diff --git a/0001-Add-triples-for-X86_64-AArch64-Riscv64-openEuler-gcc.patch b/0001-Add-triples-for-X86_64-AArch64-Riscv64-openEuler-gcc.patch index 5b75b97cc564b77d8cd3e9368d6ea006d2298d94..ec0aa64dcf9cd6c69a6ea181a5371791b6b86d57 100644 --- a/0001-Add-triples-for-X86_64-AArch64-Riscv64-openEuler-gcc.patch +++ b/0001-Add-triples-for-X86_64-AArch64-Riscv64-openEuler-gcc.patch @@ -29,6 +29,16 @@ index 665cdc3132fb..fc56935e7513 100644 static const char *const X32Triples[] = {"x86_64-linux-gnux32", "x86_64-pc-linux-gnux32"}; static const char *const X32LibDirs[] = {"/libx32", "/lib"}; +@@ -2341,7 +2341,8 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( + + static const char *const LoongArch64LibDirs[] = {"/lib64", "/lib"}; + static const char *const LoongArch64Triples[] = { +- "loongarch64-linux-gnu", "loongarch64-unknown-linux-gnu"}; ++ "loongarch64-linux-gnu", "loongarch64-unknown-linux-gnu", ++ "loongarch64-openEuler-linux"}; + + static const char *const M68kLibDirs[] = {"/lib"}; + static const char *const M68kTriples[] = { @@ -2404,7 +2404,7 @@ static const char *const RISCV64LibDirs[] = {"/lib64", "/lib"}; static const char *const RISCV64Triples[] = {"riscv64-unknown-linux-gnu", diff --git a/0002-Revert-Clang-Change-the-default-DWARF-version-to-5.patch b/0002-Revert-Clang-Change-the-default-DWARF-version-to-5.patch deleted file mode 100644 index 2112ad5fb32f15a1dfe1b336f24a3273c006f5a1..0000000000000000000000000000000000000000 --- a/0002-Revert-Clang-Change-the-default-DWARF-version-to-5.patch +++ /dev/null @@ -1,121 +0,0 @@ -From a6925b9f000008d3c1d34a401f6dcaac6e57fbbb Mon Sep 17 00:00:00 2001 -From: liyunfei -Date: Wed, 5 Jul 2023 10:55:14 +0800 -Subject: Revert "Clang: Change the default DWARF version to 5" - -This reverts commit d3b26dea16108c427b19b5480c9edc76edf8f5b4. ---- - clang/lib/Driver/ToolChain.cpp | 2 +- - clang/test/CodeGen/dwarf-version.c | 4 ++-- - clang/test/Driver/as-options.s | 2 +- - clang/test/Driver/cl-options.c | 2 +- - clang/test/Driver/clang-g-opts.c | 2 +- - clang/test/Driver/ve-toolchain.c | 2 +- - clang/test/Driver/ve-toolchain.cpp | 2 +- - 7 files changed, 8 insertions(+), 8 deletions(-) - -diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp -index f20ab164531b..53c86ee82936 100644 ---- a/clang/lib/Driver/ToolChain.cpp -+++ b/clang/lib/Driver/ToolChain.cpp -@@ -430,7 +430,7 @@ public: - unsigned ToolChain::GetDefaultDwarfVersion() const { - // TODO: Remove the RISC-V special case when R_RISCV_SET_ULEB128 linker - // support becomes more widely available. -- return getTriple().isRISCV() ? 4 : 5; -+ return getTriple().isRISCV() ? 4 : 4; - } - - Tool *ToolChain::getClang() const { -diff --git a/clang/test/CodeGen/dwarf-version.c b/clang/test/CodeGen/dwarf-version.c -index 0a6fa4768026..96f01749d0d8 100644 ---- a/clang/test/CodeGen/dwarf-version.c -+++ b/clang/test/CodeGen/dwarf-version.c -@@ -2,8 +2,8 @@ - // RUN: %clang -target x86_64-linux-gnu -gdwarf-3 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER3 - // RUN: %clang -target x86_64-linux-gnu -gdwarf-4 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER4 - // RUN: %clang -target x86_64-linux-gnu -gdwarf-5 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER5 --// RUN: %clang -target x86_64-linux-gnu -g -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER5 --// RUN: %clang -target x86_64-linux-gnu -gdwarf -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER5 -+// RUN: %clang -target x86_64-linux-gnu -g -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER4 -+// RUN: %clang -target x86_64-linux-gnu -gdwarf -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER4 - - // The -isysroot is used as a hack to avoid LIT messing with the SDKROOT - // environment variable which indirecty overrides the version in the target -diff --git a/clang/test/Driver/as-options.s b/clang/test/Driver/as-options.s -index a2e350a0a835..b6159e111052 100644 ---- a/clang/test/Driver/as-options.s -+++ b/clang/test/Driver/as-options.s -@@ -125,7 +125,7 @@ - // RUN: FileCheck --check-prefix=DEBUG %s - // RUN: %clang --target=aarch64-linux-gnu -fno-integrated-as -g0 -g %s -### 2>&1 | \ - // RUN: FileCheck --check-prefix=DEBUG %s --// DEBUG: "-g" "-gdwarf-5" -+// DEBUG: "-g" "-gdwarf-4" - // RUN: %clang --target=aarch64-linux-gnu -fno-integrated-as -g -g0 %s -### 2>&1 | \ - // RUN: FileCheck --check-prefix=NODEBUG %s - // RUN: %clang --target=aarch64-linux-gnu -fno-integrated-as -gdwarf-5 -g0 %s -### 2>&1 | \ -@@ -144,7 +144,7 @@ - // RUN: %clang --target=aarch64-linux-gnu -fno-integrated-as -gdwarf-2 %s -### 2>&1 | \ - // RUN: FileCheck --check-prefix=GDWARF2 %s - // RUN: %clang --target=aarch64-linux-gnu -fno-integrated-as -gdwarf %s -### 2>&1 | \ --// RUN: FileCheck --check-prefix=GDWARF5 %s -+// RUN: FileCheck --check-prefix=GDWARF4 %s - - // RUN: %clang --target=aarch64-linux-gnu -fno-integrated-as -gdwarf-5 %s -### 2>&1 | \ - // RUN: FileCheck --check-prefix=GDWARF5 %s -diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c -index a2e350a0a835..b6159e111052 100644 ---- a/clang/test/Driver/cl-options.c -+++ b/clang/test/Driver/cl-options.c -@@ -570,7 +570,7 @@ - // RUN: %clang_cl /Z7 -gdwarf /c -### -- %s 2>&1 | FileCheck -check-prefix=Z7_gdwarf %s - // Z7_gdwarf: "-gcodeview" - // Z7_gdwarf: "-debug-info-kind=constructor" --// Z7_gdwarf: "-dwarf-version= -+// Z7_gdwarf: "-dwarf-version=4" - - // RUN: %clang_cl /ZH:MD5 /c -### -- %s 2>&1 | FileCheck -check-prefix=ZH_MD5 %s - // ZH_MD5: "-gsrc-hash=md5" -diff --git a/clang/test/Driver/clang-g-opts.c b/clang/test/Driver/clang-g-opts.c -index d982b1070cae..bb129e75769c 100644 ---- a/clang/test/Driver/clang-g-opts.c -+++ b/clang/test/Driver/clang-g-opts.c -@@ -32,7 +32,7 @@ - - // CHECK-WITHOUT-G-NOT: -debug-info-kind - // CHECK-WITH-G: "-debug-info-kind=constructor" --// CHECK-WITH-G: "-dwarf-version=5" -+// CHECK-WITH-G: "-dwarf-version=4" - // CHECK-WITH-G-DWARF2: "-dwarf-version=2" - - // CHECK-WITH-G-STANDALONE: "-debug-info-kind=standalone" -diff --git a/clang/test/Driver/ve-toolchain.c b/clang/test/Driver/ve-toolchain.c -index 32e25769b6da..b8a2852daba8 100644 ---- a/clang/test/Driver/ve-toolchain.c -+++ b/clang/test/Driver/ve-toolchain.c -@@ -6,7 +6,7 @@ - /// Checking dwarf-version - - // RUN: %clang -### -g --target=ve %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s --// DWARF_VER: "-dwarf-version=5" -+// DWARF_VER: "-dwarf-version=4" - - ///----------------------------------------------------------------------------- - /// Checking include-path -diff --git a/clang/test/Driver/ve-toolchain.cpp b/clang/test/Driver/ve-toolchain.cpp -index 5a33d5eceb61..cedf895b36dc 100644 ---- a/clang/test/Driver/ve-toolchain.cpp -+++ b/clang/test/Driver/ve-toolchain.cpp -@@ -7,7 +7,7 @@ - - // RUN: %clangxx -### -g --target=ve-unknown-linux-gnu \ - // RUN: %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s --// DWARF_VER: "-dwarf-version=5" -+// DWARF_VER: "-dwarf-version=4" - - ///----------------------------------------------------------------------------- - /// Checking include-path --- -2.28.0.windows.1 - diff --git a/0003-add-BUILD_FOR_OPENEULER-build-option-to-clang.patch b/0003-add-BUILD_FOR_OPENEULER-build-option-to-clang.patch deleted file mode 100644 index 2819c4b0d796dbc852c3243d6658937deb48c4d5..0000000000000000000000000000000000000000 --- a/0003-add-BUILD_FOR_OPENEULER-build-option-to-clang.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 752af60afc7fd9cc986adf280d4d03714228fb04 Mon Sep 17 00:00:00 2001 -From: liyunfei -Date: Tue, 16 Jan 2024 14:47:02 +0800 -Subject: [PATCH] add BUILD_FOR_OPENEULER build option to clang - ---- - clang/CMakeLists.txt | 5 +++++ - clang/include/clang/Driver/CMakeLists.txt | 4 ++++ - 2 files changed, 9 insertions(+) - -diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt -index f7936d72e088..d558b0522e82 100644 ---- a/clang/CMakeLists.txt -+++ b/clang/CMakeLists.txt -@@ -317,6 +317,11 @@ if (LLVM_COMPILER_IS_GCC_COMPATIBLE) - endif() - endif () - -+option(BUILD_FOR_OPENEULER "Add gcc compatible options for openEuler toolchain" OFF) -+if (BUILD_FOR_OPENEULER) -+ add_definitions( -DBUILD_FOR_OPENEULER ) -+endif() -+ - # Determine HOST_LINK_VERSION on Darwin. - set(HOST_LINK_VERSION) - if (APPLE AND NOT CMAKE_LINKER MATCHES ".*lld.*") -diff --git a/clang/include/clang/Driver/CMakeLists.txt b/clang/include/clang/Driver/CMakeLists.txt -index a9d988047920..ea55ba0f1f27 100644 ---- a/clang/include/clang/Driver/CMakeLists.txt -+++ b/clang/include/clang/Driver/CMakeLists.txt -@@ -1,3 +1,7 @@ - set(LLVM_TARGET_DEFINITIONS Options.td) -+if (BUILD_FOR_OPENEULER) -+tablegen(LLVM Options.inc -gen-opt-parser-defs -DBUILD_FOR_OPENEULER) -+else() - tablegen(LLVM Options.inc -gen-opt-parser-defs) -+endif() - add_public_tablegen_target(ClangDriverOptions) --- -Gitee diff --git a/0004-add-gcc-compatible-in-BUILD_FOR_OPENEULER.patch b/0004-add-gcc-compatible-in-BUILD_FOR_OPENEULER.patch deleted file mode 100644 index d04f5ec6f244b497c983b1b7d020a218ac662f74..0000000000000000000000000000000000000000 --- a/0004-add-gcc-compatible-in-BUILD_FOR_OPENEULER.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 6503d6b87786e005c0557961aadba739d833f80c Mon Sep 17 00:00:00 2001 -From: liyunfei -Date: Tue, 16 Jan 2024 14:48:53 +0800 -Subject: [PATCH] add gcc compatible in BUILD_FOR_OPENEULER - ---- - clang/include/clang/Driver/Options.td | 14 ++++++++++++++ - clang/lib/Driver/Driver.cpp | 8 ++++++++ - clang/lib/Driver/ToolChains/Clang.cpp | 11 +++++++++++ - 3 files changed, 33 insertions(+) - -diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td -index 37e8c56b2d29..d4f7315bf8cb 100644 ---- a/clang/include/clang/Driver/Options.td -+++ b/clang/include/clang/Driver/Options.td -@@ -1786,6 +1786,12 @@ def fmemory_profile_use_EQ : Joined<["-"], "fmemory-profile-use=">, - HelpText<"Use memory profile for profile-guided memory optimization">, - MarshallingInfoString>; - -+#ifdef BUILD_FOR_OPENEULER -+def fgcc_compatible : Flag<["-"], "fgcc-compatible">, Group, -+ HelpText<"Enable gcc compatibility for openEuler.">; -+def fno_gcc_compatible : Flag<["-"], "fno-gcc-compatible">, Group; -+#endif -+ - // Begin sanitizer flags. These should all be core options exposed in all driver - // modes. - let Flags = [CC1Option, CoreOption] in { -@@ -5152,6 +5158,14 @@ def falign_jumps_EQ : Joined<["-"], "falign-jumps=">, Group, Group; -+defm peephole2 : BooleanFFlag<"peephole2">, Group; -+defm aggressive_loop_optiomizations : BooleanFFlag<"aggressive-loop-optiomizations">, Group; -+def flto_partition_EQ : Joined<["-"], "flto-partition=">, Group; -+#endif -+ - defm check_new : BoolOption<"f", "check-new", - LangOpts<"CheckNew">, DefaultFalse, - PosFlag, -diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp -index bdbdad9362e1..87736112fb76 100644 ---- a/clang/lib/Driver/Driver.cpp -+++ b/clang/lib/Driver/Driver.cpp -@@ -1491,6 +1491,14 @@ Compilation *Driver::BuildCompilation(ArrayRef ArgList) { - // Populate the tool chains for the offloading devices, if any. - CreateOffloadingDeviceToolChains(*C, Inputs); - -+#ifdef BUILD_FOR_OPENEULER -+ if(C->getArgs().hasFlag(options::OPT_fgcc_compatible, -+ options::OPT_fno_gcc_compatible, false)) { -+ getDiags().setDiagnosticGroupWarningAsError("unused-command-line-argument", 0); -+ getDiags().setDiagnosticGroupWarningAsError("ignored-optimization-argument", 0); -+ } -+#endif -+ - // Construct the list of abstract actions to perform for this compilation. On - // MachO targets this uses the driver-driver and universal actions. - if (TC.getTriple().isOSBinFormatMachO()) -diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp -index 37a07b8f224d..0921e6071d26 100644 ---- a/clang/lib/Driver/ToolChains/Clang.cpp -+++ b/clang/lib/Driver/ToolChains/Clang.cpp -@@ -4680,6 +4680,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, - CmdArgs.push_back("-triple"); - CmdArgs.push_back(Args.MakeArgString(TripleStr)); - -+#ifdef BUILD_FOR_OPENEULER -+ if (Args.hasFlag(options::OPT_fgcc_compatible, -+ options::OPT_fno_gcc_compatible, false)) { -+ CmdArgs.push_back("-Wno-error=unknown-warning-option"); -+ CmdArgs.push_back("-Wno-error=unused-parameter"); -+ CmdArgs.push_back("-Wno-error=unused-function"); -+ CmdArgs.push_back("-Wno-error=unused-but-set-parameter"); -+ CmdArgs.push_back("-Wno-error=unused-but-set-variable"); -+ } -+#endif -+ - if (const Arg *MJ = Args.getLastArg(options::OPT_MJ)) { - DumpCompilationDatabase(C, MJ->getValue(), TripleStr, Output, Input, Args); - Args.ClaimAllArgs(options::OPT_MJ); --- -Gitee diff --git a/0005-backport-Disable-InterpreterExceptionTest-on-RISC-V.patch b/0005-backport-Disable-InterpreterExceptionTest-on-RISC-V.patch deleted file mode 100644 index 4805455536041fc0f47d5f83f59041083fcc739d..0000000000000000000000000000000000000000 --- a/0005-backport-Disable-InterpreterExceptionTest-on-RISC-V.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 8943c1fb16c225f056e93c3a9a1bbeb535370e4e Mon Sep 17 00:00:00 2001 -From: laokz -Date: Fri, 1 Mar 2024 06:14:24 +0000 -Subject: [PATCH] [backport] Disable InterpreterExceptionTest on RISC-V - Reference: - https://github.com/llvm/llvm-project/commit/ca003ee06d0eac7e8facc179181298a05e4d03ed - -Signed-off-by: laokz ---- - -From ca003ee06d0eac7e8facc179181298a05e4d03ed Mon Sep 17 00:00:00 2001 -From: Alex Bradbury -Date: Wed, 4 Oct 2023 14:33:31 +0100 -Subject: [PATCH] [clang-repl] Disable InterpreterExceptionTest on RISC-V - (#68216) - -This test fails as .eh_frame handling is not yet implemented for RISC-V -in JITLink. #66067 is proposed to address this. - -Skip the test until the issue is resolved. It seems that D159167 enabled -this test for more than just ppc64. As the test always failed, it just -wasn't run until now, I think skipping is the correct interim approach -(as is already done for Arm, Darwin, and others). ---- - .../Interpreter/ExceptionTests/InterpreterExceptionTest.cpp | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp b/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp -index 70e10b1e53..3857b75598 100644 ---- a/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp -+++ b/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp -@@ -114,6 +114,11 @@ extern "C" int throw_exception() { - Triple.getArch() == llvm::Triple::aarch64_32)) - GTEST_SKIP(); - -+ // FIXME: RISC-V fails as .eh_frame handling is not yet implemented in -+ // JITLink for RISC-V. See PR #66067. -+ if (Triple.isRISCV()) -+ GTEST_SKIP(); -+ - llvm::cantFail(Interp->ParseAndExecute(ExceptionCode)); - testing::internal::CaptureStdout(); - auto ThrowException = --- -2.43.0 - diff --git a/0006-clang-LoongArch-Add-loongarch64-to-os-triple.patch b/0006-clang-LoongArch-Add-loongarch64-to-os-triple.patch deleted file mode 100644 index c19f2fc49f8b68ea0be9dc312921c327750a7f5b..0000000000000000000000000000000000000000 --- a/0006-clang-LoongArch-Add-loongarch64-to-os-triple.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 5f03c1fa84487fa7c7c5db1dd461efec67e9c323 Mon Sep 17 00:00:00 2001 -From: zhanglimin -Date: Tue, 12 Mar 2024 17:38:39 +0800 -Subject: [PATCH] [clang][LoongArch] Add loongarch64 to os-triple - ---- - clang/lib/Driver/ToolChains/Gnu.cpp | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp -index 40038dce47d8..e9cba05e8680 100644 ---- a/clang/lib/Driver/ToolChains/Gnu.cpp -+++ b/clang/lib/Driver/ToolChains/Gnu.cpp -@@ -2341,7 +2341,8 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( - - static const char *const LoongArch64LibDirs[] = {"/lib64", "/lib"}; - static const char *const LoongArch64Triples[] = { -- "loongarch64-linux-gnu", "loongarch64-unknown-linux-gnu"}; -+ "loongarch64-linux-gnu", "loongarch64-unknown-linux-gnu", -+ "loongarch64-openEuler-linux"}; - - static const char *const M68kLibDirs[] = {"/lib"}; - static const char *const M68kTriples[] = { --- -2.20.1 - diff --git a/0007-add-more-warning-options-to-fgcc-compatible.patch b/0007-add-more-warning-options-to-fgcc-compatible.patch deleted file mode 100644 index c25bfba062680225578a8a068683a863a623e2bf..0000000000000000000000000000000000000000 --- a/0007-add-more-warning-options-to-fgcc-compatible.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 929e5c1d4f2c36e233a26b480f1dd172b6d63362 Mon Sep 17 00:00:00 2001 -From: liyunfei -Date: Thu, 14 Mar 2024 16:04:40 +0800 -Subject: [PATCH] add more warning options to -fgcc-compatible - ---- - clang/lib/Driver/ToolChains/Clang.cpp | 20 ++++++++++++++++++++ - 1 file changed, 25 insertions(+) - -diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp -index 793af55a1e5f..f0da323d8adb 100644 ---- a/clang/lib/Driver/ToolChains/Clang.cpp -+++ b/clang/lib/Driver/ToolChains/Clang.cpp -@@ -4683,11 +4683,42 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, - #ifdef BUILD_FOR_OPENEULER - if (Args.hasFlag(options::OPT_fgcc_compatible, - options::OPT_fno_gcc_compatible, false)) { -+ // compatibility relevent warnings - CmdArgs.push_back("-Wno-error=unknown-warning-option"); -+ CmdArgs.push_back("-Wno-error=ignored-attributes"); -+ // By default, clang reports warnings, but gcc does not. - CmdArgs.push_back("-Wno-error=unused-parameter"); - CmdArgs.push_back("-Wno-error=unused-function"); - CmdArgs.push_back("-Wno-error=unused-but-set-parameter"); - CmdArgs.push_back("-Wno-error=unused-but-set-variable"); -+ CmdArgs.push_back("-Wno-error=deprecated-non-prototype"); -+ CmdArgs.push_back("-Wno-error=unsafe-buffer-usage"); -+ CmdArgs.push_back("-Wno-error=string-plus-int"); -+ CmdArgs.push_back("-Wno-error=language-extension-token"); -+ CmdArgs.push_back("-Wno-error=single-bit-bitfield-constant-conversion"); -+ CmdArgs.push_back("-Wno-error=gnu-variable-sized-type-not-at-end"); -+ CmdArgs.push_back("-Wno-error=header-guard"); -+ CmdArgs.push_back("-Wno-error=return-type-c-linkage"); -+ // By default, clang reports errors, but gcc reports warnings. -+ // when -Werror is passed don't add -Wno-error=*. -+ if(!D.getDiags().getWarningsAsErrors()) { -+ CmdArgs.push_back("-Wno-error=implicit-function-declaration"); -+ CmdArgs.push_back("-Wno-error=incompatible-function-pointer-types"); -+ CmdArgs.push_back("-Wno-error=register"); -+ CmdArgs.push_back("-Wno-error=int-conversion"); -+ CmdArgs.push_back("-Wno-error=implicit-int"); -+ CmdArgs.push_back("-Wno-error=enum-constexpr-conversion"); -+ CmdArgs.push_back("-Wno-error=return-type"); -+ CmdArgs.push_back("-Wno-error=reserved-user-defined-literal"); -+ } -+ //other warnings -+ CmdArgs.push_back("-Wno-error=cast-align"); -+ CmdArgs.push_back("-Wno-error=enum-conversion"); -+ CmdArgs.push_back("-Wno-error=switch"); -+ CmdArgs.push_back("-Wno-error=cast-qual"); -+ CmdArgs.push_back("-Wno-error=varargs"); -+ CmdArgs.push_back("-Wno-error=unused-value"); -+ CmdArgs.push_back("-Wno-error=format-nonliteral"); - } - #endif - --- -2.42.0.windows.2 - diff --git a/0008-Backport-LoongArch-Add-the-support-for-vector.patch b/0008-Backport-LoongArch-Add-the-support-for-vector.patch deleted file mode 100644 index 176d04ba4dee8889e33fec4d0ff7b6facefce668..0000000000000000000000000000000000000000 --- a/0008-Backport-LoongArch-Add-the-support-for-vector.patch +++ /dev/null @@ -1,67252 +0,0 @@ -From 6ff32ae0ca7a400249535b19d9ca489b44deae19 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Wed, 9 Aug 2023 16:01:37 +0800 -Subject: [PATCH 1/8] [Clang][LoongArch] Use the ClangBuiltin class to - automatically generate support for CBE and CFE - -Fixed the type modifier (L->W), removed redundant feature checking code -since the feature has already been checked in `EmitBuiltinExpr`. And -Cleaned up unused diagnostic information. - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D156866 - -(cherry picked from commit ea8d3b1f9f2d7385d97fcd34d14db0eb2cb2795c) ---- - .../include/clang/Basic/BuiltinsLoongArch.def | 25 ++-- - .../clang/Basic/DiagnosticSemaKinds.td | 7 - - clang/lib/CodeGen/CGBuiltin.cpp | 130 ------------------ - clang/lib/CodeGen/CodeGenFunction.h | 1 - - clang/lib/Sema/SemaChecking.cpp | 50 +------ - .../CodeGen/LoongArch/intrinsic-la32-error.c | 118 +++++++--------- - 6 files changed, 64 insertions(+), 267 deletions(-) - -diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def -index 7f2c8403410d..20510e18fe58 100644 ---- a/clang/include/clang/Basic/BuiltinsLoongArch.def -+++ b/clang/include/clang/Basic/BuiltinsLoongArch.def -@@ -16,8 +16,7 @@ - #endif - - // TODO: Support more builtins. --// TODO: Added feature constraints. --TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vLiULiLi", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vWiUWiWi", "nc", "64bit") - TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") - TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") - TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") -@@ -26,36 +25,36 @@ TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") - TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") - TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") - TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vLiLi", "nc", "64bit") --TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vLiLi", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vWiWi", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vWiWi", "nc", "64bit") - - TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") - TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") - TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") --TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iLii", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iWii", "nc", "64bit") - TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") - TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") - TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") --TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iLii", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iWii", "nc", "64bit") - - TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "ULiIUi", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "UWiIUi", "nc", "64bit") - TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "ULiULiIUi", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "UWiUWiIUi", "nc", "64bit") - TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "ULiULiULiIUi", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "UWiUWiUWiIUi", "nc", "64bit") - - TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") - TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") - TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "ULiUi", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "UWiUi", "nc", "64bit") - TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") - TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") - TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vULiUi", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") - --TARGET_BUILTIN(__builtin_loongarch_lddir_d, "LiLiIULi", "nc", "64bit") --TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vLiIULi", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") - - #undef BUILTIN - #undef TARGET_BUILTIN -diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td -index c88f25209fc0..0e97620945af 100644 ---- a/clang/include/clang/Basic/DiagnosticSemaKinds.td -+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td -@@ -11853,10 +11853,6 @@ def err_non_designated_init_used : Error< - def err_cast_from_randomized_struct : Error< - "casting from randomized structure pointer type %0 to %1">; - --// LoongArch-specific Diagnostics --def err_loongarch_builtin_requires_la64 : Error< -- "this builtin requires target: loongarch64">; -- - // Unsafe buffer usage diagnostics. - def warn_unsafe_buffer_variable : Warning< - "%0 is an %select{unsafe pointer used for buffer access|unsafe buffer that " -@@ -11872,9 +11868,6 @@ def note_unsafe_buffer_variable_fixit_group : Note< - "change type of %0 to '%select{std::span|std::array|std::span::iterator}1' to preserve bounds information%select{|, and change %2 to '%select{std::span|std::array|std::span::iterator}1' to propagate bounds information between them}3">; - def note_safe_buffer_usage_suggestions_disabled : Note< - "pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions">; --def err_loongarch_builtin_requires_la32 : Error< -- "this builtin requires target: loongarch32">; -- - def err_builtin_pass_in_regs_non_class : Error< - "argument %0 is not an unqualified class type">; - -diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp -index 30f5f4e7061c..e512762fafaf 100644 ---- a/clang/lib/CodeGen/CGBuiltin.cpp -+++ b/clang/lib/CodeGen/CGBuiltin.cpp -@@ -43,7 +43,6 @@ - #include "llvm/IR/IntrinsicsARM.h" - #include "llvm/IR/IntrinsicsBPF.h" - #include "llvm/IR/IntrinsicsHexagon.h" --#include "llvm/IR/IntrinsicsLoongArch.h" - #include "llvm/IR/IntrinsicsNVPTX.h" - #include "llvm/IR/IntrinsicsPowerPC.h" - #include "llvm/IR/IntrinsicsR600.h" -@@ -5588,9 +5587,6 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, - case llvm::Triple::riscv32: - case llvm::Triple::riscv64: - return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue); -- case llvm::Triple::loongarch32: -- case llvm::Triple::loongarch64: -- return CGF->EmitLoongArchBuiltinExpr(BuiltinID, E); - default: - return nullptr; - } -@@ -20418,129 +20414,3 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, - llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); - return Builder.CreateCall(F, Ops, ""); - } -- --Value *CodeGenFunction::EmitLoongArchBuiltinExpr(unsigned BuiltinID, -- const CallExpr *E) { -- SmallVector Ops; -- -- for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) -- Ops.push_back(EmitScalarExpr(E->getArg(i))); -- -- Intrinsic::ID ID = Intrinsic::not_intrinsic; -- -- switch (BuiltinID) { -- default: -- llvm_unreachable("unexpected builtin ID."); -- case LoongArch::BI__builtin_loongarch_cacop_d: -- ID = Intrinsic::loongarch_cacop_d; -- break; -- case LoongArch::BI__builtin_loongarch_cacop_w: -- ID = Intrinsic::loongarch_cacop_w; -- break; -- case LoongArch::BI__builtin_loongarch_dbar: -- ID = Intrinsic::loongarch_dbar; -- break; -- case LoongArch::BI__builtin_loongarch_break: -- ID = Intrinsic::loongarch_break; -- break; -- case LoongArch::BI__builtin_loongarch_ibar: -- ID = Intrinsic::loongarch_ibar; -- break; -- case LoongArch::BI__builtin_loongarch_movfcsr2gr: -- ID = Intrinsic::loongarch_movfcsr2gr; -- break; -- case LoongArch::BI__builtin_loongarch_movgr2fcsr: -- ID = Intrinsic::loongarch_movgr2fcsr; -- break; -- case LoongArch::BI__builtin_loongarch_syscall: -- ID = Intrinsic::loongarch_syscall; -- break; -- case LoongArch::BI__builtin_loongarch_crc_w_b_w: -- ID = Intrinsic::loongarch_crc_w_b_w; -- break; -- case LoongArch::BI__builtin_loongarch_crc_w_h_w: -- ID = Intrinsic::loongarch_crc_w_h_w; -- break; -- case LoongArch::BI__builtin_loongarch_crc_w_w_w: -- ID = Intrinsic::loongarch_crc_w_w_w; -- break; -- case LoongArch::BI__builtin_loongarch_crc_w_d_w: -- ID = Intrinsic::loongarch_crc_w_d_w; -- break; -- case LoongArch::BI__builtin_loongarch_crcc_w_b_w: -- ID = Intrinsic::loongarch_crcc_w_b_w; -- break; -- case LoongArch::BI__builtin_loongarch_crcc_w_h_w: -- ID = Intrinsic::loongarch_crcc_w_h_w; -- break; -- case LoongArch::BI__builtin_loongarch_crcc_w_w_w: -- ID = Intrinsic::loongarch_crcc_w_w_w; -- break; -- case LoongArch::BI__builtin_loongarch_crcc_w_d_w: -- ID = Intrinsic::loongarch_crcc_w_d_w; -- break; -- case LoongArch::BI__builtin_loongarch_csrrd_w: -- ID = Intrinsic::loongarch_csrrd_w; -- break; -- case LoongArch::BI__builtin_loongarch_csrwr_w: -- ID = Intrinsic::loongarch_csrwr_w; -- break; -- case LoongArch::BI__builtin_loongarch_csrxchg_w: -- ID = Intrinsic::loongarch_csrxchg_w; -- break; -- case LoongArch::BI__builtin_loongarch_csrrd_d: -- ID = Intrinsic::loongarch_csrrd_d; -- break; -- case LoongArch::BI__builtin_loongarch_csrwr_d: -- ID = Intrinsic::loongarch_csrwr_d; -- break; -- case LoongArch::BI__builtin_loongarch_csrxchg_d: -- ID = Intrinsic::loongarch_csrxchg_d; -- break; -- case LoongArch::BI__builtin_loongarch_iocsrrd_b: -- ID = Intrinsic::loongarch_iocsrrd_b; -- break; -- case LoongArch::BI__builtin_loongarch_iocsrrd_h: -- ID = Intrinsic::loongarch_iocsrrd_h; -- break; -- case LoongArch::BI__builtin_loongarch_iocsrrd_w: -- ID = Intrinsic::loongarch_iocsrrd_w; -- break; -- case LoongArch::BI__builtin_loongarch_iocsrrd_d: -- ID = Intrinsic::loongarch_iocsrrd_d; -- break; -- case LoongArch::BI__builtin_loongarch_iocsrwr_b: -- ID = Intrinsic::loongarch_iocsrwr_b; -- break; -- case LoongArch::BI__builtin_loongarch_iocsrwr_h: -- ID = Intrinsic::loongarch_iocsrwr_h; -- break; -- case LoongArch::BI__builtin_loongarch_iocsrwr_w: -- ID = Intrinsic::loongarch_iocsrwr_w; -- break; -- case LoongArch::BI__builtin_loongarch_iocsrwr_d: -- ID = Intrinsic::loongarch_iocsrwr_d; -- break; -- case LoongArch::BI__builtin_loongarch_cpucfg: -- ID = Intrinsic::loongarch_cpucfg; -- break; -- case LoongArch::BI__builtin_loongarch_asrtle_d: -- ID = Intrinsic::loongarch_asrtle_d; -- break; -- case LoongArch::BI__builtin_loongarch_asrtgt_d: -- ID = Intrinsic::loongarch_asrtgt_d; -- break; -- case LoongArch::BI__builtin_loongarch_lddir_d: -- ID = Intrinsic::loongarch_lddir_d; -- break; -- case LoongArch::BI__builtin_loongarch_ldpte_d: -- ID = Intrinsic::loongarch_ldpte_d; -- break; -- // TODO: Support more Intrinsics. -- } -- -- assert(ID != Intrinsic::not_intrinsic); -- -- llvm::Function *F = CGM.getIntrinsic(ID); -- return Builder.CreateCall(F, Ops); --} -diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h -index 8722fd4550e4..143e0707b942 100644 ---- a/clang/lib/CodeGen/CodeGenFunction.h -+++ b/clang/lib/CodeGen/CodeGenFunction.h -@@ -4316,7 +4316,6 @@ public: - llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E); - llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, - ReturnValueSlot ReturnValue); -- llvm::Value *EmitLoongArchBuiltinExpr(unsigned BuiltinID, const CallExpr *E); - void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, - llvm::AtomicOrdering &AO, - llvm::SyncScope::ID &SSID); -diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp -index a94f009f3fa6..a8416bf4de92 100644 ---- a/clang/lib/Sema/SemaChecking.cpp -+++ b/clang/lib/Sema/SemaChecking.cpp -@@ -3827,39 +3827,12 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, - default: - break; - case LoongArch::BI__builtin_loongarch_cacop_d: -- if (!TI.hasFeature("64bit")) -- return Diag(TheCall->getBeginLoc(), -- diag::err_loongarch_builtin_requires_la64) -- << TheCall->getSourceRange(); -- [[fallthrough]]; - case LoongArch::BI__builtin_loongarch_cacop_w: { -- if (BuiltinID == LoongArch::BI__builtin_loongarch_cacop_w && -- !TI.hasFeature("32bit")) -- return Diag(TheCall->getBeginLoc(), -- diag::err_loongarch_builtin_requires_la32) -- << TheCall->getSourceRange(); - SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(5)); - SemaBuiltinConstantArgRange(TheCall, 2, llvm::minIntN(12), - llvm::maxIntN(12)); - break; - } -- case LoongArch::BI__builtin_loongarch_crc_w_b_w: -- case LoongArch::BI__builtin_loongarch_crc_w_h_w: -- case LoongArch::BI__builtin_loongarch_crc_w_w_w: -- case LoongArch::BI__builtin_loongarch_crc_w_d_w: -- case LoongArch::BI__builtin_loongarch_crcc_w_b_w: -- case LoongArch::BI__builtin_loongarch_crcc_w_h_w: -- case LoongArch::BI__builtin_loongarch_crcc_w_w_w: -- case LoongArch::BI__builtin_loongarch_crcc_w_d_w: -- case LoongArch::BI__builtin_loongarch_iocsrrd_d: -- case LoongArch::BI__builtin_loongarch_iocsrwr_d: -- case LoongArch::BI__builtin_loongarch_asrtle_d: -- case LoongArch::BI__builtin_loongarch_asrtgt_d: -- if (!TI.hasFeature("64bit")) -- return Diag(TheCall->getBeginLoc(), -- diag::err_loongarch_builtin_requires_la64) -- << TheCall->getSourceRange(); -- break; - case LoongArch::BI__builtin_loongarch_break: - case LoongArch::BI__builtin_loongarch_dbar: - case LoongArch::BI__builtin_loongarch_ibar: -@@ -3867,35 +3840,16 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, - // Check if immediate is in [0, 32767]. - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 32767); - case LoongArch::BI__builtin_loongarch_csrrd_w: -- return SemaBuiltinConstantArgRange(TheCall, 0, 0, 16383); -- case LoongArch::BI__builtin_loongarch_csrwr_w: -- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 16383); -- case LoongArch::BI__builtin_loongarch_csrxchg_w: -- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 16383); - case LoongArch::BI__builtin_loongarch_csrrd_d: -- if (!TI.hasFeature("64bit")) -- return Diag(TheCall->getBeginLoc(), -- diag::err_loongarch_builtin_requires_la64) -- << TheCall->getSourceRange(); - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 16383); -+ case LoongArch::BI__builtin_loongarch_csrwr_w: - case LoongArch::BI__builtin_loongarch_csrwr_d: -- if (!TI.hasFeature("64bit")) -- return Diag(TheCall->getBeginLoc(), -- diag::err_loongarch_builtin_requires_la64) -- << TheCall->getSourceRange(); - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 16383); -+ case LoongArch::BI__builtin_loongarch_csrxchg_w: - case LoongArch::BI__builtin_loongarch_csrxchg_d: -- if (!TI.hasFeature("64bit")) -- return Diag(TheCall->getBeginLoc(), -- diag::err_loongarch_builtin_requires_la64) -- << TheCall->getSourceRange(); - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 16383); - case LoongArch::BI__builtin_loongarch_lddir_d: - case LoongArch::BI__builtin_loongarch_ldpte_d: -- if (!TI.hasFeature("64bit")) -- return Diag(TheCall->getBeginLoc(), -- diag::err_loongarch_builtin_requires_la64) -- << TheCall->getSourceRange(); - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); - case LoongArch::BI__builtin_loongarch_movfcsr2gr: - case LoongArch::BI__builtin_loongarch_movgr2fcsr: -diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c b/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c -index 0264c2948934..db113a13eb5a 100644 ---- a/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c -+++ b/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c -@@ -1,9 +1,58 @@ - // RUN: %clang_cc1 -triple loongarch32 -emit-llvm -S -verify %s -o /dev/null -+// RUN: not %clang_cc1 -triple loongarch32 -DFEATURE_CHECK -emit-llvm %s 2>&1 \ -+// RUN: | FileCheck %s - - #include - -+#ifdef FEATURE_CHECK -+void test_feature(long *v_l, unsigned long *v_ul, int *v_i, unsigned ui, char c, short s) { -+// CHECK: error: '__builtin_loongarch_cacop_d' needs target feature 64bit -+ __builtin_loongarch_cacop_d(1, v_ul[0], 1024); -+ -+// CHECK: error: '__builtin_loongarch_crc_w_b_w' needs target feature 64bit -+ v_i[0] = __builtin_loongarch_crc_w_b_w(c, v_i[0]); -+// CHECK: error: '__builtin_loongarch_crc_w_h_w' needs target feature 64bit -+ v_i[1] = __builtin_loongarch_crc_w_h_w(c, v_i[0]); -+// CHECK: error: '__builtin_loongarch_crc_w_w_w' needs target feature 64bit -+ v_i[2] = __builtin_loongarch_crc_w_w_w(c, v_i[0]); -+// CHECK: error: '__builtin_loongarch_crc_w_d_w' needs target feature 64bit -+ v_i[3] = __builtin_loongarch_crc_w_d_w(c, v_i[0]); -+ -+// CHECK: error: '__builtin_loongarch_crcc_w_b_w' needs target feature 64bit -+ v_i[4] = __builtin_loongarch_crcc_w_b_w(c, v_i[0]); -+// CHECK: error: '__builtin_loongarch_crcc_w_h_w' needs target feature 64bit -+ v_i[5] = __builtin_loongarch_crcc_w_h_w(s, v_i[0]); -+// CHECK: error: '__builtin_loongarch_crcc_w_w_w' needs target feature 64bit -+ v_i[6] = __builtin_loongarch_crcc_w_w_w(v_i[0], v_i[1]); -+// CHECK: error: '__builtin_loongarch_crcc_w_d_w' needs target feature 64bit -+ v_i[7] = __builtin_loongarch_crcc_w_d_w(v_l[0], v_i[0]); -+ -+// CHECK: error: '__builtin_loongarch_csrrd_d' needs target feature 64bit -+ v_ul[0] = __builtin_loongarch_csrrd_d(1); -+// CHECK: error: '__builtin_loongarch_csrwr_d' needs target feature 64bit -+ v_ul[1] = __builtin_loongarch_csrwr_d(v_ul[0], 1); -+// CHECK: error: '__builtin_loongarch_csrxchg_d' needs target feature 64bit -+ v_ul[2] = __builtin_loongarch_csrxchg_d(v_ul[0], v_ul[1], 1); -+ -+ -+// CHECK: error: '__builtin_loongarch_iocsrrd_d' needs target feature 64bit -+ v_ul[3] = __builtin_loongarch_iocsrrd_d(ui); -+// CHECK: error: '__builtin_loongarch_iocsrwr_d' needs target feature 64bit -+ __builtin_loongarch_iocsrwr_d(v_ul[0], ui); -+ -+// CHECK: error: '__builtin_loongarch_asrtle_d' needs target feature 64bit -+ __builtin_loongarch_asrtle_d(v_l[0], v_l[1]); -+// CHECK: error: '__builtin_loongarch_asrtgt_d' needs target feature 64bit -+ __builtin_loongarch_asrtgt_d(v_l[0], v_l[1]); -+ -+// CHECK: error: '__builtin_loongarch_lddir_d' needs target feature 64bit -+ v_ul[4] = __builtin_loongarch_lddir_d(v_l[0], 1); -+// CHECK: error: '__builtin_loongarch_ldpte_d' needs target feature 64bit -+ __builtin_loongarch_ldpte_d(v_l[0], 1); -+} -+#endif -+ - void cacop_d(unsigned long int a) { -- __builtin_loongarch_cacop_d(1, a, 1024); // expected-error {{this builtin requires target: loongarch64}} - __builtin_loongarch_cacop_w(-1, a, 1024); // expected-error {{argument value -1 is outside the valid range [0, 31]}} - __builtin_loongarch_cacop_w(32, a, 1024); // expected-error {{argument value 32 is outside the valid range [0, 31]}} - __builtin_loongarch_cacop_w(1, a, -4096); // expected-error {{argument value -4096 is outside the valid range [-2048, 2047]}} -@@ -47,49 +96,6 @@ void syscall(int a) { - __builtin_loongarch_syscall(a); // expected-error {{argument to '__builtin_loongarch_syscall' must be a constant integer}} - } - --int crc_w_b_w(char a, int b) { -- return __builtin_loongarch_crc_w_b_w(a, b); // expected-error {{this builtin requires target: loongarch64}} --} -- --int crc_w_h_w(short a, int b) { -- return __builtin_loongarch_crc_w_h_w(a, b); // expected-error {{this builtin requires target: loongarch64}} --} -- --int crc_w_w_w(int a, int b) { -- return __builtin_loongarch_crc_w_w_w(a, b); // expected-error {{this builtin requires target: loongarch64}} --} -- --int crc_w_d_w(long int a, int b) { -- return __builtin_loongarch_crc_w_d_w(a, b); // expected-error {{this builtin requires target: loongarch64}} --} --int crcc_w_b_w(char a, int b) { -- return __builtin_loongarch_crcc_w_b_w(a, b); // expected-error {{this builtin requires target: loongarch64}} --} -- --int crcc_w_h_w(short a, int b) { -- return __builtin_loongarch_crcc_w_h_w(a, b); // expected-error {{this builtin requires target: loongarch64}} --} -- --int crcc_w_w_w(int a, int b) { -- return __builtin_loongarch_crcc_w_w_w(a, b); // expected-error {{this builtin requires target: loongarch64}} --} -- --int crcc_w_d_w(long int a, int b) { -- return __builtin_loongarch_crcc_w_d_w(a, b); // expected-error {{this builtin requires target: loongarch64}} --} -- --unsigned long int csrrd_d() { -- return __builtin_loongarch_csrrd_d(1); // expected-error {{this builtin requires target: loongarch64}} --} -- --unsigned long int csrwr_d(unsigned long int a) { -- return __builtin_loongarch_csrwr_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} --} -- --unsigned long int csrxchg_d(unsigned long int a, unsigned long int b) { -- return __builtin_loongarch_csrxchg_d(a, b, 1); // expected-error {{this builtin requires target: loongarch64}} --} -- - void csrrd_w(int a) { - __builtin_loongarch_csrrd_w(16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} - __builtin_loongarch_csrrd_w(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} -@@ -108,30 +114,6 @@ void csrxchg_w(unsigned int a, unsigned int b) { - __builtin_loongarch_csrxchg_w(a, b, b); // expected-error {{argument to '__builtin_loongarch_csrxchg_w' must be a constant integer}} - } - --unsigned long int iocsrrd_d(unsigned int a) { -- return __builtin_loongarch_iocsrrd_d(a); // expected-error {{this builtin requires target: loongarch64}} --} -- --void iocsrwr_d(unsigned long int a, unsigned int b) { -- __builtin_loongarch_iocsrwr_d(a, b); // expected-error {{this builtin requires target: loongarch64}} --} -- --void asrtle_d(long int a, long int b) { -- __builtin_loongarch_asrtle_d(a, b); // expected-error {{this builtin requires target: loongarch64}} --} -- --void asrtgt_d(long int a, long int b) { -- __builtin_loongarch_asrtgt_d(a, b); // expected-error {{this builtin requires target: loongarch64}} --} -- --void lddir_d(long int a, int b) { -- __builtin_loongarch_lddir_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} --} -- --void ldpte_d(long int a, int b) { -- __builtin_loongarch_ldpte_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} --} -- - void rdtime_d() { - __rdtime_d(); // expected-error {{call to undeclared function '__rdtime_d'}} - } --- -2.20.1 - - -From ec68e541ae9c22826292d5db1bf2ce9e39b6a57b Mon Sep 17 00:00:00 2001 -From: licongtian -Date: Wed, 20 Sep 2023 11:21:56 +0800 -Subject: [PATCH 2/8] [Clang][LoongArch] Support compiler options -mlsx/-mlasx - for clang - -This patch adds compiler options -mlsx/-mlasx which enables the -instruction sets of LSX and LASX, and sets related predefined macros -according to the options. - -(cherry picked from commit 8d4e35600f3ba90997a59fdb9baeb196e723eec9) ---- - .../clang/Basic/DiagnosticDriverKinds.td | 6 +++ - clang/include/clang/Driver/Options.td | 10 +++++ - clang/lib/Basic/Targets/LoongArch.cpp | 12 +++++- - clang/lib/Basic/Targets/LoongArch.h | 4 ++ - .../lib/Driver/ToolChains/Arch/LoongArch.cpp | 32 +++++++++++++++ - clang/test/Driver/loongarch-mlasx-error.c | 15 +++++++ - clang/test/Driver/loongarch-mlasx.c | 37 +++++++++++++++++ - clang/test/Driver/loongarch-mlsx-error.c | 12 ++++++ - clang/test/Driver/loongarch-mlsx.c | 41 +++++++++++++++++++ - clang/test/Preprocessor/init-loongarch.c | 35 ++++++++++++++++ - 10 files changed, 203 insertions(+), 1 deletion(-) - create mode 100644 clang/test/Driver/loongarch-mlasx-error.c - create mode 100644 clang/test/Driver/loongarch-mlasx.c - create mode 100644 clang/test/Driver/loongarch-mlsx-error.c - create mode 100644 clang/test/Driver/loongarch-mlsx.c - -diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td -index 1b69324d073a..8c751f2c4bda 100644 ---- a/clang/include/clang/Basic/DiagnosticDriverKinds.td -+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td -@@ -732,6 +732,12 @@ def warn_drv_loongarch_conflicting_implied_val : Warning< - InGroup; - def err_drv_loongarch_invalid_mfpu_EQ : Error< - "invalid argument '%0' to -mfpu=; must be one of: 64, 32, none, 0 (alias for none)">; -+def err_drv_loongarch_wrong_fpu_width_for_lsx : Error< -+ "wrong fpu width; LSX depends on 64-bit FPU.">; -+def err_drv_loongarch_wrong_fpu_width_for_lasx : Error< -+ "wrong fpu width; LASX depends on 64-bit FPU.">; -+def err_drv_loongarch_invalid_simd_option_combination : Error< -+ "invalid option combination; LASX depends on LSX.">; - - def err_drv_expand_response_file : Error< - "failed to expand response file: %0">; -diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td -index d4f7315bf8cb..bb4928293c45 100644 ---- a/clang/include/clang/Driver/Options.td -+++ b/clang/include/clang/Driver/Options.td -@@ -192,6 +192,8 @@ def m_x86_Features_Group : OptionGroup<"">, - Group, Flags<[CoreOption]>, DocName<"X86">; - def m_riscv_Features_Group : OptionGroup<"">, - Group, DocName<"RISC-V">; -+def m_loongarch_Features_Group : OptionGroup<"">, -+ Group, DocName<"LoongArch">; - - def m_libc_Group : OptionGroup<"">, Group, - Flags<[HelpHidden]>; -@@ -4196,6 +4198,14 @@ def mstack_protector_guard_reg_EQ : Joined<["-"], "mstack-protector-guard-reg="> - def mfentry : Flag<["-"], "mfentry">, HelpText<"Insert calls to fentry at function entry (x86/SystemZ only)">, - Flags<[CC1Option]>, Group, - MarshallingInfoFlag>; -+def mlsx : Flag<["-"], "mlsx">, Group, -+ HelpText<"Enable Loongson SIMD Extension (LSX).">; -+def mno_lsx : Flag<["-"], "mno-lsx">, Group, -+ HelpText<"Disable Loongson SIMD Extension (LSX).">; -+def mlasx : Flag<["-"], "mlasx">, Group, -+ HelpText<"Enable Loongson Advanced SIMD Extension (LASX).">; -+def mno_lasx : Flag<["-"], "mno-lasx">, Group, -+ HelpText<"Disable Loongson Advanced SIMD Extension (LASX).">; - def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">, - Flags<[CC1Option]>, Group, - MarshallingInfoFlag>; -diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp -index 4448a2ae10a1..88537989a051 100644 ---- a/clang/lib/Basic/Targets/LoongArch.cpp -+++ b/clang/lib/Basic/Targets/LoongArch.cpp -@@ -208,6 +208,11 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, - TuneCPU = ArchName; - Builder.defineMacro("__loongarch_tune", Twine('"') + TuneCPU + Twine('"')); - -+ if (HasFeatureLSX) -+ Builder.defineMacro("__loongarch_sx", Twine(1)); -+ if (HasFeatureLASX) -+ Builder.defineMacro("__loongarch_asx", Twine(1)); -+ - StringRef ABI = getABI(); - if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") - Builder.defineMacro("__loongarch_lp64"); -@@ -257,6 +262,8 @@ bool LoongArchTargetInfo::hasFeature(StringRef Feature) const { - .Case("loongarch64", Is64Bit) - .Case("32bit", !Is64Bit) - .Case("64bit", Is64Bit) -+ .Case("lsx", HasFeatureLSX) -+ .Case("lasx", HasFeatureLASX) - .Default(false); - } - -@@ -274,7 +281,10 @@ bool LoongArchTargetInfo::handleTargetFeatures( - if (Feature == "+d") { - HasFeatureD = true; - } -- } -+ } else if (Feature == "+lsx") -+ HasFeatureLSX = true; -+ else if (Feature == "+lasx") -+ HasFeatureLASX = true; - } - return true; - } -diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h -index 34143f462a24..8f4150b2539d 100644 ---- a/clang/lib/Basic/Targets/LoongArch.h -+++ b/clang/lib/Basic/Targets/LoongArch.h -@@ -27,12 +27,16 @@ protected: - std::string CPU; - bool HasFeatureD; - bool HasFeatureF; -+ bool HasFeatureLSX; -+ bool HasFeatureLASX; - - public: - LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) - : TargetInfo(Triple) { - HasFeatureD = false; - HasFeatureF = false; -+ HasFeatureLSX = false; -+ HasFeatureLASX = false; - LongDoubleWidth = 128; - LongDoubleAlign = 128; - LongDoubleFormat = &llvm::APFloat::IEEEquad(); -diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp -index 65925e9ed610..31153a67ad28 100644 ---- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp -+++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp -@@ -175,6 +175,38 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, - A->ignoreTargetSpecific(); - if (Arg *A = Args.getLastArgNoClaim(options::OPT_mfpu_EQ)) - A->ignoreTargetSpecific(); -+ -+ // Select lsx feature determined by -m[no-]lsx. -+ if (const Arg *A = Args.getLastArg(options::OPT_mlsx, options::OPT_mno_lsx)) { -+ // LSX depends on 64-bit FPU. -+ // -m*-float and -mfpu=none/0/32 conflict with -mlsx. -+ if (A->getOption().matches(options::OPT_mlsx)) { -+ if (llvm::find(Features, "-d") != Features.end()) -+ D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lsx); -+ else /*-mlsx*/ -+ Features.push_back("+lsx"); -+ } else /*-mno-lsx*/ { -+ Features.push_back("-lsx"); -+ } -+ } -+ -+ // Select lasx feature determined by -m[no-]lasx. -+ if (const Arg *A = -+ Args.getLastArg(options::OPT_mlasx, options::OPT_mno_lasx)) { -+ // LASX depends on 64-bit FPU and LSX. -+ // -mno-lsx conflicts with -mlasx. -+ if (A->getOption().matches(options::OPT_mlasx)) { -+ if (llvm::find(Features, "-d") != Features.end()) -+ D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); -+ else if (llvm::find(Features, "-lsx") != Features.end()) -+ D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); -+ else { /*-mlasx*/ -+ Features.push_back("+lsx"); -+ Features.push_back("+lasx"); -+ } -+ } else /*-mno-lasx*/ -+ Features.push_back("-lasx"); -+ } - } - - std::string loongarch::postProcessTargetCPUString(const std::string &CPU, -diff --git a/clang/test/Driver/loongarch-mlasx-error.c b/clang/test/Driver/loongarch-mlasx-error.c -new file mode 100644 -index 000000000000..e66f277f7c29 ---- /dev/null -+++ b/clang/test/Driver/loongarch-mlasx-error.c -@@ -0,0 +1,15 @@ -+// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -msingle-float 2>&1 \ -+// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s -+// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -msoft-float 2>&1 \ -+// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s -+// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=32 2>&1 \ -+// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s -+// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=0 2>&1 \ -+// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s -+// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=none 2>&1 \ -+// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s -+// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mno-lsx 2>&1 \ -+// RUN: FileCheck --check-prefix=ERROR_LASX_FPU128 %s -+ -+// ERROR_LASX_FPU64: error: wrong fpu width; LASX depends on 64-bit FPU. -+// ERROR_LASX_FPU128: error: invalid option combination; LASX depends on LSX. -diff --git a/clang/test/Driver/loongarch-mlasx.c b/clang/test/Driver/loongarch-mlasx.c -new file mode 100644 -index 000000000000..0b934f125c9e ---- /dev/null -+++ b/clang/test/Driver/loongarch-mlasx.c -@@ -0,0 +1,37 @@ -+/// Test -m[no-]lasx options. -+ -+// RUN: %clang --target=loongarch64 -mlasx -fsyntax-only %s -### 2>&1 | \ -+// RUN: FileCheck %s --check-prefix=CC1-LASX -+// RUN: %clang --target=loongarch64 -mno-lasx -fsyntax-only %s -### 2>&1 | \ -+// RUN: FileCheck %s --check-prefix=CC1-NOLASX -+// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -fsyntax-only %s -### 2>&1 | \ -+// RUN: FileCheck %s --check-prefix=CC1-NOLASX -+// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -fsyntax-only %s -### 2>&1 | \ -+// RUN: FileCheck %s --check-prefix=CC1-LASX -+// RUN: %clang --target=loongarch64 -mlsx -mlasx -fsyntax-only %s -### 2>&1 | \ -+// RUN: FileCheck %s --check-prefix=CC1-LASX -+// RUN: %clang --target=loongarch64 -mlasx -mlsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: FileCheck %s --check-prefix=CC1-LASX -+ -+// RUN: %clang --target=loongarch64 -mlasx -S -emit-llvm %s -o - | \ -+// RUN: FileCheck %s --check-prefix=IR-LASX -+// RUN: %clang --target=loongarch64 -mno-lasx -S -emit-llvm %s -o - | \ -+// RUN: FileCheck %s --check-prefix=IR-NOLASX -+// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -S -emit-llvm %s -o - | \ -+// RUN: FileCheck %s --check-prefix=IR-NOLASX -+// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -S -emit-llvm %s -o - | \ -+// RUN: FileCheck %s --check-prefix=IR-LASX -+// RUN: %clang --target=loongarch64 -mlsx -mlasx -S -emit-llvm %s -o - | \ -+// RUN: FileCheck %s --check-prefix=IR-LASX -+// RUN: %clang --target=loongarch64 -mlasx -mlsx -S -emit-llvm %s -o - | \ -+// RUN: FileCheck %s --check-prefix=IR-LASX -+ -+// CC1-LASX: "-target-feature" "+lsx" "-target-feature" "+lasx" -+// CC1-NOLASX: "-target-feature" "-lasx" -+ -+// IR-LASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lasx{{(,.*)?}}" -+// IR-NOLASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lasx{{(,.*)?}}" -+ -+int foo(void){ -+ return 3; -+} -diff --git a/clang/test/Driver/loongarch-mlsx-error.c b/clang/test/Driver/loongarch-mlsx-error.c -new file mode 100644 -index 000000000000..bd6b8e2718bf ---- /dev/null -+++ b/clang/test/Driver/loongarch-mlsx-error.c -@@ -0,0 +1,12 @@ -+// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -msingle-float 2>&1 \ -+// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s -+// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -msoft-float 2>&1 \ -+// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s -+// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=32 2>&1 \ -+// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s -+// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=0 2>&1 \ -+// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s -+// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=none 2>&1 \ -+// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s -+ -+// ERROR_LSX_FPU64: error: wrong fpu width; LSX depends on 64-bit FPU. -diff --git a/clang/test/Driver/loongarch-mlsx.c b/clang/test/Driver/loongarch-mlsx.c -new file mode 100644 -index 000000000000..7d4307b078e1 ---- /dev/null -+++ b/clang/test/Driver/loongarch-mlsx.c -@@ -0,0 +1,41 @@ -+/// Test -m[no-]lsx options. -+ -+// RUN: %clang --target=loongarch64 -mlsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: FileCheck %s --check-prefix=CC1-LSX -+// RUN: %clang --target=loongarch64 -mno-lsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: FileCheck %s --check-prefix=CC1-NOLSX -+// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: FileCheck %s --check-prefix=CC1-NOLSX -+// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: FileCheck %s --check-prefix=CC1-LSX -+// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -fsyntax-only %s -### 2>&1 | \ -+// RUN: FileCheck %s --check-prefix=CC1-LSX -+// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: FileCheck %s --check-prefix=CC1-LSX -+// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -fsyntax-only %s -### 2>&1 | \ -+// RUN: FileCheck %s --check-prefix=CC1-NOLSX -+ -+// RUN: %clang --target=loongarch64 -mlsx -S -emit-llvm %s -o - | \ -+// RUN: FileCheck %s --check-prefix=IR-LSX -+// RUN: %clang --target=loongarch64 -mno-lsx -S -emit-llvm %s -o - | \ -+// RUN: FileCheck %s --check-prefix=IR-NOLSX -+// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -S -emit-llvm %s -o - | \ -+// RUN: FileCheck %s --check-prefix=IR-NOLSX -+// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -S -emit-llvm %s -o - | \ -+// RUN: FileCheck %s --check-prefix=IR-LSX -+// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -S -emit-llvm %s -o - | \ -+// RUN: FileCheck %s --check-prefix=IR-LSX -+// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -S -emit-llvm %s -o - | \ -+// RUN: FileCheck %s --check-prefix=IR-LSX -+// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -S -emit-llvm %s -o - | \ -+// RUN: FileCheck %s --check-prefix=IR-NOLSX -+ -+// CC1-LSX: "-target-feature" "+lsx" -+// CC1-NOLSX: "-target-feature" "-lsx" -+ -+// IR-LSX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lsx{{(,.*)?}}" -+// IR-NOLSX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lsx{{(,.*)?}}" -+ -+int foo(void){ -+ return 3; -+} -diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c -index 4ef42a921ec0..e235a7283021 100644 ---- a/clang/test/Preprocessor/init-loongarch.c -+++ b/clang/test/Preprocessor/init-loongarch.c -@@ -807,3 +807,38 @@ - - // ARCH-TUNE: #define __loongarch_arch "[[ARCH]]" - // ARCH-TUNE: #define __loongarch_tune "[[TUNE]]" -+ -+// RUN: %clang --target=loongarch64 -mlsx -x c -E -dM %s -o - \ -+// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s -+// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -x c -E -dM %s -o - \ -+// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s -+// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -x c -E -dM %s -o - \ -+// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s -+// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -x c -E -dM %s -o - \ -+// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s -+// MLSX-NOT: #define __loongarch_asx -+// MLSX: #define __loongarch_sx 1 -+ -+// RUN: %clang --target=loongarch64 -mlasx -x c -E -dM %s -o - \ -+// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s -+// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -x c -E -dM %s -o - \ -+// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s -+// RUN: %clang --target=loongarch64 -mlsx -mlasx -x c -E -dM %s -o - \ -+// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s -+// RUN: %clang --target=loongarch64 -mlasx -mlsx -x c -E -dM %s -o - \ -+// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s -+// MLASX: #define __loongarch_asx 1 -+// MLASX: #define __loongarch_sx 1 -+ -+// RUN: %clang --target=loongarch64 -mno-lsx -x c -E -dM %s -o - \ -+// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s -+// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -x c -E -dM %s -o - \ -+// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s -+// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -x c -E -dM %s -o - \ -+// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s -+// RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -x c -E -dM %s -o - \ -+// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s -+// RUN: %clang --target=loongarch64 -mno-lasx -x c -E -dM %s -o - \ -+// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s -+// MNO-LSX-NOT: #define __loongarch_asx -+// MNO-LSX-NOT: #define __loongarch_sx --- -2.20.1 - - -From 142597c7df9a7e6debcaffede2057eb0a7c3b33b Mon Sep 17 00:00:00 2001 -From: licongtian -Date: Wed, 25 Oct 2023 17:35:32 +0800 -Subject: [PATCH 3/8] [Clang][LoongArch] Add ABI implementation of passing - vectors - -(cherry picked from commit eb49b86f5a9b54b0e3c37024334a3c6f6ca88e14) ---- - clang/lib/CodeGen/Targets/LoongArch.cpp | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp -index 7483bf6d6d1e..26c68c3583b2 100644 ---- a/clang/lib/CodeGen/Targets/LoongArch.cpp -+++ b/clang/lib/CodeGen/Targets/LoongArch.cpp -@@ -321,6 +321,13 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, - return ABIArgInfo::getDirect(); - } - -+ // Pass 128-bit/256-bit vector values via vector registers directly. -+ if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) && -+ (getTarget().hasFeature("lsx"))) || -+ ((getContext().getTypeSize(Ty) == 256) && -+ getTarget().hasFeature("lasx")))) -+ return ABIArgInfo::getDirect(); -+ - // Complex types for the *f or *d ABI must be passed directly rather than - // using CoerceAndExpand. - if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) { --- -2.20.1 - - -From f2a409f5da055510125cae5fcf84a6b7d255e64a Mon Sep 17 00:00:00 2001 -From: licongtian -Date: Wed, 25 Oct 2023 17:41:03 +0800 -Subject: [PATCH 4/8] [Clang][LoongArch] Support the builtin functions for LSX - -This patch does the following work: -- Define the builtin functions for LSX -- Add the header file lsxintrin.h -- Add the immediate number range checking for LSX builtins - -(cherry picked from commit d6bfa3341181a80de6c8aede807fc1acc3ce8d9b) ---- - .../include/clang/Basic/BuiltinsLoongArch.def | 43 +- - .../clang/Basic/BuiltinsLoongArchBase.def | 53 + - .../clang/Basic/BuiltinsLoongArchLSX.def | 953 +++++ - clang/lib/Headers/CMakeLists.txt | 1 + - clang/lib/Headers/lsxintrin.h | 3726 +++++++++++++++++ - clang/lib/Sema/SemaChecking.cpp | 229 +- - 6 files changed, 4965 insertions(+), 40 deletions(-) - create mode 100644 clang/include/clang/Basic/BuiltinsLoongArchBase.def - create mode 100644 clang/include/clang/Basic/BuiltinsLoongArchLSX.def - create mode 100644 clang/lib/Headers/lsxintrin.h - -diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def -index 20510e18fe58..9ec19c31095a 100644 ---- a/clang/include/clang/Basic/BuiltinsLoongArch.def -+++ b/clang/include/clang/Basic/BuiltinsLoongArch.def -@@ -15,46 +15,11 @@ - # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) - #endif - --// TODO: Support more builtins. --TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vWiUWiWi", "nc", "64bit") --TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") --TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_movfcsr2gr, "UiIUi", "nc", "f") --TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") --TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vWiWi", "nc", "64bit") --TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vWiWi", "nc", "64bit") -+// Definition of LoongArch basic builtins. -+#include "clang/Basic/BuiltinsLoongArchBase.def" - --TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") --TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") --TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") --TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iWii", "nc", "64bit") --TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") --TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") --TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") --TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iWii", "nc", "64bit") -- --TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "UWiIUi", "nc", "64bit") --TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "UWiUWiIUi", "nc", "64bit") --TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "UWiUWiUWiIUi", "nc", "64bit") -- --TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "UWiUi", "nc", "64bit") --TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") --TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") -- --TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") --TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") -+// Definition of LSX builtins. -+#include "clang/Basic/BuiltinsLoongArchLSX.def" - - #undef BUILTIN - #undef TARGET_BUILTIN -diff --git a/clang/include/clang/Basic/BuiltinsLoongArchBase.def b/clang/include/clang/Basic/BuiltinsLoongArchBase.def -new file mode 100644 -index 000000000000..cbb239223aae ---- /dev/null -+++ b/clang/include/clang/Basic/BuiltinsLoongArchBase.def -@@ -0,0 +1,53 @@ -+//============------------ BuiltinsLoongArchBase.def -------------*- C++ -*-==// -+// -+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -+// See https://llvm.org/LICENSE.txt for license information. -+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+// -+//===----------------------------------------------------------------------===// -+// -+// This file defines the LoongArch-specific basic builtin function database. -+// Users of this file must define the BUILTIN macro to make use of this -+// information. -+// -+//===----------------------------------------------------------------------===// -+ -+TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vWiUWiWi", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") -+TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") -+TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") -+TARGET_BUILTIN(__builtin_loongarch_movfcsr2gr, "UiIUi", "nc", "f") -+TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") -+TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") -+TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") -+TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") -+TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vWiWi", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vWiWi", "nc", "64bit") -+ -+TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iWii", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iWii", "nc", "64bit") -+ -+TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") -+TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "UWiIUi", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") -+TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "UWiUWiIUi", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") -+TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "UWiUWiUWiIUi", "nc", "64bit") -+ -+TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") -+TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") -+TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") -+TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "UWiUi", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") -+TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") -+TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") -+TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") -+ -+TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") -+TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") -diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def -new file mode 100644 -index 000000000000..8e6aec886c50 ---- /dev/null -+++ b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def -@@ -0,0 +1,953 @@ -+//=============------------- BuiltinsLoongArchLSX.def --------------- C++ -*-=// -+// -+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -+// See https://llvm.org/LICENSE.txt for license information. -+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+// -+//===----------------------------------------------------------------------===// -+// -+// This file defines the LoongArch-specific LSX builtin function database. -+// Users of this file must define the BUILTIN macro to make use of this -+// information. -+// -+//===----------------------------------------------------------------------===// -+ -+TARGET_BUILTIN(__builtin_lsx_vadd_b, "V16cV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vadd_h, "V8sV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vadd_w, "V4iV4iV4i", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vadd_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vadd_q, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsub_b, "V16cV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsub_h, "V8sV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsub_w, "V4iV4iV4i", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsub_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsub_q, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vaddi_bu, "V16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddi_hu, "V8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddi_wu, "V4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddi_du, "V2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsubi_bu, "V16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsubi_hu, "V8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsubi_wu, "V4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsubi_du, "V2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vneg_b, "V16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vneg_h, "V8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vneg_w, "V4iV4i", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vneg_d, "V2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsadd_b, "V16ScV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsadd_h, "V8SsV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsadd_w, "V4SiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsadd_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsadd_bu, "V16UcV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsadd_hu, "V8UsV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsadd_wu, "V4UiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsadd_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vssub_b, "V16ScV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssub_h, "V8SsV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssub_w, "V4SiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssub_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vssub_bu, "V16UcV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssub_hu, "V8UsV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssub_wu, "V4UiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssub_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vhaddw_h_b, "V8SsV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vhaddw_w_h, "V4SiV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vhaddw_d_w, "V2SLLiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vhaddw_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vhaddw_hu_bu, "V8UsV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vhaddw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vhaddw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vhaddw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vhsubw_h_b, "V8SsV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vhsubw_w_h, "V4SiV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vhsubw_d_w, "V2SLLiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vhsubw_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vhsubw_hu_bu, "V8UsV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vhsubw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vhsubw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vhsubw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddwev_w_h, "V4SiV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddwod_w_h, "V4SiV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsubwev_w_h, "V4SiV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsubwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsubwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsubwod_w_h, "V4SiV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsubwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsubwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsubwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsubwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsubwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsubwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsubwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsubwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsubwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsubwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vavg_b, "V16ScV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vavg_h, "V8SsV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vavg_w, "V4SiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vavg_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vavg_bu, "V16UcV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vavg_hu, "V8UsV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vavg_wu, "V4UiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vavg_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vavgr_b, "V16ScV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vavgr_h, "V8SsV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vavgr_w, "V4SiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vavgr_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vavgr_bu, "V16UcV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vavgr_hu, "V8UsV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vavgr_wu, "V4UiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vavgr_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vabsd_b, "V16ScV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vabsd_h, "V8SsV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vabsd_w, "V4SiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vabsd_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vabsd_bu, "V16UcV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vabsd_hu, "V8UsV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vabsd_wu, "V4UiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vabsd_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vadda_b, "V16ScV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vadda_h, "V8SsV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vadda_w, "V4SiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vadda_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmax_b, "V16ScV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmax_h, "V8SsV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmax_w, "V4SiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmax_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmaxi_b, "V16ScV16ScIi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaxi_h, "V8SsV8SsIi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaxi_w, "V4SiV4SiIi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaxi_d, "V2SLLiV2SLLiIi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmax_bu, "V16UcV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmax_hu, "V8UsV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmax_wu, "V4UiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmax_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmaxi_bu, "V16UcV16UcIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaxi_hu, "V8UsV8UsIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaxi_wu, "V4UiV4UiIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaxi_du, "V2ULLiV2ULLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmin_b, "V16ScV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmin_h, "V8SsV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmin_w, "V4SiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmin_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmini_b, "V16ScV16ScIi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmini_h, "V8SsV8SsIi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmini_w, "V4SiV4SiIi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmini_d, "V2SLLiV2SLLiIi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmin_bu, "V16UcV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmin_hu, "V8UsV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmin_wu, "V4UiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmin_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmini_bu, "V16UcV16UcIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmini_hu, "V8UsV8UsIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmini_wu, "V4UiV4UiIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmini_du, "V2ULLiV2ULLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmul_b, "V16ScV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmul_h, "V8SsV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmul_w, "V4SiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmul_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmuh_b, "V16cV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmuh_h, "V8sV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmuh_w, "V4iV4iV4i", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmuh_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmuh_bu, "V16UcV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmuh_hu, "V8UsV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmuh_wu, "V4UiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmuh_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmulwev_w_h, "V4SiV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmulwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmulwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmulwod_w_h, "V4SiV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmulwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmulwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmadd_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmadd_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmadd_w, "V4SiV4SiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmadd_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmsub_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmsub_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmsub_w, "V4SiV4SiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmsub_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_h, "V4SiV4SiV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_h, "V4SiV4SiV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu, "V8UsV8UsV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu, "V8UsV8UsV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vdiv_b, "V16ScV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vdiv_h, "V8SsV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vdiv_w, "V4SiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vdiv_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vdiv_bu, "V16UcV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vdiv_hu, "V8UsV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vdiv_wu, "V4UiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vdiv_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmod_b, "V16ScV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmod_h, "V8SsV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmod_w, "V4SiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmod_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") -+ -+ -+TARGET_BUILTIN(__builtin_lsx_vmod_bu, "V16UcV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmod_hu, "V8UsV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmod_wu, "V4UiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmod_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsat_b, "V16ScV16ScIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsat_h, "V8SsV8SsIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsat_w, "V4SiV4SiIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsat_d, "V2SLLiV2SLLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsat_bu, "V16UcV16UcIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsat_hu, "V8UsV8UsIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsat_wu, "V4UiV4UiIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsat_du, "V2ULLiV2ULLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vexth_w_h, "V4SiV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vexth_d_w, "V2LLiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vexth_q_d, "V2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vexth_hu_bu, "V8UsV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vexth_wu_hu, "V4UiV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vexth_du_wu, "V2ULLiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vexth_qu_du, "V2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsigncov_b, "V16ScV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsigncov_h, "V8SsV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsigncov_w, "V4SiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsigncov_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmskltz_b, "V16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmskltz_h, "V8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmskltz_w, "V4iV4i", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmskltz_d, "V2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vmskgez_b, "V16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vmsknz_b, "V8sV8s", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vldi, "V2LLiIi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vrepli_b, "V16cIi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vrepli_h, "V8sIi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vrepli_w, "V4iIi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vrepli_d, "V2LLiIi", "nc", "lsx") -+ -+ -+TARGET_BUILTIN(__builtin_lsx_vand_v, "V16UcV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vor_v, "V16UcV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vxor_v, "V16cV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vnor_v, "V16UcV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vandn_v, "V16UcV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vorn_v, "V16ScV16ScV16Sc", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vandi_b, "V16UcV16UcIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vori_b, "V16UcV16UcIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vxori_b, "V16UcV16UcIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vnori_b, "V16UcV16UcIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsll_b, "V16cV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsll_h, "V8sV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsll_w, "V4iV4iV4i", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsll_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vslli_b, "V16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslli_h, "V8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslli_w, "V4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslli_d, "V2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsrl_b, "V16cV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrl_h, "V8sV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrl_w, "V4iV4iV4i", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrl_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsrli_b, "V16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrli_h, "V8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrli_w, "V4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrli_d, "V2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsra_b, "V16cV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsra_h, "V8sV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsra_w, "V4iV4iV4i", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsra_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsrai_b, "V16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrai_h, "V8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrai_w, "V4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrai_d, "V2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vrotr_b, "V16cV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vrotr_h, "V8sV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vrotr_w, "V4iV4iV4i", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vrotr_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vrotri_b, "V16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vrotri_h, "V8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vrotri_w, "V4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vrotri_d, "V2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsllwil_w_h, "V4SiV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsllwil_d_w, "V2LLiV4SiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vextl_q_d, "V2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsllwil_hu_bu, "V8UsV16UcIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsllwil_wu_hu, "V4UiV8UsIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsllwil_du_wu, "V2ULLiV4UiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vextl_qu_du, "V2LLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsrlr_b, "V16cV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrlr_h, "V8sV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrlr_w, "V4iV4iV4i", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrlr_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsrlri_b, "V16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrlri_h, "V8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrlri_w, "V4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrlri_d, "V2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsrar_b, "V16cV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrar_h, "V8sV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrar_w, "V4iV4iV4i", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrar_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsrari_b, "V16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrari_h, "V8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrari_w, "V4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrari_d, "V2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsrln_b_h, "V16ScV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrln_h_w, "V8sV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrln_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsran_b_h, "V16ScV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsran_h_w, "V8sV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsrani_b_h, "V16cV16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrani_h_w, "V8sV8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrani_w_d, "V4iV4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsrlrn_b_h, "V16ScV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrlrn_h_w, "V8sV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrlrn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsrarn_b_h, "V16ScV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrarn_h_w, "V8sV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vssrln_b_h, "V16ScV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrln_h_w, "V8sV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrln_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vssran_b_h, "V16ScV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssran_h_w, "V8sV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vssrln_bu_h, "V16UcV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrln_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrln_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vssran_bu_h, "V16UcV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssran_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssran_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vssrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vssrani_b_h, "V16cV16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrani_h_w, "V8sV8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrani_w_d, "V4iV4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrlrni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrlrni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrlrni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vssrani_bu_h, "V16cV16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrani_hu_w, "V8sV8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrani_wu_d, "V4iV4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrani_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vssrlrn_b_h, "V16ScV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrlrn_h_w, "V8sV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrlrn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vssrarn_b_h, "V16ScV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrarn_h_w, "V8sV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vssrlrn_bu_h, "V16UcV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrlrn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrlrn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vssrarn_bu_h, "V16UcV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrarn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrarn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vssrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrlni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrlni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrlni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrarni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrarni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vssrarni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vclo_b, "V16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vclo_h, "V8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vclo_w, "V4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vclo_d, "V2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vclz_b, "V16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vclz_h, "V8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vclz_w, "V4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vclz_d, "V2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vpcnt_b, "V16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpcnt_h, "V8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpcnt_w, "V4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpcnt_d, "V2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vbitclr_b, "V16UcV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbitclr_h, "V8UsV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbitclr_w, "V4UiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbitclr_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vbitclri_b, "V16UcV16UcIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbitclri_h, "V8UsV8UsIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbitclri_w, "V4UiV4UiIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbitclri_d, "V2ULLiV2ULLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vbitset_b, "V16UcV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbitset_h, "V8UsV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbitset_w, "V4UiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbitset_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vbitseti_b, "V16UcV16UcIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbitseti_h, "V8UsV8UsIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbitseti_w, "V4UiV4UiIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbitseti_d, "V2ULLiV2ULLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vbitrev_b, "V16UcV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbitrev_h, "V8UsV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbitrev_w, "V4UiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbitrev_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vbitrevi_b, "V16UcV16UcIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbitrevi_h, "V8UsV8UsIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbitrevi_w, "V4UiV4UiIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbitrevi_d, "V2ULLiV2ULLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfrstp_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfrstp_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfrstpi_b, "V16cV16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfrstpi_h, "V8sV8sV8sIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfadd_s, "V4fV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfadd_d, "V2dV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfsub_s, "V4fV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfsub_d, "V2dV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfmul_s, "V4fV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfmul_d, "V2dV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfdiv_s, "V4fV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfdiv_d, "V2dV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfmadd_s, "V4fV4fV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfmadd_d, "V2dV2dV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfmsub_s, "V4fV4fV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfmsub_d, "V2dV2dV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfnmadd_s, "V4fV4fV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfnmadd_d, "V2dV2dV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfnmsub_s, "V4fV4fV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfnmsub_d, "V2dV2dV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfmax_s, "V4fV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfmax_d, "V2dV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfmin_s, "V4fV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfmin_d, "V2dV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfmaxa_s, "V4fV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfmaxa_d, "V2dV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfmina_s, "V4fV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfmina_d, "V2dV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vflogb_s, "V4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vflogb_d, "V2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfclass_s, "V4iV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfclass_d, "V2LLiV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfsqrt_s, "V4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcvth_s_h, "V4fV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcvth_d_s, "V2dV4f", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcvt_h_s, "V8sV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcvt_s_d, "V4fV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfrintrne_s, "V4SiV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfrintrne_d, "V2LLiV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfrintrz_s, "V4SiV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfrintrz_d, "V2LLiV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfrintrp_s, "V4SiV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfrintrp_d, "V2LLiV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfrintrm_s, "V4SiV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfrintrm_d, "V2LLiV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfrint_s, "V4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfrint_d, "V2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vftintrne_w_s, "V4SiV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vftintrne_l_d, "V2LLiV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vftintrz_w_s, "V4SiV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vftintrz_l_d, "V2LLiV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vftintrp_w_s, "V4SiV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vftintrp_l_d, "V2LLiV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vftintrm_w_s, "V4SiV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vftintrm_l_d, "V2LLiV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vftint_w_s, "V4SiV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vftint_l_d, "V2SLLiV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vftintrz_wu_s, "V4UiV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vftintrz_lu_d, "V2ULLiV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vftint_wu_s, "V4UiV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vftint_lu_d, "V2ULLiV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vftintrne_w_d, "V4SiV2dV2d", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vftintrz_w_d, "V4SiV2dV2d", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vftintrp_w_d, "V4SiV2dV2d", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vftintrm_w_d, "V4SiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vftint_w_d, "V4SiV2dV2d", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vftintrnel_l_s, "V2LLiV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vftintrneh_l_s, "V2LLiV4f", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vftintrzl_l_s, "V2LLiV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vftintrzh_l_s, "V2LLiV4f", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vftintrpl_l_s, "V2LLiV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vftintrph_l_s, "V2LLiV4f", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vftintrml_l_s, "V2LLiV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vftintrmh_l_s, "V2LLiV4f", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vftintl_l_s, "V2LLiV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vftinth_l_s, "V2LLiV4f", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vffint_s_w, "V4fV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vffint_d_l, "V2dV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vffint_s_wu, "V4fV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vffint_d_lu, "V2dV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vffintl_d_w, "V2dV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vffinth_d_w, "V2dV4Si", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vffint_s_l, "V4fV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vseq_b, "V16ScV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vseq_h, "V8SsV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vseq_w, "V4SiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vseq_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vseqi_b, "V16ScV16ScISi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vseqi_h, "V8SsV8SsISi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vseqi_w, "V4SiV4SiISi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vseqi_d, "V2SLLiV2SLLiISi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsle_b, "V16ScV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsle_h, "V8SsV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsle_w, "V4SiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsle_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vslei_b, "V16ScV16ScISi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslei_h, "V8SsV8SsISi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslei_w, "V4SiV4SiISi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslei_d, "V2SLLiV2SLLiISi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vsle_bu, "V16ScV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsle_hu, "V8SsV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsle_wu, "V4SiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vsle_du, "V2SLLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vslei_bu, "V16ScV16UcIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslei_hu, "V8SsV8UsIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslei_wu, "V4SiV4UiIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslei_du, "V2SLLiV2ULLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vslt_b, "V16ScV16ScV16Sc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslt_h, "V8SsV8SsV8Ss", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslt_w, "V4SiV4SiV4Si", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslt_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vslti_b, "V16ScV16ScISi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslti_h, "V8SsV8SsISi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslti_w, "V4SiV4SiISi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslti_d, "V2SLLiV2SLLiISi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vslt_bu, "V16ScV16UcV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslt_hu, "V8SsV8UsV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslt_wu, "V4SiV4UiV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslt_du, "V2SLLiV2ULLiV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vslti_bu, "V16ScV16UcIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslti_hu, "V8SsV8UsIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslti_wu, "V4SiV4UiIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vslti_du, "V2SLLiV2ULLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_caf_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_caf_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_cun_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_cun_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_ceq_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_ceq_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_cueq_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_cueq_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_clt_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_clt_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_cult_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_cult_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_cle_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_cle_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_cule_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_cule_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_cne_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_cne_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_cor_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_cor_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_cune_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_cune_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_saf_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_saf_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_sun_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_sun_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_seq_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_seq_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_sueq_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_sueq_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_slt_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_slt_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_sult_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_sult_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_sle_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_sle_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_sule_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_sule_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_sne_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_sne_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_sor_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_sor_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vfcmp_sune_s, "V4SiV4fV4f", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vfcmp_sune_d, "V2SLLiV2dV2d", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vbitsel_v, "V16UcV16UcV16UcV16Uc", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vbitseli_b, "V16UcV16UcV16UcIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_b, "V16Sci", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_h, "V8Ssi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_w, "V4Sii", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_d, "V2SLLiLLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_b, "V16ScV16SciIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_h, "V8SsV8SsiIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_w, "V4SiV4SiiIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_d, "V2SLLiV2SLLiLLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vpickve2gr_b, "iV16ScIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpickve2gr_h, "iV8SsIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpickve2gr_w, "iV4SiIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpickve2gr_d, "LLiV2SLLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vpickve2gr_bu, "iV16UcIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpickve2gr_hu, "iV8UsIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpickve2gr_wu, "iV4UiIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpickve2gr_du, "LLiV2ULLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vreplve_b, "V16cV16cUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vreplve_h, "V8sV8sUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vreplve_w, "V4iV4iUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vreplve_d, "V2LLiV2LLiUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vreplvei_b, "V16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vreplvei_h, "V8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vreplvei_w, "V4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vreplvei_d, "V2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vbsll_v, "V16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vbsrl_v, "V16cV16cIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vpackev_b, "V16cV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpackev_h, "V8sV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpackev_w, "V4iV4iV4i", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpackev_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vpackod_b, "V16cV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpackod_h, "V8sV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpackod_w, "V4iV4iV4i", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpackod_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vpickev_b, "V16cV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpickev_h, "V8sV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpickev_w, "V4iV4iV4i", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpickev_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vpickod_b, "V16cV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpickod_h, "V8sV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpickod_w, "V4iV4iV4i", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vpickod_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vilvl_b, "V16cV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vilvl_h, "V8sV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vilvl_w, "V4iV4iV4i", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vilvl_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vilvh_b, "V16cV16cV16c", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vilvh_h, "V8sV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vilvh_w, "V4iV4iV4i", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vilvh_d, "V2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vshuf_b, "V16UcV16UcV16UcV16Uc", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vshuf_h, "V8sV8sV8sV8s", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vshuf_w, "V4iV4iV4iV4i", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vshuf_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vshuf4i_b, "V16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vshuf4i_h, "V8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vshuf4i_w, "V4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vshuf4i_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vpermi_w, "V4iV4iV4iIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vextrins_b, "V16cV16cV16cIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vextrins_h, "V8sV8sV8sIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vextrins_w, "V4iV4iV4iIUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vextrins_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vld, "V16ScvC*Ii", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vst, "vV16Scv*Ii", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vldx, "V16ScvC*LLi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vstx, "vV16Scv*LLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vldrepl_b, "V16cvC*Ii", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vldrepl_h, "V8svC*Ii", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vldrepl_w, "V4ivC*Ii", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vldrepl_d, "V2LLivC*Ii", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_vstelm_b, "vV16Scv*IiUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vstelm_h, "vV8Ssv*IiUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vstelm_w, "vV4Siv*IiUi", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_vstelm_d, "vV2SLLiv*IiUi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_bz_v, "iV16Uc", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_bnz_v, "iV16Uc", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_bz_b, "iV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_bz_h, "iV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_bz_w, "iV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_bz_d, "iV2ULLi", "nc", "lsx") -+ -+TARGET_BUILTIN(__builtin_lsx_bnz_b, "iV16Uc", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_bnz_h, "iV8Us", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_bnz_w, "iV4Ui", "nc", "lsx") -+TARGET_BUILTIN(__builtin_lsx_bnz_d, "iV2ULLi", "nc", "lsx") -diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt -index db47de2ad965..1d5573b71e6d 100644 ---- a/clang/lib/Headers/CMakeLists.txt -+++ b/clang/lib/Headers/CMakeLists.txt -@@ -78,6 +78,7 @@ set(hlsl_files - - set(loongarch_files - larchintrin.h -+ lsxintrin.h - ) - - set(mips_msa_files -diff --git a/clang/lib/Headers/lsxintrin.h b/clang/lib/Headers/lsxintrin.h -new file mode 100644 -index 000000000000..a29bc7757ab5 ---- /dev/null -+++ b/clang/lib/Headers/lsxintrin.h -@@ -0,0 +1,3726 @@ -+/*===------------- lsxintrin.h - LoongArch LSX intrinsics ------------------=== -+ * -+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -+ * See https://llvm.org/LICENSE.txt for license information. -+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+ * -+ *===-----------------------------------------------------------------------=== -+ */ -+ -+#ifndef _LOONGSON_SXINTRIN_H -+#define _LOONGSON_SXINTRIN_H 1 -+ -+#if defined(__loongarch_sx) -+typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); -+typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); -+typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); -+typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); -+typedef short v8i16 __attribute__((vector_size(16), aligned(16))); -+typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); -+typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); -+typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); -+typedef int v4i32 __attribute__((vector_size(16), aligned(16))); -+typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); -+typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); -+typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); -+typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); -+typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); -+typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); -+typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); -+typedef float v4f32 __attribute__((vector_size(16), aligned(16))); -+typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); -+typedef double v2f64 __attribute__((vector_size(16), aligned(16))); -+typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); -+ -+typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); -+typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); -+typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsll_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsll_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsll_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsll_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsll_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsll_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsll_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsll_d((v2i64)_1, (v2i64)_2); -+} -+ -+#define __lsx_vslli_b(/*__m128i*/ _1, /*ui3*/ _2) \ -+ ((__m128i)__builtin_lsx_vslli_b((v16i8)(_1), (_2))) -+ -+#define __lsx_vslli_h(/*__m128i*/ _1, /*ui4*/ _2) \ -+ ((__m128i)__builtin_lsx_vslli_h((v8i16)(_1), (_2))) -+ -+#define __lsx_vslli_w(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vslli_w((v4i32)(_1), (_2))) -+ -+#define __lsx_vslli_d(/*__m128i*/ _1, /*ui6*/ _2) \ -+ ((__m128i)__builtin_lsx_vslli_d((v2i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsra_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsra_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsra_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsra_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsra_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsra_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsra_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsra_d((v2i64)_1, (v2i64)_2); -+} -+ -+#define __lsx_vsrai_b(/*__m128i*/ _1, /*ui3*/ _2) \ -+ ((__m128i)__builtin_lsx_vsrai_b((v16i8)(_1), (_2))) -+ -+#define __lsx_vsrai_h(/*__m128i*/ _1, /*ui4*/ _2) \ -+ ((__m128i)__builtin_lsx_vsrai_h((v8i16)(_1), (_2))) -+ -+#define __lsx_vsrai_w(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vsrai_w((v4i32)(_1), (_2))) -+ -+#define __lsx_vsrai_d(/*__m128i*/ _1, /*ui6*/ _2) \ -+ ((__m128i)__builtin_lsx_vsrai_d((v2i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrar_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrar_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrar_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrar_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrar_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrar_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrar_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrar_d((v2i64)_1, (v2i64)_2); -+} -+ -+#define __lsx_vsrari_b(/*__m128i*/ _1, /*ui3*/ _2) \ -+ ((__m128i)__builtin_lsx_vsrari_b((v16i8)(_1), (_2))) -+ -+#define __lsx_vsrari_h(/*__m128i*/ _1, /*ui4*/ _2) \ -+ ((__m128i)__builtin_lsx_vsrari_h((v8i16)(_1), (_2))) -+ -+#define __lsx_vsrari_w(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vsrari_w((v4i32)(_1), (_2))) -+ -+#define __lsx_vsrari_d(/*__m128i*/ _1, /*ui6*/ _2) \ -+ ((__m128i)__builtin_lsx_vsrari_d((v2i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrl_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrl_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrl_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrl_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrl_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrl_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrl_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrl_d((v2i64)_1, (v2i64)_2); -+} -+ -+#define __lsx_vsrli_b(/*__m128i*/ _1, /*ui3*/ _2) \ -+ ((__m128i)__builtin_lsx_vsrli_b((v16i8)(_1), (_2))) -+ -+#define __lsx_vsrli_h(/*__m128i*/ _1, /*ui4*/ _2) \ -+ ((__m128i)__builtin_lsx_vsrli_h((v8i16)(_1), (_2))) -+ -+#define __lsx_vsrli_w(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vsrli_w((v4i32)(_1), (_2))) -+ -+#define __lsx_vsrli_d(/*__m128i*/ _1, /*ui6*/ _2) \ -+ ((__m128i)__builtin_lsx_vsrli_d((v2i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrlr_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrlr_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrlr_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrlr_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrlr_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrlr_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrlr_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrlr_d((v2i64)_1, (v2i64)_2); -+} -+ -+#define __lsx_vsrlri_b(/*__m128i*/ _1, /*ui3*/ _2) \ -+ ((__m128i)__builtin_lsx_vsrlri_b((v16i8)(_1), (_2))) -+ -+#define __lsx_vsrlri_h(/*__m128i*/ _1, /*ui4*/ _2) \ -+ ((__m128i)__builtin_lsx_vsrlri_h((v8i16)(_1), (_2))) -+ -+#define __lsx_vsrlri_w(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vsrlri_w((v4i32)(_1), (_2))) -+ -+#define __lsx_vsrlri_d(/*__m128i*/ _1, /*ui6*/ _2) \ -+ ((__m128i)__builtin_lsx_vsrlri_d((v2i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vbitclr_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vbitclr_b((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vbitclr_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vbitclr_h((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vbitclr_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vbitclr_w((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vbitclr_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vbitclr_d((v2u64)_1, (v2u64)_2); -+} -+ -+#define __lsx_vbitclri_b(/*__m128i*/ _1, /*ui3*/ _2) \ -+ ((__m128i)__builtin_lsx_vbitclri_b((v16u8)(_1), (_2))) -+ -+#define __lsx_vbitclri_h(/*__m128i*/ _1, /*ui4*/ _2) \ -+ ((__m128i)__builtin_lsx_vbitclri_h((v8u16)(_1), (_2))) -+ -+#define __lsx_vbitclri_w(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vbitclri_w((v4u32)(_1), (_2))) -+ -+#define __lsx_vbitclri_d(/*__m128i*/ _1, /*ui6*/ _2) \ -+ ((__m128i)__builtin_lsx_vbitclri_d((v2u64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vbitset_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vbitset_b((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vbitset_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vbitset_h((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vbitset_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vbitset_w((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vbitset_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vbitset_d((v2u64)_1, (v2u64)_2); -+} -+ -+#define __lsx_vbitseti_b(/*__m128i*/ _1, /*ui3*/ _2) \ -+ ((__m128i)__builtin_lsx_vbitseti_b((v16u8)(_1), (_2))) -+ -+#define __lsx_vbitseti_h(/*__m128i*/ _1, /*ui4*/ _2) \ -+ ((__m128i)__builtin_lsx_vbitseti_h((v8u16)(_1), (_2))) -+ -+#define __lsx_vbitseti_w(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vbitseti_w((v4u32)(_1), (_2))) -+ -+#define __lsx_vbitseti_d(/*__m128i*/ _1, /*ui6*/ _2) \ -+ ((__m128i)__builtin_lsx_vbitseti_d((v2u64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vbitrev_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vbitrev_b((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vbitrev_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vbitrev_h((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vbitrev_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vbitrev_w((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vbitrev_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vbitrev_d((v2u64)_1, (v2u64)_2); -+} -+ -+#define __lsx_vbitrevi_b(/*__m128i*/ _1, /*ui3*/ _2) \ -+ ((__m128i)__builtin_lsx_vbitrevi_b((v16u8)(_1), (_2))) -+ -+#define __lsx_vbitrevi_h(/*__m128i*/ _1, /*ui4*/ _2) \ -+ ((__m128i)__builtin_lsx_vbitrevi_h((v8u16)(_1), (_2))) -+ -+#define __lsx_vbitrevi_w(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vbitrevi_w((v4u32)(_1), (_2))) -+ -+#define __lsx_vbitrevi_d(/*__m128i*/ _1, /*ui6*/ _2) \ -+ ((__m128i)__builtin_lsx_vbitrevi_d((v2u64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vadd_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vadd_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vadd_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vadd_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vadd_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vadd_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vadd_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vadd_d((v2i64)_1, (v2i64)_2); -+} -+ -+#define __lsx_vaddi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vaddi_bu((v16i8)(_1), (_2))) -+ -+#define __lsx_vaddi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vaddi_hu((v8i16)(_1), (_2))) -+ -+#define __lsx_vaddi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vaddi_wu((v4i32)(_1), (_2))) -+ -+#define __lsx_vaddi_du(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vaddi_du((v2i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsub_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsub_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsub_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsub_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsub_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsub_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsub_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsub_d((v2i64)_1, (v2i64)_2); -+} -+ -+#define __lsx_vsubi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vsubi_bu((v16i8)(_1), (_2))) -+ -+#define __lsx_vsubi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vsubi_hu((v8i16)(_1), (_2))) -+ -+#define __lsx_vsubi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vsubi_wu((v4i32)(_1), (_2))) -+ -+#define __lsx_vsubi_du(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vsubi_du((v2i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmax_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmax_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmax_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmax_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmax_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmax_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmax_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmax_d((v2i64)_1, (v2i64)_2); -+} -+ -+#define __lsx_vmaxi_b(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vmaxi_b((v16i8)(_1), (_2))) -+ -+#define __lsx_vmaxi_h(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vmaxi_h((v8i16)(_1), (_2))) -+ -+#define __lsx_vmaxi_w(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vmaxi_w((v4i32)(_1), (_2))) -+ -+#define __lsx_vmaxi_d(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vmaxi_d((v2i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmax_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmax_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmax_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmax_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmax_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmax_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmax_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmax_du((v2u64)_1, (v2u64)_2); -+} -+ -+#define __lsx_vmaxi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vmaxi_bu((v16u8)(_1), (_2))) -+ -+#define __lsx_vmaxi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vmaxi_hu((v8u16)(_1), (_2))) -+ -+#define __lsx_vmaxi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vmaxi_wu((v4u32)(_1), (_2))) -+ -+#define __lsx_vmaxi_du(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vmaxi_du((v2u64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmin_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmin_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmin_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmin_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmin_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmin_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmin_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmin_d((v2i64)_1, (v2i64)_2); -+} -+ -+#define __lsx_vmini_b(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vmini_b((v16i8)(_1), (_2))) -+ -+#define __lsx_vmini_h(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vmini_h((v8i16)(_1), (_2))) -+ -+#define __lsx_vmini_w(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vmini_w((v4i32)(_1), (_2))) -+ -+#define __lsx_vmini_d(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vmini_d((v2i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmin_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmin_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmin_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmin_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmin_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmin_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmin_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmin_du((v2u64)_1, (v2u64)_2); -+} -+ -+#define __lsx_vmini_bu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vmini_bu((v16u8)(_1), (_2))) -+ -+#define __lsx_vmini_hu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vmini_hu((v8u16)(_1), (_2))) -+ -+#define __lsx_vmini_wu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vmini_wu((v4u32)(_1), (_2))) -+ -+#define __lsx_vmini_du(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vmini_du((v2u64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vseq_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vseq_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vseq_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vseq_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vseq_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vseq_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vseq_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vseq_d((v2i64)_1, (v2i64)_2); -+} -+ -+#define __lsx_vseqi_b(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vseqi_b((v16i8)(_1), (_2))) -+ -+#define __lsx_vseqi_h(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vseqi_h((v8i16)(_1), (_2))) -+ -+#define __lsx_vseqi_w(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vseqi_w((v4i32)(_1), (_2))) -+ -+#define __lsx_vseqi_d(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vseqi_d((v2i64)(_1), (_2))) -+ -+#define __lsx_vslti_b(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vslti_b((v16i8)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vslt_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vslt_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vslt_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vslt_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vslt_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vslt_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vslt_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vslt_d((v2i64)_1, (v2i64)_2); -+} -+ -+#define __lsx_vslti_h(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vslti_h((v8i16)(_1), (_2))) -+ -+#define __lsx_vslti_w(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vslti_w((v4i32)(_1), (_2))) -+ -+#define __lsx_vslti_d(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vslti_d((v2i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vslt_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vslt_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vslt_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vslt_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vslt_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vslt_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vslt_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vslt_du((v2u64)_1, (v2u64)_2); -+} -+ -+#define __lsx_vslti_bu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vslti_bu((v16u8)(_1), (_2))) -+ -+#define __lsx_vslti_hu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vslti_hu((v8u16)(_1), (_2))) -+ -+#define __lsx_vslti_wu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vslti_wu((v4u32)(_1), (_2))) -+ -+#define __lsx_vslti_du(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vslti_du((v2u64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsle_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsle_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsle_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsle_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsle_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsle_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsle_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsle_d((v2i64)_1, (v2i64)_2); -+} -+ -+#define __lsx_vslei_b(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vslei_b((v16i8)(_1), (_2))) -+ -+#define __lsx_vslei_h(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vslei_h((v8i16)(_1), (_2))) -+ -+#define __lsx_vslei_w(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vslei_w((v4i32)(_1), (_2))) -+ -+#define __lsx_vslei_d(/*__m128i*/ _1, /*si5*/ _2) \ -+ ((__m128i)__builtin_lsx_vslei_d((v2i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsle_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsle_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsle_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsle_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsle_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsle_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsle_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsle_du((v2u64)_1, (v2u64)_2); -+} -+ -+#define __lsx_vslei_bu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vslei_bu((v16u8)(_1), (_2))) -+ -+#define __lsx_vslei_hu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vslei_hu((v8u16)(_1), (_2))) -+ -+#define __lsx_vslei_wu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vslei_wu((v4u32)(_1), (_2))) -+ -+#define __lsx_vslei_du(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vslei_du((v2u64)(_1), (_2))) -+ -+#define __lsx_vsat_b(/*__m128i*/ _1, /*ui3*/ _2) \ -+ ((__m128i)__builtin_lsx_vsat_b((v16i8)(_1), (_2))) -+ -+#define __lsx_vsat_h(/*__m128i*/ _1, /*ui4*/ _2) \ -+ ((__m128i)__builtin_lsx_vsat_h((v8i16)(_1), (_2))) -+ -+#define __lsx_vsat_w(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vsat_w((v4i32)(_1), (_2))) -+ -+#define __lsx_vsat_d(/*__m128i*/ _1, /*ui6*/ _2) \ -+ ((__m128i)__builtin_lsx_vsat_d((v2i64)(_1), (_2))) -+ -+#define __lsx_vsat_bu(/*__m128i*/ _1, /*ui3*/ _2) \ -+ ((__m128i)__builtin_lsx_vsat_bu((v16u8)(_1), (_2))) -+ -+#define __lsx_vsat_hu(/*__m128i*/ _1, /*ui4*/ _2) \ -+ ((__m128i)__builtin_lsx_vsat_hu((v8u16)(_1), (_2))) -+ -+#define __lsx_vsat_wu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vsat_wu((v4u32)(_1), (_2))) -+ -+#define __lsx_vsat_du(/*__m128i*/ _1, /*ui6*/ _2) \ -+ ((__m128i)__builtin_lsx_vsat_du((v2u64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vadda_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vadda_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vadda_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vadda_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vadda_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vadda_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vadda_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vadda_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsadd_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsadd_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsadd_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsadd_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsadd_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsadd_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsadd_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsadd_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsadd_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsadd_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsadd_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsadd_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsadd_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsadd_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsadd_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsadd_du((v2u64)_1, (v2u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vavg_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vavg_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vavg_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vavg_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vavg_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vavg_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vavg_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vavg_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vavg_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vavg_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vavg_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vavg_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vavg_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vavg_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vavg_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vavg_du((v2u64)_1, (v2u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vavgr_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vavgr_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vavgr_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vavgr_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vavgr_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vavgr_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vavgr_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vavgr_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vavgr_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vavgr_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vavgr_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vavgr_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vavgr_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vavgr_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vavgr_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vavgr_du((v2u64)_1, (v2u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssub_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssub_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssub_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssub_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssub_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssub_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssub_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssub_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssub_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssub_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssub_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssub_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssub_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssub_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssub_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssub_du((v2u64)_1, (v2u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vabsd_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vabsd_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vabsd_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vabsd_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vabsd_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vabsd_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vabsd_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vabsd_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vabsd_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vabsd_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vabsd_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vabsd_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vabsd_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vabsd_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vabsd_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vabsd_du((v2u64)_1, (v2u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmul_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmul_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmul_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmul_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmul_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmul_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmul_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmul_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmadd_b(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmadd_b((v16i8)_1, (v16i8)_2, (v16i8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmadd_h(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmadd_h((v8i16)_1, (v8i16)_2, (v8i16)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmadd_w(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmadd_w((v4i32)_1, (v4i32)_2, (v4i32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmadd_d(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmadd_d((v2i64)_1, (v2i64)_2, (v2i64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmsub_b(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmsub_b((v16i8)_1, (v16i8)_2, (v16i8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmsub_h(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmsub_h((v8i16)_1, (v8i16)_2, (v8i16)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmsub_w(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmsub_w((v4i32)_1, (v4i32)_2, (v4i32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmsub_d(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmsub_d((v2i64)_1, (v2i64)_2, (v2i64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vdiv_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vdiv_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vdiv_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vdiv_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vdiv_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vdiv_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vdiv_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vdiv_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vdiv_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vdiv_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vdiv_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vdiv_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vdiv_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vdiv_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vdiv_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vdiv_du((v2u64)_1, (v2u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vhaddw_h_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vhaddw_h_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vhaddw_w_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vhaddw_w_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vhaddw_d_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vhaddw_d_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vhaddw_hu_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vhaddw_hu_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vhaddw_wu_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vhaddw_wu_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vhaddw_du_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vhaddw_du_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vhsubw_h_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vhsubw_h_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vhsubw_w_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vhsubw_w_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vhsubw_d_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vhsubw_d_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vhsubw_hu_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vhsubw_hu_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vhsubw_wu_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vhsubw_wu_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vhsubw_du_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vhsubw_du_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmod_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmod_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmod_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmod_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmod_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmod_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmod_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmod_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmod_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmod_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmod_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmod_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmod_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmod_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmod_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmod_du((v2u64)_1, (v2u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vreplve_b(__m128i _1, int _2) { -+ return (__m128i)__builtin_lsx_vreplve_b((v16i8)_1, (int)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vreplve_h(__m128i _1, int _2) { -+ return (__m128i)__builtin_lsx_vreplve_h((v8i16)_1, (int)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vreplve_w(__m128i _1, int _2) { -+ return (__m128i)__builtin_lsx_vreplve_w((v4i32)_1, (int)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vreplve_d(__m128i _1, int _2) { -+ return (__m128i)__builtin_lsx_vreplve_d((v2i64)_1, (int)_2); -+} -+ -+#define __lsx_vreplvei_b(/*__m128i*/ _1, /*ui4*/ _2) \ -+ ((__m128i)__builtin_lsx_vreplvei_b((v16i8)(_1), (_2))) -+ -+#define __lsx_vreplvei_h(/*__m128i*/ _1, /*ui3*/ _2) \ -+ ((__m128i)__builtin_lsx_vreplvei_h((v8i16)(_1), (_2))) -+ -+#define __lsx_vreplvei_w(/*__m128i*/ _1, /*ui2*/ _2) \ -+ ((__m128i)__builtin_lsx_vreplvei_w((v4i32)(_1), (_2))) -+ -+#define __lsx_vreplvei_d(/*__m128i*/ _1, /*ui1*/ _2) \ -+ ((__m128i)__builtin_lsx_vreplvei_d((v2i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpickev_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vpickev_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpickev_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vpickev_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpickev_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vpickev_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpickev_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vpickev_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpickod_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vpickod_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpickod_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vpickod_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpickod_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vpickod_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpickod_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vpickod_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vilvh_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vilvh_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vilvh_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vilvh_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vilvh_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vilvh_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vilvh_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vilvh_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vilvl_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vilvl_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vilvl_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vilvl_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vilvl_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vilvl_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vilvl_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vilvl_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpackev_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vpackev_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpackev_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vpackev_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpackev_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vpackev_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpackev_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vpackev_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpackod_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vpackod_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpackod_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vpackod_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpackod_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vpackod_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpackod_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vpackod_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vshuf_h(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vshuf_h((v8i16)_1, (v8i16)_2, (v8i16)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vshuf_w(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vshuf_w((v4i32)_1, (v4i32)_2, (v4i32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vshuf_d(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vshuf_d((v2i64)_1, (v2i64)_2, (v2i64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vand_v(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vand_v((v16u8)_1, (v16u8)_2); -+} -+ -+#define __lsx_vandi_b(/*__m128i*/ _1, /*ui8*/ _2) \ -+ ((__m128i)__builtin_lsx_vandi_b((v16u8)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vor_v(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vor_v((v16u8)_1, (v16u8)_2); -+} -+ -+#define __lsx_vori_b(/*__m128i*/ _1, /*ui8*/ _2) \ -+ ((__m128i)__builtin_lsx_vori_b((v16u8)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vnor_v(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vnor_v((v16u8)_1, (v16u8)_2); -+} -+ -+#define __lsx_vnori_b(/*__m128i*/ _1, /*ui8*/ _2) \ -+ ((__m128i)__builtin_lsx_vnori_b((v16u8)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vxor_v(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vxor_v((v16u8)_1, (v16u8)_2); -+} -+ -+#define __lsx_vxori_b(/*__m128i*/ _1, /*ui8*/ _2) \ -+ ((__m128i)__builtin_lsx_vxori_b((v16u8)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vbitsel_v(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vbitsel_v((v16u8)_1, (v16u8)_2, (v16u8)_3); -+} -+ -+#define __lsx_vbitseli_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ -+ ((__m128i)__builtin_lsx_vbitseli_b((v16u8)(_1), (v16u8)(_2), (_3))) -+ -+#define __lsx_vshuf4i_b(/*__m128i*/ _1, /*ui8*/ _2) \ -+ ((__m128i)__builtin_lsx_vshuf4i_b((v16i8)(_1), (_2))) -+ -+#define __lsx_vshuf4i_h(/*__m128i*/ _1, /*ui8*/ _2) \ -+ ((__m128i)__builtin_lsx_vshuf4i_h((v8i16)(_1), (_2))) -+ -+#define __lsx_vshuf4i_w(/*__m128i*/ _1, /*ui8*/ _2) \ -+ ((__m128i)__builtin_lsx_vshuf4i_w((v4i32)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vreplgr2vr_b(int _1) { -+ return (__m128i)__builtin_lsx_vreplgr2vr_b((int)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vreplgr2vr_h(int _1) { -+ return (__m128i)__builtin_lsx_vreplgr2vr_h((int)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vreplgr2vr_w(int _1) { -+ return (__m128i)__builtin_lsx_vreplgr2vr_w((int)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vreplgr2vr_d(long int _1) { -+ return (__m128i)__builtin_lsx_vreplgr2vr_d((long int)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpcnt_b(__m128i _1) { -+ return (__m128i)__builtin_lsx_vpcnt_b((v16i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpcnt_h(__m128i _1) { -+ return (__m128i)__builtin_lsx_vpcnt_h((v8i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpcnt_w(__m128i _1) { -+ return (__m128i)__builtin_lsx_vpcnt_w((v4i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vpcnt_d(__m128i _1) { -+ return (__m128i)__builtin_lsx_vpcnt_d((v2i64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vclo_b(__m128i _1) { -+ return (__m128i)__builtin_lsx_vclo_b((v16i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vclo_h(__m128i _1) { -+ return (__m128i)__builtin_lsx_vclo_h((v8i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vclo_w(__m128i _1) { -+ return (__m128i)__builtin_lsx_vclo_w((v4i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vclo_d(__m128i _1) { -+ return (__m128i)__builtin_lsx_vclo_d((v2i64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vclz_b(__m128i _1) { -+ return (__m128i)__builtin_lsx_vclz_b((v16i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vclz_h(__m128i _1) { -+ return (__m128i)__builtin_lsx_vclz_h((v8i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vclz_w(__m128i _1) { -+ return (__m128i)__builtin_lsx_vclz_w((v4i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vclz_d(__m128i _1) { -+ return (__m128i)__builtin_lsx_vclz_d((v2i64)_1); -+} -+ -+#define __lsx_vpickve2gr_b(/*__m128i*/ _1, /*ui4*/ _2) \ -+ ((int)__builtin_lsx_vpickve2gr_b((v16i8)(_1), (_2))) -+ -+#define __lsx_vpickve2gr_h(/*__m128i*/ _1, /*ui3*/ _2) \ -+ ((int)__builtin_lsx_vpickve2gr_h((v8i16)(_1), (_2))) -+ -+#define __lsx_vpickve2gr_w(/*__m128i*/ _1, /*ui2*/ _2) \ -+ ((int)__builtin_lsx_vpickve2gr_w((v4i32)(_1), (_2))) -+ -+#define __lsx_vpickve2gr_d(/*__m128i*/ _1, /*ui1*/ _2) \ -+ ((long int)__builtin_lsx_vpickve2gr_d((v2i64)(_1), (_2))) -+ -+#define __lsx_vpickve2gr_bu(/*__m128i*/ _1, /*ui4*/ _2) \ -+ ((unsigned int)__builtin_lsx_vpickve2gr_bu((v16i8)(_1), (_2))) -+ -+#define __lsx_vpickve2gr_hu(/*__m128i*/ _1, /*ui3*/ _2) \ -+ ((unsigned int)__builtin_lsx_vpickve2gr_hu((v8i16)(_1), (_2))) -+ -+#define __lsx_vpickve2gr_wu(/*__m128i*/ _1, /*ui2*/ _2) \ -+ ((unsigned int)__builtin_lsx_vpickve2gr_wu((v4i32)(_1), (_2))) -+ -+#define __lsx_vpickve2gr_du(/*__m128i*/ _1, /*ui1*/ _2) \ -+ ((unsigned long int)__builtin_lsx_vpickve2gr_du((v2i64)(_1), (_2))) -+ -+#define __lsx_vinsgr2vr_b(/*__m128i*/ _1, /*int*/ _2, /*ui4*/ _3) \ -+ ((__m128i)__builtin_lsx_vinsgr2vr_b((v16i8)(_1), (int)(_2), (_3))) -+ -+#define __lsx_vinsgr2vr_h(/*__m128i*/ _1, /*int*/ _2, /*ui3*/ _3) \ -+ ((__m128i)__builtin_lsx_vinsgr2vr_h((v8i16)(_1), (int)(_2), (_3))) -+ -+#define __lsx_vinsgr2vr_w(/*__m128i*/ _1, /*int*/ _2, /*ui2*/ _3) \ -+ ((__m128i)__builtin_lsx_vinsgr2vr_w((v4i32)(_1), (int)(_2), (_3))) -+ -+#define __lsx_vinsgr2vr_d(/*__m128i*/ _1, /*long int*/ _2, /*ui1*/ _3) \ -+ ((__m128i)__builtin_lsx_vinsgr2vr_d((v2i64)(_1), (long int)(_2), (_3))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfadd_s(__m128 _1, __m128 _2) { -+ return (__m128)__builtin_lsx_vfadd_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfadd_d(__m128d _1, __m128d _2) { -+ return (__m128d)__builtin_lsx_vfadd_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfsub_s(__m128 _1, __m128 _2) { -+ return (__m128)__builtin_lsx_vfsub_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfsub_d(__m128d _1, __m128d _2) { -+ return (__m128d)__builtin_lsx_vfsub_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfmul_s(__m128 _1, __m128 _2) { -+ return (__m128)__builtin_lsx_vfmul_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfmul_d(__m128d _1, __m128d _2) { -+ return (__m128d)__builtin_lsx_vfmul_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfdiv_s(__m128 _1, __m128 _2) { -+ return (__m128)__builtin_lsx_vfdiv_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfdiv_d(__m128d _1, __m128d _2) { -+ return (__m128d)__builtin_lsx_vfdiv_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcvt_h_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcvt_h_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfcvt_s_d(__m128d _1, __m128d _2) { -+ return (__m128)__builtin_lsx_vfcvt_s_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfmin_s(__m128 _1, __m128 _2) { -+ return (__m128)__builtin_lsx_vfmin_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfmin_d(__m128d _1, __m128d _2) { -+ return (__m128d)__builtin_lsx_vfmin_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfmina_s(__m128 _1, __m128 _2) { -+ return (__m128)__builtin_lsx_vfmina_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfmina_d(__m128d _1, __m128d _2) { -+ return (__m128d)__builtin_lsx_vfmina_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfmax_s(__m128 _1, __m128 _2) { -+ return (__m128)__builtin_lsx_vfmax_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfmax_d(__m128d _1, __m128d _2) { -+ return (__m128d)__builtin_lsx_vfmax_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfmaxa_s(__m128 _1, __m128 _2) { -+ return (__m128)__builtin_lsx_vfmaxa_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfmaxa_d(__m128d _1, __m128d _2) { -+ return (__m128d)__builtin_lsx_vfmaxa_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfclass_s(__m128 _1) { -+ return (__m128i)__builtin_lsx_vfclass_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfclass_d(__m128d _1) { -+ return (__m128i)__builtin_lsx_vfclass_d((v2f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfsqrt_s(__m128 _1) { -+ return (__m128)__builtin_lsx_vfsqrt_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfsqrt_d(__m128d _1) { -+ return (__m128d)__builtin_lsx_vfsqrt_d((v2f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfrecip_s(__m128 _1) { -+ return (__m128)__builtin_lsx_vfrecip_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfrecip_d(__m128d _1) { -+ return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfrint_s(__m128 _1) { -+ return (__m128)__builtin_lsx_vfrint_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfrint_d(__m128d _1) { -+ return (__m128d)__builtin_lsx_vfrint_d((v2f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfrsqrt_s(__m128 _1) { -+ return (__m128)__builtin_lsx_vfrsqrt_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfrsqrt_d(__m128d _1) { -+ return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vflogb_s(__m128 _1) { -+ return (__m128)__builtin_lsx_vflogb_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vflogb_d(__m128d _1) { -+ return (__m128d)__builtin_lsx_vflogb_d((v2f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfcvth_s_h(__m128i _1) { -+ return (__m128)__builtin_lsx_vfcvth_s_h((v8i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfcvth_d_s(__m128 _1) { -+ return (__m128d)__builtin_lsx_vfcvth_d_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfcvtl_s_h(__m128i _1) { -+ return (__m128)__builtin_lsx_vfcvtl_s_h((v8i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfcvtl_d_s(__m128 _1) { -+ return (__m128d)__builtin_lsx_vfcvtl_d_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftint_w_s(__m128 _1) { -+ return (__m128i)__builtin_lsx_vftint_w_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftint_l_d(__m128d _1) { -+ return (__m128i)__builtin_lsx_vftint_l_d((v2f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftint_wu_s(__m128 _1) { -+ return (__m128i)__builtin_lsx_vftint_wu_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftint_lu_d(__m128d _1) { -+ return (__m128i)__builtin_lsx_vftint_lu_d((v2f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrz_w_s(__m128 _1) { -+ return (__m128i)__builtin_lsx_vftintrz_w_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrz_l_d(__m128d _1) { -+ return (__m128i)__builtin_lsx_vftintrz_l_d((v2f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrz_wu_s(__m128 _1) { -+ return (__m128i)__builtin_lsx_vftintrz_wu_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrz_lu_d(__m128d _1) { -+ return (__m128i)__builtin_lsx_vftintrz_lu_d((v2f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vffint_s_w(__m128i _1) { -+ return (__m128)__builtin_lsx_vffint_s_w((v4i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vffint_d_l(__m128i _1) { -+ return (__m128d)__builtin_lsx_vffint_d_l((v2i64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vffint_s_wu(__m128i _1) { -+ return (__m128)__builtin_lsx_vffint_s_wu((v4u32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vffint_d_lu(__m128i _1) { -+ return (__m128d)__builtin_lsx_vffint_d_lu((v2u64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vandn_v(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vandn_v((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vneg_b(__m128i _1) { -+ return (__m128i)__builtin_lsx_vneg_b((v16i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vneg_h(__m128i _1) { -+ return (__m128i)__builtin_lsx_vneg_h((v8i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vneg_w(__m128i _1) { -+ return (__m128i)__builtin_lsx_vneg_w((v4i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vneg_d(__m128i _1) { -+ return (__m128i)__builtin_lsx_vneg_d((v2i64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmuh_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmuh_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmuh_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmuh_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmuh_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmuh_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmuh_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmuh_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmuh_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmuh_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmuh_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmuh_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmuh_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmuh_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmuh_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmuh_du((v2u64)_1, (v2u64)_2); -+} -+ -+#define __lsx_vsllwil_h_b(/*__m128i*/ _1, /*ui3*/ _2) \ -+ ((__m128i)__builtin_lsx_vsllwil_h_b((v16i8)(_1), (_2))) -+ -+#define __lsx_vsllwil_w_h(/*__m128i*/ _1, /*ui4*/ _2) \ -+ ((__m128i)__builtin_lsx_vsllwil_w_h((v8i16)(_1), (_2))) -+ -+#define __lsx_vsllwil_d_w(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vsllwil_d_w((v4i32)(_1), (_2))) -+ -+#define __lsx_vsllwil_hu_bu(/*__m128i*/ _1, /*ui3*/ _2) \ -+ ((__m128i)__builtin_lsx_vsllwil_hu_bu((v16u8)(_1), (_2))) -+ -+#define __lsx_vsllwil_wu_hu(/*__m128i*/ _1, /*ui4*/ _2) \ -+ ((__m128i)__builtin_lsx_vsllwil_wu_hu((v8u16)(_1), (_2))) -+ -+#define __lsx_vsllwil_du_wu(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vsllwil_du_wu((v4u32)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsran_b_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsran_b_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsran_h_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsran_h_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsran_w_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsran_w_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssran_b_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssran_b_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssran_h_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssran_h_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssran_w_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssran_w_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssran_bu_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssran_bu_h((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssran_hu_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssran_hu_w((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssran_wu_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssran_wu_d((v2u64)_1, (v2u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrarn_b_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrarn_b_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrarn_h_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrarn_h_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrarn_w_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrarn_w_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssrarn_b_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssrarn_b_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssrarn_h_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssrarn_h_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssrarn_w_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssrarn_w_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssrarn_bu_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssrarn_bu_h((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssrarn_hu_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssrarn_hu_w((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssrarn_wu_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssrarn_wu_d((v2u64)_1, (v2u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrln_b_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrln_b_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrln_h_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrln_h_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrln_w_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrln_w_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssrln_bu_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssrln_bu_h((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssrln_hu_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssrln_hu_w((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssrln_wu_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssrln_wu_d((v2u64)_1, (v2u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrlrn_b_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrlrn_b_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrlrn_h_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrlrn_h_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsrlrn_w_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsrlrn_w_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssrlrn_bu_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssrlrn_bu_h((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssrlrn_hu_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssrlrn_hu_w((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssrlrn_wu_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssrlrn_wu_d((v2u64)_1, (v2u64)_2); -+} -+ -+#define __lsx_vfrstpi_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ -+ ((__m128i)__builtin_lsx_vfrstpi_b((v16i8)(_1), (v16i8)(_2), (_3))) -+ -+#define __lsx_vfrstpi_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ -+ ((__m128i)__builtin_lsx_vfrstpi_h((v8i16)(_1), (v8i16)(_2), (_3))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfrstp_b(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vfrstp_b((v16i8)_1, (v16i8)_2, (v16i8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfrstp_h(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vfrstp_h((v8i16)_1, (v8i16)_2, (v8i16)_3); -+} -+ -+#define __lsx_vshuf4i_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ -+ ((__m128i)__builtin_lsx_vshuf4i_d((v2i64)(_1), (v2i64)(_2), (_3))) -+ -+#define __lsx_vbsrl_v(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vbsrl_v((v16i8)(_1), (_2))) -+ -+#define __lsx_vbsll_v(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vbsll_v((v16i8)(_1), (_2))) -+ -+#define __lsx_vextrins_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ -+ ((__m128i)__builtin_lsx_vextrins_b((v16i8)(_1), (v16i8)(_2), (_3))) -+ -+#define __lsx_vextrins_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ -+ ((__m128i)__builtin_lsx_vextrins_h((v8i16)(_1), (v8i16)(_2), (_3))) -+ -+#define __lsx_vextrins_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ -+ ((__m128i)__builtin_lsx_vextrins_w((v4i32)(_1), (v4i32)(_2), (_3))) -+ -+#define __lsx_vextrins_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ -+ ((__m128i)__builtin_lsx_vextrins_d((v2i64)(_1), (v2i64)(_2), (_3))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmskltz_b(__m128i _1) { -+ return (__m128i)__builtin_lsx_vmskltz_b((v16i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmskltz_h(__m128i _1) { -+ return (__m128i)__builtin_lsx_vmskltz_h((v8i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmskltz_w(__m128i _1) { -+ return (__m128i)__builtin_lsx_vmskltz_w((v4i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmskltz_d(__m128i _1) { -+ return (__m128i)__builtin_lsx_vmskltz_d((v2i64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsigncov_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsigncov_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsigncov_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsigncov_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsigncov_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsigncov_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsigncov_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsigncov_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfmadd_s(__m128 _1, __m128 _2, __m128 _3) { -+ return (__m128)__builtin_lsx_vfmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfmadd_d(__m128d _1, __m128d _2, __m128d _3) { -+ return (__m128d)__builtin_lsx_vfmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfmsub_s(__m128 _1, __m128 _2, __m128 _3) { -+ return (__m128)__builtin_lsx_vfmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfmsub_d(__m128d _1, __m128d _2, __m128d _3) { -+ return (__m128d)__builtin_lsx_vfmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfnmadd_s(__m128 _1, __m128 _2, __m128 _3) { -+ return (__m128)__builtin_lsx_vfnmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfnmadd_d(__m128d _1, __m128d _2, __m128d _3) { -+ return (__m128d)__builtin_lsx_vfnmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfnmsub_s(__m128 _1, __m128 _2, __m128 _3) { -+ return (__m128)__builtin_lsx_vfnmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfnmsub_d(__m128d _1, __m128d _2, __m128d _3) { -+ return (__m128d)__builtin_lsx_vfnmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrne_w_s(__m128 _1) { -+ return (__m128i)__builtin_lsx_vftintrne_w_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrne_l_d(__m128d _1) { -+ return (__m128i)__builtin_lsx_vftintrne_l_d((v2f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrp_w_s(__m128 _1) { -+ return (__m128i)__builtin_lsx_vftintrp_w_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrp_l_d(__m128d _1) { -+ return (__m128i)__builtin_lsx_vftintrp_l_d((v2f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrm_w_s(__m128 _1) { -+ return (__m128i)__builtin_lsx_vftintrm_w_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrm_l_d(__m128d _1) { -+ return (__m128i)__builtin_lsx_vftintrm_l_d((v2f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftint_w_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vftint_w_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vffint_s_l(__m128i _1, __m128i _2) { -+ return (__m128)__builtin_lsx_vffint_s_l((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrz_w_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vftintrz_w_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrp_w_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vftintrp_w_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrm_w_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vftintrm_w_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrne_w_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vftintrne_w_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintl_l_s(__m128 _1) { -+ return (__m128i)__builtin_lsx_vftintl_l_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftinth_l_s(__m128 _1) { -+ return (__m128i)__builtin_lsx_vftinth_l_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vffinth_d_w(__m128i _1) { -+ return (__m128d)__builtin_lsx_vffinth_d_w((v4i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vffintl_d_w(__m128i _1) { -+ return (__m128d)__builtin_lsx_vffintl_d_w((v4i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrzl_l_s(__m128 _1) { -+ return (__m128i)__builtin_lsx_vftintrzl_l_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrzh_l_s(__m128 _1) { -+ return (__m128i)__builtin_lsx_vftintrzh_l_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrpl_l_s(__m128 _1) { -+ return (__m128i)__builtin_lsx_vftintrpl_l_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrph_l_s(__m128 _1) { -+ return (__m128i)__builtin_lsx_vftintrph_l_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrml_l_s(__m128 _1) { -+ return (__m128i)__builtin_lsx_vftintrml_l_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrmh_l_s(__m128 _1) { -+ return (__m128i)__builtin_lsx_vftintrmh_l_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrnel_l_s(__m128 _1) { -+ return (__m128i)__builtin_lsx_vftintrnel_l_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vftintrneh_l_s(__m128 _1) { -+ return (__m128i)__builtin_lsx_vftintrneh_l_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfrintrne_s(__m128 _1) { -+ return (__m128)__builtin_lsx_vfrintrne_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfrintrne_d(__m128d _1) { -+ return (__m128d)__builtin_lsx_vfrintrne_d((v2f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfrintrz_s(__m128 _1) { -+ return (__m128)__builtin_lsx_vfrintrz_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfrintrz_d(__m128d _1) { -+ return (__m128d)__builtin_lsx_vfrintrz_d((v2f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfrintrp_s(__m128 _1) { -+ return (__m128)__builtin_lsx_vfrintrp_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfrintrp_d(__m128d _1) { -+ return (__m128d)__builtin_lsx_vfrintrp_d((v2f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfrintrm_s(__m128 _1) { -+ return (__m128)__builtin_lsx_vfrintrm_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfrintrm_d(__m128d _1) { -+ return (__m128d)__builtin_lsx_vfrintrm_d((v2f64)_1); -+} -+ -+#define __lsx_vstelm_b(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ -+ ((void)__builtin_lsx_vstelm_b((v16i8)(_1), (void *)(_2), (_3), (_4))) -+ -+#define __lsx_vstelm_h(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ -+ ((void)__builtin_lsx_vstelm_h((v8i16)(_1), (void *)(_2), (_3), (_4))) -+ -+#define __lsx_vstelm_w(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ -+ ((void)__builtin_lsx_vstelm_w((v4i32)(_1), (void *)(_2), (_3), (_4))) -+ -+#define __lsx_vstelm_d(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ -+ ((void)__builtin_lsx_vstelm_d((v2i64)(_1), (void *)(_2), (_3), (_4))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwev_d_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwev_d_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwev_w_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwev_w_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwev_h_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwev_h_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwod_d_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwod_d_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwod_w_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwod_w_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwod_h_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwod_h_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwev_d_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwev_d_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwev_w_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwev_w_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwev_h_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwev_h_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwod_d_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwod_d_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwod_w_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwod_w_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwod_h_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwod_h_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwev_d_wu_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwev_d_wu_w((v4u32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwev_w_hu_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwev_w_hu_h((v8u16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwev_h_bu_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwev_h_bu_b((v16u8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwod_d_wu_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwod_d_wu_w((v4u32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwod_w_hu_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwod_w_hu_h((v8u16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwod_h_bu_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwod_h_bu_b((v16u8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsubwev_d_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsubwev_d_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsubwev_w_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsubwev_w_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsubwev_h_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsubwev_h_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsubwod_d_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsubwod_d_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsubwod_w_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsubwod_w_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsubwod_h_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsubwod_h_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsubwev_d_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsubwev_d_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsubwev_w_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsubwev_w_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsubwev_h_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsubwev_h_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsubwod_d_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsubwod_d_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsubwod_w_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsubwod_w_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsubwod_h_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsubwod_h_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwev_q_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwev_q_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwod_q_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwod_q_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwev_q_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwev_q_du((v2u64)_1, (v2u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwod_q_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwod_q_du((v2u64)_1, (v2u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsubwev_q_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsubwev_q_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsubwod_q_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsubwod_q_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsubwev_q_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsubwev_q_du((v2u64)_1, (v2u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsubwod_q_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsubwod_q_du((v2u64)_1, (v2u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwev_q_du_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwev_q_du_d((v2u64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vaddwod_q_du_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vaddwod_q_du_d((v2u64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwev_d_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwev_d_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwev_w_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwev_w_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwev_h_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwev_h_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwod_d_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwod_d_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwod_w_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwod_w_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwod_h_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwod_h_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwev_d_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwev_d_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwev_w_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwev_w_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwev_h_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwev_h_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwod_d_wu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwod_d_wu((v4u32)_1, (v4u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwod_w_hu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwod_w_hu((v8u16)_1, (v8u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwod_h_bu(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwod_h_bu((v16u8)_1, (v16u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwev_d_wu_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwev_d_wu_w((v4u32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwev_w_hu_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwev_w_hu_h((v8u16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwev_h_bu_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwev_h_bu_b((v16u8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwod_d_wu_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwod_d_wu_w((v4u32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwod_w_hu_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwod_w_hu_h((v8u16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwod_h_bu_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwod_h_bu_b((v16u8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwev_q_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwev_q_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwod_q_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwod_q_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwev_q_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwev_q_du((v2u64)_1, (v2u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwod_q_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwod_q_du((v2u64)_1, (v2u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwev_q_du_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwev_q_du_d((v2u64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmulwod_q_du_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vmulwod_q_du_d((v2u64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vhaddw_q_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vhaddw_q_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vhaddw_qu_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vhaddw_qu_du((v2u64)_1, (v2u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vhsubw_q_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vhsubw_q_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vhsubw_qu_du(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vhsubw_qu_du((v2u64)_1, (v2u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwev_d_w(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwev_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwev_w_h(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwev_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwev_h_b(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwev_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwev_d_wu(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwev_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwev_w_hu(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwev_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwev_h_bu(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwev_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwod_d_w(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwod_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwod_w_h(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwod_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwod_h_b(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwod_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwod_d_wu(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwod_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwod_w_hu(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwod_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwod_h_bu(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwod_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwev_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwev_d_wu_w((v2i64)_1, (v4u32)_2, -+ (v4i32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwev_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwev_w_hu_h((v4i32)_1, (v8u16)_2, -+ (v8i16)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwev_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwev_h_bu_b((v8i16)_1, (v16u8)_2, -+ (v16i8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwod_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwod_d_wu_w((v2i64)_1, (v4u32)_2, -+ (v4i32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwod_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwod_w_hu_h((v4i32)_1, (v8u16)_2, -+ (v8i16)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwod_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwod_h_bu_b((v8i16)_1, (v16u8)_2, -+ (v16i8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwev_q_d(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwev_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwod_q_d(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwod_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwev_q_du(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwev_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwod_q_du(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwod_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwev_q_du_d(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwev_q_du_d((v2i64)_1, (v2u64)_2, -+ (v2i64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmaddwod_q_du_d(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vmaddwod_q_du_d((v2i64)_1, (v2u64)_2, -+ (v2i64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vrotr_b(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vrotr_b((v16i8)_1, (v16i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vrotr_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vrotr_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vrotr_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vrotr_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vrotr_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vrotr_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vadd_q(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vadd_q((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vsub_q(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vsub_q((v2i64)_1, (v2i64)_2); -+} -+ -+#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2) \ -+ ((__m128i)__builtin_lsx_vldrepl_b((void const *)(_1), (_2))) -+ -+#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2) \ -+ ((__m128i)__builtin_lsx_vldrepl_h((void const *)(_1), (_2))) -+ -+#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2) \ -+ ((__m128i)__builtin_lsx_vldrepl_w((void const *)(_1), (_2))) -+ -+#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2) \ -+ ((__m128i)__builtin_lsx_vldrepl_d((void const *)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmskgez_b(__m128i _1) { -+ return (__m128i)__builtin_lsx_vmskgez_b((v16i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vmsknz_b(__m128i _1) { -+ return (__m128i)__builtin_lsx_vmsknz_b((v16i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vexth_h_b(__m128i _1) { -+ return (__m128i)__builtin_lsx_vexth_h_b((v16i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vexth_w_h(__m128i _1) { -+ return (__m128i)__builtin_lsx_vexth_w_h((v8i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vexth_d_w(__m128i _1) { -+ return (__m128i)__builtin_lsx_vexth_d_w((v4i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vexth_q_d(__m128i _1) { -+ return (__m128i)__builtin_lsx_vexth_q_d((v2i64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vexth_hu_bu(__m128i _1) { -+ return (__m128i)__builtin_lsx_vexth_hu_bu((v16u8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vexth_wu_hu(__m128i _1) { -+ return (__m128i)__builtin_lsx_vexth_wu_hu((v8u16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vexth_du_wu(__m128i _1) { -+ return (__m128i)__builtin_lsx_vexth_du_wu((v4u32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vexth_qu_du(__m128i _1) { -+ return (__m128i)__builtin_lsx_vexth_qu_du((v2u64)_1); -+} -+ -+#define __lsx_vrotri_b(/*__m128i*/ _1, /*ui3*/ _2) \ -+ ((__m128i)__builtin_lsx_vrotri_b((v16i8)(_1), (_2))) -+ -+#define __lsx_vrotri_h(/*__m128i*/ _1, /*ui4*/ _2) \ -+ ((__m128i)__builtin_lsx_vrotri_h((v8i16)(_1), (_2))) -+ -+#define __lsx_vrotri_w(/*__m128i*/ _1, /*ui5*/ _2) \ -+ ((__m128i)__builtin_lsx_vrotri_w((v4i32)(_1), (_2))) -+ -+#define __lsx_vrotri_d(/*__m128i*/ _1, /*ui6*/ _2) \ -+ ((__m128i)__builtin_lsx_vrotri_d((v2i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vextl_q_d(__m128i _1) { -+ return (__m128i)__builtin_lsx_vextl_q_d((v2i64)_1); -+} -+ -+#define __lsx_vsrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ -+ ((__m128i)__builtin_lsx_vsrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) -+ -+#define __lsx_vsrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ -+ ((__m128i)__builtin_lsx_vsrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) -+ -+#define __lsx_vsrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ -+ ((__m128i)__builtin_lsx_vsrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) -+ -+#define __lsx_vsrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ -+ ((__m128i)__builtin_lsx_vsrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) -+ -+#define __lsx_vsrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ -+ ((__m128i)__builtin_lsx_vsrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) -+ -+#define __lsx_vsrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ -+ ((__m128i)__builtin_lsx_vsrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) -+ -+#define __lsx_vsrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ -+ ((__m128i)__builtin_lsx_vsrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) -+ -+#define __lsx_vsrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ -+ ((__m128i)__builtin_lsx_vsrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) -+ -+#define __lsx_vssrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) -+ -+#define __lsx_vssrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) -+ -+#define __lsx_vssrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) -+ -+#define __lsx_vssrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) -+ -+#define __lsx_vssrlni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrlni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) -+ -+#define __lsx_vssrlni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrlni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) -+ -+#define __lsx_vssrlni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrlni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) -+ -+#define __lsx_vssrlni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrlni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) -+ -+#define __lsx_vssrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) -+ -+#define __lsx_vssrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) -+ -+#define __lsx_vssrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) -+ -+#define __lsx_vssrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) -+ -+#define __lsx_vssrlrni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrlrni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) -+ -+#define __lsx_vssrlrni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrlrni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) -+ -+#define __lsx_vssrlrni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrlrni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) -+ -+#define __lsx_vssrlrni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrlrni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) -+ -+#define __lsx_vsrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ -+ ((__m128i)__builtin_lsx_vsrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) -+ -+#define __lsx_vsrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ -+ ((__m128i)__builtin_lsx_vsrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) -+ -+#define __lsx_vsrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ -+ ((__m128i)__builtin_lsx_vsrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) -+ -+#define __lsx_vsrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ -+ ((__m128i)__builtin_lsx_vsrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) -+ -+#define __lsx_vsrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ -+ ((__m128i)__builtin_lsx_vsrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) -+ -+#define __lsx_vsrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ -+ ((__m128i)__builtin_lsx_vsrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) -+ -+#define __lsx_vsrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ -+ ((__m128i)__builtin_lsx_vsrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) -+ -+#define __lsx_vsrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ -+ ((__m128i)__builtin_lsx_vsrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) -+ -+#define __lsx_vssrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) -+ -+#define __lsx_vssrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) -+ -+#define __lsx_vssrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) -+ -+#define __lsx_vssrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) -+ -+#define __lsx_vssrani_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrani_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) -+ -+#define __lsx_vssrani_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrani_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) -+ -+#define __lsx_vssrani_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrani_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) -+ -+#define __lsx_vssrani_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrani_du_q((v2u64)(_1), (v2i64)(_2), (_3))) -+ -+#define __lsx_vssrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) -+ -+#define __lsx_vssrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) -+ -+#define __lsx_vssrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) -+ -+#define __lsx_vssrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) -+ -+#define __lsx_vssrarni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrarni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) -+ -+#define __lsx_vssrarni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrarni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) -+ -+#define __lsx_vssrarni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrarni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) -+ -+#define __lsx_vssrarni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ -+ ((__m128i)__builtin_lsx_vssrarni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) -+ -+#define __lsx_vpermi_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ -+ ((__m128i)__builtin_lsx_vpermi_w((v4i32)(_1), (v4i32)(_2), (_3))) -+ -+#define __lsx_vld(/*void **/ _1, /*si12*/ _2) \ -+ ((__m128i)__builtin_lsx_vld((void const *)(_1), (_2))) -+ -+#define __lsx_vst(/*__m128i*/ _1, /*void **/ _2, /*si12*/ _3) \ -+ ((void)__builtin_lsx_vst((v16i8)(_1), (void *)(_2), (_3))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssrlrn_b_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssrlrn_b_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssrlrn_h_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssrlrn_h_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssrlrn_w_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssrlrn_w_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssrln_b_h(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssrln_b_h((v8i16)_1, (v8i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssrln_h_w(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssrln_h_w((v4i32)_1, (v4i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vssrln_w_d(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vssrln_w_d((v2i64)_1, (v2i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vorn_v(__m128i _1, __m128i _2) { -+ return (__m128i)__builtin_lsx_vorn_v((v16i8)_1, (v16i8)_2); -+} -+ -+#define __lsx_vldi(/*i13*/ _1) ((__m128i)__builtin_lsx_vldi((_1))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3) { -+ return (__m128i)__builtin_lsx_vshuf_b((v16i8)_1, (v16i8)_2, (v16i8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vldx(void const *_1, long int _2) { -+ return (__m128i)__builtin_lsx_vldx((void const *)_1, (long int)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void -+ __lsx_vstx(__m128i _1, void *_2, long int _3) { -+ return (void)__builtin_lsx_vstx((v16i8)_1, (void *)_2, (long int)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vextl_qu_du(__m128i _1) { -+ return (__m128i)__builtin_lsx_vextl_qu_du((v2u64)_1); -+} -+ -+#define __lsx_bnz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_b((v16u8)(_1))) -+ -+#define __lsx_bnz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_d((v2u64)(_1))) -+ -+#define __lsx_bnz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_h((v8u16)(_1))) -+ -+#define __lsx_bnz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_v((v16u8)(_1))) -+ -+#define __lsx_bnz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_w((v4u32)(_1))) -+ -+#define __lsx_bz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bz_b((v16u8)(_1))) -+ -+#define __lsx_bz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bz_d((v2u64)(_1))) -+ -+#define __lsx_bz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bz_h((v8u16)(_1))) -+ -+#define __lsx_bz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bz_v((v16u8)(_1))) -+ -+#define __lsx_bz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bz_w((v4u32)(_1))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_caf_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_caf_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_caf_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_caf_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_ceq_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_ceq_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_ceq_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_ceq_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_cle_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_cle_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_cle_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_cle_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_clt_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_clt_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_clt_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_clt_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_cne_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_cne_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_cne_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_cne_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_cor_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_cor_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_cor_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_cor_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_cueq_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_cueq_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_cueq_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_cueq_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_cule_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_cule_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_cule_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_cule_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_cult_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_cult_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_cult_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_cult_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_cun_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_cun_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_cune_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_cune_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_cune_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_cune_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_cun_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_cun_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_saf_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_saf_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_saf_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_saf_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_seq_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_seq_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_seq_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_seq_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_sle_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_sle_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_sle_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_sle_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_slt_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_slt_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_slt_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_slt_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_sne_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_sne_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_sne_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_sne_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_sor_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_sor_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_sor_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_sor_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_sueq_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_sueq_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_sueq_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_sueq_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_sule_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_sule_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_sule_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_sule_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_sult_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_sult_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_sult_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_sult_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_sun_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_sun_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_sune_d(__m128d _1, __m128d _2) { -+ return (__m128i)__builtin_lsx_vfcmp_sune_d((v2f64)_1, (v2f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_sune_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_sune_s((v4f32)_1, (v4f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i -+ __lsx_vfcmp_sun_s(__m128 _1, __m128 _2) { -+ return (__m128i)__builtin_lsx_vfcmp_sun_s((v4f32)_1, (v4f32)_2); -+} -+ -+#define __lsx_vrepli_b(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_b((_1))) -+ -+#define __lsx_vrepli_d(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_d((_1))) -+ -+#define __lsx_vrepli_h(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_h((_1))) -+ -+#define __lsx_vrepli_w(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_w((_1))) -+ -+#endif /* defined(__loongarch_sx) */ -+#endif /* _LOONGSON_SXINTRIN_H */ -diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp -index a8416bf4de92..d1b015502725 100644 ---- a/clang/lib/Sema/SemaChecking.cpp -+++ b/clang/lib/Sema/SemaChecking.cpp -@@ -3826,6 +3826,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, - switch (BuiltinID) { - default: - break; -+ // Basic intrinsics. - case LoongArch::BI__builtin_loongarch_cacop_d: - case LoongArch::BI__builtin_loongarch_cacop_w: { - SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(5)); -@@ -3854,8 +3855,234 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, - case LoongArch::BI__builtin_loongarch_movfcsr2gr: - case LoongArch::BI__builtin_loongarch_movgr2fcsr: - return SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(2)); -- } - -+ // LSX intrinsics. -+ case LoongArch::BI__builtin_lsx_vbitclri_b: -+ case LoongArch::BI__builtin_lsx_vbitrevi_b: -+ case LoongArch::BI__builtin_lsx_vbitseti_b: -+ case LoongArch::BI__builtin_lsx_vsat_b: -+ case LoongArch::BI__builtin_lsx_vsat_bu: -+ case LoongArch::BI__builtin_lsx_vslli_b: -+ case LoongArch::BI__builtin_lsx_vsrai_b: -+ case LoongArch::BI__builtin_lsx_vsrari_b: -+ case LoongArch::BI__builtin_lsx_vsrli_b: -+ case LoongArch::BI__builtin_lsx_vsllwil_h_b: -+ case LoongArch::BI__builtin_lsx_vsllwil_hu_bu: -+ case LoongArch::BI__builtin_lsx_vrotri_b: -+ case LoongArch::BI__builtin_lsx_vsrlri_b: -+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); -+ case LoongArch::BI__builtin_lsx_vbitclri_h: -+ case LoongArch::BI__builtin_lsx_vbitrevi_h: -+ case LoongArch::BI__builtin_lsx_vbitseti_h: -+ case LoongArch::BI__builtin_lsx_vsat_h: -+ case LoongArch::BI__builtin_lsx_vsat_hu: -+ case LoongArch::BI__builtin_lsx_vslli_h: -+ case LoongArch::BI__builtin_lsx_vsrai_h: -+ case LoongArch::BI__builtin_lsx_vsrari_h: -+ case LoongArch::BI__builtin_lsx_vsrli_h: -+ case LoongArch::BI__builtin_lsx_vsllwil_w_h: -+ case LoongArch::BI__builtin_lsx_vsllwil_wu_hu: -+ case LoongArch::BI__builtin_lsx_vrotri_h: -+ case LoongArch::BI__builtin_lsx_vsrlri_h: -+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); -+ case LoongArch::BI__builtin_lsx_vssrarni_b_h: -+ case LoongArch::BI__builtin_lsx_vssrarni_bu_h: -+ case LoongArch::BI__builtin_lsx_vssrani_b_h: -+ case LoongArch::BI__builtin_lsx_vssrani_bu_h: -+ case LoongArch::BI__builtin_lsx_vsrarni_b_h: -+ case LoongArch::BI__builtin_lsx_vsrlni_b_h: -+ case LoongArch::BI__builtin_lsx_vsrlrni_b_h: -+ case LoongArch::BI__builtin_lsx_vssrlni_b_h: -+ case LoongArch::BI__builtin_lsx_vssrlni_bu_h: -+ case LoongArch::BI__builtin_lsx_vssrlrni_b_h: -+ case LoongArch::BI__builtin_lsx_vssrlrni_bu_h: -+ case LoongArch::BI__builtin_lsx_vsrani_b_h: -+ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); -+ case LoongArch::BI__builtin_lsx_vslei_bu: -+ case LoongArch::BI__builtin_lsx_vslei_hu: -+ case LoongArch::BI__builtin_lsx_vslei_wu: -+ case LoongArch::BI__builtin_lsx_vslei_du: -+ case LoongArch::BI__builtin_lsx_vslti_bu: -+ case LoongArch::BI__builtin_lsx_vslti_hu: -+ case LoongArch::BI__builtin_lsx_vslti_wu: -+ case LoongArch::BI__builtin_lsx_vslti_du: -+ case LoongArch::BI__builtin_lsx_vmaxi_bu: -+ case LoongArch::BI__builtin_lsx_vmaxi_hu: -+ case LoongArch::BI__builtin_lsx_vmaxi_wu: -+ case LoongArch::BI__builtin_lsx_vmaxi_du: -+ case LoongArch::BI__builtin_lsx_vmini_bu: -+ case LoongArch::BI__builtin_lsx_vmini_hu: -+ case LoongArch::BI__builtin_lsx_vmini_wu: -+ case LoongArch::BI__builtin_lsx_vmini_du: -+ case LoongArch::BI__builtin_lsx_vaddi_bu: -+ case LoongArch::BI__builtin_lsx_vaddi_hu: -+ case LoongArch::BI__builtin_lsx_vaddi_wu: -+ case LoongArch::BI__builtin_lsx_vaddi_du: -+ case LoongArch::BI__builtin_lsx_vbitclri_w: -+ case LoongArch::BI__builtin_lsx_vbitrevi_w: -+ case LoongArch::BI__builtin_lsx_vbitseti_w: -+ case LoongArch::BI__builtin_lsx_vsat_w: -+ case LoongArch::BI__builtin_lsx_vsat_wu: -+ case LoongArch::BI__builtin_lsx_vslli_w: -+ case LoongArch::BI__builtin_lsx_vsrai_w: -+ case LoongArch::BI__builtin_lsx_vsrari_w: -+ case LoongArch::BI__builtin_lsx_vsrli_w: -+ case LoongArch::BI__builtin_lsx_vsllwil_d_w: -+ case LoongArch::BI__builtin_lsx_vsllwil_du_wu: -+ case LoongArch::BI__builtin_lsx_vsrlri_w: -+ case LoongArch::BI__builtin_lsx_vrotri_w: -+ case LoongArch::BI__builtin_lsx_vsubi_bu: -+ case LoongArch::BI__builtin_lsx_vsubi_hu: -+ case LoongArch::BI__builtin_lsx_vbsrl_v: -+ case LoongArch::BI__builtin_lsx_vbsll_v: -+ case LoongArch::BI__builtin_lsx_vsubi_wu: -+ case LoongArch::BI__builtin_lsx_vsubi_du: -+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); -+ case LoongArch::BI__builtin_lsx_vssrarni_h_w: -+ case LoongArch::BI__builtin_lsx_vssrarni_hu_w: -+ case LoongArch::BI__builtin_lsx_vssrani_h_w: -+ case LoongArch::BI__builtin_lsx_vssrani_hu_w: -+ case LoongArch::BI__builtin_lsx_vsrarni_h_w: -+ case LoongArch::BI__builtin_lsx_vsrani_h_w: -+ case LoongArch::BI__builtin_lsx_vfrstpi_b: -+ case LoongArch::BI__builtin_lsx_vfrstpi_h: -+ case LoongArch::BI__builtin_lsx_vsrlni_h_w: -+ case LoongArch::BI__builtin_lsx_vsrlrni_h_w: -+ case LoongArch::BI__builtin_lsx_vssrlni_h_w: -+ case LoongArch::BI__builtin_lsx_vssrlni_hu_w: -+ case LoongArch::BI__builtin_lsx_vssrlrni_h_w: -+ case LoongArch::BI__builtin_lsx_vssrlrni_hu_w: -+ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); -+ case LoongArch::BI__builtin_lsx_vbitclri_d: -+ case LoongArch::BI__builtin_lsx_vbitrevi_d: -+ case LoongArch::BI__builtin_lsx_vbitseti_d: -+ case LoongArch::BI__builtin_lsx_vsat_d: -+ case LoongArch::BI__builtin_lsx_vsat_du: -+ case LoongArch::BI__builtin_lsx_vslli_d: -+ case LoongArch::BI__builtin_lsx_vsrai_d: -+ case LoongArch::BI__builtin_lsx_vsrli_d: -+ case LoongArch::BI__builtin_lsx_vsrari_d: -+ case LoongArch::BI__builtin_lsx_vrotri_d: -+ case LoongArch::BI__builtin_lsx_vsrlri_d: -+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 63); -+ case LoongArch::BI__builtin_lsx_vssrarni_w_d: -+ case LoongArch::BI__builtin_lsx_vssrarni_wu_d: -+ case LoongArch::BI__builtin_lsx_vssrani_w_d: -+ case LoongArch::BI__builtin_lsx_vssrani_wu_d: -+ case LoongArch::BI__builtin_lsx_vsrarni_w_d: -+ case LoongArch::BI__builtin_lsx_vsrlni_w_d: -+ case LoongArch::BI__builtin_lsx_vsrlrni_w_d: -+ case LoongArch::BI__builtin_lsx_vssrlni_w_d: -+ case LoongArch::BI__builtin_lsx_vssrlni_wu_d: -+ case LoongArch::BI__builtin_lsx_vssrlrni_w_d: -+ case LoongArch::BI__builtin_lsx_vssrlrni_wu_d: -+ case LoongArch::BI__builtin_lsx_vsrani_w_d: -+ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63); -+ case LoongArch::BI__builtin_lsx_vssrarni_d_q: -+ case LoongArch::BI__builtin_lsx_vssrarni_du_q: -+ case LoongArch::BI__builtin_lsx_vssrani_d_q: -+ case LoongArch::BI__builtin_lsx_vssrani_du_q: -+ case LoongArch::BI__builtin_lsx_vsrarni_d_q: -+ case LoongArch::BI__builtin_lsx_vssrlni_d_q: -+ case LoongArch::BI__builtin_lsx_vssrlni_du_q: -+ case LoongArch::BI__builtin_lsx_vssrlrni_d_q: -+ case LoongArch::BI__builtin_lsx_vssrlrni_du_q: -+ case LoongArch::BI__builtin_lsx_vsrani_d_q: -+ case LoongArch::BI__builtin_lsx_vsrlrni_d_q: -+ case LoongArch::BI__builtin_lsx_vsrlni_d_q: -+ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 127); -+ case LoongArch::BI__builtin_lsx_vseqi_b: -+ case LoongArch::BI__builtin_lsx_vseqi_h: -+ case LoongArch::BI__builtin_lsx_vseqi_w: -+ case LoongArch::BI__builtin_lsx_vseqi_d: -+ case LoongArch::BI__builtin_lsx_vslti_b: -+ case LoongArch::BI__builtin_lsx_vslti_h: -+ case LoongArch::BI__builtin_lsx_vslti_w: -+ case LoongArch::BI__builtin_lsx_vslti_d: -+ case LoongArch::BI__builtin_lsx_vslei_b: -+ case LoongArch::BI__builtin_lsx_vslei_h: -+ case LoongArch::BI__builtin_lsx_vslei_w: -+ case LoongArch::BI__builtin_lsx_vslei_d: -+ case LoongArch::BI__builtin_lsx_vmaxi_b: -+ case LoongArch::BI__builtin_lsx_vmaxi_h: -+ case LoongArch::BI__builtin_lsx_vmaxi_w: -+ case LoongArch::BI__builtin_lsx_vmaxi_d: -+ case LoongArch::BI__builtin_lsx_vmini_b: -+ case LoongArch::BI__builtin_lsx_vmini_h: -+ case LoongArch::BI__builtin_lsx_vmini_w: -+ case LoongArch::BI__builtin_lsx_vmini_d: -+ return SemaBuiltinConstantArgRange(TheCall, 1, -16, 15); -+ case LoongArch::BI__builtin_lsx_vandi_b: -+ case LoongArch::BI__builtin_lsx_vnori_b: -+ case LoongArch::BI__builtin_lsx_vori_b: -+ case LoongArch::BI__builtin_lsx_vshuf4i_b: -+ case LoongArch::BI__builtin_lsx_vshuf4i_h: -+ case LoongArch::BI__builtin_lsx_vshuf4i_w: -+ case LoongArch::BI__builtin_lsx_vxori_b: -+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 255); -+ case LoongArch::BI__builtin_lsx_vbitseli_b: -+ case LoongArch::BI__builtin_lsx_vshuf4i_d: -+ case LoongArch::BI__builtin_lsx_vextrins_b: -+ case LoongArch::BI__builtin_lsx_vextrins_h: -+ case LoongArch::BI__builtin_lsx_vextrins_w: -+ case LoongArch::BI__builtin_lsx_vextrins_d: -+ case LoongArch::BI__builtin_lsx_vpermi_w: -+ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 255); -+ case LoongArch::BI__builtin_lsx_vpickve2gr_b: -+ case LoongArch::BI__builtin_lsx_vpickve2gr_bu: -+ case LoongArch::BI__builtin_lsx_vreplvei_b: -+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); -+ case LoongArch::BI__builtin_lsx_vinsgr2vr_b: -+ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); -+ case LoongArch::BI__builtin_lsx_vpickve2gr_h: -+ case LoongArch::BI__builtin_lsx_vpickve2gr_hu: -+ case LoongArch::BI__builtin_lsx_vreplvei_h: -+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); -+ case LoongArch::BI__builtin_lsx_vinsgr2vr_h: -+ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); -+ case LoongArch::BI__builtin_lsx_vpickve2gr_w: -+ case LoongArch::BI__builtin_lsx_vpickve2gr_wu: -+ case LoongArch::BI__builtin_lsx_vreplvei_w: -+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); -+ case LoongArch::BI__builtin_lsx_vinsgr2vr_w: -+ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); -+ case LoongArch::BI__builtin_lsx_vpickve2gr_d: -+ case LoongArch::BI__builtin_lsx_vpickve2gr_du: -+ case LoongArch::BI__builtin_lsx_vreplvei_d: -+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); -+ case LoongArch::BI__builtin_lsx_vinsgr2vr_d: -+ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 1); -+ case LoongArch::BI__builtin_lsx_vstelm_b: -+ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || -+ SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); -+ case LoongArch::BI__builtin_lsx_vstelm_h: -+ return SemaBuiltinConstantArgRange(TheCall, 2, -256, 254) || -+ SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); -+ case LoongArch::BI__builtin_lsx_vstelm_w: -+ return SemaBuiltinConstantArgRange(TheCall, 2, -512, 508) || -+ SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); -+ case LoongArch::BI__builtin_lsx_vstelm_d: -+ return SemaBuiltinConstantArgRange(TheCall, 2, -1024, 1016) || -+ SemaBuiltinConstantArgRange(TheCall, 3, 0, 1); -+ case LoongArch::BI__builtin_lsx_vldrepl_b: -+ case LoongArch::BI__builtin_lsx_vld: -+ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2047); -+ case LoongArch::BI__builtin_lsx_vldrepl_h: -+ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2046); -+ case LoongArch::BI__builtin_lsx_vldrepl_w: -+ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2044); -+ case LoongArch::BI__builtin_lsx_vldrepl_d: -+ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2040); -+ case LoongArch::BI__builtin_lsx_vst: -+ return SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); -+ case LoongArch::BI__builtin_lsx_vldi: -+ return SemaBuiltinConstantArgRange(TheCall, 0, -4096, 4095); -+ case LoongArch::BI__builtin_lsx_vrepli_b: -+ case LoongArch::BI__builtin_lsx_vrepli_h: -+ case LoongArch::BI__builtin_lsx_vrepli_w: -+ case LoongArch::BI__builtin_lsx_vrepli_d: -+ return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); -+ } - return false; - } - --- -2.20.1 - - -From c2191962d5c7fb671f1caa7d868cf985ecb1ee5b Mon Sep 17 00:00:00 2001 -From: licongtian -Date: Wed, 25 Oct 2023 17:44:06 +0800 -Subject: [PATCH 5/8] [Clang][LoongArch] Support the builtin functions for LASX - -This patch does the following work: -- Define the builtin functions for LASX -- Add the header files lasxintrin.h - -(cherry picked from commit a4005e729c8d9dba9ba19f3ce4ad5b60e64dc467) ---- - .../include/clang/Basic/BuiltinsLoongArch.def | 3 + - .../clang/Basic/BuiltinsLoongArchLASX.def | 982 +++++ - clang/lib/Headers/CMakeLists.txt | 1 + - clang/lib/Headers/lasxintrin.h | 3860 +++++++++++++++++ - clang/lib/Sema/SemaChecking.cpp | 227 + - 5 files changed, 5073 insertions(+) - create mode 100644 clang/include/clang/Basic/BuiltinsLoongArchLASX.def - create mode 100644 clang/lib/Headers/lasxintrin.h - -diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def -index 9ec19c31095a..95359a3fdc71 100644 ---- a/clang/include/clang/Basic/BuiltinsLoongArch.def -+++ b/clang/include/clang/Basic/BuiltinsLoongArch.def -@@ -21,5 +21,8 @@ - // Definition of LSX builtins. - #include "clang/Basic/BuiltinsLoongArchLSX.def" - -+// Definition of LASX builtins. -+#include "clang/Basic/BuiltinsLoongArchLASX.def" -+ - #undef BUILTIN - #undef TARGET_BUILTIN -diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def -new file mode 100644 -index 000000000000..3de200f665b6 ---- /dev/null -+++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def -@@ -0,0 +1,982 @@ -+//=BuiltinsLoongArchLASX.def - LoongArch Builtin function database -- C++ -*-=// -+// -+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -+// See https://llvm.org/LICENSE.txt for license information. -+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+// -+//===----------------------------------------------------------------------===// -+// -+// This file defines the LoongArch-specific LASX builtin function database. -+// Users of this file must define the BUILTIN macro to make use of this -+// information. -+// -+//===----------------------------------------------------------------------===// -+ -+TARGET_BUILTIN(__builtin_lasx_xvadd_b, "V32cV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvadd_h, "V16sV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvadd_w, "V8iV8iV8i", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvadd_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvadd_q, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsub_b, "V32cV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsub_h, "V16sV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsub_w, "V8iV8iV8i", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsub_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsub_q, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvaddi_bu, "V32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddi_hu, "V16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddi_wu, "V8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddi_du, "V4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsubi_bu, "V32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsubi_hu, "V16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsubi_wu, "V8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsubi_du, "V4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvneg_b, "V32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvneg_h, "V16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvneg_w, "V8iV8i", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvneg_d, "V4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsadd_b, "V32ScV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsadd_h, "V16SsV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsadd_w, "V8SiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsadd_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsadd_bu, "V32UcV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsadd_hu, "V16UsV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsadd_wu, "V8UiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsadd_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvssub_b, "V32ScV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssub_h, "V16SsV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssub_w, "V8SiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssub_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvssub_bu, "V32UcV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssub_hu, "V16UsV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssub_wu, "V8UiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssub_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvhaddw_h_b, "V16SsV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvhaddw_w_h, "V8SiV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvhaddw_d_w, "V4SLLiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvhaddw_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvhaddw_hu_bu, "V16UsV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvhaddw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvhaddw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvhaddw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvhsubw_h_b, "V16SsV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvhsubw_w_h, "V8SiV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvhsubw_d_w, "V4SLLiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvhsubw_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvhsubw_hu_bu, "V16UsV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvhsubw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvhsubw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvhsubw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_h, "V8SiV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_h, "V8SiV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_h, "V8SiV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_h, "V8SiV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvavg_b, "V32ScV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvavg_h, "V16SsV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvavg_w, "V8SiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvavg_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvavg_bu, "V32UcV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvavg_hu, "V16UsV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvavg_wu, "V8UiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvavg_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvavgr_b, "V32ScV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvavgr_h, "V16SsV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvavgr_w, "V8SiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvavgr_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvavgr_bu, "V32UcV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvavgr_hu, "V16UsV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvavgr_wu, "V8UiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvavgr_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvabsd_b, "V32ScV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvabsd_h, "V16SsV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvabsd_w, "V8SiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvabsd_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvabsd_bu, "V32UcV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvabsd_hu, "V16UsV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvabsd_wu, "V8UiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvabsd_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvadda_b, "V32ScV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvadda_h, "V16SsV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvadda_w, "V8SiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvadda_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmax_b, "V32ScV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmax_h, "V16SsV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmax_w, "V8SiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmax_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmaxi_b, "V32ScV32ScIi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaxi_h, "V16SsV16SsIi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaxi_w, "V8SiV8SiIi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaxi_d, "V4SLLiV4SLLiIi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmax_bu, "V32UcV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmax_hu, "V16UsV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmax_wu, "V8UiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmax_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmaxi_bu, "V32UcV32UcIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaxi_hu, "V16UsV16UsIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaxi_wu, "V8UiV8UiIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaxi_du, "V4ULLiV4ULLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmin_b, "V32ScV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmin_h, "V16SsV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmin_w, "V8SiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmin_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmini_b, "V32ScV32ScIi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmini_h, "V16SsV16SsIi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmini_w, "V8SiV8SiIi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmini_d, "V4SLLiV4SLLiIi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmin_bu, "V32UcV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmin_hu, "V16UsV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmin_wu, "V8UiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmin_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmini_bu, "V32UcV32UcIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmini_hu, "V16UsV16UsIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmini_wu, "V8UiV8UiIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmini_du, "V4ULLiV4ULLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmul_b, "V32ScV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmul_h, "V16SsV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmul_w, "V8SiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmul_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmuh_b, "V32cV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmuh_h, "V16sV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmuh_w, "V8iV8iV8i", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmuh_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmuh_bu, "V32UcV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmuh_hu, "V16UsV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmuh_wu, "V8UiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmuh_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_h, "V8SiV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_h, "V8SiV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmadd_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmadd_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmadd_w, "V8SiV8SiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmadd_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmsub_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmsub_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmsub_w, "V8SiV8SiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmsub_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_h, "V8SiV8SiV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_h, "V8SiV8SiV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu, "V16UsV16UsV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu, "V16UsV16UsV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx") -+ -+ -+TARGET_BUILTIN(__builtin_lasx_xvdiv_b, "V32ScV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvdiv_h, "V16SsV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvdiv_w, "V8SiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvdiv_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvdiv_bu, "V32UcV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvdiv_hu, "V16UsV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvdiv_wu, "V8UiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvdiv_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmod_b, "V32ScV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmod_h, "V16SsV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmod_w, "V8SiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmod_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmod_bu, "V32UcV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmod_hu, "V16UsV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmod_wu, "V8UiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmod_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsat_b, "V32ScV32ScIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsat_h, "V16SsV16SsIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsat_w, "V8SiV8SiIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsat_d, "V4SLLiV4SLLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsat_bu, "V32UcV32UcIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsat_hu, "V16UsV16UsIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsat_wu, "V8UiV8UiIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsat_du, "V4ULLiV4ULLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvexth_w_h, "V8SiV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvexth_d_w, "V4LLiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvexth_q_d, "V4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvexth_hu_bu, "V16UsV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvexth_wu_hu, "V8UiV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvexth_du_wu, "V4ULLiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvexth_qu_du, "V4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32c", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_vext2xv_w_h, "V8SiV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_vext2xv_d_h, "V4LLiV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_vext2xv_d_w, "V4LLiV8Si", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32c", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_hu, "V8SiV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_vext2xv_du_hu, "V4LLiV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_vext2xv_du_wu, "V4LLiV8Si", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsigncov_b, "V32ScV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsigncov_h, "V16SsV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsigncov_w, "V8SiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsigncov_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmskltz_b, "V32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmskltz_h, "V16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmskltz_w, "V8iV8i", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmskltz_d, "V4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvmskgez_b, "V32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvmsknz_b, "V16sV16s", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvldi, "V4LLiIi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvrepli_b, "V32cIi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvrepli_h, "V16sIi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvrepli_w, "V8iIi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvrepli_d, "V4LLiIi", "nc", "lasx") -+ -+ -+TARGET_BUILTIN(__builtin_lasx_xvand_v, "V32UcV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvor_v, "V32UcV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvxor_v, "V32cV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvnor_v, "V32UcV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvandn_v, "V32UcV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvorn_v, "V32ScV32ScV32Sc", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvandi_b, "V32UcV32UcIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvori_b, "V32UcV32UcIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvxori_b, "V32UcV32UcIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvnori_b, "V32UcV32UcIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsll_b, "V32cV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsll_h, "V16sV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsll_w, "V8iV8iV8i", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsll_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvslli_b, "V32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslli_h, "V16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslli_w, "V8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslli_d, "V4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsrl_b, "V32cV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrl_h, "V16sV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrl_w, "V8iV8iV8i", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrl_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsrli_b, "V32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrli_h, "V16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrli_w, "V8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrli_d, "V4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsra_b, "V32cV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsra_h, "V16sV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsra_w, "V8iV8iV8i", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsra_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsrai_b, "V32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrai_h, "V16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrai_w, "V8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrai_d, "V4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvrotr_b, "V32cV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvrotr_h, "V16sV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvrotr_w, "V8iV8iV8i", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvrotr_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvrotri_b, "V32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvrotri_h, "V16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvrotri_w, "V8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvrotri_d, "V4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsllwil_w_h, "V8SiV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsllwil_d_w, "V4LLiV8SiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvextl_q_d, "V4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsllwil_hu_bu, "V16UsV32UcIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsllwil_wu_hu, "V8UiV16UsIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsllwil_du_wu, "V4ULLiV8UiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvextl_qu_du, "V4LLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsrlr_b, "V32cV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrlr_h, "V16sV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrlr_w, "V8iV8iV8i", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrlr_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsrlri_b, "V32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrlri_h, "V16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrlri_w, "V8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrlri_d, "V4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsrar_b, "V32cV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrar_h, "V16sV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrar_w, "V8iV8iV8i", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrar_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsrari_b, "V32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrari_h, "V16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrari_w, "V8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrari_d, "V4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsrln_b_h, "V32ScV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrln_h_w, "V16sV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrln_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsran_b_h, "V32ScV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsran_h_w, "V16sV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsrani_b_h, "V32cV32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrani_h_w, "V16sV16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrani_w_d, "V8iV8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsrlrn_b_h, "V32ScV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrlrn_h_w, "V16sV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrlrn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsrarn_b_h, "V32ScV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrarn_h_w, "V16sV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvssrln_b_h, "V32ScV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrln_h_w, "V16sV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrln_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvssran_b_h, "V32ScV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssran_h_w, "V16sV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvssrln_bu_h, "V32UcV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrln_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrln_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvssran_bu_h, "V32UcV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssran_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssran_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvssrani_b_h, "V32cV32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrani_h_w, "V16sV16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrani_w_d, "V8iV8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrlrni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrlrni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrlrni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32cV32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrani_hu_w, "V16sV16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrani_wu_d, "V8iV8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrani_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvssrlrn_b_h, "V32ScV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrlrn_h_w, "V16sV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrlrn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvssrarn_b_h, "V32ScV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrarn_h_w, "V16sV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvssrlrn_bu_h, "V32UcV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrlrn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrlrn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvssrarn_bu_h, "V32UcV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrarn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrarn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrlni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrlni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrlni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrarni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrarni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvssrarni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvclo_b, "V32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvclo_h, "V16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvclo_w, "V8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvclo_d, "V4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvclz_b, "V32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvclz_h, "V16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvclz_w, "V8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvclz_d, "V4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvpcnt_b, "V32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpcnt_h, "V16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpcnt_w, "V8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpcnt_d, "V4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvbitclr_b, "V32UcV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbitclr_h, "V16UsV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbitclr_w, "V8UiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbitclr_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvbitclri_b, "V32UcV32UcIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbitclri_h, "V16UsV16UsIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbitclri_w, "V8UiV8UiIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbitclri_d, "V4ULLiV4ULLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvbitset_b, "V32UcV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbitset_h, "V16UsV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbitset_w, "V8UiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbitset_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvbitseti_b, "V32UcV32UcIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbitseti_h, "V16UsV16UsIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbitseti_w, "V8UiV8UiIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbitseti_d, "V4ULLiV4ULLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvbitrev_b, "V32UcV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbitrev_h, "V16UsV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbitrev_w, "V8UiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbitrev_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvbitrevi_b, "V32UcV32UcIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbitrevi_h, "V16UsV16UsIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbitrevi_w, "V8UiV8UiIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbitrevi_d, "V4ULLiV4ULLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfrstp_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfrstp_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfrstpi_b, "V32cV32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfrstpi_h, "V16sV16sV16sIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfadd_s, "V8fV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfadd_d, "V4dV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfsub_s, "V8fV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfsub_d, "V4dV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfmul_s, "V8fV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfmul_d, "V4dV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfdiv_s, "V8fV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfdiv_d, "V4dV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfmadd_s, "V8fV8fV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfmadd_d, "V4dV4dV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfmsub_s, "V8fV8fV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfmsub_d, "V4dV4dV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfnmadd_s, "V8fV8fV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfnmadd_d, "V4dV4dV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfnmsub_s, "V8fV8fV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfnmsub_d, "V4dV4dV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfmax_s, "V8fV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfmax_d, "V4dV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfmin_s, "V8fV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfmin_d, "V4dV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfmaxa_s, "V8fV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfmaxa_d, "V4dV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfmina_s, "V8fV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfmina_d, "V4dV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvflogb_s, "V8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvflogb_d, "V4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfclass_s, "V8iV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfclass_d, "V4LLiV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfsqrt_s, "V8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcvth_d_s, "V4dV8f", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcvt_h_s, "V16sV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcvt_s_d, "V8fV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfrintrne_s, "V8SiV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfrintrne_d, "V4LLiV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfrintrz_s, "V8SiV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfrintrz_d, "V4LLiV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfrintrp_s, "V8SiV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfrintrp_d, "V4LLiV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfrintrm_s, "V8SiV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfrintrm_d, "V4LLiV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfrint_s, "V8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfrint_d, "V4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvftintrne_w_s, "V8SiV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvftintrne_l_d, "V4LLiV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvftintrz_w_s, "V8SiV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvftintrz_l_d, "V4LLiV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvftintrp_w_s, "V8SiV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvftintrp_l_d, "V4LLiV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvftintrm_w_s, "V8SiV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvftintrm_l_d, "V4LLiV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvftint_w_s, "V8SiV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvftint_l_d, "V4SLLiV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvftintrz_wu_s, "V8UiV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvftintrz_lu_d, "V4ULLiV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvftint_wu_s, "V8UiV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvftint_lu_d, "V4ULLiV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvftintrne_w_d, "V8SiV4dV4d", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvftintrz_w_d, "V8SiV4dV4d", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvftintrp_w_d, "V8SiV4dV4d", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvftintrm_w_d, "V8SiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvftint_w_d, "V8SiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvftintrnel_l_s, "V4LLiV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvftintrneh_l_s, "V4LLiV8f", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvftintrzl_l_s, "V4LLiV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvftintrzh_l_s, "V4LLiV8f", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvftintrpl_l_s, "V4LLiV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvftintrph_l_s, "V4LLiV8f", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvftintrml_l_s, "V4LLiV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvftintrmh_l_s, "V4LLiV8f", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvftintl_l_s, "V4LLiV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvftinth_l_s, "V4LLiV8f", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvffint_s_w, "V8fV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvffint_d_l, "V4dV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvffint_s_wu, "V8fV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvffint_d_lu, "V4dV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvffintl_d_w, "V4dV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvffinth_d_w, "V4dV8Si", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvffint_s_l, "V8fV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvseq_b, "V32ScV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvseq_h, "V16SsV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvseq_w, "V8SiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvseq_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvseqi_b, "V32ScV32ScISi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvseqi_h, "V16SsV16SsISi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvseqi_w, "V8SiV8SiISi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvseqi_d, "V4SLLiV4SLLiISi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsle_b, "V32ScV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsle_h, "V16SsV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsle_w, "V8SiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsle_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvslei_b, "V32ScV32ScISi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslei_h, "V16SsV16SsISi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslei_w, "V8SiV8SiISi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslei_d, "V4SLLiV4SLLiISi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvsle_bu, "V32ScV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsle_hu, "V16SsV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsle_wu, "V8SiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvsle_du, "V4SLLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvslei_bu, "V32ScV32UcIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslei_hu, "V16SsV16UsIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslei_wu, "V8SiV8UiIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslei_du, "V4SLLiV4ULLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvslt_b, "V32ScV32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslt_h, "V16SsV16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslt_w, "V8SiV8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslt_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvslti_b, "V32ScV32ScISi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslti_h, "V16SsV16SsISi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslti_w, "V8SiV8SiISi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslti_d, "V4SLLiV4SLLiISi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvslt_bu, "V32ScV32UcV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslt_hu, "V16SsV16UsV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslt_wu, "V8SiV8UiV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslt_du, "V4SLLiV4ULLiV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvslti_bu, "V32ScV32UcIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslti_hu, "V16SsV16UsIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslti_wu, "V8SiV8UiIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvslti_du, "V4SLLiV4ULLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_caf_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_caf_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_cun_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_cun_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_ceq_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_ceq_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_cueq_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_cueq_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_clt_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_clt_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_cult_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_cult_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_cle_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_cle_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_cule_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_cule_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_cne_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_cne_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_cor_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_cor_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_cune_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_cune_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_saf_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_saf_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_sun_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_sun_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_seq_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_seq_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_sueq_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_sueq_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_slt_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_slt_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_sult_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_sult_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_sle_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_sle_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_sule_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_sule_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_sne_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_sne_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_sor_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_sor_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_sune_s, "V8SiV8fV8f", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvfcmp_sune_d, "V4SLLiV4dV4d", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvbitsel_v, "V32UcV32UcV32UcV32Uc", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvbitseli_b, "V32UcV32UcV32UcIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_b, "V32Sci", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_h, "V16Ssi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_w, "V8Sii", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_d, "V4SLLiLLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvinsgr2vr_w, "V8SiV8SiiIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvinsgr2vr_d, "V4SLLiV4SLLiLLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_w, "iV8SiIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_d, "LLiV4SLLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_wu, "iV8UiIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_du, "LLiV4ULLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvreplve_b, "V32cV32cUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvreplve_h, "V16sV16sUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvreplve_w, "V8iV8iUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvreplve_d, "V4LLiV4LLiUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_h, "V16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_w, "V8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_d, "V4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvreplve0_b, "V32ScV32Sc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvreplve0_h, "V16SsV16Ss", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvreplve0_w, "V8SiV8Si", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvreplve0_d, "V4SLLiV4SLLi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvreplve0_q, "V32ScV32Sc", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvinsve0_w, "V8iV8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvinsve0_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvpickve_w, "V8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpickve_d, "V4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvpickve_w_f, "V8fV8fIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpickve_d_f, "V4dV4dIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvbsll_v, "V32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvbsrl_v, "V32cV32cIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvpackev_b, "V32cV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpackev_h, "V16sV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpackev_w, "V8iV8iV8i", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpackev_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvpackod_b, "V32cV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpackod_h, "V16sV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpackod_w, "V8iV8iV8i", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpackod_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvpickev_b, "V32cV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpickev_h, "V16sV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpickev_w, "V8iV8iV8i", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpickev_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvpickod_b, "V32cV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpickod_h, "V16sV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpickod_w, "V8iV8iV8i", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpickod_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvilvl_b, "V32cV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvilvl_h, "V16sV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvilvl_w, "V8iV8iV8i", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvilvl_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvilvh_b, "V32cV32cV32c", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvilvh_h, "V16sV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvilvh_w, "V8iV8iV8i", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvilvh_d, "V4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvshuf_b, "V32UcV32UcV32UcV32Uc", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvshuf_h, "V16sV16sV16sV16s", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvshuf_w, "V8iV8iV8iV8i", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvshuf_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvperm_w, "V8iV8iV8i", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvshuf4i_b, "V32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvshuf4i_h, "V16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvshuf4i_w, "V8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvshuf4i_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvpermi_w, "V8iV8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpermi_d, "V4LLiV4LLiIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvpermi_q, "V32cV32cV32cIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvextrins_b, "V32cV32cV32cIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvextrins_h, "V16sV16sV16sIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvextrins_w, "V8iV8iV8iIUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvextrins_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvld, "V32ScvC*Ii", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvst, "vV32Scv*Ii", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvldx, "V32ScvC*LLi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvstx, "vV32Scv*LLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvldrepl_b, "V32cvC*Ii", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvldrepl_h, "V16svC*Ii", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvldrepl_w, "V8ivC*Ii", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvldrepl_d, "V4LLivC*Ii", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xvstelm_b, "vV32Scv*IiUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvstelm_h, "vV16Ssv*IiUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvstelm_w, "vV8Siv*IiUi", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xvstelm_d, "vV4SLLiv*IiUi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xbz_v, "iV32Uc", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xbnz_v, "iV32Uc", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xbz_b, "iV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xbz_h, "iV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xbz_w, "iV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xbz_d, "iV4ULLi", "nc", "lasx") -+ -+TARGET_BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc", "lasx") -+TARGET_BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc", "lasx") -diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt -index 1d5573b71e6d..356009ae9157 100644 ---- a/clang/lib/Headers/CMakeLists.txt -+++ b/clang/lib/Headers/CMakeLists.txt -@@ -78,6 +78,7 @@ set(hlsl_files - - set(loongarch_files - larchintrin.h -+ lasxintrin.h - lsxintrin.h - ) - -diff --git a/clang/lib/Headers/lasxintrin.h b/clang/lib/Headers/lasxintrin.h -new file mode 100644 -index 000000000000..6b4d5012a24b ---- /dev/null -+++ b/clang/lib/Headers/lasxintrin.h -@@ -0,0 +1,3860 @@ -+/*===------------ lasxintrin.h - LoongArch LASX intrinsics -----------------=== -+ * -+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -+ * See https://llvm.org/LICENSE.txt for license information. -+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+ * -+ *===-----------------------------------------------------------------------=== -+ */ -+ -+#ifndef _LOONGSON_ASXINTRIN_H -+#define _LOONGSON_ASXINTRIN_H 1 -+ -+#if defined(__loongarch_asx) -+ -+typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); -+typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); -+typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); -+typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); -+typedef short v16i16 __attribute__((vector_size(32), aligned(32))); -+typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); -+typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); -+typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); -+typedef int v8i32 __attribute__((vector_size(32), aligned(32))); -+typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); -+typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); -+typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); -+typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); -+typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); -+typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); -+typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); -+typedef float v8f32 __attribute__((vector_size(32), aligned(32))); -+typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); -+typedef double v4f64 __attribute__((vector_size(32), aligned(32))); -+typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); -+ -+typedef double v4f64 __attribute__((vector_size(32), aligned(32))); -+typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); -+ -+typedef float __m256 __attribute__((__vector_size__(32), __may_alias__)); -+typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__)); -+typedef double __m256d __attribute__((__vector_size__(32), __may_alias__)); -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsll_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsll_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsll_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsll_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsll_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsll_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsll_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsll_d((v4i64)_1, (v4i64)_2); -+} -+ -+#define __lasx_xvslli_b(/*__m256i*/ _1, /*ui3*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslli_b((v32i8)(_1), (_2))) -+ -+#define __lasx_xvslli_h(/*__m256i*/ _1, /*ui4*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslli_h((v16i16)(_1), (_2))) -+ -+#define __lasx_xvslli_w(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslli_w((v8i32)(_1), (_2))) -+ -+#define __lasx_xvslli_d(/*__m256i*/ _1, /*ui6*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslli_d((v4i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsra_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsra_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsra_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsra_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsra_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsra_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsra_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsra_d((v4i64)_1, (v4i64)_2); -+} -+ -+#define __lasx_xvsrai_b(/*__m256i*/ _1, /*ui3*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsrai_b((v32i8)(_1), (_2))) -+ -+#define __lasx_xvsrai_h(/*__m256i*/ _1, /*ui4*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsrai_h((v16i16)(_1), (_2))) -+ -+#define __lasx_xvsrai_w(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsrai_w((v8i32)(_1), (_2))) -+ -+#define __lasx_xvsrai_d(/*__m256i*/ _1, /*ui6*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsrai_d((v4i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrar_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrar_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrar_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrar_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrar_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrar_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrar_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrar_d((v4i64)_1, (v4i64)_2); -+} -+ -+#define __lasx_xvsrari_b(/*__m256i*/ _1, /*ui3*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsrari_b((v32i8)(_1), (_2))) -+ -+#define __lasx_xvsrari_h(/*__m256i*/ _1, /*ui4*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsrari_h((v16i16)(_1), (_2))) -+ -+#define __lasx_xvsrari_w(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsrari_w((v8i32)(_1), (_2))) -+ -+#define __lasx_xvsrari_d(/*__m256i*/ _1, /*ui6*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsrari_d((v4i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrl_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrl_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrl_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrl_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrl_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrl_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrl_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrl_d((v4i64)_1, (v4i64)_2); -+} -+ -+#define __lasx_xvsrli_b(/*__m256i*/ _1, /*ui3*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsrli_b((v32i8)(_1), (_2))) -+ -+#define __lasx_xvsrli_h(/*__m256i*/ _1, /*ui4*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsrli_h((v16i16)(_1), (_2))) -+ -+#define __lasx_xvsrli_w(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsrli_w((v8i32)(_1), (_2))) -+ -+#define __lasx_xvsrli_d(/*__m256i*/ _1, /*ui6*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsrli_d((v4i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrlr_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrlr_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrlr_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrlr_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrlr_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrlr_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrlr_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrlr_d((v4i64)_1, (v4i64)_2); -+} -+ -+#define __lasx_xvsrlri_b(/*__m256i*/ _1, /*ui3*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsrlri_b((v32i8)(_1), (_2))) -+ -+#define __lasx_xvsrlri_h(/*__m256i*/ _1, /*ui4*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsrlri_h((v16i16)(_1), (_2))) -+ -+#define __lasx_xvsrlri_w(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsrlri_w((v8i32)(_1), (_2))) -+ -+#define __lasx_xvsrlri_d(/*__m256i*/ _1, /*ui6*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsrlri_d((v4i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvbitclr_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvbitclr_b((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvbitclr_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvbitclr_h((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvbitclr_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvbitclr_w((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvbitclr_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvbitclr_d((v4u64)_1, (v4u64)_2); -+} -+ -+#define __lasx_xvbitclri_b(/*__m256i*/ _1, /*ui3*/ _2) \ -+ ((__m256i)__builtin_lasx_xvbitclri_b((v32u8)(_1), (_2))) -+ -+#define __lasx_xvbitclri_h(/*__m256i*/ _1, /*ui4*/ _2) \ -+ ((__m256i)__builtin_lasx_xvbitclri_h((v16u16)(_1), (_2))) -+ -+#define __lasx_xvbitclri_w(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvbitclri_w((v8u32)(_1), (_2))) -+ -+#define __lasx_xvbitclri_d(/*__m256i*/ _1, /*ui6*/ _2) \ -+ ((__m256i)__builtin_lasx_xvbitclri_d((v4u64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvbitset_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvbitset_b((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvbitset_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvbitset_h((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvbitset_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvbitset_w((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvbitset_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvbitset_d((v4u64)_1, (v4u64)_2); -+} -+ -+#define __lasx_xvbitseti_b(/*__m256i*/ _1, /*ui3*/ _2) \ -+ ((__m256i)__builtin_lasx_xvbitseti_b((v32u8)(_1), (_2))) -+ -+#define __lasx_xvbitseti_h(/*__m256i*/ _1, /*ui4*/ _2) \ -+ ((__m256i)__builtin_lasx_xvbitseti_h((v16u16)(_1), (_2))) -+ -+#define __lasx_xvbitseti_w(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvbitseti_w((v8u32)(_1), (_2))) -+ -+#define __lasx_xvbitseti_d(/*__m256i*/ _1, /*ui6*/ _2) \ -+ ((__m256i)__builtin_lasx_xvbitseti_d((v4u64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvbitrev_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvbitrev_b((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvbitrev_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvbitrev_h((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvbitrev_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvbitrev_w((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvbitrev_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvbitrev_d((v4u64)_1, (v4u64)_2); -+} -+ -+#define __lasx_xvbitrevi_b(/*__m256i*/ _1, /*ui3*/ _2) \ -+ ((__m256i)__builtin_lasx_xvbitrevi_b((v32u8)(_1), (_2))) -+ -+#define __lasx_xvbitrevi_h(/*__m256i*/ _1, /*ui4*/ _2) \ -+ ((__m256i)__builtin_lasx_xvbitrevi_h((v16u16)(_1), (_2))) -+ -+#define __lasx_xvbitrevi_w(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvbitrevi_w((v8u32)(_1), (_2))) -+ -+#define __lasx_xvbitrevi_d(/*__m256i*/ _1, /*ui6*/ _2) \ -+ ((__m256i)__builtin_lasx_xvbitrevi_d((v4u64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvadd_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvadd_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvadd_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvadd_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvadd_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvadd_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvadd_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvadd_d((v4i64)_1, (v4i64)_2); -+} -+ -+#define __lasx_xvaddi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvaddi_bu((v32i8)(_1), (_2))) -+ -+#define __lasx_xvaddi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvaddi_hu((v16i16)(_1), (_2))) -+ -+#define __lasx_xvaddi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvaddi_wu((v8i32)(_1), (_2))) -+ -+#define __lasx_xvaddi_du(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvaddi_du((v4i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsub_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsub_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsub_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsub_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsub_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsub_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsub_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsub_d((v4i64)_1, (v4i64)_2); -+} -+ -+#define __lasx_xvsubi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsubi_bu((v32i8)(_1), (_2))) -+ -+#define __lasx_xvsubi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsubi_hu((v16i16)(_1), (_2))) -+ -+#define __lasx_xvsubi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsubi_wu((v8i32)(_1), (_2))) -+ -+#define __lasx_xvsubi_du(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsubi_du((v4i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmax_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmax_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmax_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmax_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmax_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmax_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmax_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmax_d((v4i64)_1, (v4i64)_2); -+} -+ -+#define __lasx_xvmaxi_b(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvmaxi_b((v32i8)(_1), (_2))) -+ -+#define __lasx_xvmaxi_h(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvmaxi_h((v16i16)(_1), (_2))) -+ -+#define __lasx_xvmaxi_w(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvmaxi_w((v8i32)(_1), (_2))) -+ -+#define __lasx_xvmaxi_d(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvmaxi_d((v4i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmax_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmax_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmax_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmax_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmax_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmax_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmax_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmax_du((v4u64)_1, (v4u64)_2); -+} -+ -+#define __lasx_xvmaxi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvmaxi_bu((v32u8)(_1), (_2))) -+ -+#define __lasx_xvmaxi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvmaxi_hu((v16u16)(_1), (_2))) -+ -+#define __lasx_xvmaxi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvmaxi_wu((v8u32)(_1), (_2))) -+ -+#define __lasx_xvmaxi_du(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvmaxi_du((v4u64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmin_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmin_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmin_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmin_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmin_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmin_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmin_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmin_d((v4i64)_1, (v4i64)_2); -+} -+ -+#define __lasx_xvmini_b(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvmini_b((v32i8)(_1), (_2))) -+ -+#define __lasx_xvmini_h(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvmini_h((v16i16)(_1), (_2))) -+ -+#define __lasx_xvmini_w(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvmini_w((v8i32)(_1), (_2))) -+ -+#define __lasx_xvmini_d(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvmini_d((v4i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmin_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmin_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmin_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmin_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmin_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmin_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmin_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmin_du((v4u64)_1, (v4u64)_2); -+} -+ -+#define __lasx_xvmini_bu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvmini_bu((v32u8)(_1), (_2))) -+ -+#define __lasx_xvmini_hu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvmini_hu((v16u16)(_1), (_2))) -+ -+#define __lasx_xvmini_wu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvmini_wu((v8u32)(_1), (_2))) -+ -+#define __lasx_xvmini_du(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvmini_du((v4u64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvseq_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvseq_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvseq_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvseq_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvseq_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvseq_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvseq_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvseq_d((v4i64)_1, (v4i64)_2); -+} -+ -+#define __lasx_xvseqi_b(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvseqi_b((v32i8)(_1), (_2))) -+ -+#define __lasx_xvseqi_h(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvseqi_h((v16i16)(_1), (_2))) -+ -+#define __lasx_xvseqi_w(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvseqi_w((v8i32)(_1), (_2))) -+ -+#define __lasx_xvseqi_d(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvseqi_d((v4i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvslt_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvslt_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvslt_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvslt_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvslt_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvslt_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvslt_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvslt_d((v4i64)_1, (v4i64)_2); -+} -+ -+#define __lasx_xvslti_b(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslti_b((v32i8)(_1), (_2))) -+ -+#define __lasx_xvslti_h(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslti_h((v16i16)(_1), (_2))) -+ -+#define __lasx_xvslti_w(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslti_w((v8i32)(_1), (_2))) -+ -+#define __lasx_xvslti_d(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslti_d((v4i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvslt_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvslt_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvslt_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvslt_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvslt_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvslt_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvslt_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvslt_du((v4u64)_1, (v4u64)_2); -+} -+ -+#define __lasx_xvslti_bu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslti_bu((v32u8)(_1), (_2))) -+ -+#define __lasx_xvslti_hu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslti_hu((v16u16)(_1), (_2))) -+ -+#define __lasx_xvslti_wu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslti_wu((v8u32)(_1), (_2))) -+ -+#define __lasx_xvslti_du(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslti_du((v4u64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsle_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsle_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsle_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsle_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsle_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsle_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsle_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsle_d((v4i64)_1, (v4i64)_2); -+} -+ -+#define __lasx_xvslei_b(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslei_b((v32i8)(_1), (_2))) -+ -+#define __lasx_xvslei_h(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslei_h((v16i16)(_1), (_2))) -+ -+#define __lasx_xvslei_w(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslei_w((v8i32)(_1), (_2))) -+ -+#define __lasx_xvslei_d(/*__m256i*/ _1, /*si5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslei_d((v4i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsle_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsle_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsle_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsle_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsle_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsle_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsle_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsle_du((v4u64)_1, (v4u64)_2); -+} -+ -+#define __lasx_xvslei_bu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslei_bu((v32u8)(_1), (_2))) -+ -+#define __lasx_xvslei_hu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslei_hu((v16u16)(_1), (_2))) -+ -+#define __lasx_xvslei_wu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslei_wu((v8u32)(_1), (_2))) -+ -+#define __lasx_xvslei_du(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvslei_du((v4u64)(_1), (_2))) -+ -+#define __lasx_xvsat_b(/*__m256i*/ _1, /*ui3*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsat_b((v32i8)(_1), (_2))) -+ -+#define __lasx_xvsat_h(/*__m256i*/ _1, /*ui4*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsat_h((v16i16)(_1), (_2))) -+ -+#define __lasx_xvsat_w(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsat_w((v8i32)(_1), (_2))) -+ -+#define __lasx_xvsat_d(/*__m256i*/ _1, /*ui6*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsat_d((v4i64)(_1), (_2))) -+ -+#define __lasx_xvsat_bu(/*__m256i*/ _1, /*ui3*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsat_bu((v32u8)(_1), (_2))) -+ -+#define __lasx_xvsat_hu(/*__m256i*/ _1, /*ui4*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsat_hu((v16u16)(_1), (_2))) -+ -+#define __lasx_xvsat_wu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsat_wu((v8u32)(_1), (_2))) -+ -+#define __lasx_xvsat_du(/*__m256i*/ _1, /*ui6*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsat_du((v4u64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvadda_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvadda_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvadda_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvadda_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvadda_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvadda_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvadda_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvadda_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsadd_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsadd_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsadd_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsadd_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsadd_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsadd_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsadd_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsadd_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsadd_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsadd_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsadd_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsadd_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsadd_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsadd_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsadd_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsadd_du((v4u64)_1, (v4u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvavg_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvavg_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvavg_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvavg_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvavg_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvavg_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvavg_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvavg_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvavg_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvavg_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvavg_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvavg_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvavg_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvavg_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvavg_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvavg_du((v4u64)_1, (v4u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvavgr_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvavgr_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvavgr_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvavgr_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvavgr_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvavgr_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvavgr_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvavgr_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvavgr_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvavgr_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvavgr_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvavgr_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvavgr_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvavgr_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvavgr_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvavgr_du((v4u64)_1, (v4u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssub_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssub_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssub_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssub_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssub_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssub_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssub_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssub_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssub_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssub_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssub_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssub_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssub_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssub_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssub_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssub_du((v4u64)_1, (v4u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvabsd_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvabsd_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvabsd_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvabsd_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvabsd_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvabsd_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvabsd_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvabsd_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvabsd_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvabsd_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvabsd_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvabsd_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvabsd_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvabsd_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvabsd_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvabsd_du((v4u64)_1, (v4u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmul_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmul_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmul_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmul_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmul_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmul_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmul_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmul_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmadd_b(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmadd_b((v32i8)_1, (v32i8)_2, (v32i8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmadd_h(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmadd_h((v16i16)_1, (v16i16)_2, (v16i16)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmadd_w(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmadd_w((v8i32)_1, (v8i32)_2, (v8i32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmadd_d(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmadd_d((v4i64)_1, (v4i64)_2, (v4i64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmsub_b(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmsub_b((v32i8)_1, (v32i8)_2, (v32i8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmsub_h(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmsub_h((v16i16)_1, (v16i16)_2, (v16i16)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmsub_w(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmsub_w((v8i32)_1, (v8i32)_2, (v8i32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmsub_d(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmsub_d((v4i64)_1, (v4i64)_2, (v4i64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvdiv_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvdiv_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvdiv_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvdiv_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvdiv_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvdiv_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvdiv_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvdiv_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvdiv_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvdiv_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvdiv_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvdiv_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvdiv_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvdiv_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvdiv_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvdiv_du((v4u64)_1, (v4u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvhaddw_h_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvhaddw_h_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvhaddw_w_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvhaddw_w_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvhaddw_d_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvhaddw_d_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvhaddw_hu_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvhaddw_hu_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvhaddw_wu_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvhaddw_wu_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvhaddw_du_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvhaddw_du_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvhsubw_h_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvhsubw_h_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvhsubw_w_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvhsubw_w_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvhsubw_d_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvhsubw_d_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvhsubw_hu_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvhsubw_hu_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvhsubw_wu_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvhsubw_wu_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvhsubw_du_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvhsubw_du_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmod_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmod_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmod_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmod_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmod_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmod_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmod_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmod_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmod_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmod_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmod_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmod_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmod_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmod_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmod_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmod_du((v4u64)_1, (v4u64)_2); -+} -+ -+#define __lasx_xvrepl128vei_b(/*__m256i*/ _1, /*ui4*/ _2) \ -+ ((__m256i)__builtin_lasx_xvrepl128vei_b((v32i8)(_1), (_2))) -+ -+#define __lasx_xvrepl128vei_h(/*__m256i*/ _1, /*ui3*/ _2) \ -+ ((__m256i)__builtin_lasx_xvrepl128vei_h((v16i16)(_1), (_2))) -+ -+#define __lasx_xvrepl128vei_w(/*__m256i*/ _1, /*ui2*/ _2) \ -+ ((__m256i)__builtin_lasx_xvrepl128vei_w((v8i32)(_1), (_2))) -+ -+#define __lasx_xvrepl128vei_d(/*__m256i*/ _1, /*ui1*/ _2) \ -+ ((__m256i)__builtin_lasx_xvrepl128vei_d((v4i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpickev_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvpickev_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpickev_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvpickev_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpickev_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvpickev_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpickev_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvpickev_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpickod_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvpickod_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpickod_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvpickod_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpickod_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvpickod_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpickod_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvpickod_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvilvh_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvilvh_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvilvh_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvilvh_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvilvh_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvilvh_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvilvh_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvilvh_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvilvl_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvilvl_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvilvl_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvilvl_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvilvl_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvilvl_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvilvl_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvilvl_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpackev_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvpackev_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpackev_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvpackev_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpackev_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvpackev_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpackev_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvpackev_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpackod_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvpackod_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpackod_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvpackod_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpackod_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvpackod_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpackod_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvpackod_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvshuf_b(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvshuf_b((v32i8)_1, (v32i8)_2, (v32i8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvshuf_h(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvshuf_h((v16i16)_1, (v16i16)_2, (v16i16)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvshuf_w(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvshuf_w((v8i32)_1, (v8i32)_2, (v8i32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvshuf_d(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvshuf_d((v4i64)_1, (v4i64)_2, (v4i64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvand_v(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvand_v((v32u8)_1, (v32u8)_2); -+} -+ -+#define __lasx_xvandi_b(/*__m256i*/ _1, /*ui8*/ _2) \ -+ ((__m256i)__builtin_lasx_xvandi_b((v32u8)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvor_v(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvor_v((v32u8)_1, (v32u8)_2); -+} -+ -+#define __lasx_xvori_b(/*__m256i*/ _1, /*ui8*/ _2) \ -+ ((__m256i)__builtin_lasx_xvori_b((v32u8)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvnor_v(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvnor_v((v32u8)_1, (v32u8)_2); -+} -+ -+#define __lasx_xvnori_b(/*__m256i*/ _1, /*ui8*/ _2) \ -+ ((__m256i)__builtin_lasx_xvnori_b((v32u8)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvxor_v(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvxor_v((v32u8)_1, (v32u8)_2); -+} -+ -+#define __lasx_xvxori_b(/*__m256i*/ _1, /*ui8*/ _2) \ -+ ((__m256i)__builtin_lasx_xvxori_b((v32u8)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvbitsel_v(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvbitsel_v((v32u8)_1, (v32u8)_2, (v32u8)_3); -+} -+ -+#define __lasx_xvbitseli_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ -+ ((__m256i)__builtin_lasx_xvbitseli_b((v32u8)(_1), (v32u8)(_2), (_3))) -+ -+#define __lasx_xvshuf4i_b(/*__m256i*/ _1, /*ui8*/ _2) \ -+ ((__m256i)__builtin_lasx_xvshuf4i_b((v32i8)(_1), (_2))) -+ -+#define __lasx_xvshuf4i_h(/*__m256i*/ _1, /*ui8*/ _2) \ -+ ((__m256i)__builtin_lasx_xvshuf4i_h((v16i16)(_1), (_2))) -+ -+#define __lasx_xvshuf4i_w(/*__m256i*/ _1, /*ui8*/ _2) \ -+ ((__m256i)__builtin_lasx_xvshuf4i_w((v8i32)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvreplgr2vr_b(int _1) { -+ return (__m256i)__builtin_lasx_xvreplgr2vr_b((int)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvreplgr2vr_h(int _1) { -+ return (__m256i)__builtin_lasx_xvreplgr2vr_h((int)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvreplgr2vr_w(int _1) { -+ return (__m256i)__builtin_lasx_xvreplgr2vr_w((int)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvreplgr2vr_d(long int _1) { -+ return (__m256i)__builtin_lasx_xvreplgr2vr_d((long int)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpcnt_b(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvpcnt_b((v32i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpcnt_h(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvpcnt_h((v16i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpcnt_w(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvpcnt_w((v8i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvpcnt_d(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvpcnt_d((v4i64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvclo_b(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvclo_b((v32i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvclo_h(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvclo_h((v16i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvclo_w(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvclo_w((v8i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvclo_d(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvclo_d((v4i64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvclz_b(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvclz_b((v32i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvclz_h(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvclz_h((v16i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvclz_w(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvclz_w((v8i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvclz_d(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvclz_d((v4i64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfadd_s(__m256 _1, __m256 _2) { -+ return (__m256)__builtin_lasx_xvfadd_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfadd_d(__m256d _1, __m256d _2) { -+ return (__m256d)__builtin_lasx_xvfadd_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfsub_s(__m256 _1, __m256 _2) { -+ return (__m256)__builtin_lasx_xvfsub_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfsub_d(__m256d _1, __m256d _2) { -+ return (__m256d)__builtin_lasx_xvfsub_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfmul_s(__m256 _1, __m256 _2) { -+ return (__m256)__builtin_lasx_xvfmul_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfmul_d(__m256d _1, __m256d _2) { -+ return (__m256d)__builtin_lasx_xvfmul_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfdiv_s(__m256 _1, __m256 _2) { -+ return (__m256)__builtin_lasx_xvfdiv_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfdiv_d(__m256d _1, __m256d _2) { -+ return (__m256d)__builtin_lasx_xvfdiv_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcvt_h_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcvt_h_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfcvt_s_d(__m256d _1, __m256d _2) { -+ return (__m256)__builtin_lasx_xvfcvt_s_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfmin_s(__m256 _1, __m256 _2) { -+ return (__m256)__builtin_lasx_xvfmin_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfmin_d(__m256d _1, __m256d _2) { -+ return (__m256d)__builtin_lasx_xvfmin_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfmina_s(__m256 _1, __m256 _2) { -+ return (__m256)__builtin_lasx_xvfmina_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfmina_d(__m256d _1, __m256d _2) { -+ return (__m256d)__builtin_lasx_xvfmina_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfmax_s(__m256 _1, __m256 _2) { -+ return (__m256)__builtin_lasx_xvfmax_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfmax_d(__m256d _1, __m256d _2) { -+ return (__m256d)__builtin_lasx_xvfmax_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfmaxa_s(__m256 _1, __m256 _2) { -+ return (__m256)__builtin_lasx_xvfmaxa_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfmaxa_d(__m256d _1, __m256d _2) { -+ return (__m256d)__builtin_lasx_xvfmaxa_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfclass_s(__m256 _1) { -+ return (__m256i)__builtin_lasx_xvfclass_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfclass_d(__m256d _1) { -+ return (__m256i)__builtin_lasx_xvfclass_d((v4f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfsqrt_s(__m256 _1) { -+ return (__m256)__builtin_lasx_xvfsqrt_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfsqrt_d(__m256d _1) { -+ return (__m256d)__builtin_lasx_xvfsqrt_d((v4f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfrecip_s(__m256 _1) { -+ return (__m256)__builtin_lasx_xvfrecip_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfrecip_d(__m256d _1) { -+ return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfrint_s(__m256 _1) { -+ return (__m256)__builtin_lasx_xvfrint_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfrint_d(__m256d _1) { -+ return (__m256d)__builtin_lasx_xvfrint_d((v4f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfrsqrt_s(__m256 _1) { -+ return (__m256)__builtin_lasx_xvfrsqrt_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfrsqrt_d(__m256d _1) { -+ return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvflogb_s(__m256 _1) { -+ return (__m256)__builtin_lasx_xvflogb_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvflogb_d(__m256d _1) { -+ return (__m256d)__builtin_lasx_xvflogb_d((v4f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfcvth_s_h(__m256i _1) { -+ return (__m256)__builtin_lasx_xvfcvth_s_h((v16i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfcvth_d_s(__m256 _1) { -+ return (__m256d)__builtin_lasx_xvfcvth_d_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfcvtl_s_h(__m256i _1) { -+ return (__m256)__builtin_lasx_xvfcvtl_s_h((v16i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfcvtl_d_s(__m256 _1) { -+ return (__m256d)__builtin_lasx_xvfcvtl_d_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftint_w_s(__m256 _1) { -+ return (__m256i)__builtin_lasx_xvftint_w_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftint_l_d(__m256d _1) { -+ return (__m256i)__builtin_lasx_xvftint_l_d((v4f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftint_wu_s(__m256 _1) { -+ return (__m256i)__builtin_lasx_xvftint_wu_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftint_lu_d(__m256d _1) { -+ return (__m256i)__builtin_lasx_xvftint_lu_d((v4f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrz_w_s(__m256 _1) { -+ return (__m256i)__builtin_lasx_xvftintrz_w_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrz_l_d(__m256d _1) { -+ return (__m256i)__builtin_lasx_xvftintrz_l_d((v4f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrz_wu_s(__m256 _1) { -+ return (__m256i)__builtin_lasx_xvftintrz_wu_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrz_lu_d(__m256d _1) { -+ return (__m256i)__builtin_lasx_xvftintrz_lu_d((v4f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvffint_s_w(__m256i _1) { -+ return (__m256)__builtin_lasx_xvffint_s_w((v8i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvffint_d_l(__m256i _1) { -+ return (__m256d)__builtin_lasx_xvffint_d_l((v4i64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvffint_s_wu(__m256i _1) { -+ return (__m256)__builtin_lasx_xvffint_s_wu((v8u32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvffint_d_lu(__m256i _1) { -+ return (__m256d)__builtin_lasx_xvffint_d_lu((v4u64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvreplve_b(__m256i _1, int _2) { -+ return (__m256i)__builtin_lasx_xvreplve_b((v32i8)_1, (int)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvreplve_h(__m256i _1, int _2) { -+ return (__m256i)__builtin_lasx_xvreplve_h((v16i16)_1, (int)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvreplve_w(__m256i _1, int _2) { -+ return (__m256i)__builtin_lasx_xvreplve_w((v8i32)_1, (int)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvreplve_d(__m256i _1, int _2) { -+ return (__m256i)__builtin_lasx_xvreplve_d((v4i64)_1, (int)_2); -+} -+ -+#define __lasx_xvpermi_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ -+ ((__m256i)__builtin_lasx_xvpermi_w((v8i32)(_1), (v8i32)(_2), (_3))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvandn_v(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvandn_v((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvneg_b(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvneg_b((v32i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvneg_h(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvneg_h((v16i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvneg_w(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvneg_w((v8i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvneg_d(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvneg_d((v4i64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmuh_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmuh_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmuh_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmuh_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmuh_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmuh_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmuh_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmuh_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmuh_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmuh_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmuh_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmuh_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmuh_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmuh_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmuh_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmuh_du((v4u64)_1, (v4u64)_2); -+} -+ -+#define __lasx_xvsllwil_h_b(/*__m256i*/ _1, /*ui3*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsllwil_h_b((v32i8)(_1), (_2))) -+ -+#define __lasx_xvsllwil_w_h(/*__m256i*/ _1, /*ui4*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsllwil_w_h((v16i16)(_1), (_2))) -+ -+#define __lasx_xvsllwil_d_w(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsllwil_d_w((v8i32)(_1), (_2))) -+ -+#define __lasx_xvsllwil_hu_bu(/*__m256i*/ _1, /*ui3*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsllwil_hu_bu((v32u8)(_1), (_2))) -+ -+#define __lasx_xvsllwil_wu_hu(/*__m256i*/ _1, /*ui4*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsllwil_wu_hu((v16u16)(_1), (_2))) -+ -+#define __lasx_xvsllwil_du_wu(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvsllwil_du_wu((v8u32)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsran_b_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsran_b_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsran_h_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsran_h_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsran_w_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsran_w_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssran_b_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssran_b_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssran_h_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssran_h_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssran_w_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssran_w_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssran_bu_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssran_bu_h((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssran_hu_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssran_hu_w((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssran_wu_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssran_wu_d((v4u64)_1, (v4u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrarn_b_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrarn_b_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrarn_h_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrarn_h_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrarn_w_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrarn_w_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssrarn_b_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssrarn_b_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssrarn_h_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssrarn_h_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssrarn_w_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssrarn_w_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssrarn_bu_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssrarn_bu_h((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssrarn_hu_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssrarn_hu_w((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssrarn_wu_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssrarn_wu_d((v4u64)_1, (v4u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrln_b_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrln_b_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrln_h_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrln_h_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrln_w_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrln_w_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssrln_bu_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssrln_bu_h((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssrln_hu_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssrln_hu_w((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssrln_wu_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssrln_wu_d((v4u64)_1, (v4u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrlrn_b_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrlrn_b_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrlrn_h_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrlrn_h_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsrlrn_w_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsrlrn_w_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssrlrn_bu_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssrlrn_bu_h((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssrlrn_hu_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssrlrn_hu_w((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssrlrn_wu_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssrlrn_wu_d((v4u64)_1, (v4u64)_2); -+} -+ -+#define __lasx_xvfrstpi_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ -+ ((__m256i)__builtin_lasx_xvfrstpi_b((v32i8)(_1), (v32i8)(_2), (_3))) -+ -+#define __lasx_xvfrstpi_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ -+ ((__m256i)__builtin_lasx_xvfrstpi_h((v16i16)(_1), (v16i16)(_2), (_3))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfrstp_b(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvfrstp_b((v32i8)_1, (v32i8)_2, (v32i8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfrstp_h(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvfrstp_h((v16i16)_1, (v16i16)_2, (v16i16)_3); -+} -+ -+#define __lasx_xvshuf4i_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ -+ ((__m256i)__builtin_lasx_xvshuf4i_d((v4i64)(_1), (v4i64)(_2), (_3))) -+ -+#define __lasx_xvbsrl_v(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvbsrl_v((v32i8)(_1), (_2))) -+ -+#define __lasx_xvbsll_v(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvbsll_v((v32i8)(_1), (_2))) -+ -+#define __lasx_xvextrins_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ -+ ((__m256i)__builtin_lasx_xvextrins_b((v32i8)(_1), (v32i8)(_2), (_3))) -+ -+#define __lasx_xvextrins_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ -+ ((__m256i)__builtin_lasx_xvextrins_h((v16i16)(_1), (v16i16)(_2), (_3))) -+ -+#define __lasx_xvextrins_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ -+ ((__m256i)__builtin_lasx_xvextrins_w((v8i32)(_1), (v8i32)(_2), (_3))) -+ -+#define __lasx_xvextrins_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ -+ ((__m256i)__builtin_lasx_xvextrins_d((v4i64)(_1), (v4i64)(_2), (_3))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmskltz_b(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvmskltz_b((v32i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmskltz_h(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvmskltz_h((v16i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmskltz_w(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvmskltz_w((v8i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmskltz_d(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvmskltz_d((v4i64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsigncov_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsigncov_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsigncov_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsigncov_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsigncov_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsigncov_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsigncov_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsigncov_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfmadd_s(__m256 _1, __m256 _2, __m256 _3) { -+ return (__m256)__builtin_lasx_xvfmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfmadd_d(__m256d _1, __m256d _2, __m256d _3) { -+ return (__m256d)__builtin_lasx_xvfmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfmsub_s(__m256 _1, __m256 _2, __m256 _3) { -+ return (__m256)__builtin_lasx_xvfmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfmsub_d(__m256d _1, __m256d _2, __m256d _3) { -+ return (__m256d)__builtin_lasx_xvfmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfnmadd_s(__m256 _1, __m256 _2, __m256 _3) { -+ return (__m256)__builtin_lasx_xvfnmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfnmadd_d(__m256d _1, __m256d _2, __m256d _3) { -+ return (__m256d)__builtin_lasx_xvfnmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfnmsub_s(__m256 _1, __m256 _2, __m256 _3) { -+ return (__m256)__builtin_lasx_xvfnmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfnmsub_d(__m256d _1, __m256d _2, __m256d _3) { -+ return (__m256d)__builtin_lasx_xvfnmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrne_w_s(__m256 _1) { -+ return (__m256i)__builtin_lasx_xvftintrne_w_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrne_l_d(__m256d _1) { -+ return (__m256i)__builtin_lasx_xvftintrne_l_d((v4f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrp_w_s(__m256 _1) { -+ return (__m256i)__builtin_lasx_xvftintrp_w_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrp_l_d(__m256d _1) { -+ return (__m256i)__builtin_lasx_xvftintrp_l_d((v4f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrm_w_s(__m256 _1) { -+ return (__m256i)__builtin_lasx_xvftintrm_w_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrm_l_d(__m256d _1) { -+ return (__m256i)__builtin_lasx_xvftintrm_l_d((v4f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftint_w_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvftint_w_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvffint_s_l(__m256i _1, __m256i _2) { -+ return (__m256)__builtin_lasx_xvffint_s_l((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrz_w_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvftintrz_w_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrp_w_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvftintrp_w_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrm_w_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvftintrm_w_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrne_w_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvftintrne_w_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftinth_l_s(__m256 _1) { -+ return (__m256i)__builtin_lasx_xvftinth_l_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintl_l_s(__m256 _1) { -+ return (__m256i)__builtin_lasx_xvftintl_l_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvffinth_d_w(__m256i _1) { -+ return (__m256d)__builtin_lasx_xvffinth_d_w((v8i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvffintl_d_w(__m256i _1) { -+ return (__m256d)__builtin_lasx_xvffintl_d_w((v8i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrzh_l_s(__m256 _1) { -+ return (__m256i)__builtin_lasx_xvftintrzh_l_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrzl_l_s(__m256 _1) { -+ return (__m256i)__builtin_lasx_xvftintrzl_l_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrph_l_s(__m256 _1) { -+ return (__m256i)__builtin_lasx_xvftintrph_l_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrpl_l_s(__m256 _1) { -+ return (__m256i)__builtin_lasx_xvftintrpl_l_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrmh_l_s(__m256 _1) { -+ return (__m256i)__builtin_lasx_xvftintrmh_l_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrml_l_s(__m256 _1) { -+ return (__m256i)__builtin_lasx_xvftintrml_l_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrneh_l_s(__m256 _1) { -+ return (__m256i)__builtin_lasx_xvftintrneh_l_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvftintrnel_l_s(__m256 _1) { -+ return (__m256i)__builtin_lasx_xvftintrnel_l_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfrintrne_s(__m256 _1) { -+ return (__m256)__builtin_lasx_xvfrintrne_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfrintrne_d(__m256d _1) { -+ return (__m256d)__builtin_lasx_xvfrintrne_d((v4f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfrintrz_s(__m256 _1) { -+ return (__m256)__builtin_lasx_xvfrintrz_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfrintrz_d(__m256d _1) { -+ return (__m256d)__builtin_lasx_xvfrintrz_d((v4f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfrintrp_s(__m256 _1) { -+ return (__m256)__builtin_lasx_xvfrintrp_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfrintrp_d(__m256d _1) { -+ return (__m256d)__builtin_lasx_xvfrintrp_d((v4f64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfrintrm_s(__m256 _1) { -+ return (__m256)__builtin_lasx_xvfrintrm_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfrintrm_d(__m256d _1) { -+ return (__m256d)__builtin_lasx_xvfrintrm_d((v4f64)_1); -+} -+ -+#define __lasx_xvld(/*void **/ _1, /*si12*/ _2) \ -+ ((__m256i)__builtin_lasx_xvld((void const *)(_1), (_2))) -+ -+#define __lasx_xvst(/*__m256i*/ _1, /*void **/ _2, /*si12*/ _3) \ -+ ((void)__builtin_lasx_xvst((v32i8)(_1), (void *)(_2), (_3))) -+ -+#define __lasx_xvstelm_b(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ -+ /*idx*/ _4) \ -+ ((void)__builtin_lasx_xvstelm_b((v32i8)(_1), (void *)(_2), (_3), (_4))) -+ -+#define __lasx_xvstelm_h(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ -+ /*idx*/ _4) \ -+ ((void)__builtin_lasx_xvstelm_h((v16i16)(_1), (void *)(_2), (_3), (_4))) -+ -+#define __lasx_xvstelm_w(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ -+ /*idx*/ _4) \ -+ ((void)__builtin_lasx_xvstelm_w((v8i32)(_1), (void *)(_2), (_3), (_4))) -+ -+#define __lasx_xvstelm_d(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ -+ /*idx*/ _4) \ -+ ((void)__builtin_lasx_xvstelm_d((v4i64)(_1), (void *)(_2), (_3), (_4))) -+ -+#define __lasx_xvinsve0_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui3*/ _3) \ -+ ((__m256i)__builtin_lasx_xvinsve0_w((v8i32)(_1), (v8i32)(_2), (_3))) -+ -+#define __lasx_xvinsve0_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui2*/ _3) \ -+ ((__m256i)__builtin_lasx_xvinsve0_d((v4i64)(_1), (v4i64)(_2), (_3))) -+ -+#define __lasx_xvpickve_w(/*__m256i*/ _1, /*ui3*/ _2) \ -+ ((__m256i)__builtin_lasx_xvpickve_w((v8i32)(_1), (_2))) -+ -+#define __lasx_xvpickve_d(/*__m256i*/ _1, /*ui2*/ _2) \ -+ ((__m256i)__builtin_lasx_xvpickve_d((v4i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssrlrn_b_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssrlrn_b_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssrlrn_h_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssrlrn_h_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssrlrn_w_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssrlrn_w_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssrln_b_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssrln_b_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssrln_h_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssrln_h_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvssrln_w_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvssrln_w_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvorn_v(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvorn_v((v32i8)_1, (v32i8)_2); -+} -+ -+#define __lasx_xvldi(/*i13*/ _1) ((__m256i)__builtin_lasx_xvldi((_1))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvldx(void const *_1, long int _2) { -+ return (__m256i)__builtin_lasx_xvldx((void const *)_1, (long int)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void -+ __lasx_xvstx(__m256i _1, void *_2, long int _3) { -+ return (void)__builtin_lasx_xvstx((v32i8)_1, (void *)_2, (long int)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvextl_qu_du(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvextl_qu_du((v4u64)_1); -+} -+ -+#define __lasx_xvinsgr2vr_w(/*__m256i*/ _1, /*int*/ _2, /*ui3*/ _3) \ -+ ((__m256i)__builtin_lasx_xvinsgr2vr_w((v8i32)(_1), (int)(_2), (_3))) -+ -+#define __lasx_xvinsgr2vr_d(/*__m256i*/ _1, /*long int*/ _2, /*ui2*/ _3) \ -+ ((__m256i)__builtin_lasx_xvinsgr2vr_d((v4i64)(_1), (long int)(_2), (_3))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvreplve0_b(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvreplve0_b((v32i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvreplve0_h(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvreplve0_h((v16i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvreplve0_w(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvreplve0_w((v8i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvreplve0_d(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvreplve0_d((v4i64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvreplve0_q(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvreplve0_q((v32i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_vext2xv_h_b(__m256i _1) { -+ return (__m256i)__builtin_lasx_vext2xv_h_b((v32i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_vext2xv_w_h(__m256i _1) { -+ return (__m256i)__builtin_lasx_vext2xv_w_h((v16i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_vext2xv_d_w(__m256i _1) { -+ return (__m256i)__builtin_lasx_vext2xv_d_w((v8i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_vext2xv_w_b(__m256i _1) { -+ return (__m256i)__builtin_lasx_vext2xv_w_b((v32i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_vext2xv_d_h(__m256i _1) { -+ return (__m256i)__builtin_lasx_vext2xv_d_h((v16i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_vext2xv_d_b(__m256i _1) { -+ return (__m256i)__builtin_lasx_vext2xv_d_b((v32i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_vext2xv_hu_bu(__m256i _1) { -+ return (__m256i)__builtin_lasx_vext2xv_hu_bu((v32i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_vext2xv_wu_hu(__m256i _1) { -+ return (__m256i)__builtin_lasx_vext2xv_wu_hu((v16i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_vext2xv_du_wu(__m256i _1) { -+ return (__m256i)__builtin_lasx_vext2xv_du_wu((v8i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_vext2xv_wu_bu(__m256i _1) { -+ return (__m256i)__builtin_lasx_vext2xv_wu_bu((v32i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_vext2xv_du_hu(__m256i _1) { -+ return (__m256i)__builtin_lasx_vext2xv_du_hu((v16i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_vext2xv_du_bu(__m256i _1) { -+ return (__m256i)__builtin_lasx_vext2xv_du_bu((v32i8)_1); -+} -+ -+#define __lasx_xvpermi_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ -+ ((__m256i)__builtin_lasx_xvpermi_q((v32i8)(_1), (v32i8)(_2), (_3))) -+ -+#define __lasx_xvpermi_d(/*__m256i*/ _1, /*ui8*/ _2) \ -+ ((__m256i)__builtin_lasx_xvpermi_d((v4i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvperm_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvperm_w((v8i32)_1, (v8i32)_2); -+} -+ -+#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2) \ -+ ((__m256i)__builtin_lasx_xvldrepl_b((void const *)(_1), (_2))) -+ -+#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2) \ -+ ((__m256i)__builtin_lasx_xvldrepl_h((void const *)(_1), (_2))) -+ -+#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2) \ -+ ((__m256i)__builtin_lasx_xvldrepl_w((void const *)(_1), (_2))) -+ -+#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2) \ -+ ((__m256i)__builtin_lasx_xvldrepl_d((void const *)(_1), (_2))) -+ -+#define __lasx_xvpickve2gr_w(/*__m256i*/ _1, /*ui3*/ _2) \ -+ ((int)__builtin_lasx_xvpickve2gr_w((v8i32)(_1), (_2))) -+ -+#define __lasx_xvpickve2gr_wu(/*__m256i*/ _1, /*ui3*/ _2) \ -+ ((unsigned int)__builtin_lasx_xvpickve2gr_wu((v8i32)(_1), (_2))) -+ -+#define __lasx_xvpickve2gr_d(/*__m256i*/ _1, /*ui2*/ _2) \ -+ ((long int)__builtin_lasx_xvpickve2gr_d((v4i64)(_1), (_2))) -+ -+#define __lasx_xvpickve2gr_du(/*__m256i*/ _1, /*ui2*/ _2) \ -+ ((unsigned long int)__builtin_lasx_xvpickve2gr_du((v4i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwev_q_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwev_q_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwev_d_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwev_d_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwev_w_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwev_w_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwev_h_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwev_h_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwev_q_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwev_q_du((v4u64)_1, (v4u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwev_d_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwev_d_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwev_w_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwev_w_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwev_h_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwev_h_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsubwev_q_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsubwev_q_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsubwev_d_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsubwev_d_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsubwev_w_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsubwev_w_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsubwev_h_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsubwev_h_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsubwev_q_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsubwev_q_du((v4u64)_1, (v4u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsubwev_d_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsubwev_d_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsubwev_w_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsubwev_w_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsubwev_h_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsubwev_h_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwev_q_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwev_q_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwev_d_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwev_d_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwev_w_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwev_w_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwev_h_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwev_h_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwev_q_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwev_q_du((v4u64)_1, (v4u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwev_d_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwev_d_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwev_w_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwev_w_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwev_h_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwev_h_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwod_q_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwod_q_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwod_d_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwod_d_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwod_w_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwod_w_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwod_h_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwod_h_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwod_q_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwod_q_du((v4u64)_1, (v4u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwod_d_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwod_d_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwod_w_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwod_w_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwod_h_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwod_h_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsubwod_q_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsubwod_q_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsubwod_d_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsubwod_d_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsubwod_w_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsubwod_w_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsubwod_h_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsubwod_h_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsubwod_q_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsubwod_q_du((v4u64)_1, (v4u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsubwod_d_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsubwod_d_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsubwod_w_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsubwod_w_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsubwod_h_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsubwod_h_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwod_q_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwod_q_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwod_d_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwod_d_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwod_w_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwod_w_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwod_h_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwod_h_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwod_q_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwod_q_du((v4u64)_1, (v4u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwod_d_wu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwod_d_wu((v8u32)_1, (v8u32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwod_w_hu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwod_w_hu((v16u16)_1, (v16u16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwod_h_bu(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwod_h_bu((v32u8)_1, (v32u8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwev_d_wu_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwev_d_wu_w((v8u32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwev_w_hu_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwev_w_hu_h((v16u16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwev_h_bu_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwev_h_bu_b((v32u8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwev_d_wu_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwev_d_wu_w((v8u32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwev_w_hu_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwev_w_hu_h((v16u16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwev_h_bu_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwev_h_bu_b((v32u8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwod_d_wu_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwod_d_wu_w((v8u32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwod_w_hu_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwod_w_hu_h((v16u16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwod_h_bu_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwod_h_bu_b((v32u8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwod_d_wu_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwod_d_wu_w((v8u32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwod_w_hu_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwod_w_hu_h((v16u16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwod_h_bu_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwod_h_bu_b((v32u8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvhaddw_q_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvhaddw_q_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvhaddw_qu_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvhaddw_qu_du((v4u64)_1, (v4u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvhsubw_q_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvhsubw_q_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvhsubw_qu_du(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvhsubw_qu_du((v4u64)_1, (v4u64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwev_q_d(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwev_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwev_d_w(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwev_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwev_w_h(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwev_w_h((v8i32)_1, (v16i16)_2, -+ (v16i16)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwev_h_b(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwev_h_b((v16i16)_1, (v32i8)_2, -+ (v32i8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwev_q_du(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwev_q_du((v4u64)_1, (v4u64)_2, -+ (v4u64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwev_d_wu(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwev_d_wu((v4u64)_1, (v8u32)_2, -+ (v8u32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwev_w_hu(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwev_w_hu((v8u32)_1, (v16u16)_2, -+ (v16u16)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwev_h_bu(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwev_h_bu((v16u16)_1, (v32u8)_2, -+ (v32u8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwod_q_d(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwod_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwod_d_w(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwod_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwod_w_h(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwod_w_h((v8i32)_1, (v16i16)_2, -+ (v16i16)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwod_h_b(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwod_h_b((v16i16)_1, (v32i8)_2, -+ (v32i8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwod_q_du(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwod_q_du((v4u64)_1, (v4u64)_2, -+ (v4u64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwod_d_wu(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwod_d_wu((v4u64)_1, (v8u32)_2, -+ (v8u32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwod_w_hu(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwod_w_hu((v8u32)_1, (v16u16)_2, -+ (v16u16)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwod_h_bu(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwod_h_bu((v16u16)_1, (v32u8)_2, -+ (v32u8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwev_q_du_d(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwev_q_du_d((v4i64)_1, (v4u64)_2, -+ (v4i64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwev_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwev_d_wu_w((v4i64)_1, (v8u32)_2, -+ (v8i32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwev_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwev_w_hu_h((v8i32)_1, (v16u16)_2, -+ (v16i16)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwev_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwev_h_bu_b((v16i16)_1, (v32u8)_2, -+ (v32i8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwod_q_du_d(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwod_q_du_d((v4i64)_1, (v4u64)_2, -+ (v4i64)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwod_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwod_d_wu_w((v4i64)_1, (v8u32)_2, -+ (v8i32)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwod_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwod_w_hu_h((v8i32)_1, (v16u16)_2, -+ (v16i16)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmaddwod_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { -+ return (__m256i)__builtin_lasx_xvmaddwod_h_bu_b((v16i16)_1, (v32u8)_2, -+ (v32i8)_3); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvrotr_b(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvrotr_b((v32i8)_1, (v32i8)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvrotr_h(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvrotr_h((v16i16)_1, (v16i16)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvrotr_w(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvrotr_w((v8i32)_1, (v8i32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvrotr_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvrotr_d((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvadd_q(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvadd_q((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvsub_q(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvsub_q((v4i64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwev_q_du_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwev_q_du_d((v4u64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvaddwod_q_du_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvaddwod_q_du_d((v4u64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwev_q_du_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwev_q_du_d((v4u64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmulwod_q_du_d(__m256i _1, __m256i _2) { -+ return (__m256i)__builtin_lasx_xvmulwod_q_du_d((v4u64)_1, (v4i64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmskgez_b(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvmskgez_b((v32i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvmsknz_b(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvmsknz_b((v32i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvexth_h_b(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvexth_h_b((v32i8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvexth_w_h(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvexth_w_h((v16i16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvexth_d_w(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvexth_d_w((v8i32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvexth_q_d(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvexth_q_d((v4i64)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvexth_hu_bu(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvexth_hu_bu((v32u8)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvexth_wu_hu(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvexth_wu_hu((v16u16)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvexth_du_wu(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvexth_du_wu((v8u32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvexth_qu_du(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvexth_qu_du((v4u64)_1); -+} -+ -+#define __lasx_xvrotri_b(/*__m256i*/ _1, /*ui3*/ _2) \ -+ ((__m256i)__builtin_lasx_xvrotri_b((v32i8)(_1), (_2))) -+ -+#define __lasx_xvrotri_h(/*__m256i*/ _1, /*ui4*/ _2) \ -+ ((__m256i)__builtin_lasx_xvrotri_h((v16i16)(_1), (_2))) -+ -+#define __lasx_xvrotri_w(/*__m256i*/ _1, /*ui5*/ _2) \ -+ ((__m256i)__builtin_lasx_xvrotri_w((v8i32)(_1), (_2))) -+ -+#define __lasx_xvrotri_d(/*__m256i*/ _1, /*ui6*/ _2) \ -+ ((__m256i)__builtin_lasx_xvrotri_d((v4i64)(_1), (_2))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvextl_q_d(__m256i _1) { -+ return (__m256i)__builtin_lasx_xvextl_q_d((v4i64)_1); -+} -+ -+#define __lasx_xvsrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ -+ ((__m256i)__builtin_lasx_xvsrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) -+ -+#define __lasx_xvsrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ -+ ((__m256i)__builtin_lasx_xvsrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) -+ -+#define __lasx_xvsrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ -+ ((__m256i)__builtin_lasx_xvsrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) -+ -+#define __lasx_xvsrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ -+ ((__m256i)__builtin_lasx_xvsrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) -+ -+#define __lasx_xvsrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ -+ ((__m256i)__builtin_lasx_xvsrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) -+ -+#define __lasx_xvsrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ -+ ((__m256i)__builtin_lasx_xvsrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) -+ -+#define __lasx_xvsrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ -+ ((__m256i)__builtin_lasx_xvsrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) -+ -+#define __lasx_xvsrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ -+ ((__m256i)__builtin_lasx_xvsrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) -+ -+#define __lasx_xvssrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) -+ -+#define __lasx_xvssrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) -+ -+#define __lasx_xvssrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) -+ -+#define __lasx_xvssrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) -+ -+#define __lasx_xvssrlni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrlni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) -+ -+#define __lasx_xvssrlni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrlni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) -+ -+#define __lasx_xvssrlni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrlni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) -+ -+#define __lasx_xvssrlni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrlni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) -+ -+#define __lasx_xvssrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) -+ -+#define __lasx_xvssrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) -+ -+#define __lasx_xvssrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) -+ -+#define __lasx_xvssrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) -+ -+#define __lasx_xvssrlrni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrlrni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) -+ -+#define __lasx_xvssrlrni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrlrni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) -+ -+#define __lasx_xvssrlrni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrlrni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) -+ -+#define __lasx_xvssrlrni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrlrni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) -+ -+#define __lasx_xvsrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ -+ ((__m256i)__builtin_lasx_xvsrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) -+ -+#define __lasx_xvsrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ -+ ((__m256i)__builtin_lasx_xvsrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) -+ -+#define __lasx_xvsrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ -+ ((__m256i)__builtin_lasx_xvsrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) -+ -+#define __lasx_xvsrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ -+ ((__m256i)__builtin_lasx_xvsrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) -+ -+#define __lasx_xvsrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ -+ ((__m256i)__builtin_lasx_xvsrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) -+ -+#define __lasx_xvsrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ -+ ((__m256i)__builtin_lasx_xvsrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) -+ -+#define __lasx_xvsrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ -+ ((__m256i)__builtin_lasx_xvsrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) -+ -+#define __lasx_xvsrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ -+ ((__m256i)__builtin_lasx_xvsrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) -+ -+#define __lasx_xvssrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) -+ -+#define __lasx_xvssrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) -+ -+#define __lasx_xvssrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) -+ -+#define __lasx_xvssrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) -+ -+#define __lasx_xvssrani_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrani_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) -+ -+#define __lasx_xvssrani_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrani_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) -+ -+#define __lasx_xvssrani_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrani_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) -+ -+#define __lasx_xvssrani_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrani_du_q((v4u64)(_1), (v4i64)(_2), (_3))) -+ -+#define __lasx_xvssrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) -+ -+#define __lasx_xvssrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) -+ -+#define __lasx_xvssrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) -+ -+#define __lasx_xvssrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) -+ -+#define __lasx_xvssrarni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrarni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) -+ -+#define __lasx_xvssrarni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrarni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) -+ -+#define __lasx_xvssrarni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrarni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) -+ -+#define __lasx_xvssrarni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ -+ ((__m256i)__builtin_lasx_xvssrarni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) -+ -+#define __lasx_xbnz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_b((v32u8)(_1))) -+ -+#define __lasx_xbnz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_d((v4u64)(_1))) -+ -+#define __lasx_xbnz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_h((v16u16)(_1))) -+ -+#define __lasx_xbnz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_v((v32u8)(_1))) -+ -+#define __lasx_xbnz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_w((v8u32)(_1))) -+ -+#define __lasx_xbz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_b((v32u8)(_1))) -+ -+#define __lasx_xbz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_d((v4u64)(_1))) -+ -+#define __lasx_xbz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_h((v16u16)(_1))) -+ -+#define __lasx_xbz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_v((v32u8)(_1))) -+ -+#define __lasx_xbz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_w((v8u32)(_1))) -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_caf_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_caf_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_caf_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_caf_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_ceq_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_ceq_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_ceq_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_ceq_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_cle_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_cle_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_cle_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_cle_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_clt_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_clt_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_clt_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_clt_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_cne_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_cne_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_cne_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_cne_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_cor_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_cor_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_cor_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_cor_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_cueq_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_cueq_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_cueq_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_cueq_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_cule_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_cule_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_cule_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_cule_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_cult_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_cult_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_cult_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_cult_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_cun_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_cun_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_cune_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_cune_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_cune_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_cune_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_cun_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_cun_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_saf_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_saf_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_saf_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_saf_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_seq_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_seq_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_seq_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_seq_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_sle_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_sle_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_sle_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_sle_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_slt_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_slt_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_slt_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_slt_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_sne_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_sne_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_sne_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_sne_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_sor_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_sor_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_sor_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_sor_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_sueq_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_sueq_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_sueq_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_sueq_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_sule_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_sule_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_sule_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_sule_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_sult_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_sult_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_sult_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_sult_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_sun_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_sun_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_sune_d(__m256d _1, __m256d _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_sune_d((v4f64)_1, (v4f64)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_sune_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_sune_s((v8f32)_1, (v8f32)_2); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i -+ __lasx_xvfcmp_sun_s(__m256 _1, __m256 _2) { -+ return (__m256i)__builtin_lasx_xvfcmp_sun_s((v8f32)_1, (v8f32)_2); -+} -+ -+#define __lasx_xvpickve_d_f(/*__m256d*/ _1, /*ui2*/ _2) \ -+ ((__m256d)__builtin_lasx_xvpickve_d_f((v4f64)(_1), (_2))) -+ -+#define __lasx_xvpickve_w_f(/*__m256*/ _1, /*ui3*/ _2) \ -+ ((__m256)__builtin_lasx_xvpickve_w_f((v8f32)(_1), (_2))) -+ -+#define __lasx_xvrepli_b(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_b((_1))) -+ -+#define __lasx_xvrepli_d(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_d((_1))) -+ -+#define __lasx_xvrepli_h(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_h((_1))) -+ -+#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1))) -+ -+#endif /* defined(__loongarch_asx). */ -+#endif /* _LOONGSON_ASXINTRIN_H. */ -diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp -index d1b015502725..5ee20554c4cf 100644 ---- a/clang/lib/Sema/SemaChecking.cpp -+++ b/clang/lib/Sema/SemaChecking.cpp -@@ -4082,6 +4082,233 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, - case LoongArch::BI__builtin_lsx_vrepli_w: - case LoongArch::BI__builtin_lsx_vrepli_d: - return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); -+ -+ // LASX intrinsics. -+ case LoongArch::BI__builtin_lasx_xvbitclri_b: -+ case LoongArch::BI__builtin_lasx_xvbitrevi_b: -+ case LoongArch::BI__builtin_lasx_xvbitseti_b: -+ case LoongArch::BI__builtin_lasx_xvsat_b: -+ case LoongArch::BI__builtin_lasx_xvsat_bu: -+ case LoongArch::BI__builtin_lasx_xvslli_b: -+ case LoongArch::BI__builtin_lasx_xvsrai_b: -+ case LoongArch::BI__builtin_lasx_xvsrari_b: -+ case LoongArch::BI__builtin_lasx_xvsrli_b: -+ case LoongArch::BI__builtin_lasx_xvsllwil_h_b: -+ case LoongArch::BI__builtin_lasx_xvsllwil_hu_bu: -+ case LoongArch::BI__builtin_lasx_xvrotri_b: -+ case LoongArch::BI__builtin_lasx_xvsrlri_b: -+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); -+ case LoongArch::BI__builtin_lasx_xvbitclri_h: -+ case LoongArch::BI__builtin_lasx_xvbitrevi_h: -+ case LoongArch::BI__builtin_lasx_xvbitseti_h: -+ case LoongArch::BI__builtin_lasx_xvsat_h: -+ case LoongArch::BI__builtin_lasx_xvsat_hu: -+ case LoongArch::BI__builtin_lasx_xvslli_h: -+ case LoongArch::BI__builtin_lasx_xvsrai_h: -+ case LoongArch::BI__builtin_lasx_xvsrari_h: -+ case LoongArch::BI__builtin_lasx_xvsrli_h: -+ case LoongArch::BI__builtin_lasx_xvsllwil_w_h: -+ case LoongArch::BI__builtin_lasx_xvsllwil_wu_hu: -+ case LoongArch::BI__builtin_lasx_xvrotri_h: -+ case LoongArch::BI__builtin_lasx_xvsrlri_h: -+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); -+ case LoongArch::BI__builtin_lasx_xvssrarni_b_h: -+ case LoongArch::BI__builtin_lasx_xvssrarni_bu_h: -+ case LoongArch::BI__builtin_lasx_xvssrani_b_h: -+ case LoongArch::BI__builtin_lasx_xvssrani_bu_h: -+ case LoongArch::BI__builtin_lasx_xvsrarni_b_h: -+ case LoongArch::BI__builtin_lasx_xvsrlni_b_h: -+ case LoongArch::BI__builtin_lasx_xvsrlrni_b_h: -+ case LoongArch::BI__builtin_lasx_xvssrlni_b_h: -+ case LoongArch::BI__builtin_lasx_xvssrlni_bu_h: -+ case LoongArch::BI__builtin_lasx_xvssrlrni_b_h: -+ case LoongArch::BI__builtin_lasx_xvssrlrni_bu_h: -+ case LoongArch::BI__builtin_lasx_xvsrani_b_h: -+ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); -+ case LoongArch::BI__builtin_lasx_xvslei_bu: -+ case LoongArch::BI__builtin_lasx_xvslei_hu: -+ case LoongArch::BI__builtin_lasx_xvslei_wu: -+ case LoongArch::BI__builtin_lasx_xvslei_du: -+ case LoongArch::BI__builtin_lasx_xvslti_bu: -+ case LoongArch::BI__builtin_lasx_xvslti_hu: -+ case LoongArch::BI__builtin_lasx_xvslti_wu: -+ case LoongArch::BI__builtin_lasx_xvslti_du: -+ case LoongArch::BI__builtin_lasx_xvmaxi_bu: -+ case LoongArch::BI__builtin_lasx_xvmaxi_hu: -+ case LoongArch::BI__builtin_lasx_xvmaxi_wu: -+ case LoongArch::BI__builtin_lasx_xvmaxi_du: -+ case LoongArch::BI__builtin_lasx_xvmini_bu: -+ case LoongArch::BI__builtin_lasx_xvmini_hu: -+ case LoongArch::BI__builtin_lasx_xvmini_wu: -+ case LoongArch::BI__builtin_lasx_xvmini_du: -+ case LoongArch::BI__builtin_lasx_xvaddi_bu: -+ case LoongArch::BI__builtin_lasx_xvaddi_hu: -+ case LoongArch::BI__builtin_lasx_xvaddi_wu: -+ case LoongArch::BI__builtin_lasx_xvaddi_du: -+ case LoongArch::BI__builtin_lasx_xvbitclri_w: -+ case LoongArch::BI__builtin_lasx_xvbitrevi_w: -+ case LoongArch::BI__builtin_lasx_xvbitseti_w: -+ case LoongArch::BI__builtin_lasx_xvsat_w: -+ case LoongArch::BI__builtin_lasx_xvsat_wu: -+ case LoongArch::BI__builtin_lasx_xvslli_w: -+ case LoongArch::BI__builtin_lasx_xvsrai_w: -+ case LoongArch::BI__builtin_lasx_xvsrari_w: -+ case LoongArch::BI__builtin_lasx_xvsrli_w: -+ case LoongArch::BI__builtin_lasx_xvsllwil_d_w: -+ case LoongArch::BI__builtin_lasx_xvsllwil_du_wu: -+ case LoongArch::BI__builtin_lasx_xvsrlri_w: -+ case LoongArch::BI__builtin_lasx_xvrotri_w: -+ case LoongArch::BI__builtin_lasx_xvsubi_bu: -+ case LoongArch::BI__builtin_lasx_xvsubi_hu: -+ case LoongArch::BI__builtin_lasx_xvsubi_wu: -+ case LoongArch::BI__builtin_lasx_xvsubi_du: -+ case LoongArch::BI__builtin_lasx_xvbsrl_v: -+ case LoongArch::BI__builtin_lasx_xvbsll_v: -+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); -+ case LoongArch::BI__builtin_lasx_xvssrarni_h_w: -+ case LoongArch::BI__builtin_lasx_xvssrarni_hu_w: -+ case LoongArch::BI__builtin_lasx_xvssrani_h_w: -+ case LoongArch::BI__builtin_lasx_xvssrani_hu_w: -+ case LoongArch::BI__builtin_lasx_xvsrarni_h_w: -+ case LoongArch::BI__builtin_lasx_xvsrani_h_w: -+ case LoongArch::BI__builtin_lasx_xvfrstpi_b: -+ case LoongArch::BI__builtin_lasx_xvfrstpi_h: -+ case LoongArch::BI__builtin_lasx_xvsrlni_h_w: -+ case LoongArch::BI__builtin_lasx_xvsrlrni_h_w: -+ case LoongArch::BI__builtin_lasx_xvssrlni_h_w: -+ case LoongArch::BI__builtin_lasx_xvssrlni_hu_w: -+ case LoongArch::BI__builtin_lasx_xvssrlrni_h_w: -+ case LoongArch::BI__builtin_lasx_xvssrlrni_hu_w: -+ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); -+ case LoongArch::BI__builtin_lasx_xvbitclri_d: -+ case LoongArch::BI__builtin_lasx_xvbitrevi_d: -+ case LoongArch::BI__builtin_lasx_xvbitseti_d: -+ case LoongArch::BI__builtin_lasx_xvsat_d: -+ case LoongArch::BI__builtin_lasx_xvsat_du: -+ case LoongArch::BI__builtin_lasx_xvslli_d: -+ case LoongArch::BI__builtin_lasx_xvsrai_d: -+ case LoongArch::BI__builtin_lasx_xvsrli_d: -+ case LoongArch::BI__builtin_lasx_xvsrari_d: -+ case LoongArch::BI__builtin_lasx_xvrotri_d: -+ case LoongArch::BI__builtin_lasx_xvsrlri_d: -+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 63); -+ case LoongArch::BI__builtin_lasx_xvssrarni_w_d: -+ case LoongArch::BI__builtin_lasx_xvssrarni_wu_d: -+ case LoongArch::BI__builtin_lasx_xvssrani_w_d: -+ case LoongArch::BI__builtin_lasx_xvssrani_wu_d: -+ case LoongArch::BI__builtin_lasx_xvsrarni_w_d: -+ case LoongArch::BI__builtin_lasx_xvsrlni_w_d: -+ case LoongArch::BI__builtin_lasx_xvsrlrni_w_d: -+ case LoongArch::BI__builtin_lasx_xvssrlni_w_d: -+ case LoongArch::BI__builtin_lasx_xvssrlni_wu_d: -+ case LoongArch::BI__builtin_lasx_xvssrlrni_w_d: -+ case LoongArch::BI__builtin_lasx_xvssrlrni_wu_d: -+ case LoongArch::BI__builtin_lasx_xvsrani_w_d: -+ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63); -+ case LoongArch::BI__builtin_lasx_xvssrarni_d_q: -+ case LoongArch::BI__builtin_lasx_xvssrarni_du_q: -+ case LoongArch::BI__builtin_lasx_xvssrani_d_q: -+ case LoongArch::BI__builtin_lasx_xvssrani_du_q: -+ case LoongArch::BI__builtin_lasx_xvsrarni_d_q: -+ case LoongArch::BI__builtin_lasx_xvssrlni_d_q: -+ case LoongArch::BI__builtin_lasx_xvssrlni_du_q: -+ case LoongArch::BI__builtin_lasx_xvssrlrni_d_q: -+ case LoongArch::BI__builtin_lasx_xvssrlrni_du_q: -+ case LoongArch::BI__builtin_lasx_xvsrani_d_q: -+ case LoongArch::BI__builtin_lasx_xvsrlni_d_q: -+ case LoongArch::BI__builtin_lasx_xvsrlrni_d_q: -+ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 127); -+ case LoongArch::BI__builtin_lasx_xvseqi_b: -+ case LoongArch::BI__builtin_lasx_xvseqi_h: -+ case LoongArch::BI__builtin_lasx_xvseqi_w: -+ case LoongArch::BI__builtin_lasx_xvseqi_d: -+ case LoongArch::BI__builtin_lasx_xvslti_b: -+ case LoongArch::BI__builtin_lasx_xvslti_h: -+ case LoongArch::BI__builtin_lasx_xvslti_w: -+ case LoongArch::BI__builtin_lasx_xvslti_d: -+ case LoongArch::BI__builtin_lasx_xvslei_b: -+ case LoongArch::BI__builtin_lasx_xvslei_h: -+ case LoongArch::BI__builtin_lasx_xvslei_w: -+ case LoongArch::BI__builtin_lasx_xvslei_d: -+ case LoongArch::BI__builtin_lasx_xvmaxi_b: -+ case LoongArch::BI__builtin_lasx_xvmaxi_h: -+ case LoongArch::BI__builtin_lasx_xvmaxi_w: -+ case LoongArch::BI__builtin_lasx_xvmaxi_d: -+ case LoongArch::BI__builtin_lasx_xvmini_b: -+ case LoongArch::BI__builtin_lasx_xvmini_h: -+ case LoongArch::BI__builtin_lasx_xvmini_w: -+ case LoongArch::BI__builtin_lasx_xvmini_d: -+ return SemaBuiltinConstantArgRange(TheCall, 1, -16, 15); -+ case LoongArch::BI__builtin_lasx_xvandi_b: -+ case LoongArch::BI__builtin_lasx_xvnori_b: -+ case LoongArch::BI__builtin_lasx_xvori_b: -+ case LoongArch::BI__builtin_lasx_xvshuf4i_b: -+ case LoongArch::BI__builtin_lasx_xvshuf4i_h: -+ case LoongArch::BI__builtin_lasx_xvshuf4i_w: -+ case LoongArch::BI__builtin_lasx_xvxori_b: -+ case LoongArch::BI__builtin_lasx_xvpermi_d: -+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 255); -+ case LoongArch::BI__builtin_lasx_xvbitseli_b: -+ case LoongArch::BI__builtin_lasx_xvshuf4i_d: -+ case LoongArch::BI__builtin_lasx_xvextrins_b: -+ case LoongArch::BI__builtin_lasx_xvextrins_h: -+ case LoongArch::BI__builtin_lasx_xvextrins_w: -+ case LoongArch::BI__builtin_lasx_xvextrins_d: -+ case LoongArch::BI__builtin_lasx_xvpermi_q: -+ case LoongArch::BI__builtin_lasx_xvpermi_w: -+ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 255); -+ case LoongArch::BI__builtin_lasx_xvrepl128vei_b: -+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); -+ case LoongArch::BI__builtin_lasx_xvrepl128vei_h: -+ case LoongArch::BI__builtin_lasx_xvpickve2gr_w: -+ case LoongArch::BI__builtin_lasx_xvpickve2gr_wu: -+ case LoongArch::BI__builtin_lasx_xvpickve_w_f: -+ case LoongArch::BI__builtin_lasx_xvpickve_w: -+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); -+ case LoongArch::BI__builtin_lasx_xvinsgr2vr_w: -+ case LoongArch::BI__builtin_lasx_xvinsve0_w: -+ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); -+ case LoongArch::BI__builtin_lasx_xvrepl128vei_w: -+ case LoongArch::BI__builtin_lasx_xvpickve2gr_d: -+ case LoongArch::BI__builtin_lasx_xvpickve2gr_du: -+ case LoongArch::BI__builtin_lasx_xvpickve_d_f: -+ case LoongArch::BI__builtin_lasx_xvpickve_d: -+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); -+ case LoongArch::BI__builtin_lasx_xvinsve0_d: -+ case LoongArch::BI__builtin_lasx_xvinsgr2vr_d: -+ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); -+ case LoongArch::BI__builtin_lasx_xvstelm_b: -+ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || -+ SemaBuiltinConstantArgRange(TheCall, 3, 0, 31); -+ case LoongArch::BI__builtin_lasx_xvstelm_h: -+ return SemaBuiltinConstantArgRange(TheCall, 2, -256, 254) || -+ SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); -+ case LoongArch::BI__builtin_lasx_xvstelm_w: -+ return SemaBuiltinConstantArgRange(TheCall, 2, -512, 508) || -+ SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); -+ case LoongArch::BI__builtin_lasx_xvstelm_d: -+ return SemaBuiltinConstantArgRange(TheCall, 2, -1024, 1016) || -+ SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); -+ case LoongArch::BI__builtin_lasx_xvrepl128vei_d: -+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); -+ case LoongArch::BI__builtin_lasx_xvldrepl_b: -+ case LoongArch::BI__builtin_lasx_xvld: -+ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2047); -+ case LoongArch::BI__builtin_lasx_xvldrepl_h: -+ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2046); -+ case LoongArch::BI__builtin_lasx_xvldrepl_w: -+ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2044); -+ case LoongArch::BI__builtin_lasx_xvldrepl_d: -+ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2040); -+ case LoongArch::BI__builtin_lasx_xvst: -+ return SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); -+ case LoongArch::BI__builtin_lasx_xvldi: -+ return SemaBuiltinConstantArgRange(TheCall, 0, -4096, 4095); -+ case LoongArch::BI__builtin_lasx_xvrepli_b: -+ case LoongArch::BI__builtin_lasx_xvrepli_h: -+ case LoongArch::BI__builtin_lasx_xvrepli_w: -+ case LoongArch::BI__builtin_lasx_xvrepli_d: -+ return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); - } - return false; - } --- -2.20.1 - - -From 7d01676815acf376a054bbe9b10586dbc5f80d49 Mon Sep 17 00:00:00 2001 -From: chenli -Date: Fri, 27 Oct 2023 15:57:30 +0800 -Subject: [PATCH 6/8] [LoongArch][CodeGen] Add LSX builtin testcases - -(cherry picked from commit 673c530837faa5ddb45769ddee01d09e1f73d406) ---- - .../LoongArch/lsx/builtin-alias-error.c | 1359 +++++ - .../CodeGen/LoongArch/lsx/builtin-alias.c | 4451 ++++++++++++++ - .../CodeGen/LoongArch/lsx/builtin-error.c | 1382 +++++ - clang/test/CodeGen/LoongArch/lsx/builtin.c | 5193 +++++++++++++++++ - 4 files changed, 12385 insertions(+) - create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c - create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-alias.c - create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-error.c - create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin.c - -diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c -new file mode 100644 -index 000000000000..69cf2254fdd7 ---- /dev/null -+++ b/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c -@@ -0,0 +1,1359 @@ -+// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s -+ -+#include -+ -+v16i8 vslli_b(v16i8 _1, int var) { -+ v16i8 res = __lsx_vslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lsx_vslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lsx_vslli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vslli_h(v8i16 _1, int var) { -+ v8i16 res = __lsx_vslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vslli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vslli_w(v4i32 _1, int var) { -+ v4i32 res = __lsx_vslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vslli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vslli_d(v2i64 _1, int var) { -+ v2i64 res = __lsx_vslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vslli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsrai_b(v16i8 _1, int var) { -+ v16i8 res = __lsx_vsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lsx_vsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lsx_vsrai_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsrai_h(v8i16 _1, int var) { -+ v8i16 res = __lsx_vsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vsrai_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsrai_w(v4i32 _1, int var) { -+ v4i32 res = __lsx_vsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vsrai_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsrai_d(v2i64 _1, int var) { -+ v2i64 res = __lsx_vsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vsrai_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsrari_b(v16i8 _1, int var) { -+ v16i8 res = __lsx_vsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lsx_vsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lsx_vsrari_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsrari_h(v8i16 _1, int var) { -+ v8i16 res = __lsx_vsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vsrari_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsrari_w(v4i32 _1, int var) { -+ v4i32 res = __lsx_vsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vsrari_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsrari_d(v2i64 _1, int var) { -+ v2i64 res = __lsx_vsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vsrari_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsrli_b(v16i8 _1, int var) { -+ v16i8 res = __lsx_vsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lsx_vsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lsx_vsrli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsrli_h(v8i16 _1, int var) { -+ v8i16 res = __lsx_vsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vsrli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsrli_w(v4i32 _1, int var) { -+ v4i32 res = __lsx_vsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vsrli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsrli_d(v2i64 _1, int var) { -+ v2i64 res = __lsx_vsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vsrli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsrlri_b(v16i8 _1, int var) { -+ v16i8 res = __lsx_vsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lsx_vsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lsx_vsrlri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsrlri_h(v8i16 _1, int var) { -+ v8i16 res = __lsx_vsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vsrlri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsrlri_w(v4i32 _1, int var) { -+ v4i32 res = __lsx_vsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vsrlri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsrlri_d(v2i64 _1, int var) { -+ v2i64 res = __lsx_vsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vsrlri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_d' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vbitclri_b(v16u8 _1, int var) { -+ v16u8 res = __lsx_vbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lsx_vbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lsx_vbitclri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_b' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vbitclri_h(v8u16 _1, int var) { -+ v8u16 res = __lsx_vbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vbitclri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_h' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vbitclri_w(v4u32 _1, int var) { -+ v4u32 res = __lsx_vbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vbitclri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_w' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vbitclri_d(v2u64 _1, int var) { -+ v2u64 res = __lsx_vbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vbitclri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_d' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vbitseti_b(v16u8 _1, int var) { -+ v16u8 res = __lsx_vbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lsx_vbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lsx_vbitseti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_b' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vbitseti_h(v8u16 _1, int var) { -+ v8u16 res = __lsx_vbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vbitseti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_h' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vbitseti_w(v4u32 _1, int var) { -+ v4u32 res = __lsx_vbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vbitseti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_w' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vbitseti_d(v2u64 _1, int var) { -+ v2u64 res = __lsx_vbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vbitseti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_d' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vbitrevi_b(v16u8 _1, int var) { -+ v16u8 res = __lsx_vbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lsx_vbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lsx_vbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_b' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vbitrevi_h(v8u16 _1, int var) { -+ v8u16 res = __lsx_vbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_h' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vbitrevi_w(v4u32 _1, int var) { -+ v4u32 res = __lsx_vbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_w' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vbitrevi_d(v2u64 _1, int var) { -+ v2u64 res = __lsx_vbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vaddi_bu(v16i8 _1, int var) { -+ v16i8 res = __lsx_vaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vaddi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_bu' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vaddi_hu(v8i16 _1, int var) { -+ v8i16 res = __lsx_vaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vaddi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_hu' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vaddi_wu(v4i32 _1, int var) { -+ v4i32 res = __lsx_vaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vaddi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_wu' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vaddi_du(v2i64 _1, int var) { -+ v2i64 res = __lsx_vaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vaddi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_du' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsubi_bu(v16i8 _1, int var) { -+ v16i8 res = __lsx_vsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vsubi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_bu' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsubi_hu(v8i16 _1, int var) { -+ v8i16 res = __lsx_vsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vsubi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_hu' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsubi_wu(v4i32 _1, int var) { -+ v4i32 res = __lsx_vsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vsubi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_wu' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsubi_du(v2i64 _1, int var) { -+ v2i64 res = __lsx_vsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vsubi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_du' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vmaxi_b(v16i8 _1, int var) { -+ v16i8 res = __lsx_vmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vmaxi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vmaxi_h(v8i16 _1, int var) { -+ v8i16 res = __lsx_vmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vmaxi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vmaxi_w(v4i32 _1, int var) { -+ v4i32 res = __lsx_vmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vmaxi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vmaxi_d(v2i64 _1, int var) { -+ v2i64 res = __lsx_vmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vmaxi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_d' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vmaxi_bu(v16u8 _1, int var) { -+ v16u8 res = __lsx_vmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_bu' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vmaxi_hu(v8u16 _1, int var) { -+ v8u16 res = __lsx_vmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_hu' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vmaxi_wu(v4u32 _1, int var) { -+ v4u32 res = __lsx_vmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_wu' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vmaxi_du(v2u64 _1, int var) { -+ v2u64 res = __lsx_vmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vmaxi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_du' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vmini_b(v16i8 _1, int var) { -+ v16i8 res = __lsx_vmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vmini_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vmini_h(v8i16 _1, int var) { -+ v8i16 res = __lsx_vmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vmini_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_h' must be a constant integer}}} -+ return res; -+} -+ -+v4i32 vmini_w(v4i32 _1, int var) { -+ v4i32 res = __lsx_vmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vmini_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vmini_d(v2i64 _1, int var) { -+ v2i64 res = __lsx_vmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vmini_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_d' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vmini_bu(v16u8 _1, int var) { -+ v16u8 res = __lsx_vmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vmini_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_bu' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vmini_hu(v8u16 _1, int var) { -+ v8u16 res = __lsx_vmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vmini_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_hu' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vmini_wu(v4u32 _1, int var) { -+ v4u32 res = __lsx_vmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vmini_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_wu' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vmini_du(v2u64 _1, int var) { -+ v2u64 res = __lsx_vmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vmini_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_du' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vseqi_b(v16i8 _1, int var) { -+ v16i8 res = __lsx_vseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vseqi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vseqi_h(v8i16 _1, int var) { -+ v8i16 res = __lsx_vseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vseqi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vseqi_w(v4i32 _1, int var) { -+ v4i32 res = __lsx_vseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vseqi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vseqi_d(v2i64 _1, int var) { -+ v2i64 res = __lsx_vseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vseqi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vslti_b(v16i8 _1, int var) { -+ v16i8 res = __lsx_vslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vslti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vslti_h(v8i16 _1, int var) { -+ v8i16 res = __lsx_vslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vslti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vslti_w(v4i32 _1, int var) { -+ v4i32 res = __lsx_vslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vslti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vslti_d(v2i64 _1, int var) { -+ v2i64 res = __lsx_vslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vslti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vslti_bu(v16u8 _1, int var) { -+ v16i8 res = __lsx_vslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vslti_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_bu' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vslti_hu(v8u16 _1, int var) { -+ v8i16 res = __lsx_vslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vslti_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_hu' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vslti_wu(v4u32 _1, int var) { -+ v4i32 res = __lsx_vslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vslti_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_wu' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vslti_du(v2u64 _1, int var) { -+ v2i64 res = __lsx_vslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vslti_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_du' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vslei_b(v16i8 _1, int var) { -+ v16i8 res = __lsx_vslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vslei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vslei_h(v8i16 _1, int var) { -+ v8i16 res = __lsx_vslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vslei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vslei_w(v4i32 _1, int var) { -+ v4i32 res = __lsx_vslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vslei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vslei_d(v2i64 _1, int var) { -+ v2i64 res = __lsx_vslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lsx_vslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lsx_vslei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vslei_bu(v16u8 _1, int var) { -+ v16i8 res = __lsx_vslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vslei_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_bu' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vslei_hu(v8u16 _1, int var) { -+ v8i16 res = __lsx_vslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vslei_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_hu' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vslei_wu(v4u32 _1, int var) { -+ v4i32 res = __lsx_vslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vslei_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_wu' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vslei_du(v2u64 _1, int var) { -+ v2i64 res = __lsx_vslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vslei_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_du' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsat_b(v16i8 _1, int var) { -+ v16i8 res = __lsx_vsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lsx_vsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lsx_vsat_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsat_h(v8i16 _1, int var) { -+ v8i16 res = __lsx_vsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vsat_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsat_w(v4i32 _1, int var) { -+ v4i32 res = __lsx_vsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vsat_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsat_d(v2i64 _1, int var) { -+ v2i64 res = __lsx_vsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vsat_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_d' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vsat_bu(v16u8 _1, int var) { -+ v16u8 res = __lsx_vsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lsx_vsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lsx_vsat_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_bu' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vsat_hu(v8u16 _1, int var) { -+ v8u16 res = __lsx_vsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vsat_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_hu' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vsat_wu(v4u32 _1, int var) { -+ v4u32 res = __lsx_vsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vsat_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_wu' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vsat_du(v2u64 _1, int var) { -+ v2u64 res = __lsx_vsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vsat_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_du' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vreplvei_b(v16i8 _1, int var) { -+ v16i8 res = __lsx_vreplvei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vreplvei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vreplvei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vreplvei_h(v8i16 _1, int var) { -+ v8i16 res = __lsx_vreplvei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lsx_vreplvei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lsx_vreplvei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vreplvei_w(v4i32 _1, int var) { -+ v4i32 res = __lsx_vreplvei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __lsx_vreplvei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __lsx_vreplvei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vreplvei_d(v2i64 _1, int var) { -+ v2i64 res = __lsx_vreplvei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} -+ res |= __lsx_vreplvei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} -+ res |= __lsx_vreplvei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_d' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vandi_b(v16u8 _1, int var) { -+ v16u8 res = __lsx_vandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lsx_vandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lsx_vandi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vandi_b' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vori_b(v16u8 _1, int var) { -+ v16u8 res = __lsx_vori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lsx_vori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lsx_vori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vori_b' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vnori_b(v16u8 _1, int var) { -+ v16u8 res = __lsx_vnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lsx_vnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lsx_vnori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vnori_b' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vxori_b(v16u8 _1, int var) { -+ v16u8 res = __lsx_vxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lsx_vxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lsx_vxori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vxori_b' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vbitseli_b(v16u8 _1, v16u8 _2, int var) { -+ v16u8 res = __lsx_vbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lsx_vbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lsx_vbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vbitseli_b' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vshuf4i_b(v16i8 _1, int var) { -+ v16i8 res = __lsx_vshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lsx_vshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lsx_vshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vshuf4i_h(v8i16 _1, int var) { -+ v8i16 res = __lsx_vshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lsx_vshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lsx_vshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vshuf4i_w(v4i32 _1, int var) { -+ v4i32 res = __lsx_vshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lsx_vshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lsx_vshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_w' must be a constant integer}} -+ return res; -+} -+ -+int vpickve2gr_b(v16i8 _1, int var) { -+ int res = __lsx_vpickve2gr_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vpickve2gr_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vpickve2gr_b(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_b' must be a constant integer}} -+ return res; -+} -+ -+int vpickve2gr_h(v8i16 _1, int var) { -+ int res = __lsx_vpickve2gr_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lsx_vpickve2gr_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lsx_vpickve2gr_h(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_h' must be a constant integer}} -+ return res; -+} -+ -+int vpickve2gr_w(v4i32 _1, int var) { -+ int res = __lsx_vpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __lsx_vpickve2gr_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __lsx_vpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_w' must be a constant integer}} -+ return res; -+} -+ -+long vpickve2gr_d(v2i64 _1, int var) { -+ long res = __lsx_vpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} -+ res |= __lsx_vpickve2gr_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} -+ res |= __lsx_vpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_d' must be a constant integer}} -+ return res; -+} -+ -+unsigned int vpickve2gr_bu(v16i8 _1, int var) { -+ unsigned int res = __lsx_vpickve2gr_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vpickve2gr_bu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vpickve2gr_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_bu' must be a constant integer}} -+ return res; -+} -+ -+unsigned int vpickve2gr_hu(v8i16 _1, int var) { -+ unsigned int res = __lsx_vpickve2gr_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lsx_vpickve2gr_hu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lsx_vpickve2gr_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_hu' must be a constant integer}} -+ return res; -+} -+ -+unsigned int vpickve2gr_wu(v4i32 _1, int var) { -+ unsigned int res = __lsx_vpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __lsx_vpickve2gr_wu(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __lsx_vpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_wu' must be a constant integer}} -+ return res; -+} -+ -+unsigned long int vpickve2gr_du(v2i64 _1, int var) { -+ unsigned long int res = __lsx_vpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} -+ res |= __lsx_vpickve2gr_du(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} -+ res |= __lsx_vpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_du' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vinsgr2vr_b(v16i8 _1, int var) { -+ v16i8 res = __lsx_vinsgr2vr_b(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vinsgr2vr_b(_1, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vinsgr2vr_b(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vinsgr2vr_h(v8i16 _1, int var) { -+ v8i16 res = __lsx_vinsgr2vr_h(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lsx_vinsgr2vr_h(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lsx_vinsgr2vr_h(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vinsgr2vr_w(v4i32 _1, int var) { -+ v4i32 res = __lsx_vinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __lsx_vinsgr2vr_w(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __lsx_vinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vinsgr2vr_d(v2i64 _1, int var) { -+ v2i64 res = __lsx_vinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} -+ res |= __lsx_vinsgr2vr_d(_1, 1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} -+ res |= __lsx_vinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_d' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsllwil_h_b(v16i8 _1, int var) { -+ v8i16 res = __lsx_vsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lsx_vsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lsx_vsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_h_b' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsllwil_w_h(v8i16 _1, int var) { -+ v4i32 res = __lsx_vsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_w_h' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsllwil_d_w(v4i32 _1, int var) { -+ v2i64 res = __lsx_vsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_d_w' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vsllwil_hu_bu(v16u8 _1, int var) { -+ v8u16 res = __lsx_vsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lsx_vsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lsx_vsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_hu_bu' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vsllwil_wu_hu(v8u16 _1, int var) { -+ v4u32 res = __lsx_vsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_wu_hu' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vsllwil_du_wu(v4u32 _1, int var) { -+ v2u64 res = __lsx_vsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_du_wu' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vfrstpi_b(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __lsx_vfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vfrstpi_h(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __lsx_vfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_h' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vshuf4i_d(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __lsx_vshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lsx_vshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lsx_vshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vbsrl_v(v16i8 _1, int var) { -+ v16i8 res = __lsx_vbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vbsrl_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsrl_v' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vbsll_v(v16i8 _1, int var) { -+ v16i8 res = __lsx_vbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vbsll_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsll_v' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vextrins_b(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __lsx_vextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lsx_vextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lsx_vextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vextrins_h(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __lsx_vextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lsx_vextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lsx_vextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vextrins_w(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __lsx_vextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lsx_vextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lsx_vextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vextrins_d(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __lsx_vextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lsx_vextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lsx_vextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_d' must be a constant integer}} -+ return res; -+} -+ -+void vstelm_b_idx(v16i8 _1, void *_2, int var) { -+ __lsx_vstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ __lsx_vstelm_b(_1, _2, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ __lsx_vstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} -+} -+ -+void vstelm_h_idx(v8i16 _1, void *_2, int var) { -+ __lsx_vstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ __lsx_vstelm_h(_1, _2, 2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ __lsx_vstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} -+} -+ -+void vstelm_w_idx(v4i32 _1, void *_2, int var) { -+ __lsx_vstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ __lsx_vstelm_w(_1, _2, 4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ __lsx_vstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} -+} -+ -+void vstelm_d_idx(v2i64 _1, void *_2, int var) { -+ __lsx_vstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} -+ __lsx_vstelm_d(_1, _2, 8, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} -+ __lsx_vstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} -+} -+ -+void vstelm_b(v16i8 _1, void *_2, int var) { -+ __lsx_vstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} -+ __lsx_vstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} -+ __lsx_vstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} -+} -+ -+void vstelm_h(v8i16 _1, void *_2, int var) { -+ __lsx_vstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} -+ __lsx_vstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} -+ __lsx_vstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} -+} -+ -+void vstelm_w(v4i32 _1, void *_2, int var) { -+ __lsx_vstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} -+ __lsx_vstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} -+ __lsx_vstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} -+} -+ -+void vstelm_d(v2i64 _1, void *_2, int var) { -+ __lsx_vstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} -+ __lsx_vstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} -+ __lsx_vstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} -+} -+ -+v16i8 vldrepl_b(void *_1, int var) { -+ v16i8 res = __lsx_vldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} -+ res |= __lsx_vldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} -+ res |= __lsx_vldrepl_b(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vldrepl_h(void *_1, int var) { -+ v8i16 res = __lsx_vldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} -+ res |= __lsx_vldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} -+ res |= __lsx_vldrepl_h(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vldrepl_w(void *_1, int var) { -+ v4i32 res = __lsx_vldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} -+ res |= __lsx_vldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} -+ res |= __lsx_vldrepl_w(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vldrepl_d(void *_1, int var) { -+ v2i64 res = __lsx_vldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} -+ res |= __lsx_vldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} -+ res |= __lsx_vldrepl_d(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vrotri_b(v16i8 _1, int var) { -+ v16i8 res = __lsx_vrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lsx_vrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lsx_vrotri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vrotri_h(v8i16 _1, int var) { -+ v8i16 res = __lsx_vrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vrotri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vrotri_w(v4i32 _1, int var) { -+ v4i32 res = __lsx_vrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vrotri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vrotri_d(v2i64 _1, int var) { -+ v2i64 res = __lsx_vrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vrotri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __lsx_vsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __lsx_vsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __lsx_vsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __lsx_vsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lsx_vsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lsx_vsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __lsx_vsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __lsx_vsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __lsx_vsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __lsx_vsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lsx_vsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lsx_vsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __lsx_vssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __lsx_vssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __lsx_vssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __lsx_vssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lsx_vssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lsx_vssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2, int var) { -+ v16u8 res = __lsx_vssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_bu_h' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2, int var) { -+ v8u16 res = __lsx_vssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_hu_w' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2, int var) { -+ v4u32 res = __lsx_vssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_wu_d' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2, int var) { -+ v2u64 res = __lsx_vssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lsx_vssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lsx_vssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_du_q' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __lsx_vssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __lsx_vssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __lsx_vssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __lsx_vssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lsx_vssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lsx_vssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2, int var) { -+ v16u8 res = __lsx_vssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_bu_h' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2, int var) { -+ v8u16 res = __lsx_vssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_hu_w' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2, int var) { -+ v4u32 res = __lsx_vssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_wu_d' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2, int var) { -+ v2u64 res = __lsx_vssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lsx_vssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lsx_vssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_du_q' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsrani_b_h(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __lsx_vsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_b_h' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsrani_h_w(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __lsx_vsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_h_w' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsrani_w_d(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __lsx_vsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_w_d' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsrani_d_q(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __lsx_vsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lsx_vsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lsx_vsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_d_q' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __lsx_vsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __lsx_vsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __lsx_vsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __lsx_vsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lsx_vsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lsx_vsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vssrani_b_h(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __lsx_vssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_b_h' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vssrani_h_w(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __lsx_vssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_h_w' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vssrani_w_d(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __lsx_vssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_w_d' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vssrani_d_q(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __lsx_vssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lsx_vssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lsx_vssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_d_q' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2, int var) { -+ v16u8 res = __lsx_vssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_bu_h' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2, int var) { -+ v8u16 res = __lsx_vssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_hu_w' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2, int var) { -+ v4u32 res = __lsx_vssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_wu_d' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vssrani_du_q(v2u64 _1, v2i64 _2, int var) { -+ v2u64 res = __lsx_vssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lsx_vssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lsx_vssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_du_q' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __lsx_vssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __lsx_vssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __lsx_vssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __lsx_vssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lsx_vssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lsx_vssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2, int var) { -+ v16u8 res = __lsx_vssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lsx_vssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lsx_vssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_bu_h' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2, int var) { -+ v8u16 res = __lsx_vssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lsx_vssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lsx_vssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_hu_w' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2, int var) { -+ v4u32 res = __lsx_vssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lsx_vssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lsx_vssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_wu_d' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2, int var) { -+ v2u64 res = __lsx_vssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lsx_vssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lsx_vssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_du_q' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vpermi_w(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __lsx_vpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lsx_vpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lsx_vpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vpermi_w' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vld(void *_1, int var) { -+ v16i8 res = __lsx_vld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} -+ res |= __lsx_vld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} -+ res |= __lsx_vld(_1, var); // expected-error {{argument to '__builtin_lsx_vld' must be a constant integer}} -+ return res; -+} -+ -+void vst(v16i8 _1, void *_2, int var) { -+ __lsx_vst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} -+ __lsx_vst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} -+ __lsx_vst(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vst' must be a constant integer}} -+} -+ -+v2i64 vldi(int var) { -+ v2i64 res = __lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} -+ res |= __lsx_vldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} -+ res |= __lsx_vldi(var); // expected-error {{argument to '__builtin_lsx_vldi' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vrepli_b(int var) { -+ v16i8 res = __lsx_vrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} -+ res |= __lsx_vrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} -+ res |= __lsx_vrepli_b(var); // expected-error {{argument to '__builtin_lsx_vrepli_b' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vrepli_d(int var) { -+ v2i64 res = __lsx_vrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} -+ res |= __lsx_vrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} -+ res |= __lsx_vrepli_d(var); // expected-error {{argument to '__builtin_lsx_vrepli_d' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vrepli_h(int var) { -+ v8i16 res = __lsx_vrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} -+ res |= __lsx_vrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} -+ res |= __lsx_vrepli_h(var); // expected-error {{argument to '__builtin_lsx_vrepli_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vrepli_w(int var) { -+ v4i32 res = __lsx_vrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} -+ res |= __lsx_vrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} -+ res |= __lsx_vrepli_w(var); // expected-error {{argument to '__builtin_lsx_vrepli_w' must be a constant integer}} -+ return res; -+} -diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c -new file mode 100644 -index 000000000000..331e29fb7d17 ---- /dev/null -+++ b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c -@@ -0,0 +1,4451 @@ -+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -+// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s -+ -+#include -+ -+// CHECK-LABEL: @vsll_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __lsx_vsll_b(_1, _2); } -+// CHECK-LABEL: @vsll_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __lsx_vsll_h(_1, _2); } -+// CHECK-LABEL: @vsll_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __lsx_vsll_w(_1, _2); } -+// CHECK-LABEL: @vsll_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __lsx_vsll_d(_1, _2); } -+// CHECK-LABEL: @vslli_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vslli_b(v16i8 _1) { return __lsx_vslli_b(_1, 1); } -+// CHECK-LABEL: @vslli_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vslli_h(v8i16 _1) { return __lsx_vslli_h(_1, 1); } -+// CHECK-LABEL: @vslli_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vslli_w(v4i32 _1) { return __lsx_vslli_w(_1, 1); } -+// CHECK-LABEL: @vslli_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vslli_d(v2i64 _1) { return __lsx_vslli_d(_1, 1); } -+// CHECK-LABEL: @vsra_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __lsx_vsra_b(_1, _2); } -+// CHECK-LABEL: @vsra_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __lsx_vsra_h(_1, _2); } -+// CHECK-LABEL: @vsra_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __lsx_vsra_w(_1, _2); } -+// CHECK-LABEL: @vsra_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __lsx_vsra_d(_1, _2); } -+// CHECK-LABEL: @vsrai_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrai_b(v16i8 _1) { return __lsx_vsrai_b(_1, 1); } -+// CHECK-LABEL: @vsrai_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrai_h(v8i16 _1) { return __lsx_vsrai_h(_1, 1); } -+// CHECK-LABEL: @vsrai_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrai_w(v4i32 _1) { return __lsx_vsrai_w(_1, 1); } -+// CHECK-LABEL: @vsrai_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrai_d(v2i64 _1) { return __lsx_vsrai_d(_1, 1); } -+// CHECK-LABEL: @vsrar_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrar_b(v16i8 _1, v16i8 _2) { return __lsx_vsrar_b(_1, _2); } -+// CHECK-LABEL: @vsrar_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrar_h(v8i16 _1, v8i16 _2) { return __lsx_vsrar_h(_1, _2); } -+// CHECK-LABEL: @vsrar_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrar_w(v4i32 _1, v4i32 _2) { return __lsx_vsrar_w(_1, _2); } -+// CHECK-LABEL: @vsrar_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrar_d(v2i64 _1, v2i64 _2) { return __lsx_vsrar_d(_1, _2); } -+// CHECK-LABEL: @vsrari_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrari_b(v16i8 _1) { return __lsx_vsrari_b(_1, 1); } -+// CHECK-LABEL: @vsrari_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrari_h(v8i16 _1) { return __lsx_vsrari_h(_1, 1); } -+// CHECK-LABEL: @vsrari_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrari_w(v4i32 _1) { return __lsx_vsrari_w(_1, 1); } -+// CHECK-LABEL: @vsrari_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrari_d(v2i64 _1) { return __lsx_vsrari_d(_1, 1); } -+// CHECK-LABEL: @vsrl_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __lsx_vsrl_b(_1, _2); } -+// CHECK-LABEL: @vsrl_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __lsx_vsrl_h(_1, _2); } -+// CHECK-LABEL: @vsrl_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __lsx_vsrl_w(_1, _2); } -+// CHECK-LABEL: @vsrl_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __lsx_vsrl_d(_1, _2); } -+// CHECK-LABEL: @vsrli_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrli_b(v16i8 _1) { return __lsx_vsrli_b(_1, 1); } -+// CHECK-LABEL: @vsrli_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrli_h(v8i16 _1) { return __lsx_vsrli_h(_1, 1); } -+// CHECK-LABEL: @vsrli_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrli_w(v4i32 _1) { return __lsx_vsrli_w(_1, 1); } -+// CHECK-LABEL: @vsrli_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrli_d(v2i64 _1) { return __lsx_vsrli_d(_1, 1); } -+// CHECK-LABEL: @vsrlr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { return __lsx_vsrlr_b(_1, _2); } -+// CHECK-LABEL: @vsrlr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlr_h(_1, _2); } -+// CHECK-LABEL: @vsrlr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlr_w(_1, _2); } -+// CHECK-LABEL: @vsrlr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlr_d(_1, _2); } -+// CHECK-LABEL: @vsrlri_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrlri_b(v16i8 _1) { return __lsx_vsrlri_b(_1, 1); } -+// CHECK-LABEL: @vsrlri_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrlri_h(v8i16 _1) { return __lsx_vsrlri_h(_1, 1); } -+// CHECK-LABEL: @vsrlri_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrlri_w(v4i32 _1) { return __lsx_vsrlri_w(_1, 1); } -+// CHECK-LABEL: @vsrlri_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrlri_d(v2i64 _1) { return __lsx_vsrlri_d(_1, 1); } -+// CHECK-LABEL: @vbitclr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { return __lsx_vbitclr_b(_1, _2); } -+// CHECK-LABEL: @vbitclr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { return __lsx_vbitclr_h(_1, _2); } -+// CHECK-LABEL: @vbitclr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { return __lsx_vbitclr_w(_1, _2); } -+// CHECK-LABEL: @vbitclr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { return __lsx_vbitclr_d(_1, _2); } -+// CHECK-LABEL: @vbitclri_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vbitclri_b(v16u8 _1) { return __lsx_vbitclri_b(_1, 1); } -+// CHECK-LABEL: @vbitclri_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vbitclri_h(v8u16 _1) { return __lsx_vbitclri_h(_1, 1); } -+// CHECK-LABEL: @vbitclri_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vbitclri_w(v4u32 _1) { return __lsx_vbitclri_w(_1, 1); } -+// CHECK-LABEL: @vbitclri_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vbitclri_d(v2u64 _1) { return __lsx_vbitclri_d(_1, 1); } -+// CHECK-LABEL: @vbitset_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vbitset_b(v16u8 _1, v16u8 _2) { return __lsx_vbitset_b(_1, _2); } -+// CHECK-LABEL: @vbitset_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vbitset_h(v8u16 _1, v8u16 _2) { return __lsx_vbitset_h(_1, _2); } -+// CHECK-LABEL: @vbitset_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vbitset_w(v4u32 _1, v4u32 _2) { return __lsx_vbitset_w(_1, _2); } -+// CHECK-LABEL: @vbitset_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vbitset_d(v2u64 _1, v2u64 _2) { return __lsx_vbitset_d(_1, _2); } -+// CHECK-LABEL: @vbitseti_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vbitseti_b(v16u8 _1) { return __lsx_vbitseti_b(_1, 1); } -+// CHECK-LABEL: @vbitseti_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vbitseti_h(v8u16 _1) { return __lsx_vbitseti_h(_1, 1); } -+// CHECK-LABEL: @vbitseti_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vbitseti_w(v4u32 _1) { return __lsx_vbitseti_w(_1, 1); } -+// CHECK-LABEL: @vbitseti_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vbitseti_d(v2u64 _1) { return __lsx_vbitseti_d(_1, 1); } -+// CHECK-LABEL: @vbitrev_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { return __lsx_vbitrev_b(_1, _2); } -+// CHECK-LABEL: @vbitrev_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { return __lsx_vbitrev_h(_1, _2); } -+// CHECK-LABEL: @vbitrev_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { return __lsx_vbitrev_w(_1, _2); } -+// CHECK-LABEL: @vbitrev_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { return __lsx_vbitrev_d(_1, _2); } -+// CHECK-LABEL: @vbitrevi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vbitrevi_b(v16u8 _1) { return __lsx_vbitrevi_b(_1, 1); } -+// CHECK-LABEL: @vbitrevi_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vbitrevi_h(v8u16 _1) { return __lsx_vbitrevi_h(_1, 1); } -+// CHECK-LABEL: @vbitrevi_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vbitrevi_w(v4u32 _1) { return __lsx_vbitrevi_w(_1, 1); } -+// CHECK-LABEL: @vbitrevi_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vbitrevi_d(v2u64 _1) { return __lsx_vbitrevi_d(_1, 1); } -+// CHECK-LABEL: @vadd_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __lsx_vadd_b(_1, _2); } -+// CHECK-LABEL: @vadd_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __lsx_vadd_h(_1, _2); } -+// CHECK-LABEL: @vadd_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __lsx_vadd_w(_1, _2); } -+// CHECK-LABEL: @vadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __lsx_vadd_d(_1, _2); } -+// CHECK-LABEL: @vaddi_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vaddi_bu(v16i8 _1) { return __lsx_vaddi_bu(_1, 1); } -+// CHECK-LABEL: @vaddi_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vaddi_hu(v8i16 _1) { return __lsx_vaddi_hu(_1, 1); } -+// CHECK-LABEL: @vaddi_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vaddi_wu(v4i32 _1) { return __lsx_vaddi_wu(_1, 1); } -+// CHECK-LABEL: @vaddi_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddi_du(v2i64 _1) { return __lsx_vaddi_du(_1, 1); } -+// CHECK-LABEL: @vsub_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __lsx_vsub_b(_1, _2); } -+// CHECK-LABEL: @vsub_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __lsx_vsub_h(_1, _2); } -+// CHECK-LABEL: @vsub_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __lsx_vsub_w(_1, _2); } -+// CHECK-LABEL: @vsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __lsx_vsub_d(_1, _2); } -+// CHECK-LABEL: @vsubi_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsubi_bu(v16i8 _1) { return __lsx_vsubi_bu(_1, 1); } -+// CHECK-LABEL: @vsubi_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsubi_hu(v8i16 _1) { return __lsx_vsubi_hu(_1, 1); } -+// CHECK-LABEL: @vsubi_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsubi_wu(v4i32 _1) { return __lsx_vsubi_wu(_1, 1); } -+// CHECK-LABEL: @vsubi_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsubi_du(v2i64 _1) { return __lsx_vsubi_du(_1, 1); } -+// CHECK-LABEL: @vmax_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __lsx_vmax_b(_1, _2); } -+// CHECK-LABEL: @vmax_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __lsx_vmax_h(_1, _2); } -+// CHECK-LABEL: @vmax_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __lsx_vmax_w(_1, _2); } -+// CHECK-LABEL: @vmax_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __lsx_vmax_d(_1, _2); } -+// CHECK-LABEL: @vmaxi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmaxi_b(v16i8 _1) { return __lsx_vmaxi_b(_1, 1); } -+// CHECK-LABEL: @vmaxi_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmaxi_h(v8i16 _1) { return __lsx_vmaxi_h(_1, 1); } -+// CHECK-LABEL: @vmaxi_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmaxi_w(v4i32 _1) { return __lsx_vmaxi_w(_1, 1); } -+// CHECK-LABEL: @vmaxi_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmaxi_d(v2i64 _1) { return __lsx_vmaxi_d(_1, 1); } -+// CHECK-LABEL: @vmax_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vmax_bu(v16u8 _1, v16u8 _2) { return __lsx_vmax_bu(_1, _2); } -+// CHECK-LABEL: @vmax_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vmax_hu(v8u16 _1, v8u16 _2) { return __lsx_vmax_hu(_1, _2); } -+// CHECK-LABEL: @vmax_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vmax_wu(v4u32 _1, v4u32 _2) { return __lsx_vmax_wu(_1, _2); } -+// CHECK-LABEL: @vmax_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmax_du(v2u64 _1, v2u64 _2) { return __lsx_vmax_du(_1, _2); } -+// CHECK-LABEL: @vmaxi_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vmaxi_bu(v16u8 _1) { return __lsx_vmaxi_bu(_1, 1); } -+// CHECK-LABEL: @vmaxi_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vmaxi_hu(v8u16 _1) { return __lsx_vmaxi_hu(_1, 1); } -+// CHECK-LABEL: @vmaxi_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vmaxi_wu(v4u32 _1) { return __lsx_vmaxi_wu(_1, 1); } -+// CHECK-LABEL: @vmaxi_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmaxi_du(v2u64 _1) { return __lsx_vmaxi_du(_1, 1); } -+// CHECK-LABEL: @vmin_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __lsx_vmin_b(_1, _2); } -+// CHECK-LABEL: @vmin_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __lsx_vmin_h(_1, _2); } -+// CHECK-LABEL: @vmin_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __lsx_vmin_w(_1, _2); } -+// CHECK-LABEL: @vmin_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __lsx_vmin_d(_1, _2); } -+// CHECK-LABEL: @vmini_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmini_b(v16i8 _1) { return __lsx_vmini_b(_1, 1); } -+// CHECK-LABEL: @vmini_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmini_h(v8i16 _1) { return __lsx_vmini_h(_1, 1); } -+// CHECK-LABEL: @vmini_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmini_w(v4i32 _1) { return __lsx_vmini_w(_1, 1); } -+// CHECK-LABEL: @vmini_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmini_d(v2i64 _1) { return __lsx_vmini_d(_1, 1); } -+// CHECK-LABEL: @vmin_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vmin_bu(v16u8 _1, v16u8 _2) { return __lsx_vmin_bu(_1, _2); } -+// CHECK-LABEL: @vmin_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vmin_hu(v8u16 _1, v8u16 _2) { return __lsx_vmin_hu(_1, _2); } -+// CHECK-LABEL: @vmin_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vmin_wu(v4u32 _1, v4u32 _2) { return __lsx_vmin_wu(_1, _2); } -+// CHECK-LABEL: @vmin_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmin_du(v2u64 _1, v2u64 _2) { return __lsx_vmin_du(_1, _2); } -+// CHECK-LABEL: @vmini_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vmini_bu(v16u8 _1) { return __lsx_vmini_bu(_1, 1); } -+// CHECK-LABEL: @vmini_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vmini_hu(v8u16 _1) { return __lsx_vmini_hu(_1, 1); } -+// CHECK-LABEL: @vmini_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vmini_wu(v4u32 _1) { return __lsx_vmini_wu(_1, 1); } -+// CHECK-LABEL: @vmini_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmini_du(v2u64 _1) { return __lsx_vmini_du(_1, 1); } -+// CHECK-LABEL: @vseq_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __lsx_vseq_b(_1, _2); } -+// CHECK-LABEL: @vseq_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __lsx_vseq_h(_1, _2); } -+// CHECK-LABEL: @vseq_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __lsx_vseq_w(_1, _2); } -+// CHECK-LABEL: @vseq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __lsx_vseq_d(_1, _2); } -+// CHECK-LABEL: @vseqi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vseqi_b(v16i8 _1) { return __lsx_vseqi_b(_1, 1); } -+// CHECK-LABEL: @vseqi_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vseqi_h(v8i16 _1) { return __lsx_vseqi_h(_1, 1); } -+// CHECK-LABEL: @vseqi_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vseqi_w(v4i32 _1) { return __lsx_vseqi_w(_1, 1); } -+// CHECK-LABEL: @vseqi_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vseqi_d(v2i64 _1) { return __lsx_vseqi_d(_1, 1); } -+// CHECK-LABEL: @vslti_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vslti_b(v16i8 _1) { return __lsx_vslti_b(_1, 1); } -+// CHECK-LABEL: @vslt_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __lsx_vslt_b(_1, _2); } -+// CHECK-LABEL: @vslt_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __lsx_vslt_h(_1, _2); } -+// CHECK-LABEL: @vslt_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __lsx_vslt_w(_1, _2); } -+// CHECK-LABEL: @vslt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __lsx_vslt_d(_1, _2); } -+// CHECK-LABEL: @vslti_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vslti_h(v8i16 _1) { return __lsx_vslti_h(_1, 1); } -+// CHECK-LABEL: @vslti_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vslti_w(v4i32 _1) { return __lsx_vslti_w(_1, 1); } -+// CHECK-LABEL: @vslti_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vslti_d(v2i64 _1) { return __lsx_vslti_d(_1, 1); } -+// CHECK-LABEL: @vslt_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vslt_bu(v16u8 _1, v16u8 _2) { return __lsx_vslt_bu(_1, _2); } -+// CHECK-LABEL: @vslt_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vslt_hu(v8u16 _1, v8u16 _2) { return __lsx_vslt_hu(_1, _2); } -+// CHECK-LABEL: @vslt_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vslt_wu(v4u32 _1, v4u32 _2) { return __lsx_vslt_wu(_1, _2); } -+// CHECK-LABEL: @vslt_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vslt_du(v2u64 _1, v2u64 _2) { return __lsx_vslt_du(_1, _2); } -+// CHECK-LABEL: @vslti_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vslti_bu(v16u8 _1) { return __lsx_vslti_bu(_1, 1); } -+// CHECK-LABEL: @vslti_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vslti_hu(v8u16 _1) { return __lsx_vslti_hu(_1, 1); } -+// CHECK-LABEL: @vslti_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vslti_wu(v4u32 _1) { return __lsx_vslti_wu(_1, 1); } -+// CHECK-LABEL: @vslti_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vslti_du(v2u64 _1) { return __lsx_vslti_du(_1, 1); } -+// CHECK-LABEL: @vsle_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __lsx_vsle_b(_1, _2); } -+// CHECK-LABEL: @vsle_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __lsx_vsle_h(_1, _2); } -+// CHECK-LABEL: @vsle_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __lsx_vsle_w(_1, _2); } -+// CHECK-LABEL: @vsle_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __lsx_vsle_d(_1, _2); } -+// CHECK-LABEL: @vslei_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vslei_b(v16i8 _1) { return __lsx_vslei_b(_1, 1); } -+// CHECK-LABEL: @vslei_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vslei_h(v8i16 _1) { return __lsx_vslei_h(_1, 1); } -+// CHECK-LABEL: @vslei_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vslei_w(v4i32 _1) { return __lsx_vslei_w(_1, 1); } -+// CHECK-LABEL: @vslei_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vslei_d(v2i64 _1) { return __lsx_vslei_d(_1, 1); } -+// CHECK-LABEL: @vsle_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsle_bu(v16u8 _1, v16u8 _2) { return __lsx_vsle_bu(_1, _2); } -+// CHECK-LABEL: @vsle_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsle_hu(v8u16 _1, v8u16 _2) { return __lsx_vsle_hu(_1, _2); } -+// CHECK-LABEL: @vsle_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsle_wu(v4u32 _1, v4u32 _2) { return __lsx_vsle_wu(_1, _2); } -+// CHECK-LABEL: @vsle_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsle_du(v2u64 _1, v2u64 _2) { return __lsx_vsle_du(_1, _2); } -+// CHECK-LABEL: @vslei_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vslei_bu(v16u8 _1) { return __lsx_vslei_bu(_1, 1); } -+// CHECK-LABEL: @vslei_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vslei_hu(v8u16 _1) { return __lsx_vslei_hu(_1, 1); } -+// CHECK-LABEL: @vslei_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vslei_wu(v4u32 _1) { return __lsx_vslei_wu(_1, 1); } -+// CHECK-LABEL: @vslei_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vslei_du(v2u64 _1) { return __lsx_vslei_du(_1, 1); } -+// CHECK-LABEL: @vsat_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsat_b(v16i8 _1) { return __lsx_vsat_b(_1, 1); } -+// CHECK-LABEL: @vsat_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsat_h(v8i16 _1) { return __lsx_vsat_h(_1, 1); } -+// CHECK-LABEL: @vsat_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsat_w(v4i32 _1) { return __lsx_vsat_w(_1, 1); } -+// CHECK-LABEL: @vsat_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsat_d(v2i64 _1) { return __lsx_vsat_d(_1, 1); } -+// CHECK-LABEL: @vsat_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vsat_bu(v16u8 _1) { return __lsx_vsat_bu(_1, 1); } -+// CHECK-LABEL: @vsat_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vsat_hu(v8u16 _1) { return __lsx_vsat_hu(_1, 1); } -+// CHECK-LABEL: @vsat_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vsat_wu(v4u32 _1) { return __lsx_vsat_wu(_1, 1); } -+// CHECK-LABEL: @vsat_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vsat_du(v2u64 _1) { return __lsx_vsat_du(_1, 1); } -+// CHECK-LABEL: @vadda_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vadda_b(v16i8 _1, v16i8 _2) { return __lsx_vadda_b(_1, _2); } -+// CHECK-LABEL: @vadda_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vadda_h(v8i16 _1, v8i16 _2) { return __lsx_vadda_h(_1, _2); } -+// CHECK-LABEL: @vadda_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vadda_w(v4i32 _1, v4i32 _2) { return __lsx_vadda_w(_1, _2); } -+// CHECK-LABEL: @vadda_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vadda_d(v2i64 _1, v2i64 _2) { return __lsx_vadda_d(_1, _2); } -+// CHECK-LABEL: @vsadd_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsadd_b(v16i8 _1, v16i8 _2) { return __lsx_vsadd_b(_1, _2); } -+// CHECK-LABEL: @vsadd_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsadd_h(v8i16 _1, v8i16 _2) { return __lsx_vsadd_h(_1, _2); } -+// CHECK-LABEL: @vsadd_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsadd_w(v4i32 _1, v4i32 _2) { return __lsx_vsadd_w(_1, _2); } -+// CHECK-LABEL: @vsadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsadd_d(v2i64 _1, v2i64 _2) { return __lsx_vsadd_d(_1, _2); } -+// CHECK-LABEL: @vsadd_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { return __lsx_vsadd_bu(_1, _2); } -+// CHECK-LABEL: @vsadd_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { return __lsx_vsadd_hu(_1, _2); } -+// CHECK-LABEL: @vsadd_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { return __lsx_vsadd_wu(_1, _2); } -+// CHECK-LABEL: @vsadd_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vsadd_du(v2u64 _1, v2u64 _2) { return __lsx_vsadd_du(_1, _2); } -+// CHECK-LABEL: @vavg_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __lsx_vavg_b(_1, _2); } -+// CHECK-LABEL: @vavg_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __lsx_vavg_h(_1, _2); } -+// CHECK-LABEL: @vavg_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __lsx_vavg_w(_1, _2); } -+// CHECK-LABEL: @vavg_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __lsx_vavg_d(_1, _2); } -+// CHECK-LABEL: @vavg_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vavg_bu(v16u8 _1, v16u8 _2) { return __lsx_vavg_bu(_1, _2); } -+// CHECK-LABEL: @vavg_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vavg_hu(v8u16 _1, v8u16 _2) { return __lsx_vavg_hu(_1, _2); } -+// CHECK-LABEL: @vavg_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vavg_wu(v4u32 _1, v4u32 _2) { return __lsx_vavg_wu(_1, _2); } -+// CHECK-LABEL: @vavg_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vavg_du(v2u64 _1, v2u64 _2) { return __lsx_vavg_du(_1, _2); } -+// CHECK-LABEL: @vavgr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vavgr_b(v16i8 _1, v16i8 _2) { return __lsx_vavgr_b(_1, _2); } -+// CHECK-LABEL: @vavgr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vavgr_h(v8i16 _1, v8i16 _2) { return __lsx_vavgr_h(_1, _2); } -+// CHECK-LABEL: @vavgr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vavgr_w(v4i32 _1, v4i32 _2) { return __lsx_vavgr_w(_1, _2); } -+// CHECK-LABEL: @vavgr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vavgr_d(v2i64 _1, v2i64 _2) { return __lsx_vavgr_d(_1, _2); } -+// CHECK-LABEL: @vavgr_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { return __lsx_vavgr_bu(_1, _2); } -+// CHECK-LABEL: @vavgr_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { return __lsx_vavgr_hu(_1, _2); } -+// CHECK-LABEL: @vavgr_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { return __lsx_vavgr_wu(_1, _2); } -+// CHECK-LABEL: @vavgr_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vavgr_du(v2u64 _1, v2u64 _2) { return __lsx_vavgr_du(_1, _2); } -+// CHECK-LABEL: @vssub_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vssub_b(v16i8 _1, v16i8 _2) { return __lsx_vssub_b(_1, _2); } -+// CHECK-LABEL: @vssub_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vssub_h(v8i16 _1, v8i16 _2) { return __lsx_vssub_h(_1, _2); } -+// CHECK-LABEL: @vssub_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vssub_w(v4i32 _1, v4i32 _2) { return __lsx_vssub_w(_1, _2); } -+// CHECK-LABEL: @vssub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vssub_d(v2i64 _1, v2i64 _2) { return __lsx_vssub_d(_1, _2); } -+// CHECK-LABEL: @vssub_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vssub_bu(v16u8 _1, v16u8 _2) { return __lsx_vssub_bu(_1, _2); } -+// CHECK-LABEL: @vssub_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vssub_hu(v8u16 _1, v8u16 _2) { return __lsx_vssub_hu(_1, _2); } -+// CHECK-LABEL: @vssub_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vssub_wu(v4u32 _1, v4u32 _2) { return __lsx_vssub_wu(_1, _2); } -+// CHECK-LABEL: @vssub_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vssub_du(v2u64 _1, v2u64 _2) { return __lsx_vssub_du(_1, _2); } -+// CHECK-LABEL: @vabsd_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vabsd_b(v16i8 _1, v16i8 _2) { return __lsx_vabsd_b(_1, _2); } -+// CHECK-LABEL: @vabsd_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vabsd_h(v8i16 _1, v8i16 _2) { return __lsx_vabsd_h(_1, _2); } -+// CHECK-LABEL: @vabsd_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vabsd_w(v4i32 _1, v4i32 _2) { return __lsx_vabsd_w(_1, _2); } -+// CHECK-LABEL: @vabsd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vabsd_d(v2i64 _1, v2i64 _2) { return __lsx_vabsd_d(_1, _2); } -+// CHECK-LABEL: @vabsd_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { return __lsx_vabsd_bu(_1, _2); } -+// CHECK-LABEL: @vabsd_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { return __lsx_vabsd_hu(_1, _2); } -+// CHECK-LABEL: @vabsd_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { return __lsx_vabsd_wu(_1, _2); } -+// CHECK-LABEL: @vabsd_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vabsd_du(v2u64 _1, v2u64 _2) { return __lsx_vabsd_du(_1, _2); } -+// CHECK-LABEL: @vmul_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __lsx_vmul_b(_1, _2); } -+// CHECK-LABEL: @vmul_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __lsx_vmul_h(_1, _2); } -+// CHECK-LABEL: @vmul_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __lsx_vmul_w(_1, _2); } -+// CHECK-LABEL: @vmul_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __lsx_vmul_d(_1, _2); } -+// CHECK-LABEL: @vmadd_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { -+ return __lsx_vmadd_b(_1, _2, _3); -+} -+// CHECK-LABEL: @vmadd_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { -+ return __lsx_vmadd_h(_1, _2, _3); -+} -+// CHECK-LABEL: @vmadd_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { -+ return __lsx_vmadd_w(_1, _2, _3); -+} -+// CHECK-LABEL: @vmadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { -+ return __lsx_vmadd_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vmsub_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { -+ return __lsx_vmsub_b(_1, _2, _3); -+} -+// CHECK-LABEL: @vmsub_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { -+ return __lsx_vmsub_h(_1, _2, _3); -+} -+// CHECK-LABEL: @vmsub_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { -+ return __lsx_vmsub_w(_1, _2, _3); -+} -+// CHECK-LABEL: @vmsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { -+ return __lsx_vmsub_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vdiv_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __lsx_vdiv_b(_1, _2); } -+// CHECK-LABEL: @vdiv_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __lsx_vdiv_h(_1, _2); } -+// CHECK-LABEL: @vdiv_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __lsx_vdiv_w(_1, _2); } -+// CHECK-LABEL: @vdiv_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __lsx_vdiv_d(_1, _2); } -+// CHECK-LABEL: @vdiv_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { return __lsx_vdiv_bu(_1, _2); } -+// CHECK-LABEL: @vdiv_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { return __lsx_vdiv_hu(_1, _2); } -+// CHECK-LABEL: @vdiv_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { return __lsx_vdiv_wu(_1, _2); } -+// CHECK-LABEL: @vdiv_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vdiv_du(v2u64 _1, v2u64 _2) { return __lsx_vdiv_du(_1, _2); } -+// CHECK-LABEL: @vhaddw_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhaddw_h_b(_1, _2); } -+// CHECK-LABEL: @vhaddw_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhaddw_w_h(_1, _2); } -+// CHECK-LABEL: @vhaddw_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhaddw_d_w(_1, _2); } -+// CHECK-LABEL: @vhaddw_hu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhaddw_hu_bu(_1, _2); } -+// CHECK-LABEL: @vhaddw_wu_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhaddw_wu_hu(_1, _2); } -+// CHECK-LABEL: @vhaddw_du_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhaddw_du_wu(_1, _2); } -+// CHECK-LABEL: @vhsubw_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhsubw_h_b(_1, _2); } -+// CHECK-LABEL: @vhsubw_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhsubw_w_h(_1, _2); } -+// CHECK-LABEL: @vhsubw_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhsubw_d_w(_1, _2); } -+// CHECK-LABEL: @vhsubw_hu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhsubw_hu_bu(_1, _2); } -+// CHECK-LABEL: @vhsubw_wu_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhsubw_wu_hu(_1, _2); } -+// CHECK-LABEL: @vhsubw_du_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhsubw_du_wu(_1, _2); } -+// CHECK-LABEL: @vmod_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __lsx_vmod_b(_1, _2); } -+// CHECK-LABEL: @vmod_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __lsx_vmod_h(_1, _2); } -+// CHECK-LABEL: @vmod_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __lsx_vmod_w(_1, _2); } -+// CHECK-LABEL: @vmod_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __lsx_vmod_d(_1, _2); } -+// CHECK-LABEL: @vmod_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vmod_bu(v16u8 _1, v16u8 _2) { return __lsx_vmod_bu(_1, _2); } -+// CHECK-LABEL: @vmod_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vmod_hu(v8u16 _1, v8u16 _2) { return __lsx_vmod_hu(_1, _2); } -+// CHECK-LABEL: @vmod_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vmod_wu(v4u32 _1, v4u32 _2) { return __lsx_vmod_wu(_1, _2); } -+// CHECK-LABEL: @vmod_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmod_du(v2u64 _1, v2u64 _2) { return __lsx_vmod_du(_1, _2); } -+// CHECK-LABEL: @vreplve_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[_1:%.*]], i32 [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vreplve_b(v16i8 _1, int _2) { return __lsx_vreplve_b(_1, _2); } -+// CHECK-LABEL: @vreplve_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[_1:%.*]], i32 [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vreplve_h(v8i16 _1, int _2) { return __lsx_vreplve_h(_1, _2); } -+// CHECK-LABEL: @vreplve_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[_1:%.*]], i32 [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vreplve_w(v4i32 _1, int _2) { return __lsx_vreplve_w(_1, _2); } -+// CHECK-LABEL: @vreplve_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[_1:%.*]], i32 [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vreplve_d(v2i64 _1, int _2) { return __lsx_vreplve_d(_1, _2); } -+// CHECK-LABEL: @vreplvei_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vreplvei_b(v16i8 _1) { return __lsx_vreplvei_b(_1, 1); } -+// CHECK-LABEL: @vreplvei_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vreplvei_h(v8i16 _1) { return __lsx_vreplvei_h(_1, 1); } -+// CHECK-LABEL: @vreplvei_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vreplvei_w(v4i32 _1) { return __lsx_vreplvei_w(_1, 1); } -+// CHECK-LABEL: @vreplvei_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vreplvei_d(v2i64 _1) { return __lsx_vreplvei_d(_1, 1); } -+// CHECK-LABEL: @vpickev_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vpickev_b(v16i8 _1, v16i8 _2) { return __lsx_vpickev_b(_1, _2); } -+// CHECK-LABEL: @vpickev_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vpickev_h(v8i16 _1, v8i16 _2) { return __lsx_vpickev_h(_1, _2); } -+// CHECK-LABEL: @vpickev_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vpickev_w(v4i32 _1, v4i32 _2) { return __lsx_vpickev_w(_1, _2); } -+// CHECK-LABEL: @vpickev_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vpickev_d(v2i64 _1, v2i64 _2) { return __lsx_vpickev_d(_1, _2); } -+// CHECK-LABEL: @vpickod_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vpickod_b(v16i8 _1, v16i8 _2) { return __lsx_vpickod_b(_1, _2); } -+// CHECK-LABEL: @vpickod_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vpickod_h(v8i16 _1, v8i16 _2) { return __lsx_vpickod_h(_1, _2); } -+// CHECK-LABEL: @vpickod_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vpickod_w(v4i32 _1, v4i32 _2) { return __lsx_vpickod_w(_1, _2); } -+// CHECK-LABEL: @vpickod_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vpickod_d(v2i64 _1, v2i64 _2) { return __lsx_vpickod_d(_1, _2); } -+// CHECK-LABEL: @vilvh_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vilvh_b(v16i8 _1, v16i8 _2) { return __lsx_vilvh_b(_1, _2); } -+// CHECK-LABEL: @vilvh_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vilvh_h(v8i16 _1, v8i16 _2) { return __lsx_vilvh_h(_1, _2); } -+// CHECK-LABEL: @vilvh_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vilvh_w(v4i32 _1, v4i32 _2) { return __lsx_vilvh_w(_1, _2); } -+// CHECK-LABEL: @vilvh_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vilvh_d(v2i64 _1, v2i64 _2) { return __lsx_vilvh_d(_1, _2); } -+// CHECK-LABEL: @vilvl_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vilvl_b(v16i8 _1, v16i8 _2) { return __lsx_vilvl_b(_1, _2); } -+// CHECK-LABEL: @vilvl_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vilvl_h(v8i16 _1, v8i16 _2) { return __lsx_vilvl_h(_1, _2); } -+// CHECK-LABEL: @vilvl_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vilvl_w(v4i32 _1, v4i32 _2) { return __lsx_vilvl_w(_1, _2); } -+// CHECK-LABEL: @vilvl_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vilvl_d(v2i64 _1, v2i64 _2) { return __lsx_vilvl_d(_1, _2); } -+// CHECK-LABEL: @vpackev_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vpackev_b(v16i8 _1, v16i8 _2) { return __lsx_vpackev_b(_1, _2); } -+// CHECK-LABEL: @vpackev_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vpackev_h(v8i16 _1, v8i16 _2) { return __lsx_vpackev_h(_1, _2); } -+// CHECK-LABEL: @vpackev_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vpackev_w(v4i32 _1, v4i32 _2) { return __lsx_vpackev_w(_1, _2); } -+// CHECK-LABEL: @vpackev_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vpackev_d(v2i64 _1, v2i64 _2) { return __lsx_vpackev_d(_1, _2); } -+// CHECK-LABEL: @vpackod_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vpackod_b(v16i8 _1, v16i8 _2) { return __lsx_vpackod_b(_1, _2); } -+// CHECK-LABEL: @vpackod_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vpackod_h(v8i16 _1, v8i16 _2) { return __lsx_vpackod_h(_1, _2); } -+// CHECK-LABEL: @vpackod_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vpackod_w(v4i32 _1, v4i32 _2) { return __lsx_vpackod_w(_1, _2); } -+// CHECK-LABEL: @vpackod_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vpackod_d(v2i64 _1, v2i64 _2) { return __lsx_vpackod_d(_1, _2); } -+// CHECK-LABEL: @vshuf_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { -+ return __lsx_vshuf_h(_1, _2, _3); -+} -+// CHECK-LABEL: @vshuf_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { -+ return __lsx_vshuf_w(_1, _2, _3); -+} -+// CHECK-LABEL: @vshuf_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { -+ return __lsx_vshuf_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vand_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vand_v(v16u8 _1, v16u8 _2) { return __lsx_vand_v(_1, _2); } -+// CHECK-LABEL: @vandi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vandi_b(v16u8 _1) { return __lsx_vandi_b(_1, 1); } -+// CHECK-LABEL: @vor_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vor_v(v16u8 _1, v16u8 _2) { return __lsx_vor_v(_1, _2); } -+// CHECK-LABEL: @vori_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vori_b(v16u8 _1) { return __lsx_vori_b(_1, 1); } -+// CHECK-LABEL: @vnor_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __lsx_vnor_v(_1, _2); } -+// CHECK-LABEL: @vnori_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vnori_b(v16u8 _1) { return __lsx_vnori_b(_1, 1); } -+// CHECK-LABEL: @vxor_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __lsx_vxor_v(_1, _2); } -+// CHECK-LABEL: @vxori_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vxori_b(v16u8 _1) { return __lsx_vxori_b(_1, 1); } -+// CHECK-LABEL: @vbitsel_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { -+ return __lsx_vbitsel_v(_1, _2, _3); -+} -+// CHECK-LABEL: @vbitseli_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { return __lsx_vbitseli_b(_1, _2, 1); } -+// CHECK-LABEL: @vshuf4i_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vshuf4i_b(v16i8 _1) { return __lsx_vshuf4i_b(_1, 1); } -+// CHECK-LABEL: @vshuf4i_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vshuf4i_h(v8i16 _1) { return __lsx_vshuf4i_h(_1, 1); } -+// CHECK-LABEL: @vshuf4i_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vshuf4i_w(v4i32 _1) { return __lsx_vshuf4i_w(_1, 1); } -+// CHECK-LABEL: @vreplgr2vr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vreplgr2vr_b(int _1) { return __lsx_vreplgr2vr_b(_1); } -+// CHECK-LABEL: @vreplgr2vr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vreplgr2vr_h(int _1) { return __lsx_vreplgr2vr_h(_1); } -+// CHECK-LABEL: @vreplgr2vr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vreplgr2vr_w(int _1) { return __lsx_vreplgr2vr_w(_1); } -+// CHECK-LABEL: @vreplgr2vr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vreplgr2vr_d(long _1) { return __lsx_vreplgr2vr_d(_1); } -+// CHECK-LABEL: @vpcnt_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vpcnt_b(v16i8 _1) { return __lsx_vpcnt_b(_1); } -+// CHECK-LABEL: @vpcnt_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vpcnt_h(v8i16 _1) { return __lsx_vpcnt_h(_1); } -+// CHECK-LABEL: @vpcnt_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vpcnt_w(v4i32 _1) { return __lsx_vpcnt_w(_1); } -+// CHECK-LABEL: @vpcnt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vpcnt_d(v2i64 _1) { return __lsx_vpcnt_d(_1); } -+// CHECK-LABEL: @vclo_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vclo_b(v16i8 _1) { return __lsx_vclo_b(_1); } -+// CHECK-LABEL: @vclo_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vclo_h(v8i16 _1) { return __lsx_vclo_h(_1); } -+// CHECK-LABEL: @vclo_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vclo_w(v4i32 _1) { return __lsx_vclo_w(_1); } -+// CHECK-LABEL: @vclo_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vclo_d(v2i64 _1) { return __lsx_vclo_d(_1); } -+// CHECK-LABEL: @vclz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vclz_b(v16i8 _1) { return __lsx_vclz_b(_1); } -+// CHECK-LABEL: @vclz_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vclz_h(v8i16 _1) { return __lsx_vclz_h(_1); } -+// CHECK-LABEL: @vclz_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vclz_w(v4i32 _1) { return __lsx_vclz_w(_1); } -+// CHECK-LABEL: @vclz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vclz_d(v2i64 _1) { return __lsx_vclz_d(_1); } -+// CHECK-LABEL: @vpickve2gr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int vpickve2gr_b(v16i8 _1) { return __lsx_vpickve2gr_b(_1, 1); } -+// CHECK-LABEL: @vpickve2gr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int vpickve2gr_h(v8i16 _1) { return __lsx_vpickve2gr_h(_1, 1); } -+// CHECK-LABEL: @vpickve2gr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int vpickve2gr_w(v4i32 _1) { return __lsx_vpickve2gr_w(_1, 1); } -+// CHECK-LABEL: @vpickve2gr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i64 [[TMP0]] -+// -+long vpickve2gr_d(v2i64 _1) { return __lsx_vpickve2gr_d(_1, 1); } -+// CHECK-LABEL: @vpickve2gr_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+unsigned int vpickve2gr_bu(v16i8 _1) { return __lsx_vpickve2gr_bu(_1, 1); } -+// CHECK-LABEL: @vpickve2gr_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+unsigned int vpickve2gr_hu(v8i16 _1) { return __lsx_vpickve2gr_hu(_1, 1); } -+// CHECK-LABEL: @vpickve2gr_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+unsigned int vpickve2gr_wu(v4i32 _1) { return __lsx_vpickve2gr_wu(_1, 1); } -+// CHECK-LABEL: @vpickve2gr_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i64 [[TMP0]] -+// -+unsigned long int vpickve2gr_du(v2i64 _1) { return __lsx_vpickve2gr_du(_1, 1); } -+// CHECK-LABEL: @vinsgr2vr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[_1:%.*]], i32 1, i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vinsgr2vr_b(v16i8 _1) { return __lsx_vinsgr2vr_b(_1, 1, 1); } -+// CHECK-LABEL: @vinsgr2vr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[_1:%.*]], i32 1, i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vinsgr2vr_h(v8i16 _1) { return __lsx_vinsgr2vr_h(_1, 1, 1); } -+// CHECK-LABEL: @vinsgr2vr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[_1:%.*]], i32 1, i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vinsgr2vr_w(v4i32 _1) { return __lsx_vinsgr2vr_w(_1, 1, 1); } -+// CHECK-LABEL: @vinsgr2vr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[_1:%.*]], i64 1, i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vinsgr2vr_d(v2i64 _1) { return __lsx_vinsgr2vr_d(_1, 1, 1); } -+// CHECK-LABEL: @vfadd_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfadd_s(v4f32 _1, v4f32 _2) { return __lsx_vfadd_s(_1, _2); } -+// CHECK-LABEL: @vfadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfadd_d(v2f64 _1, v2f64 _2) { return __lsx_vfadd_d(_1, _2); } -+// CHECK-LABEL: @vfsub_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfsub_s(v4f32 _1, v4f32 _2) { return __lsx_vfsub_s(_1, _2); } -+// CHECK-LABEL: @vfsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfsub_d(v2f64 _1, v2f64 _2) { return __lsx_vfsub_d(_1, _2); } -+// CHECK-LABEL: @vfmul_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfmul_s(v4f32 _1, v4f32 _2) { return __lsx_vfmul_s(_1, _2); } -+// CHECK-LABEL: @vfmul_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfmul_d(v2f64 _1, v2f64 _2) { return __lsx_vfmul_d(_1, _2); } -+// CHECK-LABEL: @vfdiv_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { return __lsx_vfdiv_s(_1, _2); } -+// CHECK-LABEL: @vfdiv_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { return __lsx_vfdiv_d(_1, _2); } -+// CHECK-LABEL: @vfcvt_h_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { return __lsx_vfcvt_h_s(_1, _2); } -+// CHECK-LABEL: @vfcvt_s_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { return __lsx_vfcvt_s_d(_1, _2); } -+// CHECK-LABEL: @vfmin_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfmin_s(v4f32 _1, v4f32 _2) { return __lsx_vfmin_s(_1, _2); } -+// CHECK-LABEL: @vfmin_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfmin_d(v2f64 _1, v2f64 _2) { return __lsx_vfmin_d(_1, _2); } -+// CHECK-LABEL: @vfmina_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfmina_s(v4f32 _1, v4f32 _2) { return __lsx_vfmina_s(_1, _2); } -+// CHECK-LABEL: @vfmina_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfmina_d(v2f64 _1, v2f64 _2) { return __lsx_vfmina_d(_1, _2); } -+// CHECK-LABEL: @vfmax_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfmax_s(v4f32 _1, v4f32 _2) { return __lsx_vfmax_s(_1, _2); } -+// CHECK-LABEL: @vfmax_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfmax_d(v2f64 _1, v2f64 _2) { return __lsx_vfmax_d(_1, _2); } -+// CHECK-LABEL: @vfmaxa_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { return __lsx_vfmaxa_s(_1, _2); } -+// CHECK-LABEL: @vfmaxa_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { return __lsx_vfmaxa_d(_1, _2); } -+// CHECK-LABEL: @vfclass_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfclass_s(v4f32 _1) { return __lsx_vfclass_s(_1); } -+// CHECK-LABEL: @vfclass_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfclass_d(v2f64 _1) { return __lsx_vfclass_d(_1); } -+// CHECK-LABEL: @vfsqrt_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfsqrt_s(v4f32 _1) { return __lsx_vfsqrt_s(_1); } -+// CHECK-LABEL: @vfsqrt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfsqrt_d(v2f64 _1) { return __lsx_vfsqrt_d(_1); } -+// CHECK-LABEL: @vfrecip_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfrecip_s(v4f32 _1) { return __lsx_vfrecip_s(_1); } -+// CHECK-LABEL: @vfrecip_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfrecip_d(v2f64 _1) { return __lsx_vfrecip_d(_1); } -+// CHECK-LABEL: @vfrint_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfrint_s(v4f32 _1) { return __lsx_vfrint_s(_1); } -+// CHECK-LABEL: @vfrint_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfrint_d(v2f64 _1) { return __lsx_vfrint_d(_1); } -+// CHECK-LABEL: @vfrsqrt_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfrsqrt_s(v4f32 _1) { return __lsx_vfrsqrt_s(_1); } -+// CHECK-LABEL: @vfrsqrt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfrsqrt_d(v2f64 _1) { return __lsx_vfrsqrt_d(_1); } -+// CHECK-LABEL: @vflogb_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vflogb_s(v4f32 _1) { return __lsx_vflogb_s(_1); } -+// CHECK-LABEL: @vflogb_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vflogb_d(v2f64 _1) { return __lsx_vflogb_d(_1); } -+// CHECK-LABEL: @vfcvth_s_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfcvth_s_h(v8i16 _1) { return __lsx_vfcvth_s_h(_1); } -+// CHECK-LABEL: @vfcvth_d_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfcvth_d_s(v4f32 _1) { return __lsx_vfcvth_d_s(_1); } -+// CHECK-LABEL: @vfcvtl_s_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfcvtl_s_h(v8i16 _1) { return __lsx_vfcvtl_s_h(_1); } -+// CHECK-LABEL: @vfcvtl_d_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfcvtl_d_s(v4f32 _1) { return __lsx_vfcvtl_d_s(_1); } -+// CHECK-LABEL: @vftint_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftint_w_s(v4f32 _1) { return __lsx_vftint_w_s(_1); } -+// CHECK-LABEL: @vftint_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftint_l_d(v2f64 _1) { return __lsx_vftint_l_d(_1); } -+// CHECK-LABEL: @vftint_wu_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vftint_wu_s(v4f32 _1) { return __lsx_vftint_wu_s(_1); } -+// CHECK-LABEL: @vftint_lu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vftint_lu_d(v2f64 _1) { return __lsx_vftint_lu_d(_1); } -+// CHECK-LABEL: @vftintrz_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftintrz_w_s(v4f32 _1) { return __lsx_vftintrz_w_s(_1); } -+// CHECK-LABEL: @vftintrz_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrz_l_d(v2f64 _1) { return __lsx_vftintrz_l_d(_1); } -+// CHECK-LABEL: @vftintrz_wu_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vftintrz_wu_s(v4f32 _1) { return __lsx_vftintrz_wu_s(_1); } -+// CHECK-LABEL: @vftintrz_lu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vftintrz_lu_d(v2f64 _1) { return __lsx_vftintrz_lu_d(_1); } -+// CHECK-LABEL: @vffint_s_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vffint_s_w(v4i32 _1) { return __lsx_vffint_s_w(_1); } -+// CHECK-LABEL: @vffint_d_l( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vffint_d_l(v2i64 _1) { return __lsx_vffint_d_l(_1); } -+// CHECK-LABEL: @vffint_s_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vffint_s_wu(v4u32 _1) { return __lsx_vffint_s_wu(_1); } -+// CHECK-LABEL: @vffint_d_lu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vffint_d_lu(v2u64 _1) { return __lsx_vffint_d_lu(_1); } -+// CHECK-LABEL: @vandn_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vandn_v(v16u8 _1, v16u8 _2) { return __lsx_vandn_v(_1, _2); } -+// CHECK-LABEL: @vneg_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vneg_b(v16i8 _1) { return __lsx_vneg_b(_1); } -+// CHECK-LABEL: @vneg_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vneg_h(v8i16 _1) { return __lsx_vneg_h(_1); } -+// CHECK-LABEL: @vneg_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vneg_w(v4i32 _1) { return __lsx_vneg_w(_1); } -+// CHECK-LABEL: @vneg_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vneg_d(v2i64 _1) { return __lsx_vneg_d(_1); } -+// CHECK-LABEL: @vmuh_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __lsx_vmuh_b(_1, _2); } -+// CHECK-LABEL: @vmuh_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __lsx_vmuh_h(_1, _2); } -+// CHECK-LABEL: @vmuh_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __lsx_vmuh_w(_1, _2); } -+// CHECK-LABEL: @vmuh_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __lsx_vmuh_d(_1, _2); } -+// CHECK-LABEL: @vmuh_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { return __lsx_vmuh_bu(_1, _2); } -+// CHECK-LABEL: @vmuh_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { return __lsx_vmuh_hu(_1, _2); } -+// CHECK-LABEL: @vmuh_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { return __lsx_vmuh_wu(_1, _2); } -+// CHECK-LABEL: @vmuh_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmuh_du(v2u64 _1, v2u64 _2) { return __lsx_vmuh_du(_1, _2); } -+// CHECK-LABEL: @vsllwil_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsllwil_h_b(v16i8 _1) { return __lsx_vsllwil_h_b(_1, 1); } -+// CHECK-LABEL: @vsllwil_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsllwil_w_h(v8i16 _1) { return __lsx_vsllwil_w_h(_1, 1); } -+// CHECK-LABEL: @vsllwil_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsllwil_d_w(v4i32 _1) { return __lsx_vsllwil_d_w(_1, 1); } -+// CHECK-LABEL: @vsllwil_hu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vsllwil_hu_bu(v16u8 _1) { return __lsx_vsllwil_hu_bu(_1, 1); } -+// CHECK-LABEL: @vsllwil_wu_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vsllwil_wu_hu(v8u16 _1) { return __lsx_vsllwil_wu_hu(_1, 1); } -+// CHECK-LABEL: @vsllwil_du_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vsllwil_du_wu(v4u32 _1) { return __lsx_vsllwil_du_wu(_1, 1); } -+// CHECK-LABEL: @vsran_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsran_b_h(_1, _2); } -+// CHECK-LABEL: @vsran_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsran_h_w(_1, _2); } -+// CHECK-LABEL: @vsran_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsran_w_d(_1, _2); } -+// CHECK-LABEL: @vssran_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssran_b_h(_1, _2); } -+// CHECK-LABEL: @vssran_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssran_h_w(_1, _2); } -+// CHECK-LABEL: @vssran_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssran_w_d(_1, _2); } -+// CHECK-LABEL: @vssran_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssran_bu_h(_1, _2); } -+// CHECK-LABEL: @vssran_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssran_hu_w(_1, _2); } -+// CHECK-LABEL: @vssran_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssran_wu_d(_1, _2); } -+// CHECK-LABEL: @vsrarn_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrarn_b_h(_1, _2); } -+// CHECK-LABEL: @vsrarn_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrarn_h_w(_1, _2); } -+// CHECK-LABEL: @vsrarn_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrarn_w_d(_1, _2); } -+// CHECK-LABEL: @vssrarn_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrarn_b_h(_1, _2); } -+// CHECK-LABEL: @vssrarn_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrarn_h_w(_1, _2); } -+// CHECK-LABEL: @vssrarn_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrarn_w_d(_1, _2); } -+// CHECK-LABEL: @vssrarn_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrarn_bu_h(_1, _2); } -+// CHECK-LABEL: @vssrarn_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrarn_hu_w(_1, _2); } -+// CHECK-LABEL: @vssrarn_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrarn_wu_d(_1, _2); } -+// CHECK-LABEL: @vsrln_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrln_b_h(_1, _2); } -+// CHECK-LABEL: @vsrln_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrln_h_w(_1, _2); } -+// CHECK-LABEL: @vsrln_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrln_w_d(_1, _2); } -+// CHECK-LABEL: @vssrln_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrln_bu_h(_1, _2); } -+// CHECK-LABEL: @vssrln_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrln_hu_w(_1, _2); } -+// CHECK-LABEL: @vssrln_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrln_wu_d(_1, _2); } -+// CHECK-LABEL: @vsrlrn_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlrn_b_h(_1, _2); } -+// CHECK-LABEL: @vsrlrn_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlrn_h_w(_1, _2); } -+// CHECK-LABEL: @vsrlrn_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlrn_w_d(_1, _2); } -+// CHECK-LABEL: @vssrlrn_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrlrn_bu_h(_1, _2); } -+// CHECK-LABEL: @vssrlrn_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrlrn_hu_w(_1, _2); } -+// CHECK-LABEL: @vssrlrn_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrlrn_wu_d(_1, _2); } -+// CHECK-LABEL: @vfrstpi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { return __lsx_vfrstpi_b(_1, _2, 1); } -+// CHECK-LABEL: @vfrstpi_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { return __lsx_vfrstpi_h(_1, _2, 1); } -+// CHECK-LABEL: @vfrstp_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { -+ return __lsx_vfrstp_b(_1, _2, _3); -+} -+// CHECK-LABEL: @vfrstp_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { -+ return __lsx_vfrstp_h(_1, _2, _3); -+} -+// CHECK-LABEL: @vshuf4i_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { return __lsx_vshuf4i_d(_1, _2, 1); } -+// CHECK-LABEL: @vbsrl_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vbsrl_v(v16i8 _1) { return __lsx_vbsrl_v(_1, 1); } -+// CHECK-LABEL: @vbsll_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vbsll_v(v16i8 _1) { return __lsx_vbsll_v(_1, 1); } -+// CHECK-LABEL: @vextrins_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vextrins_b(v16i8 _1, v16i8 _2) { return __lsx_vextrins_b(_1, _2, 1); } -+// CHECK-LABEL: @vextrins_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vextrins_h(v8i16 _1, v8i16 _2) { return __lsx_vextrins_h(_1, _2, 1); } -+// CHECK-LABEL: @vextrins_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vextrins_w(v4i32 _1, v4i32 _2) { return __lsx_vextrins_w(_1, _2, 1); } -+// CHECK-LABEL: @vextrins_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vextrins_d(v2i64 _1, v2i64 _2) { return __lsx_vextrins_d(_1, _2, 1); } -+// CHECK-LABEL: @vmskltz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmskltz_b(v16i8 _1) { return __lsx_vmskltz_b(_1); } -+// CHECK-LABEL: @vmskltz_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmskltz_h(v8i16 _1) { return __lsx_vmskltz_h(_1); } -+// CHECK-LABEL: @vmskltz_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmskltz_w(v4i32 _1) { return __lsx_vmskltz_w(_1); } -+// CHECK-LABEL: @vmskltz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmskltz_d(v2i64 _1) { return __lsx_vmskltz_d(_1); } -+// CHECK-LABEL: @vsigncov_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { return __lsx_vsigncov_b(_1, _2); } -+// CHECK-LABEL: @vsigncov_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { return __lsx_vsigncov_h(_1, _2); } -+// CHECK-LABEL: @vsigncov_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { return __lsx_vsigncov_w(_1, _2); } -+// CHECK-LABEL: @vsigncov_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { return __lsx_vsigncov_d(_1, _2); } -+// CHECK-LABEL: @vfmadd_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { -+ return __lsx_vfmadd_s(_1, _2, _3); -+} -+// CHECK-LABEL: @vfmadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { -+ return __lsx_vfmadd_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vfmsub_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { -+ return __lsx_vfmsub_s(_1, _2, _3); -+} -+// CHECK-LABEL: @vfmsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { -+ return __lsx_vfmsub_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vfnmadd_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { -+ return __lsx_vfnmadd_s(_1, _2, _3); -+} -+// CHECK-LABEL: @vfnmadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { -+ return __lsx_vfnmadd_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vfnmsub_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { -+ return __lsx_vfnmsub_s(_1, _2, _3); -+} -+// CHECK-LABEL: @vfnmsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { -+ return __lsx_vfnmsub_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vftintrne_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftintrne_w_s(v4f32 _1) { return __lsx_vftintrne_w_s(_1); } -+// CHECK-LABEL: @vftintrne_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrne_l_d(v2f64 _1) { return __lsx_vftintrne_l_d(_1); } -+// CHECK-LABEL: @vftintrp_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftintrp_w_s(v4f32 _1) { return __lsx_vftintrp_w_s(_1); } -+// CHECK-LABEL: @vftintrp_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrp_l_d(v2f64 _1) { return __lsx_vftintrp_l_d(_1); } -+// CHECK-LABEL: @vftintrm_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftintrm_w_s(v4f32 _1) { return __lsx_vftintrm_w_s(_1); } -+// CHECK-LABEL: @vftintrm_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrm_l_d(v2f64 _1) { return __lsx_vftintrm_l_d(_1); } -+// CHECK-LABEL: @vftint_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftint_w_d(_1, _2); } -+// CHECK-LABEL: @vffint_s_l( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { return __lsx_vffint_s_l(_1, _2); } -+// CHECK-LABEL: @vftintrz_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrz_w_d(_1, _2); } -+// CHECK-LABEL: @vftintrp_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrp_w_d(_1, _2); } -+// CHECK-LABEL: @vftintrm_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrm_w_d(_1, _2); } -+// CHECK-LABEL: @vftintrne_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrne_w_d(_1, _2); } -+// CHECK-LABEL: @vftintl_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintl_l_s(v4f32 _1) { return __lsx_vftintl_l_s(_1); } -+// CHECK-LABEL: @vftinth_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftinth_l_s(v4f32 _1) { return __lsx_vftinth_l_s(_1); } -+// CHECK-LABEL: @vffinth_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vffinth_d_w(v4i32 _1) { return __lsx_vffinth_d_w(_1); } -+// CHECK-LABEL: @vffintl_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vffintl_d_w(v4i32 _1) { return __lsx_vffintl_d_w(_1); } -+// CHECK-LABEL: @vftintrzl_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrzl_l_s(v4f32 _1) { return __lsx_vftintrzl_l_s(_1); } -+// CHECK-LABEL: @vftintrzh_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrzh_l_s(v4f32 _1) { return __lsx_vftintrzh_l_s(_1); } -+// CHECK-LABEL: @vftintrpl_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrpl_l_s(v4f32 _1) { return __lsx_vftintrpl_l_s(_1); } -+// CHECK-LABEL: @vftintrph_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrph_l_s(v4f32 _1) { return __lsx_vftintrph_l_s(_1); } -+// CHECK-LABEL: @vftintrml_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrml_l_s(v4f32 _1) { return __lsx_vftintrml_l_s(_1); } -+// CHECK-LABEL: @vftintrmh_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrmh_l_s(v4f32 _1) { return __lsx_vftintrmh_l_s(_1); } -+// CHECK-LABEL: @vftintrnel_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrnel_l_s(v4f32 _1) { return __lsx_vftintrnel_l_s(_1); } -+// CHECK-LABEL: @vftintrneh_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrneh_l_s(v4f32 _1) { return __lsx_vftintrneh_l_s(_1); } -+// CHECK-LABEL: @vfrintrne_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> -+// CHECK-NEXT: ret <4 x i32> [[TMP1]] -+// -+v4i32 vfrintrne_s(v4f32 _1) { return __lsx_vfrintrne_s(_1); } -+// CHECK-LABEL: @vfrintrne_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> -+// CHECK-NEXT: ret <2 x i64> [[TMP1]] -+// -+v2i64 vfrintrne_d(v2f64 _1) { return __lsx_vfrintrne_d(_1); } -+// CHECK-LABEL: @vfrintrz_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> -+// CHECK-NEXT: ret <4 x i32> [[TMP1]] -+// -+v4i32 vfrintrz_s(v4f32 _1) { return __lsx_vfrintrz_s(_1); } -+// CHECK-LABEL: @vfrintrz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> -+// CHECK-NEXT: ret <2 x i64> [[TMP1]] -+// -+v2i64 vfrintrz_d(v2f64 _1) { return __lsx_vfrintrz_d(_1); } -+// CHECK-LABEL: @vfrintrp_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> -+// CHECK-NEXT: ret <4 x i32> [[TMP1]] -+// -+v4i32 vfrintrp_s(v4f32 _1) { return __lsx_vfrintrp_s(_1); } -+// CHECK-LABEL: @vfrintrp_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> -+// CHECK-NEXT: ret <2 x i64> [[TMP1]] -+// -+v2i64 vfrintrp_d(v2f64 _1) { return __lsx_vfrintrp_d(_1); } -+// CHECK-LABEL: @vfrintrm_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> -+// CHECK-NEXT: ret <4 x i32> [[TMP1]] -+// -+v4i32 vfrintrm_s(v4f32 _1) { return __lsx_vfrintrm_s(_1); } -+// CHECK-LABEL: @vfrintrm_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> -+// CHECK-NEXT: ret <2 x i64> [[TMP1]] -+// -+v2i64 vfrintrm_d(v2f64 _1) { return __lsx_vfrintrm_d(_1); } -+// CHECK-LABEL: @vstelm_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) -+// CHECK-NEXT: ret void -+// -+void vstelm_b(v16i8 _1, void *_2) { return __lsx_vstelm_b(_1, _2, 1, 1); } -+// CHECK-LABEL: @vstelm_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) -+// CHECK-NEXT: ret void -+// -+void vstelm_h(v8i16 _1, void *_2) { return __lsx_vstelm_h(_1, _2, 2, 1); } -+// CHECK-LABEL: @vstelm_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) -+// CHECK-NEXT: ret void -+// -+void vstelm_w(v4i32 _1, void *_2) { return __lsx_vstelm_w(_1, _2, 4, 1); } -+// CHECK-LABEL: @vstelm_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) -+// CHECK-NEXT: ret void -+// -+void vstelm_d(v2i64 _1, void *_2) { return __lsx_vstelm_d(_1, _2, 8, 1); } -+// CHECK-LABEL: @vaddwev_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwev_d_w(_1, _2); } -+// CHECK-LABEL: @vaddwev_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwev_w_h(_1, _2); } -+// CHECK-LABEL: @vaddwev_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwev_h_b(_1, _2); } -+// CHECK-LABEL: @vaddwod_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwod_d_w(_1, _2); } -+// CHECK-LABEL: @vaddwod_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwod_w_h(_1, _2); } -+// CHECK-LABEL: @vaddwod_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwod_h_b(_1, _2); } -+// CHECK-LABEL: @vaddwev_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwev_d_wu(_1, _2); } -+// CHECK-LABEL: @vaddwev_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwev_w_hu(_1, _2); } -+// CHECK-LABEL: @vaddwev_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwev_h_bu(_1, _2); } -+// CHECK-LABEL: @vaddwod_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwod_d_wu(_1, _2); } -+// CHECK-LABEL: @vaddwod_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwod_w_hu(_1, _2); } -+// CHECK-LABEL: @vaddwod_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwod_h_bu(_1, _2); } -+// CHECK-LABEL: @vaddwev_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { -+ return __lsx_vaddwev_d_wu_w(_1, _2); -+} -+// CHECK-LABEL: @vaddwev_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { -+ return __lsx_vaddwev_w_hu_h(_1, _2); -+} -+// CHECK-LABEL: @vaddwev_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { -+ return __lsx_vaddwev_h_bu_b(_1, _2); -+} -+// CHECK-LABEL: @vaddwod_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { -+ return __lsx_vaddwod_d_wu_w(_1, _2); -+} -+// CHECK-LABEL: @vaddwod_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { -+ return __lsx_vaddwod_w_hu_h(_1, _2); -+} -+// CHECK-LABEL: @vaddwod_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { -+ return __lsx_vaddwod_h_bu_b(_1, _2); -+} -+// CHECK-LABEL: @vsubwev_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwev_d_w(_1, _2); } -+// CHECK-LABEL: @vsubwev_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwev_w_h(_1, _2); } -+// CHECK-LABEL: @vsubwev_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwev_h_b(_1, _2); } -+// CHECK-LABEL: @vsubwod_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwod_d_w(_1, _2); } -+// CHECK-LABEL: @vsubwod_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwod_w_h(_1, _2); } -+// CHECK-LABEL: @vsubwod_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwod_h_b(_1, _2); } -+// CHECK-LABEL: @vsubwev_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwev_d_wu(_1, _2); } -+// CHECK-LABEL: @vsubwev_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwev_w_hu(_1, _2); } -+// CHECK-LABEL: @vsubwev_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwev_h_bu(_1, _2); } -+// CHECK-LABEL: @vsubwod_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwod_d_wu(_1, _2); } -+// CHECK-LABEL: @vsubwod_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwod_w_hu(_1, _2); } -+// CHECK-LABEL: @vsubwod_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwod_h_bu(_1, _2); } -+// CHECK-LABEL: @vaddwev_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwev_q_d(_1, _2); } -+// CHECK-LABEL: @vaddwod_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwod_q_d(_1, _2); } -+// CHECK-LABEL: @vaddwev_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwev_q_du(_1, _2); } -+// CHECK-LABEL: @vaddwod_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwod_q_du(_1, _2); } -+// CHECK-LABEL: @vsubwev_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwev_q_d(_1, _2); } -+// CHECK-LABEL: @vsubwod_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwod_q_d(_1, _2); } -+// CHECK-LABEL: @vsubwev_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwev_q_du(_1, _2); } -+// CHECK-LABEL: @vsubwod_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwod_q_du(_1, _2); } -+// CHECK-LABEL: @vaddwev_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { -+ return __lsx_vaddwev_q_du_d(_1, _2); -+} -+// CHECK-LABEL: @vaddwod_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { -+ return __lsx_vaddwod_q_du_d(_1, _2); -+} -+// CHECK-LABEL: @vmulwev_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwev_d_w(_1, _2); } -+// CHECK-LABEL: @vmulwev_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwev_w_h(_1, _2); } -+// CHECK-LABEL: @vmulwev_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwev_h_b(_1, _2); } -+// CHECK-LABEL: @vmulwod_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwod_d_w(_1, _2); } -+// CHECK-LABEL: @vmulwod_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwod_w_h(_1, _2); } -+// CHECK-LABEL: @vmulwod_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwod_h_b(_1, _2); } -+// CHECK-LABEL: @vmulwev_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwev_d_wu(_1, _2); } -+// CHECK-LABEL: @vmulwev_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwev_w_hu(_1, _2); } -+// CHECK-LABEL: @vmulwev_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwev_h_bu(_1, _2); } -+// CHECK-LABEL: @vmulwod_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwod_d_wu(_1, _2); } -+// CHECK-LABEL: @vmulwod_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwod_w_hu(_1, _2); } -+// CHECK-LABEL: @vmulwod_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwod_h_bu(_1, _2); } -+// CHECK-LABEL: @vmulwev_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { -+ return __lsx_vmulwev_d_wu_w(_1, _2); -+} -+// CHECK-LABEL: @vmulwev_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { -+ return __lsx_vmulwev_w_hu_h(_1, _2); -+} -+// CHECK-LABEL: @vmulwev_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { -+ return __lsx_vmulwev_h_bu_b(_1, _2); -+} -+// CHECK-LABEL: @vmulwod_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { -+ return __lsx_vmulwod_d_wu_w(_1, _2); -+} -+// CHECK-LABEL: @vmulwod_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { -+ return __lsx_vmulwod_w_hu_h(_1, _2); -+} -+// CHECK-LABEL: @vmulwod_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { -+ return __lsx_vmulwod_h_bu_b(_1, _2); -+} -+// CHECK-LABEL: @vmulwev_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwev_q_d(_1, _2); } -+// CHECK-LABEL: @vmulwod_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwod_q_d(_1, _2); } -+// CHECK-LABEL: @vmulwev_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwev_q_du(_1, _2); } -+// CHECK-LABEL: @vmulwod_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwod_q_du(_1, _2); } -+// CHECK-LABEL: @vmulwev_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { -+ return __lsx_vmulwev_q_du_d(_1, _2); -+} -+// CHECK-LABEL: @vmulwod_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { -+ return __lsx_vmulwod_q_du_d(_1, _2); -+} -+// CHECK-LABEL: @vhaddw_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhaddw_q_d(_1, _2); } -+// CHECK-LABEL: @vhaddw_qu_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhaddw_qu_du(_1, _2); } -+// CHECK-LABEL: @vhsubw_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhsubw_q_d(_1, _2); } -+// CHECK-LABEL: @vhsubw_qu_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhsubw_qu_du(_1, _2); } -+// CHECK-LABEL: @vmaddwev_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { -+ return __lsx_vmaddwev_d_w(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { -+ return __lsx_vmaddwev_w_h(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { -+ return __lsx_vmaddwev_h_b(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { -+ return __lsx_vmaddwev_d_wu(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { -+ return __lsx_vmaddwev_w_hu(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { -+ return __lsx_vmaddwev_h_bu(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { -+ return __lsx_vmaddwod_d_w(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { -+ return __lsx_vmaddwod_w_h(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { -+ return __lsx_vmaddwod_h_b(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { -+ return __lsx_vmaddwod_d_wu(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { -+ return __lsx_vmaddwod_w_hu(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { -+ return __lsx_vmaddwod_h_bu(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { -+ return __lsx_vmaddwev_d_wu_w(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { -+ return __lsx_vmaddwev_w_hu_h(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { -+ return __lsx_vmaddwev_h_bu_b(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { -+ return __lsx_vmaddwod_d_wu_w(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { -+ return __lsx_vmaddwod_w_hu_h(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { -+ return __lsx_vmaddwod_h_bu_b(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { -+ return __lsx_vmaddwev_q_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { -+ return __lsx_vmaddwod_q_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { -+ return __lsx_vmaddwev_q_du(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { -+ return __lsx_vmaddwod_q_du(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { -+ return __lsx_vmaddwev_q_du_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { -+ return __lsx_vmaddwod_q_du_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vrotr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vrotr_b(v16i8 _1, v16i8 _2) { return __lsx_vrotr_b(_1, _2); } -+// CHECK-LABEL: @vrotr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vrotr_h(v8i16 _1, v8i16 _2) { return __lsx_vrotr_h(_1, _2); } -+// CHECK-LABEL: @vrotr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vrotr_w(v4i32 _1, v4i32 _2) { return __lsx_vrotr_w(_1, _2); } -+// CHECK-LABEL: @vrotr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vrotr_d(v2i64 _1, v2i64 _2) { return __lsx_vrotr_d(_1, _2); } -+// CHECK-LABEL: @vadd_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __lsx_vadd_q(_1, _2); } -+// CHECK-LABEL: @vsub_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __lsx_vsub_q(_1, _2); } -+// CHECK-LABEL: @vldrepl_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vldrepl_b(void *_1) { return __lsx_vldrepl_b(_1, 1); } -+// CHECK-LABEL: @vldrepl_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vldrepl_h(void *_1) { return __lsx_vldrepl_h(_1, 2); } -+// CHECK-LABEL: @vldrepl_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vldrepl_w(void *_1) { return __lsx_vldrepl_w(_1, 4); } -+// CHECK-LABEL: @vldrepl_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vldrepl_d(void *_1) { return __lsx_vldrepl_d(_1, 8); } -+// CHECK-LABEL: @vmskgez_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmskgez_b(v16i8 _1) { return __lsx_vmskgez_b(_1); } -+// CHECK-LABEL: @vmsknz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmsknz_b(v16i8 _1) { return __lsx_vmsknz_b(_1); } -+// CHECK-LABEL: @vexth_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vexth_h_b(v16i8 _1) { return __lsx_vexth_h_b(_1); } -+// CHECK-LABEL: @vexth_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vexth_w_h(v8i16 _1) { return __lsx_vexth_w_h(_1); } -+// CHECK-LABEL: @vexth_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vexth_d_w(v4i32 _1) { return __lsx_vexth_d_w(_1); } -+// CHECK-LABEL: @vexth_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vexth_q_d(v2i64 _1) { return __lsx_vexth_q_d(_1); } -+// CHECK-LABEL: @vexth_hu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vexth_hu_bu(v16u8 _1) { return __lsx_vexth_hu_bu(_1); } -+// CHECK-LABEL: @vexth_wu_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vexth_wu_hu(v8u16 _1) { return __lsx_vexth_wu_hu(_1); } -+// CHECK-LABEL: @vexth_du_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vexth_du_wu(v4u32 _1) { return __lsx_vexth_du_wu(_1); } -+// CHECK-LABEL: @vexth_qu_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vexth_qu_du(v2u64 _1) { return __lsx_vexth_qu_du(_1); } -+// CHECK-LABEL: @vrotri_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vrotri_b(v16i8 _1) { return __lsx_vrotri_b(_1, 1); } -+// CHECK-LABEL: @vrotri_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vrotri_h(v8i16 _1) { return __lsx_vrotri_h(_1, 1); } -+// CHECK-LABEL: @vrotri_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vrotri_w(v4i32 _1) { return __lsx_vrotri_w(_1, 1); } -+// CHECK-LABEL: @vrotri_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vrotri_d(v2i64 _1) { return __lsx_vrotri_d(_1, 1); } -+// CHECK-LABEL: @vextl_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vextl_q_d(v2i64 _1) { return __lsx_vextl_q_d(_1); } -+// CHECK-LABEL: @vsrlni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlni_b_h(_1, _2, 1); } -+// CHECK-LABEL: @vsrlni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlni_h_w(_1, _2, 1); } -+// CHECK-LABEL: @vsrlni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlni_w_d(_1, _2, 1); } -+// CHECK-LABEL: @vsrlni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlni_d_q(_1, _2, 1); } -+// CHECK-LABEL: @vsrlrni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlrni_b_h(_1, _2, 1); } -+// CHECK-LABEL: @vsrlrni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlrni_h_w(_1, _2, 1); } -+// CHECK-LABEL: @vsrlrni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlrni_w_d(_1, _2, 1); } -+// CHECK-LABEL: @vsrlrni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlrni_d_q(_1, _2, 1); } -+// CHECK-LABEL: @vssrlni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlni_b_h(_1, _2, 1); } -+// CHECK-LABEL: @vssrlni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlni_h_w(_1, _2, 1); } -+// CHECK-LABEL: @vssrlni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlni_w_d(_1, _2, 1); } -+// CHECK-LABEL: @vssrlni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlni_d_q(_1, _2, 1); } -+// CHECK-LABEL: @vssrlni_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrlni_bu_h(_1, _2, 1); } -+// CHECK-LABEL: @vssrlni_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrlni_hu_w(_1, _2, 1); } -+// CHECK-LABEL: @vssrlni_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrlni_wu_d(_1, _2, 1); } -+// CHECK-LABEL: @vssrlni_du_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrlni_du_q(_1, _2, 1); } -+// CHECK-LABEL: @vssrlrni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlrni_b_h(_1, _2, 1); } -+// CHECK-LABEL: @vssrlrni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlrni_h_w(_1, _2, 1); } -+// CHECK-LABEL: @vssrlrni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlrni_w_d(_1, _2, 1); } -+// CHECK-LABEL: @vssrlrni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlrni_d_q(_1, _2, 1); } -+// CHECK-LABEL: @vssrlrni_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { -+ return __lsx_vssrlrni_bu_h(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlrni_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { -+ return __lsx_vssrlrni_hu_w(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlrni_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { -+ return __lsx_vssrlrni_wu_d(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlrni_du_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { -+ return __lsx_vssrlrni_du_q(_1, _2, 1); -+} -+// CHECK-LABEL: @vsrani_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrani_b_h(_1, _2, 1); } -+// CHECK-LABEL: @vsrani_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrani_h_w(_1, _2, 1); } -+// CHECK-LABEL: @vsrani_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrani_w_d(_1, _2, 1); } -+// CHECK-LABEL: @vsrani_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrani_d_q(_1, _2, 1); } -+// CHECK-LABEL: @vsrarni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrarni_b_h(_1, _2, 1); } -+// CHECK-LABEL: @vsrarni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrarni_h_w(_1, _2, 1); } -+// CHECK-LABEL: @vsrarni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrarni_w_d(_1, _2, 1); } -+// CHECK-LABEL: @vsrarni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrarni_d_q(_1, _2, 1); } -+// CHECK-LABEL: @vssrani_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrani_b_h(_1, _2, 1); } -+// CHECK-LABEL: @vssrani_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrani_h_w(_1, _2, 1); } -+// CHECK-LABEL: @vssrani_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrani_w_d(_1, _2, 1); } -+// CHECK-LABEL: @vssrani_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrani_d_q(_1, _2, 1); } -+// CHECK-LABEL: @vssrani_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrani_bu_h(_1, _2, 1); } -+// CHECK-LABEL: @vssrani_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrani_hu_w(_1, _2, 1); } -+// CHECK-LABEL: @vssrani_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrani_wu_d(_1, _2, 1); } -+// CHECK-LABEL: @vssrani_du_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrani_du_q(_1, _2, 1); } -+// CHECK-LABEL: @vssrarni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrarni_b_h(_1, _2, 1); } -+// CHECK-LABEL: @vssrarni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrarni_h_w(_1, _2, 1); } -+// CHECK-LABEL: @vssrarni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrarni_w_d(_1, _2, 1); } -+// CHECK-LABEL: @vssrarni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrarni_d_q(_1, _2, 1); } -+// CHECK-LABEL: @vssrarni_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { -+ return __lsx_vssrarni_bu_h(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrarni_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { -+ return __lsx_vssrarni_hu_w(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrarni_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { -+ return __lsx_vssrarni_wu_d(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrarni_du_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { -+ return __lsx_vssrarni_du_q(_1, _2, 1); -+} -+// CHECK-LABEL: @vpermi_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vpermi_w(v4i32 _1, v4i32 _2) { return __lsx_vpermi_w(_1, _2, 1); } -+// CHECK-LABEL: @vld( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vld(void *_1) { return __lsx_vld(_1, 1); } -+// CHECK-LABEL: @vst( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret void -+// -+void vst(v16i8 _1, void *_2) { return __lsx_vst(_1, _2, 1); } -+// CHECK-LABEL: @vssrlrn_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrlrn_b_h(_1, _2); } -+// CHECK-LABEL: @vssrlrn_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrlrn_h_w(_1, _2); } -+// CHECK-LABEL: @vssrlrn_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrlrn_w_d(_1, _2); } -+// CHECK-LABEL: @vssrln_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrln_b_h(_1, _2); } -+// CHECK-LABEL: @vssrln_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrln_h_w(_1, _2); } -+// CHECK-LABEL: @vssrln_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrln_w_d(_1, _2); } -+// CHECK-LABEL: @vorn_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __lsx_vorn_v(_1, _2); } -+// CHECK-LABEL: @vldi( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vldi() { return __lsx_vldi(1); } -+// CHECK-LABEL: @vshuf_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { -+ return __lsx_vshuf_b(_1, _2, _3); -+} -+// CHECK-LABEL: @vldx( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vldx(void *_1) { return __lsx_vldx(_1, 1); } -+// CHECK-LABEL: @vstx( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) -+// CHECK-NEXT: ret void -+// -+void vstx(v16i8 _1, void *_2) { return __lsx_vstx(_1, _2, 1); } -+// CHECK-LABEL: @vextl_qu_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vextl_qu_du(v2u64 _1) { return __lsx_vextl_qu_du(_1); } -+// CHECK-LABEL: @bnz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bnz_b(v16u8 _1) { return __lsx_bnz_b(_1); } -+// CHECK-LABEL: @bnz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bnz_d(v2u64 _1) { return __lsx_bnz_d(_1); } -+// CHECK-LABEL: @bnz_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bnz_h(v8u16 _1) { return __lsx_bnz_h(_1); } -+// CHECK-LABEL: @bnz_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bnz_v(v16u8 _1) { return __lsx_bnz_v(_1); } -+// CHECK-LABEL: @bnz_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bnz_w(v4u32 _1) { return __lsx_bnz_w(_1); } -+// CHECK-LABEL: @bz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bz_b(v16u8 _1) { return __lsx_bz_b(_1); } -+// CHECK-LABEL: @bz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bz_d(v2u64 _1) { return __lsx_bz_d(_1); } -+// CHECK-LABEL: @bz_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bz_h(v8u16 _1) { return __lsx_bz_h(_1); } -+// CHECK-LABEL: @bz_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bz_v(v16u8 _1) { return __lsx_bz_v(_1); } -+// CHECK-LABEL: @bz_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bz_w(v4u32 _1) { return __lsx_bz_w(_1); } -+// CHECK-LABEL: @vfcmp_caf_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_caf_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_caf_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_caf_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_ceq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_ceq_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_ceq_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_ceq_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_cle_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cle_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_cle_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cle_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_clt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_clt_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_clt_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_clt_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_cne_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cne_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_cne_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cne_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_cor_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cor_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_cor_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cor_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_cueq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cueq_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_cueq_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cueq_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_cule_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cule_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_cule_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cule_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_cult_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cult_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_cult_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cult_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_cun_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cun_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_cune_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cune_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_cune_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cune_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_cun_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cun_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_saf_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_saf_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_saf_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_saf_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_seq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_seq_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_seq_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_seq_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_sle_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sle_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_sle_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sle_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_slt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_slt_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_slt_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_slt_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_sne_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sne_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_sne_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sne_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_sor_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sor_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_sor_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sor_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_sueq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sueq_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_sueq_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sueq_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_sule_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sule_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_sule_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sule_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_sult_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sult_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_sult_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sult_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_sun_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sun_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_sune_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sune_d(_1, _2); } -+// CHECK-LABEL: @vfcmp_sune_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sune_s(_1, _2); } -+// CHECK-LABEL: @vfcmp_sun_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sun_s(_1, _2); } -+// CHECK-LABEL: @vrepli_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vrepli_b() { return __lsx_vrepli_b(1); } -+// CHECK-LABEL: @vrepli_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vrepli_d() { return __lsx_vrepli_d(1); } -+// CHECK-LABEL: @vrepli_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vrepli_h() { return __lsx_vrepli_h(1); } -+// CHECK-LABEL: @vrepli_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vrepli_w() { return __lsx_vrepli_w(1); } -diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-error.c b/clang/test/CodeGen/LoongArch/lsx/builtin-error.c -new file mode 100644 -index 000000000000..3fc5f73f1193 ---- /dev/null -+++ b/clang/test/CodeGen/LoongArch/lsx/builtin-error.c -@@ -0,0 +1,1382 @@ -+// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s -+ -+typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); -+typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); -+typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); -+typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); -+typedef short v8i16 __attribute__((vector_size(16), aligned(16))); -+typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); -+typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); -+typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); -+typedef int v4i32 __attribute__((vector_size(16), aligned(16))); -+typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); -+typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); -+typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); -+typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); -+typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); -+typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); -+typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); -+typedef float v4f32 __attribute__((vector_size(16), aligned(16))); -+typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); -+typedef double v2f64 __attribute__((vector_size(16), aligned(16))); -+typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); -+ -+typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); -+typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); -+typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); -+ -+v16i8 vslli_b(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vslli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vslli_h(v8i16 _1, int var) { -+ v8i16 res = __builtin_lsx_vslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vslli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vslli_w(v4i32 _1, int var) { -+ v4i32 res = __builtin_lsx_vslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vslli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vslli_d(v2i64 _1, int var) { -+ v2i64 res = __builtin_lsx_vslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vslli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsrai_b(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vsrai_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsrai_h(v8i16 _1, int var) { -+ v8i16 res = __builtin_lsx_vsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsrai_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsrai_w(v4i32 _1, int var) { -+ v4i32 res = __builtin_lsx_vsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsrai_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsrai_d(v2i64 _1, int var) { -+ v2i64 res = __builtin_lsx_vsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsrai_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsrari_b(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vsrari_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsrari_h(v8i16 _1, int var) { -+ v8i16 res = __builtin_lsx_vsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsrari_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsrari_w(v4i32 _1, int var) { -+ v4i32 res = __builtin_lsx_vsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsrari_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsrari_d(v2i64 _1, int var) { -+ v2i64 res = __builtin_lsx_vsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsrari_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsrli_b(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vsrli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsrli_h(v8i16 _1, int var) { -+ v8i16 res = __builtin_lsx_vsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsrli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsrli_w(v4i32 _1, int var) { -+ v4i32 res = __builtin_lsx_vsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsrli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsrli_d(v2i64 _1, int var) { -+ v2i64 res = __builtin_lsx_vsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsrli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsrlri_b(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vsrlri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsrlri_h(v8i16 _1, int var) { -+ v8i16 res = __builtin_lsx_vsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsrlri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsrlri_w(v4i32 _1, int var) { -+ v4i32 res = __builtin_lsx_vsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsrlri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsrlri_d(v2i64 _1, int var) { -+ v2i64 res = __builtin_lsx_vsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsrlri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_d' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vbitclri_b(v16u8 _1, int var) { -+ v16u8 res = __builtin_lsx_vbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vbitclri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_b' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vbitclri_h(v8u16 _1, int var) { -+ v8u16 res = __builtin_lsx_vbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vbitclri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_h' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vbitclri_w(v4u32 _1, int var) { -+ v4u32 res = __builtin_lsx_vbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vbitclri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_w' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vbitclri_d(v2u64 _1, int var) { -+ v2u64 res = __builtin_lsx_vbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vbitclri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_d' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vbitseti_b(v16u8 _1, int var) { -+ v16u8 res = __builtin_lsx_vbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vbitseti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_b' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vbitseti_h(v8u16 _1, int var) { -+ v8u16 res = __builtin_lsx_vbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vbitseti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_h' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vbitseti_w(v4u32 _1, int var) { -+ v4u32 res = __builtin_lsx_vbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vbitseti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_w' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vbitseti_d(v2u64 _1, int var) { -+ v2u64 res = __builtin_lsx_vbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vbitseti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_d' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vbitrevi_b(v16u8 _1, int var) { -+ v16u8 res = __builtin_lsx_vbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_b' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vbitrevi_h(v8u16 _1, int var) { -+ v8u16 res = __builtin_lsx_vbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_h' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vbitrevi_w(v4u32 _1, int var) { -+ v4u32 res = __builtin_lsx_vbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_w' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vbitrevi_d(v2u64 _1, int var) { -+ v2u64 res = __builtin_lsx_vbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vaddi_bu(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vaddi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_bu' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vaddi_hu(v8i16 _1, int var) { -+ v8i16 res = __builtin_lsx_vaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vaddi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_hu' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vaddi_wu(v4i32 _1, int var) { -+ v4i32 res = __builtin_lsx_vaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vaddi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_wu' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vaddi_du(v2i64 _1, int var) { -+ v2i64 res = __builtin_lsx_vaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vaddi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_du' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsubi_bu(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsubi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_bu' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsubi_hu(v8i16 _1, int var) { -+ v8i16 res = __builtin_lsx_vsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsubi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_hu' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsubi_wu(v4i32 _1, int var) { -+ v4i32 res = __builtin_lsx_vsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsubi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_wu' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsubi_du(v2i64 _1, int var) { -+ v2i64 res = __builtin_lsx_vsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsubi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_du' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vmaxi_b(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vmaxi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vmaxi_h(v8i16 _1, int var) { -+ v8i16 res = __builtin_lsx_vmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vmaxi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vmaxi_w(v4i32 _1, int var) { -+ v4i32 res = __builtin_lsx_vmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vmaxi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vmaxi_d(v2i64 _1, int var) { -+ v2i64 res = __builtin_lsx_vmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vmaxi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_d' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vmaxi_bu(v16u8 _1, int var) { -+ v16u8 res = __builtin_lsx_vmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_bu' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vmaxi_hu(v8u16 _1, int var) { -+ v8u16 res = __builtin_lsx_vmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_hu' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vmaxi_wu(v4u32 _1, int var) { -+ v4u32 res = __builtin_lsx_vmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_wu' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vmaxi_du(v2u64 _1, int var) { -+ v2u64 res = __builtin_lsx_vmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vmaxi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_du' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vmini_b(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vmini_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vmini_h(v8i16 _1, int var) { -+ v8i16 res = __builtin_lsx_vmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vmini_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_h' must be a constant integer}}} -+ return res; -+} -+ -+v4i32 vmini_w(v4i32 _1, int var) { -+ v4i32 res = __builtin_lsx_vmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vmini_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vmini_d(v2i64 _1, int var) { -+ v2i64 res = __builtin_lsx_vmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vmini_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_d' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vmini_bu(v16u8 _1, int var) { -+ v16u8 res = __builtin_lsx_vmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vmini_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_bu' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vmini_hu(v8u16 _1, int var) { -+ v8u16 res = __builtin_lsx_vmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vmini_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_hu' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vmini_wu(v4u32 _1, int var) { -+ v4u32 res = __builtin_lsx_vmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vmini_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_wu' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vmini_du(v2u64 _1, int var) { -+ v2u64 res = __builtin_lsx_vmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vmini_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_du' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vseqi_b(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vseqi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vseqi_h(v8i16 _1, int var) { -+ v8i16 res = __builtin_lsx_vseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vseqi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vseqi_w(v4i32 _1, int var) { -+ v4i32 res = __builtin_lsx_vseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vseqi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vseqi_d(v2i64 _1, int var) { -+ v2i64 res = __builtin_lsx_vseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vseqi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vslti_b(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vslti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vslti_h(v8i16 _1, int var) { -+ v8i16 res = __builtin_lsx_vslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vslti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vslti_w(v4i32 _1, int var) { -+ v4i32 res = __builtin_lsx_vslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vslti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vslti_d(v2i64 _1, int var) { -+ v2i64 res = __builtin_lsx_vslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vslti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vslti_bu(v16u8 _1, int var) { -+ v16i8 res = __builtin_lsx_vslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vslti_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_bu' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vslti_hu(v8u16 _1, int var) { -+ v8i16 res = __builtin_lsx_vslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vslti_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_hu' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vslti_wu(v4u32 _1, int var) { -+ v4i32 res = __builtin_lsx_vslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vslti_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_wu' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vslti_du(v2u64 _1, int var) { -+ v2i64 res = __builtin_lsx_vslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vslti_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_du' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vslei_b(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vslei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vslei_h(v8i16 _1, int var) { -+ v8i16 res = __builtin_lsx_vslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vslei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vslei_w(v4i32 _1, int var) { -+ v4i32 res = __builtin_lsx_vslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vslei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vslei_d(v2i64 _1, int var) { -+ v2i64 res = __builtin_lsx_vslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lsx_vslei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vslei_bu(v16u8 _1, int var) { -+ v16i8 res = __builtin_lsx_vslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vslei_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_bu' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vslei_hu(v8u16 _1, int var) { -+ v8i16 res = __builtin_lsx_vslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vslei_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_hu' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vslei_wu(v4u32 _1, int var) { -+ v4i32 res = __builtin_lsx_vslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vslei_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_wu' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vslei_du(v2u64 _1, int var) { -+ v2i64 res = __builtin_lsx_vslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vslei_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_du' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsat_b(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vsat_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsat_h(v8i16 _1, int var) { -+ v8i16 res = __builtin_lsx_vsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsat_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsat_w(v4i32 _1, int var) { -+ v4i32 res = __builtin_lsx_vsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsat_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsat_d(v2i64 _1, int var) { -+ v2i64 res = __builtin_lsx_vsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsat_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_d' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vsat_bu(v16u8 _1, int var) { -+ v16u8 res = __builtin_lsx_vsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vsat_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_bu' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vsat_hu(v8u16 _1, int var) { -+ v8u16 res = __builtin_lsx_vsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsat_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_hu' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vsat_wu(v4u32 _1, int var) { -+ v4u32 res = __builtin_lsx_vsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsat_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_wu' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vsat_du(v2u64 _1, int var) { -+ v2u64 res = __builtin_lsx_vsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsat_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_du' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vreplvei_b(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vreplvei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vreplvei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vreplvei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vreplvei_h(v8i16 _1, int var) { -+ v8i16 res = __builtin_lsx_vreplvei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vreplvei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vreplvei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vreplvei_w(v4i32 _1, int var) { -+ v4i32 res = __builtin_lsx_vreplvei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __builtin_lsx_vreplvei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __builtin_lsx_vreplvei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vreplvei_d(v2i64 _1, int var) { -+ v2i64 res = __builtin_lsx_vreplvei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} -+ res |= __builtin_lsx_vreplvei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} -+ res |= __builtin_lsx_vreplvei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_d' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vandi_b(v16u8 _1, int var) { -+ v16u8 res = __builtin_lsx_vandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vandi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vandi_b' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vori_b(v16u8 _1, int var) { -+ v16u8 res = __builtin_lsx_vori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vori_b' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vnori_b(v16u8 _1, int var) { -+ v16u8 res = __builtin_lsx_vnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vnori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vnori_b' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vxori_b(v16u8 _1, int var) { -+ v16u8 res = __builtin_lsx_vxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vxori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vxori_b' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vbitseli_b(v16u8 _1, v16u8 _2, int var) { -+ v16u8 res = __builtin_lsx_vbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vbitseli_b' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vshuf4i_b(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vshuf4i_h(v8i16 _1, int var) { -+ v8i16 res = __builtin_lsx_vshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vshuf4i_w(v4i32 _1, int var) { -+ v4i32 res = __builtin_lsx_vshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_w' must be a constant integer}} -+ return res; -+} -+ -+int vpickve2gr_b(v16i8 _1, int var) { -+ int res = __builtin_lsx_vpickve2gr_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vpickve2gr_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vpickve2gr_b(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_b' must be a constant integer}} -+ return res; -+} -+ -+int vpickve2gr_h(v8i16 _1, int var) { -+ int res = __builtin_lsx_vpickve2gr_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vpickve2gr_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vpickve2gr_h(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_h' must be a constant integer}} -+ return res; -+} -+ -+int vpickve2gr_w(v4i32 _1, int var) { -+ int res = __builtin_lsx_vpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __builtin_lsx_vpickve2gr_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __builtin_lsx_vpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_w' must be a constant integer}} -+ return res; -+} -+ -+long vpickve2gr_d(v2i64 _1, int var) { -+ long res = __builtin_lsx_vpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} -+ res |= __builtin_lsx_vpickve2gr_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} -+ res |= __builtin_lsx_vpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_d' must be a constant integer}} -+ return res; -+} -+ -+unsigned int vpickve2gr_bu(v16i8 _1, int var) { -+ unsigned int res = __builtin_lsx_vpickve2gr_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vpickve2gr_bu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vpickve2gr_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_bu' must be a constant integer}} -+ return res; -+} -+ -+unsigned int vpickve2gr_hu(v8i16 _1, int var) { -+ unsigned int res = __builtin_lsx_vpickve2gr_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vpickve2gr_hu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vpickve2gr_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_hu' must be a constant integer}} -+ return res; -+} -+ -+unsigned int vpickve2gr_wu(v4i32 _1, int var) { -+ unsigned int res = __builtin_lsx_vpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __builtin_lsx_vpickve2gr_wu(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __builtin_lsx_vpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_wu' must be a constant integer}} -+ return res; -+} -+ -+unsigned long int vpickve2gr_du(v2i64 _1, int var) { -+ unsigned long int res = __builtin_lsx_vpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} -+ res |= __builtin_lsx_vpickve2gr_du(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} -+ res |= __builtin_lsx_vpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_du' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vinsgr2vr_b(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vinsgr2vr_b(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vinsgr2vr_b(_1, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vinsgr2vr_b(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vinsgr2vr_h(v8i16 _1, int var) { -+ v8i16 res = __builtin_lsx_vinsgr2vr_h(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vinsgr2vr_h(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vinsgr2vr_h(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vinsgr2vr_w(v4i32 _1, int var) { -+ v4i32 res = __builtin_lsx_vinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __builtin_lsx_vinsgr2vr_w(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __builtin_lsx_vinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vinsgr2vr_d(v2i64 _1, int var) { -+ v2i64 res = __builtin_lsx_vinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} -+ res |= __builtin_lsx_vinsgr2vr_d(_1, 1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} -+ res |= __builtin_lsx_vinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_d' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsllwil_h_b(v16i8 _1, int var) { -+ v8i16 res = __builtin_lsx_vsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_h_b' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsllwil_w_h(v8i16 _1, int var) { -+ v4i32 res = __builtin_lsx_vsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_w_h' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsllwil_d_w(v4i32 _1, int var) { -+ v2i64 res = __builtin_lsx_vsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_d_w' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vsllwil_hu_bu(v16u8 _1, int var) { -+ v8u16 res = __builtin_lsx_vsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_hu_bu' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vsllwil_wu_hu(v8u16 _1, int var) { -+ v4u32 res = __builtin_lsx_vsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_wu_hu' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vsllwil_du_wu(v4u32 _1, int var) { -+ v2u64 res = __builtin_lsx_vsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_du_wu' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vfrstpi_b(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __builtin_lsx_vfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vfrstpi_h(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __builtin_lsx_vfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_h' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vshuf4i_d(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __builtin_lsx_vshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vbsrl_v(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vbsrl_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsrl_v' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vbsll_v(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vbsll_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsll_v' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vextrins_b(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __builtin_lsx_vextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vextrins_h(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __builtin_lsx_vextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vextrins_w(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __builtin_lsx_vextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vextrins_d(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __builtin_lsx_vextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_d' must be a constant integer}} -+ return res; -+} -+ -+void vstelm_b_idx(v16i8 _1, void *_2, int var) { -+ __builtin_lsx_vstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ __builtin_lsx_vstelm_b(_1, _2, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ __builtin_lsx_vstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} -+} -+ -+void vstelm_h_idx(v8i16 _1, void *_2, int var) { -+ __builtin_lsx_vstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ __builtin_lsx_vstelm_h(_1, _2, 2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ __builtin_lsx_vstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} -+} -+ -+void vstelm_w_idx(v4i32 _1, void *_2, int var) { -+ __builtin_lsx_vstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ __builtin_lsx_vstelm_w(_1, _2, 4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ __builtin_lsx_vstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} -+} -+ -+void vstelm_d_idx(v2i64 _1, void *_2, int var) { -+ __builtin_lsx_vstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} -+ __builtin_lsx_vstelm_d(_1, _2, 8, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} -+ __builtin_lsx_vstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} -+} -+ -+void vstelm_b(v16i8 _1, void *_2, int var) { -+ __builtin_lsx_vstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} -+ __builtin_lsx_vstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} -+ __builtin_lsx_vstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} -+} -+ -+void vstelm_h(v8i16 _1, void *_2, int var) { -+ __builtin_lsx_vstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} -+ __builtin_lsx_vstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} -+ __builtin_lsx_vstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} -+} -+ -+void vstelm_w(v4i32 _1, void *_2, int var) { -+ __builtin_lsx_vstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} -+ __builtin_lsx_vstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} -+ __builtin_lsx_vstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} -+} -+ -+void vstelm_d(v2i64 _1, void *_2, int var) { -+ __builtin_lsx_vstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} -+ __builtin_lsx_vstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} -+ __builtin_lsx_vstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} -+} -+ -+v16i8 vldrepl_b(void *_1, int var) { -+ v16i8 res = __builtin_lsx_vldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} -+ res |= __builtin_lsx_vldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} -+ res |= __builtin_lsx_vldrepl_b(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vldrepl_h(void *_1, int var) { -+ v8i16 res = __builtin_lsx_vldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} -+ res |= __builtin_lsx_vldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} -+ res |= __builtin_lsx_vldrepl_h(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vldrepl_w(void *_1, int var) { -+ v4i32 res = __builtin_lsx_vldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} -+ res |= __builtin_lsx_vldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} -+ res |= __builtin_lsx_vldrepl_w(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vldrepl_d(void *_1, int var) { -+ v2i64 res = __builtin_lsx_vldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} -+ res |= __builtin_lsx_vldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} -+ res |= __builtin_lsx_vldrepl_d(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vrotri_b(v16i8 _1, int var) { -+ v16i8 res = __builtin_lsx_vrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lsx_vrotri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_b' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vrotri_h(v8i16 _1, int var) { -+ v8i16 res = __builtin_lsx_vrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vrotri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vrotri_w(v4i32 _1, int var) { -+ v4i32 res = __builtin_lsx_vrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vrotri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_w' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vrotri_d(v2i64 _1, int var) { -+ v2i64 res = __builtin_lsx_vrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vrotri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_d' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __builtin_lsx_vsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __builtin_lsx_vsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __builtin_lsx_vsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __builtin_lsx_vsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __builtin_lsx_vsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __builtin_lsx_vsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __builtin_lsx_vsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __builtin_lsx_vsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __builtin_lsx_vssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __builtin_lsx_vssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __builtin_lsx_vssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __builtin_lsx_vssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2, int var) { -+ v16u8 res = __builtin_lsx_vssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_bu_h' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2, int var) { -+ v8u16 res = __builtin_lsx_vssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_hu_w' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2, int var) { -+ v4u32 res = __builtin_lsx_vssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_wu_d' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2, int var) { -+ v2u64 res = __builtin_lsx_vssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_du_q' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __builtin_lsx_vssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __builtin_lsx_vssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __builtin_lsx_vssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __builtin_lsx_vssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2, int var) { -+ v16u8 res = __builtin_lsx_vssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_bu_h' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2, int var) { -+ v8u16 res = __builtin_lsx_vssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_hu_w' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2, int var) { -+ v4u32 res = __builtin_lsx_vssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_wu_d' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2, int var) { -+ v2u64 res = __builtin_lsx_vssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_du_q' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsrani_b_h(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __builtin_lsx_vsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_b_h' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsrani_h_w(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __builtin_lsx_vsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_h_w' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsrani_w_d(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __builtin_lsx_vsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_w_d' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsrani_d_q(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __builtin_lsx_vsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_d_q' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __builtin_lsx_vsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __builtin_lsx_vsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __builtin_lsx_vsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __builtin_lsx_vsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vssrani_b_h(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __builtin_lsx_vssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_b_h' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vssrani_h_w(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __builtin_lsx_vssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_h_w' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vssrani_w_d(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __builtin_lsx_vssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_w_d' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vssrani_d_q(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __builtin_lsx_vssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_d_q' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2, int var) { -+ v16u8 res = __builtin_lsx_vssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_bu_h' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2, int var) { -+ v8u16 res = __builtin_lsx_vssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_hu_w' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2, int var) { -+ v4u32 res = __builtin_lsx_vssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_wu_d' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vssrani_du_q(v2u64 _1, v2i64 _2, int var) { -+ v2u64 res = __builtin_lsx_vssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_du_q' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2, int var) { -+ v16i8 res = __builtin_lsx_vssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2, int var) { -+ v8i16 res = __builtin_lsx_vssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __builtin_lsx_vssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2, int var) { -+ v2i64 res = __builtin_lsx_vssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2, int var) { -+ v16u8 res = __builtin_lsx_vssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lsx_vssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_bu_h' must be a constant integer}} -+ return res; -+} -+ -+v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2, int var) { -+ v8u16 res = __builtin_lsx_vssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lsx_vssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_hu_w' must be a constant integer}} -+ return res; -+} -+ -+v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2, int var) { -+ v4u32 res = __builtin_lsx_vssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lsx_vssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_wu_d' must be a constant integer}} -+ return res; -+} -+ -+v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2, int var) { -+ v2u64 res = __builtin_lsx_vssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lsx_vssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_du_q' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vpermi_w(v4i32 _1, v4i32 _2, int var) { -+ v4i32 res = __builtin_lsx_vpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lsx_vpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vpermi_w' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vld(void *_1, int var) { -+ v16i8 res = __builtin_lsx_vld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} -+ res |= __builtin_lsx_vld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} -+ res |= __builtin_lsx_vld(_1, var); // expected-error {{argument to '__builtin_lsx_vld' must be a constant integer}} -+ return res; -+} -+ -+void vst(v16i8 _1, void *_2, int var) { -+ __builtin_lsx_vst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} -+ __builtin_lsx_vst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} -+ __builtin_lsx_vst(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vst' must be a constant integer}} -+} -+ -+v2i64 vldi(int var) { -+ v2i64 res = __builtin_lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} -+ res |= __builtin_lsx_vldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} -+ res |= __builtin_lsx_vldi(var); // expected-error {{argument to '__builtin_lsx_vldi' must be a constant integer}} -+ return res; -+} -+ -+v16i8 vrepli_b(int var) { -+ v16i8 res = __builtin_lsx_vrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} -+ res |= __builtin_lsx_vrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} -+ res |= __builtin_lsx_vrepli_b(var); // expected-error {{argument to '__builtin_lsx_vrepli_b' must be a constant integer}} -+ return res; -+} -+ -+v2i64 vrepli_d(int var) { -+ v2i64 res = __builtin_lsx_vrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} -+ res |= __builtin_lsx_vrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} -+ res |= __builtin_lsx_vrepli_d(var); // expected-error {{argument to '__builtin_lsx_vrepli_d' must be a constant integer}} -+ return res; -+} -+ -+v8i16 vrepli_h(int var) { -+ v8i16 res = __builtin_lsx_vrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} -+ res |= __builtin_lsx_vrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} -+ res |= __builtin_lsx_vrepli_h(var); // expected-error {{argument to '__builtin_lsx_vrepli_h' must be a constant integer}} -+ return res; -+} -+ -+v4i32 vrepli_w(int var) { -+ v4i32 res = __builtin_lsx_vrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} -+ res |= __builtin_lsx_vrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} -+ res |= __builtin_lsx_vrepli_w(var); // expected-error {{argument to '__builtin_lsx_vrepli_w' must be a constant integer}} -+ return res; -+} -diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin.c b/clang/test/CodeGen/LoongArch/lsx/builtin.c -new file mode 100644 -index 000000000000..ef5a390e1838 ---- /dev/null -+++ b/clang/test/CodeGen/LoongArch/lsx/builtin.c -@@ -0,0 +1,5193 @@ -+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -+// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s -+ -+typedef signed char v16i8 __attribute__ ((vector_size(16), aligned(16))); -+typedef signed char v16i8_b __attribute__ ((vector_size(16), aligned(1))); -+typedef unsigned char v16u8 __attribute__ ((vector_size(16), aligned(16))); -+typedef unsigned char v16u8_b __attribute__ ((vector_size(16), aligned(1))); -+typedef short v8i16 __attribute__ ((vector_size(16), aligned(16))); -+typedef short v8i16_h __attribute__ ((vector_size(16), aligned(2))); -+typedef unsigned short v8u16 __attribute__ ((vector_size(16), aligned(16))); -+typedef unsigned short v8u16_h __attribute__ ((vector_size(16), aligned(2))); -+typedef int v4i32 __attribute__ ((vector_size(16), aligned(16))); -+typedef int v4i32_w __attribute__ ((vector_size(16), aligned(4))); -+typedef unsigned int v4u32 __attribute__ ((vector_size(16), aligned(16))); -+typedef unsigned int v4u32_w __attribute__ ((vector_size(16), aligned(4))); -+typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16))); -+typedef long long v2i64_d __attribute__ ((vector_size(16), aligned(8))); -+typedef unsigned long long v2u64 __attribute__ ((vector_size(16), aligned(16))); -+typedef unsigned long long v2u64_d __attribute__ ((vector_size(16), aligned(8))); -+typedef float v4f32 __attribute__ ((vector_size(16), aligned(16))); -+typedef float v4f32_w __attribute__ ((vector_size(16), aligned(4))); -+typedef double v2f64 __attribute__ ((vector_size(16), aligned(16))); -+typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8))); -+ -+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); -+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); -+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); -+ -+ -+// CHECK-LABEL: @vsll_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsll_b(_1, _2); } -+// CHECK-LABEL: @vsll_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsll_h(_1, _2); } -+// CHECK-LABEL: @vsll_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsll_w(_1, _2); } -+// CHECK-LABEL: @vsll_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsll_d(_1, _2); } -+// CHECK-LABEL: @vslli_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vslli_b(v16i8 _1) { return __builtin_lsx_vslli_b(_1, 1); } -+// CHECK-LABEL: @vslli_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vslli_h(v8i16 _1) { return __builtin_lsx_vslli_h(_1, 1); } -+// CHECK-LABEL: @vslli_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vslli_w(v4i32 _1) { return __builtin_lsx_vslli_w(_1, 1); } -+// CHECK-LABEL: @vslli_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vslli_d(v2i64 _1) { return __builtin_lsx_vslli_d(_1, 1); } -+// CHECK-LABEL: @vsra_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsra_b(_1, _2); } -+// CHECK-LABEL: @vsra_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsra_h(_1, _2); } -+// CHECK-LABEL: @vsra_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsra_w(_1, _2); } -+// CHECK-LABEL: @vsra_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsra_d(_1, _2); } -+// CHECK-LABEL: @vsrai_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrai_b(v16i8 _1) { return __builtin_lsx_vsrai_b(_1, 1); } -+// CHECK-LABEL: @vsrai_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrai_h(v8i16 _1) { return __builtin_lsx_vsrai_h(_1, 1); } -+// CHECK-LABEL: @vsrai_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrai_w(v4i32 _1) { return __builtin_lsx_vsrai_w(_1, 1); } -+// CHECK-LABEL: @vsrai_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrai_d(v2i64 _1) { return __builtin_lsx_vsrai_d(_1, 1); } -+// CHECK-LABEL: @vsrar_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrar_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vsrar_b(_1, _2); -+} -+// CHECK-LABEL: @vsrar_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrar_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vsrar_h(_1, _2); -+} -+// CHECK-LABEL: @vsrar_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrar_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vsrar_w(_1, _2); -+} -+// CHECK-LABEL: @vsrar_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrar_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vsrar_d(_1, _2); -+} -+// CHECK-LABEL: @vsrari_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrari_b(v16i8 _1) { return __builtin_lsx_vsrari_b(_1, 1); } -+// CHECK-LABEL: @vsrari_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrari_h(v8i16 _1) { return __builtin_lsx_vsrari_h(_1, 1); } -+// CHECK-LABEL: @vsrari_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrari_w(v4i32 _1) { return __builtin_lsx_vsrari_w(_1, 1); } -+// CHECK-LABEL: @vsrari_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrari_d(v2i64 _1) { return __builtin_lsx_vsrari_d(_1, 1); } -+// CHECK-LABEL: @vsrl_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsrl_b(_1, _2); } -+// CHECK-LABEL: @vsrl_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsrl_h(_1, _2); } -+// CHECK-LABEL: @vsrl_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsrl_w(_1, _2); } -+// CHECK-LABEL: @vsrl_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsrl_d(_1, _2); } -+// CHECK-LABEL: @vsrli_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrli_b(v16i8 _1) { return __builtin_lsx_vsrli_b(_1, 1); } -+// CHECK-LABEL: @vsrli_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrli_h(v8i16 _1) { return __builtin_lsx_vsrli_h(_1, 1); } -+// CHECK-LABEL: @vsrli_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrli_w(v4i32 _1) { return __builtin_lsx_vsrli_w(_1, 1); } -+// CHECK-LABEL: @vsrli_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrli_d(v2i64 _1) { return __builtin_lsx_vsrli_d(_1, 1); } -+// CHECK-LABEL: @vsrlr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vsrlr_b(_1, _2); -+} -+// CHECK-LABEL: @vsrlr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vsrlr_h(_1, _2); -+} -+// CHECK-LABEL: @vsrlr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vsrlr_w(_1, _2); -+} -+// CHECK-LABEL: @vsrlr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vsrlr_d(_1, _2); -+} -+// CHECK-LABEL: @vsrlri_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrlri_b(v16i8 _1) { return __builtin_lsx_vsrlri_b(_1, 1); } -+// CHECK-LABEL: @vsrlri_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrlri_h(v8i16 _1) { return __builtin_lsx_vsrlri_h(_1, 1); } -+// CHECK-LABEL: @vsrlri_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrlri_w(v4i32 _1) { return __builtin_lsx_vsrlri_w(_1, 1); } -+// CHECK-LABEL: @vsrlri_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrlri_d(v2i64 _1) { return __builtin_lsx_vsrlri_d(_1, 1); } -+// CHECK-LABEL: @vbitclr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vbitclr_b(_1, _2); -+} -+// CHECK-LABEL: @vbitclr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vbitclr_h(_1, _2); -+} -+// CHECK-LABEL: @vbitclr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vbitclr_w(_1, _2); -+} -+// CHECK-LABEL: @vbitclr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vbitclr_d(_1, _2); -+} -+// CHECK-LABEL: @vbitclri_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vbitclri_b(v16u8 _1) { return __builtin_lsx_vbitclri_b(_1, 1); } -+// CHECK-LABEL: @vbitclri_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vbitclri_h(v8u16 _1) { return __builtin_lsx_vbitclri_h(_1, 1); } -+// CHECK-LABEL: @vbitclri_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vbitclri_w(v4u32 _1) { return __builtin_lsx_vbitclri_w(_1, 1); } -+// CHECK-LABEL: @vbitclri_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vbitclri_d(v2u64 _1) { return __builtin_lsx_vbitclri_d(_1, 1); } -+// CHECK-LABEL: @vbitset_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vbitset_b(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vbitset_b(_1, _2); -+} -+// CHECK-LABEL: @vbitset_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vbitset_h(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vbitset_h(_1, _2); -+} -+// CHECK-LABEL: @vbitset_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vbitset_w(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vbitset_w(_1, _2); -+} -+// CHECK-LABEL: @vbitset_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vbitset_d(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vbitset_d(_1, _2); -+} -+// CHECK-LABEL: @vbitseti_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vbitseti_b(v16u8 _1) { return __builtin_lsx_vbitseti_b(_1, 1); } -+// CHECK-LABEL: @vbitseti_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vbitseti_h(v8u16 _1) { return __builtin_lsx_vbitseti_h(_1, 1); } -+// CHECK-LABEL: @vbitseti_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vbitseti_w(v4u32 _1) { return __builtin_lsx_vbitseti_w(_1, 1); } -+// CHECK-LABEL: @vbitseti_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vbitseti_d(v2u64 _1) { return __builtin_lsx_vbitseti_d(_1, 1); } -+// CHECK-LABEL: @vbitrev_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vbitrev_b(_1, _2); -+} -+// CHECK-LABEL: @vbitrev_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vbitrev_h(_1, _2); -+} -+// CHECK-LABEL: @vbitrev_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vbitrev_w(_1, _2); -+} -+// CHECK-LABEL: @vbitrev_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vbitrev_d(_1, _2); -+} -+// CHECK-LABEL: @vbitrevi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vbitrevi_b(v16u8 _1) { return __builtin_lsx_vbitrevi_b(_1, 1); } -+// CHECK-LABEL: @vbitrevi_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vbitrevi_h(v8u16 _1) { return __builtin_lsx_vbitrevi_h(_1, 1); } -+// CHECK-LABEL: @vbitrevi_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vbitrevi_w(v4u32 _1) { return __builtin_lsx_vbitrevi_w(_1, 1); } -+// CHECK-LABEL: @vbitrevi_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vbitrevi_d(v2u64 _1) { return __builtin_lsx_vbitrevi_d(_1, 1); } -+// CHECK-LABEL: @vadd_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vadd_b(_1, _2); } -+// CHECK-LABEL: @vadd_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vadd_h(_1, _2); } -+// CHECK-LABEL: @vadd_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vadd_w(_1, _2); } -+// CHECK-LABEL: @vadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_d(_1, _2); } -+// CHECK-LABEL: @vaddi_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vaddi_bu(v16i8 _1) { return __builtin_lsx_vaddi_bu(_1, 1); } -+// CHECK-LABEL: @vaddi_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vaddi_hu(v8i16 _1) { return __builtin_lsx_vaddi_hu(_1, 1); } -+// CHECK-LABEL: @vaddi_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vaddi_wu(v4i32 _1) { return __builtin_lsx_vaddi_wu(_1, 1); } -+// CHECK-LABEL: @vaddi_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddi_du(v2i64 _1) { return __builtin_lsx_vaddi_du(_1, 1); } -+// CHECK-LABEL: @vsub_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsub_b(_1, _2); } -+// CHECK-LABEL: @vsub_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsub_h(_1, _2); } -+// CHECK-LABEL: @vsub_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsub_w(_1, _2); } -+// CHECK-LABEL: @vsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_d(_1, _2); } -+// CHECK-LABEL: @vsubi_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsubi_bu(v16i8 _1) { return __builtin_lsx_vsubi_bu(_1, 1); } -+// CHECK-LABEL: @vsubi_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsubi_hu(v8i16 _1) { return __builtin_lsx_vsubi_hu(_1, 1); } -+// CHECK-LABEL: @vsubi_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsubi_wu(v4i32 _1) { return __builtin_lsx_vsubi_wu(_1, 1); } -+// CHECK-LABEL: @vsubi_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsubi_du(v2i64 _1) { return __builtin_lsx_vsubi_du(_1, 1); } -+// CHECK-LABEL: @vmax_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmax_b(_1, _2); } -+// CHECK-LABEL: @vmax_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmax_h(_1, _2); } -+// CHECK-LABEL: @vmax_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmax_w(_1, _2); } -+// CHECK-LABEL: @vmax_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmax_d(_1, _2); } -+// CHECK-LABEL: @vmaxi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmaxi_b(v16i8 _1) { return __builtin_lsx_vmaxi_b(_1, 1); } -+// CHECK-LABEL: @vmaxi_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmaxi_h(v8i16 _1) { return __builtin_lsx_vmaxi_h(_1, 1); } -+// CHECK-LABEL: @vmaxi_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmaxi_w(v4i32 _1) { return __builtin_lsx_vmaxi_w(_1, 1); } -+// CHECK-LABEL: @vmaxi_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmaxi_d(v2i64 _1) { return __builtin_lsx_vmaxi_d(_1, 1); } -+// CHECK-LABEL: @vmax_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vmax_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vmax_bu(_1, _2); -+} -+// CHECK-LABEL: @vmax_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vmax_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vmax_hu(_1, _2); -+} -+// CHECK-LABEL: @vmax_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vmax_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vmax_wu(_1, _2); -+} -+// CHECK-LABEL: @vmax_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmax_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vmax_du(_1, _2); -+} -+// CHECK-LABEL: @vmaxi_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vmaxi_bu(v16u8 _1) { return __builtin_lsx_vmaxi_bu(_1, 1); } -+// CHECK-LABEL: @vmaxi_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vmaxi_hu(v8u16 _1) { return __builtin_lsx_vmaxi_hu(_1, 1); } -+// CHECK-LABEL: @vmaxi_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vmaxi_wu(v4u32 _1) { return __builtin_lsx_vmaxi_wu(_1, 1); } -+// CHECK-LABEL: @vmaxi_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmaxi_du(v2u64 _1) { return __builtin_lsx_vmaxi_du(_1, 1); } -+// CHECK-LABEL: @vmin_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmin_b(_1, _2); } -+// CHECK-LABEL: @vmin_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmin_h(_1, _2); } -+// CHECK-LABEL: @vmin_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmin_w(_1, _2); } -+// CHECK-LABEL: @vmin_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmin_d(_1, _2); } -+// CHECK-LABEL: @vmini_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmini_b(v16i8 _1) { return __builtin_lsx_vmini_b(_1, 1); } -+// CHECK-LABEL: @vmini_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmini_h(v8i16 _1) { return __builtin_lsx_vmini_h(_1, 1); } -+// CHECK-LABEL: @vmini_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmini_w(v4i32 _1) { return __builtin_lsx_vmini_w(_1, 1); } -+// CHECK-LABEL: @vmini_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmini_d(v2i64 _1) { return __builtin_lsx_vmini_d(_1, 1); } -+// CHECK-LABEL: @vmin_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vmin_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vmin_bu(_1, _2); -+} -+// CHECK-LABEL: @vmin_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vmin_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vmin_hu(_1, _2); -+} -+// CHECK-LABEL: @vmin_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vmin_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vmin_wu(_1, _2); -+} -+// CHECK-LABEL: @vmin_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmin_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vmin_du(_1, _2); -+} -+// CHECK-LABEL: @vmini_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vmini_bu(v16u8 _1) { return __builtin_lsx_vmini_bu(_1, 1); } -+// CHECK-LABEL: @vmini_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vmini_hu(v8u16 _1) { return __builtin_lsx_vmini_hu(_1, 1); } -+// CHECK-LABEL: @vmini_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vmini_wu(v4u32 _1) { return __builtin_lsx_vmini_wu(_1, 1); } -+// CHECK-LABEL: @vmini_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmini_du(v2u64 _1) { return __builtin_lsx_vmini_du(_1, 1); } -+// CHECK-LABEL: @vseq_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vseq_b(_1, _2); } -+// CHECK-LABEL: @vseq_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vseq_h(_1, _2); } -+// CHECK-LABEL: @vseq_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vseq_w(_1, _2); } -+// CHECK-LABEL: @vseq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vseq_d(_1, _2); } -+// CHECK-LABEL: @vseqi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vseqi_b(v16i8 _1) { return __builtin_lsx_vseqi_b(_1, 1); } -+// CHECK-LABEL: @vseqi_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vseqi_h(v8i16 _1) { return __builtin_lsx_vseqi_h(_1, 1); } -+// CHECK-LABEL: @vseqi_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vseqi_w(v4i32 _1) { return __builtin_lsx_vseqi_w(_1, 1); } -+// CHECK-LABEL: @vseqi_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vseqi_d(v2i64 _1) { return __builtin_lsx_vseqi_d(_1, 1); } -+// CHECK-LABEL: @vslti_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vslti_b(v16i8 _1) { return __builtin_lsx_vslti_b(_1, 1); } -+// CHECK-LABEL: @vslt_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vslt_b(_1, _2); } -+// CHECK-LABEL: @vslt_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vslt_h(_1, _2); } -+// CHECK-LABEL: @vslt_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vslt_w(_1, _2); } -+// CHECK-LABEL: @vslt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vslt_d(_1, _2); } -+// CHECK-LABEL: @vslti_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vslti_h(v8i16 _1) { return __builtin_lsx_vslti_h(_1, 1); } -+// CHECK-LABEL: @vslti_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vslti_w(v4i32 _1) { return __builtin_lsx_vslti_w(_1, 1); } -+// CHECK-LABEL: @vslti_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vslti_d(v2i64 _1) { return __builtin_lsx_vslti_d(_1, 1); } -+// CHECK-LABEL: @vslt_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vslt_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vslt_bu(_1, _2); -+} -+// CHECK-LABEL: @vslt_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vslt_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vslt_hu(_1, _2); -+} -+// CHECK-LABEL: @vslt_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vslt_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vslt_wu(_1, _2); -+} -+// CHECK-LABEL: @vslt_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vslt_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vslt_du(_1, _2); -+} -+// CHECK-LABEL: @vslti_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vslti_bu(v16u8 _1) { return __builtin_lsx_vslti_bu(_1, 1); } -+// CHECK-LABEL: @vslti_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vslti_hu(v8u16 _1) { return __builtin_lsx_vslti_hu(_1, 1); } -+// CHECK-LABEL: @vslti_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vslti_wu(v4u32 _1) { return __builtin_lsx_vslti_wu(_1, 1); } -+// CHECK-LABEL: @vslti_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vslti_du(v2u64 _1) { return __builtin_lsx_vslti_du(_1, 1); } -+// CHECK-LABEL: @vsle_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsle_b(_1, _2); } -+// CHECK-LABEL: @vsle_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsle_h(_1, _2); } -+// CHECK-LABEL: @vsle_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsle_w(_1, _2); } -+// CHECK-LABEL: @vsle_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsle_d(_1, _2); } -+// CHECK-LABEL: @vslei_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vslei_b(v16i8 _1) { return __builtin_lsx_vslei_b(_1, 1); } -+// CHECK-LABEL: @vslei_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vslei_h(v8i16 _1) { return __builtin_lsx_vslei_h(_1, 1); } -+// CHECK-LABEL: @vslei_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vslei_w(v4i32 _1) { return __builtin_lsx_vslei_w(_1, 1); } -+// CHECK-LABEL: @vslei_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vslei_d(v2i64 _1) { return __builtin_lsx_vslei_d(_1, 1); } -+// CHECK-LABEL: @vsle_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsle_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vsle_bu(_1, _2); -+} -+// CHECK-LABEL: @vsle_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsle_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vsle_hu(_1, _2); -+} -+// CHECK-LABEL: @vsle_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsle_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vsle_wu(_1, _2); -+} -+// CHECK-LABEL: @vsle_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsle_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vsle_du(_1, _2); -+} -+// CHECK-LABEL: @vslei_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vslei_bu(v16u8 _1) { return __builtin_lsx_vslei_bu(_1, 1); } -+// CHECK-LABEL: @vslei_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vslei_hu(v8u16 _1) { return __builtin_lsx_vslei_hu(_1, 1); } -+// CHECK-LABEL: @vslei_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vslei_wu(v4u32 _1) { return __builtin_lsx_vslei_wu(_1, 1); } -+// CHECK-LABEL: @vslei_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vslei_du(v2u64 _1) { return __builtin_lsx_vslei_du(_1, 1); } -+// CHECK-LABEL: @vsat_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsat_b(v16i8 _1) { return __builtin_lsx_vsat_b(_1, 1); } -+// CHECK-LABEL: @vsat_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsat_h(v8i16 _1) { return __builtin_lsx_vsat_h(_1, 1); } -+// CHECK-LABEL: @vsat_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsat_w(v4i32 _1) { return __builtin_lsx_vsat_w(_1, 1); } -+// CHECK-LABEL: @vsat_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsat_d(v2i64 _1) { return __builtin_lsx_vsat_d(_1, 1); } -+// CHECK-LABEL: @vsat_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vsat_bu(v16u8 _1) { return __builtin_lsx_vsat_bu(_1, 1); } -+// CHECK-LABEL: @vsat_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vsat_hu(v8u16 _1) { return __builtin_lsx_vsat_hu(_1, 1); } -+// CHECK-LABEL: @vsat_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vsat_wu(v4u32 _1) { return __builtin_lsx_vsat_wu(_1, 1); } -+// CHECK-LABEL: @vsat_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vsat_du(v2u64 _1) { return __builtin_lsx_vsat_du(_1, 1); } -+// CHECK-LABEL: @vadda_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vadda_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vadda_b(_1, _2); -+} -+// CHECK-LABEL: @vadda_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vadda_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vadda_h(_1, _2); -+} -+// CHECK-LABEL: @vadda_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vadda_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vadda_w(_1, _2); -+} -+// CHECK-LABEL: @vadda_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vadda_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vadda_d(_1, _2); -+} -+// CHECK-LABEL: @vsadd_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsadd_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vsadd_b(_1, _2); -+} -+// CHECK-LABEL: @vsadd_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsadd_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vsadd_h(_1, _2); -+} -+// CHECK-LABEL: @vsadd_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsadd_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vsadd_w(_1, _2); -+} -+// CHECK-LABEL: @vsadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsadd_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vsadd_d(_1, _2); -+} -+// CHECK-LABEL: @vsadd_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vsadd_bu(_1, _2); -+} -+// CHECK-LABEL: @vsadd_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vsadd_hu(_1, _2); -+} -+// CHECK-LABEL: @vsadd_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vsadd_wu(_1, _2); -+} -+// CHECK-LABEL: @vsadd_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vsadd_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vsadd_du(_1, _2); -+} -+// CHECK-LABEL: @vavg_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vavg_b(_1, _2); } -+// CHECK-LABEL: @vavg_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vavg_h(_1, _2); } -+// CHECK-LABEL: @vavg_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vavg_w(_1, _2); } -+// CHECK-LABEL: @vavg_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vavg_d(_1, _2); } -+// CHECK-LABEL: @vavg_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vavg_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vavg_bu(_1, _2); -+} -+// CHECK-LABEL: @vavg_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vavg_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vavg_hu(_1, _2); -+} -+// CHECK-LABEL: @vavg_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vavg_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vavg_wu(_1, _2); -+} -+// CHECK-LABEL: @vavg_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vavg_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vavg_du(_1, _2); -+} -+// CHECK-LABEL: @vavgr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vavgr_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vavgr_b(_1, _2); -+} -+// CHECK-LABEL: @vavgr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vavgr_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vavgr_h(_1, _2); -+} -+// CHECK-LABEL: @vavgr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vavgr_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vavgr_w(_1, _2); -+} -+// CHECK-LABEL: @vavgr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vavgr_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vavgr_d(_1, _2); -+} -+// CHECK-LABEL: @vavgr_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vavgr_bu(_1, _2); -+} -+// CHECK-LABEL: @vavgr_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vavgr_hu(_1, _2); -+} -+// CHECK-LABEL: @vavgr_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vavgr_wu(_1, _2); -+} -+// CHECK-LABEL: @vavgr_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vavgr_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vavgr_du(_1, _2); -+} -+// CHECK-LABEL: @vssub_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vssub_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vssub_b(_1, _2); -+} -+// CHECK-LABEL: @vssub_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vssub_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vssub_h(_1, _2); -+} -+// CHECK-LABEL: @vssub_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vssub_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vssub_w(_1, _2); -+} -+// CHECK-LABEL: @vssub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vssub_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vssub_d(_1, _2); -+} -+// CHECK-LABEL: @vssub_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vssub_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vssub_bu(_1, _2); -+} -+// CHECK-LABEL: @vssub_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vssub_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vssub_hu(_1, _2); -+} -+// CHECK-LABEL: @vssub_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vssub_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vssub_wu(_1, _2); -+} -+// CHECK-LABEL: @vssub_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vssub_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vssub_du(_1, _2); -+} -+// CHECK-LABEL: @vabsd_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vabsd_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vabsd_b(_1, _2); -+} -+// CHECK-LABEL: @vabsd_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vabsd_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vabsd_h(_1, _2); -+} -+// CHECK-LABEL: @vabsd_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vabsd_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vabsd_w(_1, _2); -+} -+// CHECK-LABEL: @vabsd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vabsd_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vabsd_d(_1, _2); -+} -+// CHECK-LABEL: @vabsd_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vabsd_bu(_1, _2); -+} -+// CHECK-LABEL: @vabsd_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vabsd_hu(_1, _2); -+} -+// CHECK-LABEL: @vabsd_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vabsd_wu(_1, _2); -+} -+// CHECK-LABEL: @vabsd_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vabsd_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vabsd_du(_1, _2); -+} -+// CHECK-LABEL: @vmul_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmul_b(_1, _2); } -+// CHECK-LABEL: @vmul_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmul_h(_1, _2); } -+// CHECK-LABEL: @vmul_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmul_w(_1, _2); } -+// CHECK-LABEL: @vmul_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmul_d(_1, _2); } -+// CHECK-LABEL: @vmadd_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { -+ return __builtin_lsx_vmadd_b(_1, _2, _3); -+} -+// CHECK-LABEL: @vmadd_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { -+ return __builtin_lsx_vmadd_h(_1, _2, _3); -+} -+// CHECK-LABEL: @vmadd_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { -+ return __builtin_lsx_vmadd_w(_1, _2, _3); -+} -+// CHECK-LABEL: @vmadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { -+ return __builtin_lsx_vmadd_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vmsub_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { -+ return __builtin_lsx_vmsub_b(_1, _2, _3); -+} -+// CHECK-LABEL: @vmsub_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { -+ return __builtin_lsx_vmsub_h(_1, _2, _3); -+} -+// CHECK-LABEL: @vmsub_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { -+ return __builtin_lsx_vmsub_w(_1, _2, _3); -+} -+// CHECK-LABEL: @vmsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { -+ return __builtin_lsx_vmsub_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vdiv_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vdiv_b(_1, _2); } -+// CHECK-LABEL: @vdiv_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vdiv_h(_1, _2); } -+// CHECK-LABEL: @vdiv_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vdiv_w(_1, _2); } -+// CHECK-LABEL: @vdiv_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vdiv_d(_1, _2); } -+// CHECK-LABEL: @vdiv_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vdiv_bu(_1, _2); -+} -+// CHECK-LABEL: @vdiv_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vdiv_hu(_1, _2); -+} -+// CHECK-LABEL: @vdiv_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vdiv_wu(_1, _2); -+} -+// CHECK-LABEL: @vdiv_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vdiv_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vdiv_du(_1, _2); -+} -+// CHECK-LABEL: @vhaddw_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vhaddw_h_b(_1, _2); -+} -+// CHECK-LABEL: @vhaddw_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vhaddw_w_h(_1, _2); -+} -+// CHECK-LABEL: @vhaddw_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vhaddw_d_w(_1, _2); -+} -+// CHECK-LABEL: @vhaddw_hu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vhaddw_hu_bu(_1, _2); -+} -+// CHECK-LABEL: @vhaddw_wu_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vhaddw_wu_hu(_1, _2); -+} -+// CHECK-LABEL: @vhaddw_du_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vhaddw_du_wu(_1, _2); -+} -+// CHECK-LABEL: @vhsubw_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vhsubw_h_b(_1, _2); -+} -+// CHECK-LABEL: @vhsubw_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vhsubw_w_h(_1, _2); -+} -+// CHECK-LABEL: @vhsubw_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vhsubw_d_w(_1, _2); -+} -+// CHECK-LABEL: @vhsubw_hu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vhsubw_hu_bu(_1, _2); -+} -+// CHECK-LABEL: @vhsubw_wu_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vhsubw_wu_hu(_1, _2); -+} -+// CHECK-LABEL: @vhsubw_du_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vhsubw_du_wu(_1, _2); -+} -+// CHECK-LABEL: @vmod_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmod_b(_1, _2); } -+// CHECK-LABEL: @vmod_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmod_h(_1, _2); } -+// CHECK-LABEL: @vmod_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmod_w(_1, _2); } -+// CHECK-LABEL: @vmod_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmod_d(_1, _2); } -+// CHECK-LABEL: @vmod_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vmod_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vmod_bu(_1, _2); -+} -+// CHECK-LABEL: @vmod_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vmod_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vmod_hu(_1, _2); -+} -+// CHECK-LABEL: @vmod_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vmod_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vmod_wu(_1, _2); -+} -+// CHECK-LABEL: @vmod_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmod_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vmod_du(_1, _2); -+} -+// CHECK-LABEL: @vreplve_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[_1:%.*]], i32 [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vreplve_b(v16i8 _1, int _2) { -+ return __builtin_lsx_vreplve_b(_1, _2); -+} -+// CHECK-LABEL: @vreplve_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[_1:%.*]], i32 [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vreplve_h(v8i16 _1, int _2) { -+ return __builtin_lsx_vreplve_h(_1, _2); -+} -+// CHECK-LABEL: @vreplve_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[_1:%.*]], i32 [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vreplve_w(v4i32 _1, int _2) { -+ return __builtin_lsx_vreplve_w(_1, _2); -+} -+// CHECK-LABEL: @vreplve_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[_1:%.*]], i32 [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vreplve_d(v2i64 _1, int _2) { -+ return __builtin_lsx_vreplve_d(_1, _2); -+} -+// CHECK-LABEL: @vreplvei_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vreplvei_b(v16i8 _1) { return __builtin_lsx_vreplvei_b(_1, 1); } -+// CHECK-LABEL: @vreplvei_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vreplvei_h(v8i16 _1) { return __builtin_lsx_vreplvei_h(_1, 1); } -+// CHECK-LABEL: @vreplvei_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vreplvei_w(v4i32 _1) { return __builtin_lsx_vreplvei_w(_1, 1); } -+// CHECK-LABEL: @vreplvei_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vreplvei_d(v2i64 _1) { return __builtin_lsx_vreplvei_d(_1, 1); } -+// CHECK-LABEL: @vpickev_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vpickev_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vpickev_b(_1, _2); -+} -+// CHECK-LABEL: @vpickev_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vpickev_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vpickev_h(_1, _2); -+} -+// CHECK-LABEL: @vpickev_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vpickev_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vpickev_w(_1, _2); -+} -+// CHECK-LABEL: @vpickev_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vpickev_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vpickev_d(_1, _2); -+} -+// CHECK-LABEL: @vpickod_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vpickod_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vpickod_b(_1, _2); -+} -+// CHECK-LABEL: @vpickod_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vpickod_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vpickod_h(_1, _2); -+} -+// CHECK-LABEL: @vpickod_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vpickod_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vpickod_w(_1, _2); -+} -+// CHECK-LABEL: @vpickod_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vpickod_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vpickod_d(_1, _2); -+} -+// CHECK-LABEL: @vilvh_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vilvh_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vilvh_b(_1, _2); -+} -+// CHECK-LABEL: @vilvh_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vilvh_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vilvh_h(_1, _2); -+} -+// CHECK-LABEL: @vilvh_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vilvh_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vilvh_w(_1, _2); -+} -+// CHECK-LABEL: @vilvh_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vilvh_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vilvh_d(_1, _2); -+} -+// CHECK-LABEL: @vilvl_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vilvl_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vilvl_b(_1, _2); -+} -+// CHECK-LABEL: @vilvl_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vilvl_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vilvl_h(_1, _2); -+} -+// CHECK-LABEL: @vilvl_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vilvl_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vilvl_w(_1, _2); -+} -+// CHECK-LABEL: @vilvl_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vilvl_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vilvl_d(_1, _2); -+} -+// CHECK-LABEL: @vpackev_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vpackev_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vpackev_b(_1, _2); -+} -+// CHECK-LABEL: @vpackev_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vpackev_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vpackev_h(_1, _2); -+} -+// CHECK-LABEL: @vpackev_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vpackev_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vpackev_w(_1, _2); -+} -+// CHECK-LABEL: @vpackev_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vpackev_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vpackev_d(_1, _2); -+} -+// CHECK-LABEL: @vpackod_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vpackod_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vpackod_b(_1, _2); -+} -+// CHECK-LABEL: @vpackod_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vpackod_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vpackod_h(_1, _2); -+} -+// CHECK-LABEL: @vpackod_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vpackod_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vpackod_w(_1, _2); -+} -+// CHECK-LABEL: @vpackod_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vpackod_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vpackod_d(_1, _2); -+} -+// CHECK-LABEL: @vshuf_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { -+ return __builtin_lsx_vshuf_h(_1, _2, _3); -+} -+// CHECK-LABEL: @vshuf_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { -+ return __builtin_lsx_vshuf_w(_1, _2, _3); -+} -+// CHECK-LABEL: @vshuf_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { -+ return __builtin_lsx_vshuf_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vand_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vand_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vand_v(_1, _2); } -+// CHECK-LABEL: @vandi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vandi_b(v16u8 _1) { return __builtin_lsx_vandi_b(_1, 1); } -+// CHECK-LABEL: @vor_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vor_v(_1, _2); } -+// CHECK-LABEL: @vori_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vori_b(v16u8 _1) { return __builtin_lsx_vori_b(_1, 1); } -+// CHECK-LABEL: @vnor_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vnor_v(_1, _2); } -+// CHECK-LABEL: @vnori_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vnori_b(v16u8 _1) { return __builtin_lsx_vnori_b(_1, 1); } -+// CHECK-LABEL: @vxor_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vxor_v(_1, _2); } -+// CHECK-LABEL: @vxori_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vxori_b(v16u8 _1) { return __builtin_lsx_vxori_b(_1, 1); } -+// CHECK-LABEL: @vbitsel_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { -+ return __builtin_lsx_vbitsel_v(_1, _2, _3); -+} -+// CHECK-LABEL: @vbitseli_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vbitseli_b(_1, _2, 1); -+} -+// CHECK-LABEL: @vshuf4i_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vshuf4i_b(v16i8 _1) { return __builtin_lsx_vshuf4i_b(_1, 1); } -+// CHECK-LABEL: @vshuf4i_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vshuf4i_h(v8i16 _1) { return __builtin_lsx_vshuf4i_h(_1, 1); } -+// CHECK-LABEL: @vshuf4i_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vshuf4i_w(v4i32 _1) { return __builtin_lsx_vshuf4i_w(_1, 1); } -+// CHECK-LABEL: @vreplgr2vr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vreplgr2vr_b(int _1) { return __builtin_lsx_vreplgr2vr_b(_1); } -+// CHECK-LABEL: @vreplgr2vr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vreplgr2vr_h(int _1) { return __builtin_lsx_vreplgr2vr_h(_1); } -+// CHECK-LABEL: @vreplgr2vr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vreplgr2vr_w(int _1) { return __builtin_lsx_vreplgr2vr_w(_1); } -+// CHECK-LABEL: @vreplgr2vr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vreplgr2vr_d(long _1) { return __builtin_lsx_vreplgr2vr_d(_1); } -+// CHECK-LABEL: @vpcnt_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vpcnt_b(v16i8 _1) { return __builtin_lsx_vpcnt_b(_1); } -+// CHECK-LABEL: @vpcnt_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vpcnt_h(v8i16 _1) { return __builtin_lsx_vpcnt_h(_1); } -+// CHECK-LABEL: @vpcnt_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vpcnt_w(v4i32 _1) { return __builtin_lsx_vpcnt_w(_1); } -+// CHECK-LABEL: @vpcnt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vpcnt_d(v2i64 _1) { return __builtin_lsx_vpcnt_d(_1); } -+// CHECK-LABEL: @vclo_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vclo_b(v16i8 _1) { return __builtin_lsx_vclo_b(_1); } -+// CHECK-LABEL: @vclo_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vclo_h(v8i16 _1) { return __builtin_lsx_vclo_h(_1); } -+// CHECK-LABEL: @vclo_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vclo_w(v4i32 _1) { return __builtin_lsx_vclo_w(_1); } -+// CHECK-LABEL: @vclo_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vclo_d(v2i64 _1) { return __builtin_lsx_vclo_d(_1); } -+// CHECK-LABEL: @vclz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vclz_b(v16i8 _1) { return __builtin_lsx_vclz_b(_1); } -+// CHECK-LABEL: @vclz_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vclz_h(v8i16 _1) { return __builtin_lsx_vclz_h(_1); } -+// CHECK-LABEL: @vclz_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vclz_w(v4i32 _1) { return __builtin_lsx_vclz_w(_1); } -+// CHECK-LABEL: @vclz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vclz_d(v2i64 _1) { return __builtin_lsx_vclz_d(_1); } -+// CHECK-LABEL: @vpickve2gr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int vpickve2gr_b(v16i8 _1) { return __builtin_lsx_vpickve2gr_b(_1, 1); } -+// CHECK-LABEL: @vpickve2gr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int vpickve2gr_h(v8i16 _1) { return __builtin_lsx_vpickve2gr_h(_1, 1); } -+// CHECK-LABEL: @vpickve2gr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int vpickve2gr_w(v4i32 _1) { return __builtin_lsx_vpickve2gr_w(_1, 1); } -+// CHECK-LABEL: @vpickve2gr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i64 [[TMP0]] -+// -+long vpickve2gr_d(v2i64 _1) { return __builtin_lsx_vpickve2gr_d(_1, 1); } -+// CHECK-LABEL: @vpickve2gr_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+unsigned int vpickve2gr_bu(v16i8 _1) { -+ return __builtin_lsx_vpickve2gr_bu(_1, 1); -+} -+// CHECK-LABEL: @vpickve2gr_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+unsigned int vpickve2gr_hu(v8i16 _1) { -+ return __builtin_lsx_vpickve2gr_hu(_1, 1); -+} -+// CHECK-LABEL: @vpickve2gr_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+unsigned int vpickve2gr_wu(v4i32 _1) { -+ return __builtin_lsx_vpickve2gr_wu(_1, 1); -+} -+// CHECK-LABEL: @vpickve2gr_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i64 [[TMP0]] -+// -+unsigned long int vpickve2gr_du(v2i64 _1) { -+ return __builtin_lsx_vpickve2gr_du(_1, 1); -+} -+// CHECK-LABEL: @vinsgr2vr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[_1:%.*]], i32 1, i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vinsgr2vr_b(v16i8 _1) { -+ return __builtin_lsx_vinsgr2vr_b(_1, 1, 1); -+} -+// CHECK-LABEL: @vinsgr2vr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[_1:%.*]], i32 1, i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vinsgr2vr_h(v8i16 _1) { -+ return __builtin_lsx_vinsgr2vr_h(_1, 1, 1); -+} -+// CHECK-LABEL: @vinsgr2vr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[_1:%.*]], i32 1, i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vinsgr2vr_w(v4i32 _1) { -+ return __builtin_lsx_vinsgr2vr_w(_1, 1, 1); -+} -+// CHECK-LABEL: @vinsgr2vr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[_1:%.*]], i64 1, i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vinsgr2vr_d(v2i64 _1) { -+ return __builtin_lsx_vinsgr2vr_d(_1, 1, 1); -+} -+// CHECK-LABEL: @vfadd_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfadd_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfadd_s(_1, _2); -+} -+// CHECK-LABEL: @vfadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfadd_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfadd_d(_1, _2); -+} -+// CHECK-LABEL: @vfsub_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfsub_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfsub_s(_1, _2); -+} -+// CHECK-LABEL: @vfsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfsub_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfsub_d(_1, _2); -+} -+// CHECK-LABEL: @vfmul_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfmul_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfmul_s(_1, _2); -+} -+// CHECK-LABEL: @vfmul_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfmul_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfmul_d(_1, _2); -+} -+// CHECK-LABEL: @vfdiv_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfdiv_s(_1, _2); -+} -+// CHECK-LABEL: @vfdiv_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfdiv_d(_1, _2); -+} -+// CHECK-LABEL: @vfcvt_h_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcvt_h_s(_1, _2); -+} -+// CHECK-LABEL: @vfcvt_s_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcvt_s_d(_1, _2); -+} -+// CHECK-LABEL: @vfmin_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfmin_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfmin_s(_1, _2); -+} -+// CHECK-LABEL: @vfmin_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfmin_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfmin_d(_1, _2); -+} -+// CHECK-LABEL: @vfmina_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfmina_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfmina_s(_1, _2); -+} -+// CHECK-LABEL: @vfmina_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfmina_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfmina_d(_1, _2); -+} -+// CHECK-LABEL: @vfmax_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfmax_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfmax_s(_1, _2); -+} -+// CHECK-LABEL: @vfmax_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfmax_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfmax_d(_1, _2); -+} -+// CHECK-LABEL: @vfmaxa_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfmaxa_s(_1, _2); -+} -+// CHECK-LABEL: @vfmaxa_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfmaxa_d(_1, _2); -+} -+// CHECK-LABEL: @vfclass_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfclass_s(v4f32 _1) { return __builtin_lsx_vfclass_s(_1); } -+// CHECK-LABEL: @vfclass_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfclass_d(v2f64 _1) { return __builtin_lsx_vfclass_d(_1); } -+// CHECK-LABEL: @vfsqrt_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfsqrt_s(v4f32 _1) { return __builtin_lsx_vfsqrt_s(_1); } -+// CHECK-LABEL: @vfsqrt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfsqrt_d(v2f64 _1) { return __builtin_lsx_vfsqrt_d(_1); } -+// CHECK-LABEL: @vfrecip_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfrecip_s(v4f32 _1) { return __builtin_lsx_vfrecip_s(_1); } -+// CHECK-LABEL: @vfrecip_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfrecip_d(v2f64 _1) { return __builtin_lsx_vfrecip_d(_1); } -+// CHECK-LABEL: @vfrint_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfrint_s(v4f32 _1) { return __builtin_lsx_vfrint_s(_1); } -+// CHECK-LABEL: @vfrint_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfrint_d(v2f64 _1) { return __builtin_lsx_vfrint_d(_1); } -+// CHECK-LABEL: @vfrsqrt_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfrsqrt_s(v4f32 _1) { return __builtin_lsx_vfrsqrt_s(_1); } -+// CHECK-LABEL: @vfrsqrt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfrsqrt_d(v2f64 _1) { return __builtin_lsx_vfrsqrt_d(_1); } -+// CHECK-LABEL: @vflogb_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vflogb_s(v4f32 _1) { return __builtin_lsx_vflogb_s(_1); } -+// CHECK-LABEL: @vflogb_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vflogb_d(v2f64 _1) { return __builtin_lsx_vflogb_d(_1); } -+// CHECK-LABEL: @vfcvth_s_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfcvth_s_h(v8i16 _1) { return __builtin_lsx_vfcvth_s_h(_1); } -+// CHECK-LABEL: @vfcvth_d_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfcvth_d_s(v4f32 _1) { return __builtin_lsx_vfcvth_d_s(_1); } -+// CHECK-LABEL: @vfcvtl_s_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfcvtl_s_h(v8i16 _1) { return __builtin_lsx_vfcvtl_s_h(_1); } -+// CHECK-LABEL: @vfcvtl_d_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfcvtl_d_s(v4f32 _1) { return __builtin_lsx_vfcvtl_d_s(_1); } -+// CHECK-LABEL: @vftint_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftint_w_s(v4f32 _1) { return __builtin_lsx_vftint_w_s(_1); } -+// CHECK-LABEL: @vftint_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftint_l_d(v2f64 _1) { return __builtin_lsx_vftint_l_d(_1); } -+// CHECK-LABEL: @vftint_wu_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vftint_wu_s(v4f32 _1) { return __builtin_lsx_vftint_wu_s(_1); } -+// CHECK-LABEL: @vftint_lu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vftint_lu_d(v2f64 _1) { return __builtin_lsx_vftint_lu_d(_1); } -+// CHECK-LABEL: @vftintrz_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftintrz_w_s(v4f32 _1) { return __builtin_lsx_vftintrz_w_s(_1); } -+// CHECK-LABEL: @vftintrz_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrz_l_d(v2f64 _1) { return __builtin_lsx_vftintrz_l_d(_1); } -+// CHECK-LABEL: @vftintrz_wu_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vftintrz_wu_s(v4f32 _1) { return __builtin_lsx_vftintrz_wu_s(_1); } -+// CHECK-LABEL: @vftintrz_lu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vftintrz_lu_d(v2f64 _1) { return __builtin_lsx_vftintrz_lu_d(_1); } -+// CHECK-LABEL: @vffint_s_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vffint_s_w(v4i32 _1) { return __builtin_lsx_vffint_s_w(_1); } -+// CHECK-LABEL: @vffint_d_l( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vffint_d_l(v2i64 _1) { return __builtin_lsx_vffint_d_l(_1); } -+// CHECK-LABEL: @vffint_s_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vffint_s_wu(v4u32 _1) { return __builtin_lsx_vffint_s_wu(_1); } -+// CHECK-LABEL: @vffint_d_lu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vffint_d_lu(v2u64 _1) { return __builtin_lsx_vffint_d_lu(_1); } -+// CHECK-LABEL: @vandn_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vandn_v(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vandn_v(_1, _2); -+} -+// CHECK-LABEL: @vneg_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vneg_b(v16i8 _1) { return __builtin_lsx_vneg_b(_1); } -+// CHECK-LABEL: @vneg_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vneg_h(v8i16 _1) { return __builtin_lsx_vneg_h(_1); } -+// CHECK-LABEL: @vneg_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vneg_w(v4i32 _1) { return __builtin_lsx_vneg_w(_1); } -+// CHECK-LABEL: @vneg_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vneg_d(v2i64 _1) { return __builtin_lsx_vneg_d(_1); } -+// CHECK-LABEL: @vmuh_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmuh_b(_1, _2); } -+// CHECK-LABEL: @vmuh_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmuh_h(_1, _2); } -+// CHECK-LABEL: @vmuh_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmuh_w(_1, _2); } -+// CHECK-LABEL: @vmuh_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmuh_d(_1, _2); } -+// CHECK-LABEL: @vmuh_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vmuh_bu(_1, _2); -+} -+// CHECK-LABEL: @vmuh_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vmuh_hu(_1, _2); -+} -+// CHECK-LABEL: @vmuh_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vmuh_wu(_1, _2); -+} -+// CHECK-LABEL: @vmuh_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmuh_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vmuh_du(_1, _2); -+} -+// CHECK-LABEL: @vsllwil_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsllwil_h_b(v16i8 _1) { return __builtin_lsx_vsllwil_h_b(_1, 1); } -+// CHECK-LABEL: @vsllwil_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsllwil_w_h(v8i16 _1) { return __builtin_lsx_vsllwil_w_h(_1, 1); } -+// CHECK-LABEL: @vsllwil_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsllwil_d_w(v4i32 _1) { return __builtin_lsx_vsllwil_d_w(_1, 1); } -+// CHECK-LABEL: @vsllwil_hu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vsllwil_hu_bu(v16u8 _1) { -+ return __builtin_lsx_vsllwil_hu_bu(_1, 1); -+} -+// CHECK-LABEL: @vsllwil_wu_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vsllwil_wu_hu(v8u16 _1) { -+ return __builtin_lsx_vsllwil_wu_hu(_1, 1); -+} -+// CHECK-LABEL: @vsllwil_du_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vsllwil_du_wu(v4u32 _1) { -+ return __builtin_lsx_vsllwil_du_wu(_1, 1); -+} -+// CHECK-LABEL: @vsran_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vsran_b_h(_1, _2); -+} -+// CHECK-LABEL: @vsran_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vsran_h_w(_1, _2); -+} -+// CHECK-LABEL: @vsran_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vsran_w_d(_1, _2); -+} -+// CHECK-LABEL: @vssran_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vssran_b_h(_1, _2); -+} -+// CHECK-LABEL: @vssran_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vssran_h_w(_1, _2); -+} -+// CHECK-LABEL: @vssran_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vssran_w_d(_1, _2); -+} -+// CHECK-LABEL: @vssran_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vssran_bu_h(_1, _2); -+} -+// CHECK-LABEL: @vssran_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vssran_hu_w(_1, _2); -+} -+// CHECK-LABEL: @vssran_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vssran_wu_d(_1, _2); -+} -+// CHECK-LABEL: @vsrarn_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vsrarn_b_h(_1, _2); -+} -+// CHECK-LABEL: @vsrarn_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vsrarn_h_w(_1, _2); -+} -+// CHECK-LABEL: @vsrarn_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vsrarn_w_d(_1, _2); -+} -+// CHECK-LABEL: @vssrarn_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vssrarn_b_h(_1, _2); -+} -+// CHECK-LABEL: @vssrarn_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vssrarn_h_w(_1, _2); -+} -+// CHECK-LABEL: @vssrarn_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vssrarn_w_d(_1, _2); -+} -+// CHECK-LABEL: @vssrarn_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vssrarn_bu_h(_1, _2); -+} -+// CHECK-LABEL: @vssrarn_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vssrarn_hu_w(_1, _2); -+} -+// CHECK-LABEL: @vssrarn_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vssrarn_wu_d(_1, _2); -+} -+// CHECK-LABEL: @vsrln_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vsrln_b_h(_1, _2); -+} -+// CHECK-LABEL: @vsrln_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vsrln_h_w(_1, _2); -+} -+// CHECK-LABEL: @vsrln_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vsrln_w_d(_1, _2); -+} -+// CHECK-LABEL: @vssrln_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vssrln_bu_h(_1, _2); -+} -+// CHECK-LABEL: @vssrln_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vssrln_hu_w(_1, _2); -+} -+// CHECK-LABEL: @vssrln_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vssrln_wu_d(_1, _2); -+} -+// CHECK-LABEL: @vsrlrn_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vsrlrn_b_h(_1, _2); -+} -+// CHECK-LABEL: @vsrlrn_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vsrlrn_h_w(_1, _2); -+} -+// CHECK-LABEL: @vsrlrn_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vsrlrn_w_d(_1, _2); -+} -+// CHECK-LABEL: @vssrlrn_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vssrlrn_bu_h(_1, _2); -+} -+// CHECK-LABEL: @vssrlrn_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vssrlrn_hu_w(_1, _2); -+} -+// CHECK-LABEL: @vssrlrn_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vssrlrn_wu_d(_1, _2); -+} -+// CHECK-LABEL: @vfrstpi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vfrstpi_b(_1, _2, 1); -+} -+// CHECK-LABEL: @vfrstpi_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vfrstpi_h(_1, _2, 1); -+} -+// CHECK-LABEL: @vfrstp_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { -+ return __builtin_lsx_vfrstp_b(_1, _2, _3); -+} -+// CHECK-LABEL: @vfrstp_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { -+ return __builtin_lsx_vfrstp_h(_1, _2, _3); -+} -+// CHECK-LABEL: @vshuf4i_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vshuf4i_d(_1, _2, 1); -+} -+// CHECK-LABEL: @vbsrl_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vbsrl_v(v16i8 _1) { return __builtin_lsx_vbsrl_v(_1, 1); } -+// CHECK-LABEL: @vbsll_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vbsll_v(v16i8 _1) { return __builtin_lsx_vbsll_v(_1, 1); } -+// CHECK-LABEL: @vextrins_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vextrins_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vextrins_b(_1, _2, 1); -+} -+// CHECK-LABEL: @vextrins_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vextrins_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vextrins_h(_1, _2, 1); -+} -+// CHECK-LABEL: @vextrins_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vextrins_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vextrins_w(_1, _2, 1); -+} -+// CHECK-LABEL: @vextrins_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vextrins_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vextrins_d(_1, _2, 1); -+} -+// CHECK-LABEL: @vmskltz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmskltz_b(v16i8 _1) { return __builtin_lsx_vmskltz_b(_1); } -+// CHECK-LABEL: @vmskltz_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmskltz_h(v8i16 _1) { return __builtin_lsx_vmskltz_h(_1); } -+// CHECK-LABEL: @vmskltz_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmskltz_w(v4i32 _1) { return __builtin_lsx_vmskltz_w(_1); } -+// CHECK-LABEL: @vmskltz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmskltz_d(v2i64 _1) { return __builtin_lsx_vmskltz_d(_1); } -+// CHECK-LABEL: @vsigncov_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vsigncov_b(_1, _2); -+} -+// CHECK-LABEL: @vsigncov_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vsigncov_h(_1, _2); -+} -+// CHECK-LABEL: @vsigncov_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vsigncov_w(_1, _2); -+} -+// CHECK-LABEL: @vsigncov_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vsigncov_d(_1, _2); -+} -+// CHECK-LABEL: @vfmadd_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { -+ return __builtin_lsx_vfmadd_s(_1, _2, _3); -+} -+// CHECK-LABEL: @vfmadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { -+ return __builtin_lsx_vfmadd_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vfmsub_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { -+ return __builtin_lsx_vfmsub_s(_1, _2, _3); -+} -+// CHECK-LABEL: @vfmsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { -+ return __builtin_lsx_vfmsub_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vfnmadd_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { -+ return __builtin_lsx_vfnmadd_s(_1, _2, _3); -+} -+// CHECK-LABEL: @vfnmadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { -+ return __builtin_lsx_vfnmadd_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vfnmsub_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { -+ return __builtin_lsx_vfnmsub_s(_1, _2, _3); -+} -+// CHECK-LABEL: @vfnmsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { -+ return __builtin_lsx_vfnmsub_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vftintrne_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftintrne_w_s(v4f32 _1) { return __builtin_lsx_vftintrne_w_s(_1); } -+// CHECK-LABEL: @vftintrne_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrne_l_d(v2f64 _1) { return __builtin_lsx_vftintrne_l_d(_1); } -+// CHECK-LABEL: @vftintrp_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftintrp_w_s(v4f32 _1) { return __builtin_lsx_vftintrp_w_s(_1); } -+// CHECK-LABEL: @vftintrp_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrp_l_d(v2f64 _1) { return __builtin_lsx_vftintrp_l_d(_1); } -+// CHECK-LABEL: @vftintrm_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftintrm_w_s(v4f32 _1) { return __builtin_lsx_vftintrm_w_s(_1); } -+// CHECK-LABEL: @vftintrm_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrm_l_d(v2f64 _1) { return __builtin_lsx_vftintrm_l_d(_1); } -+// CHECK-LABEL: @vftint_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vftint_w_d(_1, _2); -+} -+// CHECK-LABEL: @vffint_s_l( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// -+v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vffint_s_l(_1, _2); -+} -+// CHECK-LABEL: @vftintrz_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vftintrz_w_d(_1, _2); -+} -+// CHECK-LABEL: @vftintrp_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vftintrp_w_d(_1, _2); -+} -+// CHECK-LABEL: @vftintrm_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vftintrm_w_d(_1, _2); -+} -+// CHECK-LABEL: @vftintrne_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vftintrne_w_d(_1, _2); -+} -+// CHECK-LABEL: @vftintl_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintl_l_s(v4f32 _1) { return __builtin_lsx_vftintl_l_s(_1); } -+// CHECK-LABEL: @vftinth_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftinth_l_s(v4f32 _1) { return __builtin_lsx_vftinth_l_s(_1); } -+// CHECK-LABEL: @vffinth_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vffinth_d_w(v4i32 _1) { return __builtin_lsx_vffinth_d_w(_1); } -+// CHECK-LABEL: @vffintl_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// -+v2f64 vffintl_d_w(v4i32 _1) { return __builtin_lsx_vffintl_d_w(_1); } -+// CHECK-LABEL: @vftintrzl_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrzl_l_s(v4f32 _1) { return __builtin_lsx_vftintrzl_l_s(_1); } -+// CHECK-LABEL: @vftintrzh_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrzh_l_s(v4f32 _1) { return __builtin_lsx_vftintrzh_l_s(_1); } -+// CHECK-LABEL: @vftintrpl_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrpl_l_s(v4f32 _1) { return __builtin_lsx_vftintrpl_l_s(_1); } -+// CHECK-LABEL: @vftintrph_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrph_l_s(v4f32 _1) { return __builtin_lsx_vftintrph_l_s(_1); } -+// CHECK-LABEL: @vftintrml_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrml_l_s(v4f32 _1) { return __builtin_lsx_vftintrml_l_s(_1); } -+// CHECK-LABEL: @vftintrmh_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrmh_l_s(v4f32 _1) { return __builtin_lsx_vftintrmh_l_s(_1); } -+// CHECK-LABEL: @vftintrnel_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrnel_l_s(v4f32 _1) { -+ return __builtin_lsx_vftintrnel_l_s(_1); -+} -+// CHECK-LABEL: @vftintrneh_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vftintrneh_l_s(v4f32 _1) { -+ return __builtin_lsx_vftintrneh_l_s(_1); -+} -+// CHECK-LABEL: @vfrintrne_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> -+// CHECK-NEXT: ret <4 x i32> [[TMP1]] -+// -+v4i32 vfrintrne_s(v4f32 _1) { return __builtin_lsx_vfrintrne_s(_1); } -+// CHECK-LABEL: @vfrintrne_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> -+// CHECK-NEXT: ret <2 x i64> [[TMP1]] -+// -+v2i64 vfrintrne_d(v2f64 _1) { return __builtin_lsx_vfrintrne_d(_1); } -+// CHECK-LABEL: @vfrintrz_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> -+// CHECK-NEXT: ret <4 x i32> [[TMP1]] -+// -+v4i32 vfrintrz_s(v4f32 _1) { return __builtin_lsx_vfrintrz_s(_1); } -+// CHECK-LABEL: @vfrintrz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> -+// CHECK-NEXT: ret <2 x i64> [[TMP1]] -+// -+v2i64 vfrintrz_d(v2f64 _1) { return __builtin_lsx_vfrintrz_d(_1); } -+// CHECK-LABEL: @vfrintrp_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> -+// CHECK-NEXT: ret <4 x i32> [[TMP1]] -+// -+v4i32 vfrintrp_s(v4f32 _1) { return __builtin_lsx_vfrintrp_s(_1); } -+// CHECK-LABEL: @vfrintrp_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> -+// CHECK-NEXT: ret <2 x i64> [[TMP1]] -+// -+v2i64 vfrintrp_d(v2f64 _1) { return __builtin_lsx_vfrintrp_d(_1); } -+// CHECK-LABEL: @vfrintrm_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> -+// CHECK-NEXT: ret <4 x i32> [[TMP1]] -+// -+v4i32 vfrintrm_s(v4f32 _1) { return __builtin_lsx_vfrintrm_s(_1); } -+// CHECK-LABEL: @vfrintrm_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> -+// CHECK-NEXT: ret <2 x i64> [[TMP1]] -+// -+v2i64 vfrintrm_d(v2f64 _1) { return __builtin_lsx_vfrintrm_d(_1); } -+// CHECK-LABEL: @vstelm_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) -+// CHECK-NEXT: ret void -+// -+void vstelm_b(v16i8 _1, void *_2) { -+ return __builtin_lsx_vstelm_b(_1, _2, 1, 1); -+} -+// CHECK-LABEL: @vstelm_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) -+// CHECK-NEXT: ret void -+// -+void vstelm_h(v8i16 _1, void *_2) { -+ return __builtin_lsx_vstelm_h(_1, _2, 2, 1); -+} -+// CHECK-LABEL: @vstelm_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) -+// CHECK-NEXT: ret void -+// -+void vstelm_w(v4i32 _1, void *_2) { -+ return __builtin_lsx_vstelm_w(_1, _2, 4, 1); -+} -+// CHECK-LABEL: @vstelm_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) -+// CHECK-NEXT: ret void -+// -+void vstelm_d(v2i64 _1, void *_2) { -+ return __builtin_lsx_vstelm_d(_1, _2, 8, 1); -+} -+// CHECK-LABEL: @vaddwev_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vaddwev_d_w(_1, _2); -+} -+// CHECK-LABEL: @vaddwev_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vaddwev_w_h(_1, _2); -+} -+// CHECK-LABEL: @vaddwev_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vaddwev_h_b(_1, _2); -+} -+// CHECK-LABEL: @vaddwod_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vaddwod_d_w(_1, _2); -+} -+// CHECK-LABEL: @vaddwod_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vaddwod_w_h(_1, _2); -+} -+// CHECK-LABEL: @vaddwod_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vaddwod_h_b(_1, _2); -+} -+// CHECK-LABEL: @vaddwev_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vaddwev_d_wu(_1, _2); -+} -+// CHECK-LABEL: @vaddwev_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vaddwev_w_hu(_1, _2); -+} -+// CHECK-LABEL: @vaddwev_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vaddwev_h_bu(_1, _2); -+} -+// CHECK-LABEL: @vaddwod_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vaddwod_d_wu(_1, _2); -+} -+// CHECK-LABEL: @vaddwod_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vaddwod_w_hu(_1, _2); -+} -+// CHECK-LABEL: @vaddwod_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vaddwod_h_bu(_1, _2); -+} -+// CHECK-LABEL: @vaddwev_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { -+ return __builtin_lsx_vaddwev_d_wu_w(_1, _2); -+} -+// CHECK-LABEL: @vaddwev_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { -+ return __builtin_lsx_vaddwev_w_hu_h(_1, _2); -+} -+// CHECK-LABEL: @vaddwev_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { -+ return __builtin_lsx_vaddwev_h_bu_b(_1, _2); -+} -+// CHECK-LABEL: @vaddwod_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { -+ return __builtin_lsx_vaddwod_d_wu_w(_1, _2); -+} -+// CHECK-LABEL: @vaddwod_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { -+ return __builtin_lsx_vaddwod_w_hu_h(_1, _2); -+} -+// CHECK-LABEL: @vaddwod_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { -+ return __builtin_lsx_vaddwod_h_bu_b(_1, _2); -+} -+// CHECK-LABEL: @vsubwev_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vsubwev_d_w(_1, _2); -+} -+// CHECK-LABEL: @vsubwev_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vsubwev_w_h(_1, _2); -+} -+// CHECK-LABEL: @vsubwev_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vsubwev_h_b(_1, _2); -+} -+// CHECK-LABEL: @vsubwod_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vsubwod_d_w(_1, _2); -+} -+// CHECK-LABEL: @vsubwod_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vsubwod_w_h(_1, _2); -+} -+// CHECK-LABEL: @vsubwod_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vsubwod_h_b(_1, _2); -+} -+// CHECK-LABEL: @vsubwev_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vsubwev_d_wu(_1, _2); -+} -+// CHECK-LABEL: @vsubwev_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vsubwev_w_hu(_1, _2); -+} -+// CHECK-LABEL: @vsubwev_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vsubwev_h_bu(_1, _2); -+} -+// CHECK-LABEL: @vsubwod_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vsubwod_d_wu(_1, _2); -+} -+// CHECK-LABEL: @vsubwod_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vsubwod_w_hu(_1, _2); -+} -+// CHECK-LABEL: @vsubwod_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vsubwod_h_bu(_1, _2); -+} -+// CHECK-LABEL: @vaddwev_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vaddwev_q_d(_1, _2); -+} -+// CHECK-LABEL: @vaddwod_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vaddwod_q_d(_1, _2); -+} -+// CHECK-LABEL: @vaddwev_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vaddwev_q_du(_1, _2); -+} -+// CHECK-LABEL: @vaddwod_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vaddwod_q_du(_1, _2); -+} -+// CHECK-LABEL: @vsubwev_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vsubwev_q_d(_1, _2); -+} -+// CHECK-LABEL: @vsubwod_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vsubwod_q_d(_1, _2); -+} -+// CHECK-LABEL: @vsubwev_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vsubwev_q_du(_1, _2); -+} -+// CHECK-LABEL: @vsubwod_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vsubwod_q_du(_1, _2); -+} -+// CHECK-LABEL: @vaddwev_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { -+ return __builtin_lsx_vaddwev_q_du_d(_1, _2); -+} -+// CHECK-LABEL: @vaddwod_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { -+ return __builtin_lsx_vaddwod_q_du_d(_1, _2); -+} -+// CHECK-LABEL: @vmulwev_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vmulwev_d_w(_1, _2); -+} -+// CHECK-LABEL: @vmulwev_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vmulwev_w_h(_1, _2); -+} -+// CHECK-LABEL: @vmulwev_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vmulwev_h_b(_1, _2); -+} -+// CHECK-LABEL: @vmulwod_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vmulwod_d_w(_1, _2); -+} -+// CHECK-LABEL: @vmulwod_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vmulwod_w_h(_1, _2); -+} -+// CHECK-LABEL: @vmulwod_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vmulwod_h_b(_1, _2); -+} -+// CHECK-LABEL: @vmulwev_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vmulwev_d_wu(_1, _2); -+} -+// CHECK-LABEL: @vmulwev_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vmulwev_w_hu(_1, _2); -+} -+// CHECK-LABEL: @vmulwev_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vmulwev_h_bu(_1, _2); -+} -+// CHECK-LABEL: @vmulwod_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { -+ return __builtin_lsx_vmulwod_d_wu(_1, _2); -+} -+// CHECK-LABEL: @vmulwod_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { -+ return __builtin_lsx_vmulwod_w_hu(_1, _2); -+} -+// CHECK-LABEL: @vmulwod_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { -+ return __builtin_lsx_vmulwod_h_bu(_1, _2); -+} -+// CHECK-LABEL: @vmulwev_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { -+ return __builtin_lsx_vmulwev_d_wu_w(_1, _2); -+} -+// CHECK-LABEL: @vmulwev_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { -+ return __builtin_lsx_vmulwev_w_hu_h(_1, _2); -+} -+// CHECK-LABEL: @vmulwev_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { -+ return __builtin_lsx_vmulwev_h_bu_b(_1, _2); -+} -+// CHECK-LABEL: @vmulwod_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { -+ return __builtin_lsx_vmulwod_d_wu_w(_1, _2); -+} -+// CHECK-LABEL: @vmulwod_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { -+ return __builtin_lsx_vmulwod_w_hu_h(_1, _2); -+} -+// CHECK-LABEL: @vmulwod_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { -+ return __builtin_lsx_vmulwod_h_bu_b(_1, _2); -+} -+// CHECK-LABEL: @vmulwev_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vmulwev_q_d(_1, _2); -+} -+// CHECK-LABEL: @vmulwod_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vmulwod_q_d(_1, _2); -+} -+// CHECK-LABEL: @vmulwev_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vmulwev_q_du(_1, _2); -+} -+// CHECK-LABEL: @vmulwod_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vmulwod_q_du(_1, _2); -+} -+// CHECK-LABEL: @vmulwev_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { -+ return __builtin_lsx_vmulwev_q_du_d(_1, _2); -+} -+// CHECK-LABEL: @vmulwod_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { -+ return __builtin_lsx_vmulwod_q_du_d(_1, _2); -+} -+// CHECK-LABEL: @vhaddw_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vhaddw_q_d(_1, _2); -+} -+// CHECK-LABEL: @vhaddw_qu_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vhaddw_qu_du(_1, _2); -+} -+// CHECK-LABEL: @vhsubw_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vhsubw_q_d(_1, _2); -+} -+// CHECK-LABEL: @vhsubw_qu_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { -+ return __builtin_lsx_vhsubw_qu_du(_1, _2); -+} -+// CHECK-LABEL: @vmaddwev_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { -+ return __builtin_lsx_vmaddwev_d_w(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { -+ return __builtin_lsx_vmaddwev_w_h(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { -+ return __builtin_lsx_vmaddwev_h_b(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { -+ return __builtin_lsx_vmaddwev_d_wu(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { -+ return __builtin_lsx_vmaddwev_w_hu(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { -+ return __builtin_lsx_vmaddwev_h_bu(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { -+ return __builtin_lsx_vmaddwod_d_w(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { -+ return __builtin_lsx_vmaddwod_w_h(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { -+ return __builtin_lsx_vmaddwod_h_b(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { -+ return __builtin_lsx_vmaddwod_d_wu(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { -+ return __builtin_lsx_vmaddwod_w_hu(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { -+ return __builtin_lsx_vmaddwod_h_bu(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { -+ return __builtin_lsx_vmaddwev_d_wu_w(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { -+ return __builtin_lsx_vmaddwev_w_hu_h(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { -+ return __builtin_lsx_vmaddwev_h_bu_b(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { -+ return __builtin_lsx_vmaddwod_d_wu_w(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { -+ return __builtin_lsx_vmaddwod_w_hu_h(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { -+ return __builtin_lsx_vmaddwod_h_bu_b(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { -+ return __builtin_lsx_vmaddwev_q_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { -+ return __builtin_lsx_vmaddwod_q_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { -+ return __builtin_lsx_vmaddwev_q_du(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { -+ return __builtin_lsx_vmaddwod_q_du(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwev_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { -+ return __builtin_lsx_vmaddwev_q_du_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vmaddwod_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { -+ return __builtin_lsx_vmaddwod_q_du_d(_1, _2, _3); -+} -+// CHECK-LABEL: @vrotr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vrotr_b(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vrotr_b(_1, _2); -+} -+// CHECK-LABEL: @vrotr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vrotr_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vrotr_h(_1, _2); -+} -+// CHECK-LABEL: @vrotr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vrotr_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vrotr_w(_1, _2); -+} -+// CHECK-LABEL: @vrotr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vrotr_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vrotr_d(_1, _2); -+} -+// CHECK-LABEL: @vadd_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_q(_1, _2); } -+// CHECK-LABEL: @vsub_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_q(_1, _2); } -+// CHECK-LABEL: @vldrepl_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vldrepl_b(void *_1) { return __builtin_lsx_vldrepl_b(_1, 1); } -+// CHECK-LABEL: @vldrepl_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vldrepl_h(void *_1) { return __builtin_lsx_vldrepl_h(_1, 2); } -+// CHECK-LABEL: @vldrepl_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vldrepl_w(void *_1) { return __builtin_lsx_vldrepl_w(_1, 4); } -+// CHECK-LABEL: @vldrepl_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vldrepl_d(void *_1) { return __builtin_lsx_vldrepl_d(_1, 8); } -+// CHECK-LABEL: @vmskgez_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmskgez_b(v16i8 _1) { return __builtin_lsx_vmskgez_b(_1); } -+// CHECK-LABEL: @vmsknz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vmsknz_b(v16i8 _1) { return __builtin_lsx_vmsknz_b(_1); } -+// CHECK-LABEL: @vexth_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vexth_h_b(v16i8 _1) { return __builtin_lsx_vexth_h_b(_1); } -+// CHECK-LABEL: @vexth_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vexth_w_h(v8i16 _1) { return __builtin_lsx_vexth_w_h(_1); } -+// CHECK-LABEL: @vexth_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vexth_d_w(v4i32 _1) { return __builtin_lsx_vexth_d_w(_1); } -+// CHECK-LABEL: @vexth_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vexth_q_d(v2i64 _1) { return __builtin_lsx_vexth_q_d(_1); } -+// CHECK-LABEL: @vexth_hu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vexth_hu_bu(v16u8 _1) { return __builtin_lsx_vexth_hu_bu(_1); } -+// CHECK-LABEL: @vexth_wu_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vexth_wu_hu(v8u16 _1) { return __builtin_lsx_vexth_wu_hu(_1); } -+// CHECK-LABEL: @vexth_du_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vexth_du_wu(v4u32 _1) { return __builtin_lsx_vexth_du_wu(_1); } -+// CHECK-LABEL: @vexth_qu_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vexth_qu_du(v2u64 _1) { return __builtin_lsx_vexth_qu_du(_1); } -+// CHECK-LABEL: @vrotri_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vrotri_b(v16i8 _1) { return __builtin_lsx_vrotri_b(_1, 1); } -+// CHECK-LABEL: @vrotri_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vrotri_h(v8i16 _1) { return __builtin_lsx_vrotri_h(_1, 1); } -+// CHECK-LABEL: @vrotri_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vrotri_w(v4i32 _1) { return __builtin_lsx_vrotri_w(_1, 1); } -+// CHECK-LABEL: @vrotri_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vrotri_d(v2i64 _1) { return __builtin_lsx_vrotri_d(_1, 1); } -+// CHECK-LABEL: @vextl_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vextl_q_d(v2i64 _1) { return __builtin_lsx_vextl_q_d(_1); } -+// CHECK-LABEL: @vsrlni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vsrlni_b_h(_1, _2, 1); -+} -+// CHECK-LABEL: @vsrlni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vsrlni_h_w(_1, _2, 1); -+} -+// CHECK-LABEL: @vsrlni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vsrlni_w_d(_1, _2, 1); -+} -+// CHECK-LABEL: @vsrlni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vsrlni_d_q(_1, _2, 1); -+} -+// CHECK-LABEL: @vsrlrni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vsrlrni_b_h(_1, _2, 1); -+} -+// CHECK-LABEL: @vsrlrni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vsrlrni_h_w(_1, _2, 1); -+} -+// CHECK-LABEL: @vsrlrni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vsrlrni_w_d(_1, _2, 1); -+} -+// CHECK-LABEL: @vsrlrni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vsrlrni_d_q(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vssrlni_b_h(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vssrlni_h_w(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vssrlni_w_d(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vssrlni_d_q(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlni_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { -+ return __builtin_lsx_vssrlni_bu_h(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlni_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { -+ return __builtin_lsx_vssrlni_hu_w(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlni_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { -+ return __builtin_lsx_vssrlni_wu_d(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlni_du_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { -+ return __builtin_lsx_vssrlni_du_q(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlrni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vssrlrni_b_h(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlrni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vssrlrni_h_w(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlrni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vssrlrni_w_d(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlrni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vssrlrni_d_q(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlrni_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { -+ return __builtin_lsx_vssrlrni_bu_h(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlrni_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { -+ return __builtin_lsx_vssrlrni_hu_w(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlrni_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { -+ return __builtin_lsx_vssrlrni_wu_d(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrlrni_du_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { -+ return __builtin_lsx_vssrlrni_du_q(_1, _2, 1); -+} -+// CHECK-LABEL: @vsrani_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vsrani_b_h(_1, _2, 1); -+} -+// CHECK-LABEL: @vsrani_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vsrani_h_w(_1, _2, 1); -+} -+// CHECK-LABEL: @vsrani_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vsrani_w_d(_1, _2, 1); -+} -+// CHECK-LABEL: @vsrani_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vsrani_d_q(_1, _2, 1); -+} -+// CHECK-LABEL: @vsrarni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vsrarni_b_h(_1, _2, 1); -+} -+// CHECK-LABEL: @vsrarni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vsrarni_h_w(_1, _2, 1); -+} -+// CHECK-LABEL: @vsrarni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vsrarni_w_d(_1, _2, 1); -+} -+// CHECK-LABEL: @vsrarni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vsrarni_d_q(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrani_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vssrani_b_h(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrani_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vssrani_h_w(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrani_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vssrani_w_d(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrani_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vssrani_d_q(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrani_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { -+ return __builtin_lsx_vssrani_bu_h(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrani_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { -+ return __builtin_lsx_vssrani_hu_w(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrani_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { -+ return __builtin_lsx_vssrani_wu_d(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrani_du_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { -+ return __builtin_lsx_vssrani_du_q(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrarni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { -+ return __builtin_lsx_vssrarni_b_h(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrarni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vssrarni_h_w(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrarni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vssrarni_w_d(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrarni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vssrarni_d_q(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrarni_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { -+ return __builtin_lsx_vssrarni_bu_h(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrarni_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { -+ return __builtin_lsx_vssrarni_hu_w(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrarni_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { -+ return __builtin_lsx_vssrarni_wu_d(_1, _2, 1); -+} -+// CHECK-LABEL: @vssrarni_du_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { -+ return __builtin_lsx_vssrarni_du_q(_1, _2, 1); -+} -+// CHECK-LABEL: @vpermi_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vpermi_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vpermi_w(_1, _2, 1); -+} -+// CHECK-LABEL: @vld( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vld(void *_1) { return __builtin_lsx_vld(_1, 1); } -+// CHECK-LABEL: @vst( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret void -+// -+void vst(v16i8 _1, void *_2) { return __builtin_lsx_vst(_1, _2, 1); } -+// CHECK-LABEL: @vssrlrn_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vssrlrn_b_h(_1, _2); -+} -+// CHECK-LABEL: @vssrlrn_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vssrlrn_h_w(_1, _2); -+} -+// CHECK-LABEL: @vssrlrn_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vssrlrn_w_d(_1, _2); -+} -+// CHECK-LABEL: @vssrln_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { -+ return __builtin_lsx_vssrln_b_h(_1, _2); -+} -+// CHECK-LABEL: @vssrln_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { -+ return __builtin_lsx_vssrln_h_w(_1, _2); -+} -+// CHECK-LABEL: @vssrln_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { -+ return __builtin_lsx_vssrln_w_d(_1, _2); -+} -+// CHECK-LABEL: @vorn_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __builtin_lsx_vorn_v(_1, _2); } -+// CHECK-LABEL: @vldi( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vldi() { return __builtin_lsx_vldi(1); } -+// CHECK-LABEL: @vshuf_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { -+ return __builtin_lsx_vshuf_b(_1, _2, _3); -+} -+// CHECK-LABEL: @vldx( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vldx(void *_1) { return __builtin_lsx_vldx(_1, 1); } -+// CHECK-LABEL: @vstx( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) -+// CHECK-NEXT: ret void -+// -+void vstx(v16i8 _1, void *_2) { return __builtin_lsx_vstx(_1, _2, 1); } -+// CHECK-LABEL: @vextl_qu_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2u64 vextl_qu_du(v2u64 _1) { return __builtin_lsx_vextl_qu_du(_1); } -+// CHECK-LABEL: @bnz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bnz_b(v16u8 _1) { return __builtin_lsx_bnz_b(_1); } -+// CHECK-LABEL: @bnz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bnz_d(v2u64 _1) { return __builtin_lsx_bnz_d(_1); } -+// CHECK-LABEL: @bnz_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bnz_h(v8u16 _1) { return __builtin_lsx_bnz_h(_1); } -+// CHECK-LABEL: @bnz_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bnz_v(v16u8 _1) { return __builtin_lsx_bnz_v(_1); } -+// CHECK-LABEL: @bnz_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bnz_w(v4u32 _1) { return __builtin_lsx_bnz_w(_1); } -+// CHECK-LABEL: @bz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bz_b(v16u8 _1) { return __builtin_lsx_bz_b(_1); } -+// CHECK-LABEL: @bz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bz_d(v2u64 _1) { return __builtin_lsx_bz_d(_1); } -+// CHECK-LABEL: @bz_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bz_h(v8u16 _1) { return __builtin_lsx_bz_h(_1); } -+// CHECK-LABEL: @bz_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bz_v(v16u8 _1) { return __builtin_lsx_bz_v(_1); } -+// CHECK-LABEL: @bz_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int bz_w(v4u32 _1) { return __builtin_lsx_bz_w(_1); } -+// CHECK-LABEL: @vfcmp_caf_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_caf_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_caf_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_caf_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_ceq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_ceq_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_ceq_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_ceq_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_cle_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_cle_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_cle_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_cle_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_clt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_clt_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_clt_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_clt_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_cne_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_cne_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_cne_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_cne_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_cor_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_cor_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_cor_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_cor_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_cueq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_cueq_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_cueq_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_cueq_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_cule_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_cule_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_cule_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_cule_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_cult_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_cult_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_cult_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_cult_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_cun_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_cun_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_cune_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_cune_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_cune_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_cune_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_cun_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_cun_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_saf_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_saf_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_saf_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_saf_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_seq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_seq_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_seq_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_seq_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_sle_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_sle_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_sle_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_sle_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_slt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_slt_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_slt_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_slt_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_sne_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_sne_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_sne_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_sne_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_sor_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_sor_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_sor_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_sor_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_sueq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_sueq_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_sueq_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_sueq_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_sule_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_sule_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_sule_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_sule_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_sult_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_sult_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_sult_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_sult_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_sun_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_sun_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_sune_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { -+ return __builtin_lsx_vfcmp_sune_d(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_sune_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_sune_s(_1, _2); -+} -+// CHECK-LABEL: @vfcmp_sun_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { -+ return __builtin_lsx_vfcmp_sun_s(_1, _2); -+} -+// CHECK-LABEL: @vrepli_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) -+// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// -+v16i8 vrepli_b() { return __builtin_lsx_vrepli_b(1); } -+// CHECK-LABEL: @vrepli_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) -+// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// -+v2i64 vrepli_d() { return __builtin_lsx_vrepli_d(1); } -+// CHECK-LABEL: @vrepli_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) -+// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// -+v8i16 vrepli_h() { return __builtin_lsx_vrepli_h(1); } -+// CHECK-LABEL: @vrepli_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) -+// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// -+v4i32 vrepli_w() { return __builtin_lsx_vrepli_w(1); } --- -2.20.1 - - -From c403023799745d743eb3aa8d8719318c58e395d4 Mon Sep 17 00:00:00 2001 -From: chenli -Date: Fri, 27 Oct 2023 15:58:55 +0800 -Subject: [PATCH 7/8] [LoongArch][CodeGen] Add LASX builtin testcases - -(cherry picked from commit 535408eedbf812d9038bd40a0faae5001d2256cf) ---- - .../LoongArch/lasx/builtin-alias-error.c | 1373 +++++ - .../CodeGen/LoongArch/lasx/builtin-alias.c | 4430 ++++++++++++++++ - .../CodeGen/LoongArch/lasx/builtin-error.c | 1392 ++++++ - clang/test/CodeGen/LoongArch/lasx/builtin.c | 4452 +++++++++++++++++ - 4 files changed, 11647 insertions(+) - create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c - create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-alias.c - create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-error.c - create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin.c - -diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c -new file mode 100644 -index 000000000000..2a3862bbe3c1 ---- /dev/null -+++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c -@@ -0,0 +1,1373 @@ -+// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -verify %s -+ -+#include -+ -+v32i8 xvslli_b(v32i8 _1, int var) { -+ v32i8 res = __lasx_xvslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvslli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvslli_h(v16i16 _1, int var) { -+ v16i16 res = __lasx_xvslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvslli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvslli_w(v8i32 _1, int var) { -+ v8i32 res = __lasx_xvslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvslli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvslli_d(v4i64 _1, int var) { -+ v4i64 res = __lasx_xvslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvslli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsrai_b(v32i8 _1, int var) { -+ v32i8 res = __lasx_xvsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvsrai_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsrai_h(v16i16 _1, int var) { -+ v16i16 res = __lasx_xvsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsrai_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsrai_w(v8i32 _1, int var) { -+ v8i32 res = __lasx_xvsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsrai_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsrai_d(v4i64 _1, int var) { -+ v4i64 res = __lasx_xvsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsrai_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsrari_b(v32i8 _1, int var) { -+ v32i8 res = __lasx_xvsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvsrari_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsrari_h(v16i16 _1, int var) { -+ v16i16 res = __lasx_xvsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsrari_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsrari_w(v8i32 _1, int var) { -+ v8i32 res = __lasx_xvsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsrari_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsrari_d(v4i64 _1, int var) { -+ v4i64 res = __lasx_xvsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsrari_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsrli_b(v32i8 _1, int var) { -+ v32i8 res = __lasx_xvsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvsrli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsrli_h(v16i16 _1, int var) { -+ v16i16 res = __lasx_xvsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsrli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsrli_w(v8i32 _1, int var) { -+ v8i32 res = __lasx_xvsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsrli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsrli_d(v4i64 _1, int var) { -+ v4i64 res = __lasx_xvsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsrli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsrlri_b(v32i8 _1, int var) { -+ v32i8 res = __lasx_xvsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvsrlri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsrlri_h(v16i16 _1, int var) { -+ v16i16 res = __lasx_xvsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsrlri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsrlri_w(v8i32 _1, int var) { -+ v8i32 res = __lasx_xvsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsrlri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsrlri_d(v4i64 _1, int var) { -+ v4i64 res = __lasx_xvsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsrlri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_d' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvbitclri_b(v32u8 _1, int var) { -+ v32u8 res = __lasx_xvbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvbitclri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_b' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvbitclri_h(v16u16 _1, int var) { -+ v16u16 res = __lasx_xvbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvbitclri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_h' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvbitclri_w(v8u32 _1, int var) { -+ v8u32 res = __lasx_xvbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvbitclri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_w' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvbitclri_d(v4u64 _1, int var) { -+ v4u64 res = __lasx_xvbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvbitclri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_d' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvbitseti_b(v32u8 _1, int var) { -+ v32u8 res = __lasx_xvbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvbitseti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_b' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvbitseti_h(v16u16 _1, int var) { -+ v16u16 res = __lasx_xvbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvbitseti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_h' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvbitseti_w(v8u32 _1, int var) { -+ v8u32 res = __lasx_xvbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvbitseti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_w' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvbitseti_d(v4u64 _1, int var) { -+ v4u64 res = __lasx_xvbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvbitseti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_d' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvbitrevi_b(v32u8 _1, int var) { -+ v32u8 res = __lasx_xvbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_b' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvbitrevi_h(v16u16 _1, int var) { -+ v16u16 res = __lasx_xvbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_h' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvbitrevi_w(v8u32 _1, int var) { -+ v8u32 res = __lasx_xvbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_w' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvbitrevi_d(v4u64 _1, int var) { -+ v4u64 res = __lasx_xvbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvaddi_bu(v32i8 _1, int var) { -+ v32i8 res = __lasx_xvaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvaddi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_bu' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvaddi_hu(v16i16 _1, int var) { -+ v16i16 res = __lasx_xvaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvaddi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_hu' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvaddi_wu(v8i32 _1, int var) { -+ v8i32 res = __lasx_xvaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvaddi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_wu' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvaddi_du(v4i64 _1, int var) { -+ v4i64 res = __lasx_xvaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvaddi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_du' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsubi_bu(v32i8 _1, int var) { -+ v32i8 res = __lasx_xvsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsubi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_bu' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsubi_hu(v16i16 _1, int var) { -+ v16i16 res = __lasx_xvsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsubi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_hu' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsubi_wu(v8i32 _1, int var) { -+ v8i32 res = __lasx_xvsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsubi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_wu' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsubi_du(v4i64 _1, int var) { -+ v4i64 res = __lasx_xvsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsubi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_du' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvmaxi_b(v32i8 _1, int var) { -+ v32i8 res = __lasx_xvmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvmaxi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvmaxi_h(v16i16 _1, int var) { -+ v16i16 res = __lasx_xvmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvmaxi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvmaxi_w(v8i32 _1, int var) { -+ v8i32 res = __lasx_xvmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvmaxi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvmaxi_d(v4i64 _1, int var) { -+ v4i64 res = __lasx_xvmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvmaxi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_d' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvmaxi_bu(v32u8 _1, int var) { -+ v32u8 res = __lasx_xvmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_bu' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvmaxi_hu(v16u16 _1, int var) { -+ v16u16 res = __lasx_xvmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_hu' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvmaxi_wu(v8u32 _1, int var) { -+ v8u32 res = __lasx_xvmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_wu' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvmaxi_du(v4u64 _1, int var) { -+ v4u64 res = __lasx_xvmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvmaxi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_du' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvmini_b(v32i8 _1, int var) { -+ v32i8 res = __lasx_xvmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvmini_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvmini_h(v16i16 _1, int var) { -+ v16i16 res = __lasx_xvmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvmini_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_h' must be a constant integer}}} -+ return res; -+} -+ -+v8i32 xvmini_w(v8i32 _1, int var) { -+ v8i32 res = __lasx_xvmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvmini_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvmini_d(v4i64 _1, int var) { -+ v4i64 res = __lasx_xvmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvmini_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_d' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvmini_bu(v32u8 _1, int var) { -+ v32u8 res = __lasx_xvmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvmini_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_bu' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvmini_hu(v16u16 _1, int var) { -+ v16u16 res = __lasx_xvmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvmini_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_hu' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvmini_wu(v8u32 _1, int var) { -+ v8u32 res = __lasx_xvmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvmini_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_wu' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvmini_du(v4u64 _1, int var) { -+ v4u64 res = __lasx_xvmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvmini_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_du' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvseqi_b(v32i8 _1, int var) { -+ v32i8 res = __lasx_xvseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvseqi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvseqi_h(v16i16 _1, int var) { -+ v16i16 res = __lasx_xvseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvseqi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvseqi_w(v8i32 _1, int var) { -+ v8i32 res = __lasx_xvseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvseqi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvseqi_d(v4i64 _1, int var) { -+ v4i64 res = __lasx_xvseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvseqi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvslti_b(v32i8 _1, int var) { -+ v32i8 res = __lasx_xvslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvslti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvslti_h(v16i16 _1, int var) { -+ v16i16 res = __lasx_xvslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvslti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvslti_w(v8i32 _1, int var) { -+ v8i32 res = __lasx_xvslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvslti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvslti_d(v4i64 _1, int var) { -+ v4i64 res = __lasx_xvslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvslti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvslti_bu(v32u8 _1, int var) { -+ v32i8 res = __lasx_xvslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvslti_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_bu' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvslti_hu(v16u16 _1, int var) { -+ v16i16 res = __lasx_xvslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvslti_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_hu' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvslti_wu(v8u32 _1, int var) { -+ v8i32 res = __lasx_xvslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvslti_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_wu' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvslti_du(v4u64 _1, int var) { -+ v4i64 res = __lasx_xvslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvslti_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_du' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvslei_b(v32i8 _1, int var) { -+ v32i8 res = __lasx_xvslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvslei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvslei_h(v16i16 _1, int var) { -+ v16i16 res = __lasx_xvslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvslei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvslei_w(v8i32 _1, int var) { -+ v8i32 res = __lasx_xvslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvslei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvslei_d(v4i64 _1, int var) { -+ v4i64 res = __lasx_xvslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __lasx_xvslei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvslei_bu(v32u8 _1, int var) { -+ v32i8 res = __lasx_xvslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvslei_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_bu' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvslei_hu(v16u16 _1, int var) { -+ v16i16 res = __lasx_xvslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvslei_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_hu' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvslei_wu(v8u32 _1, int var) { -+ v8i32 res = __lasx_xvslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvslei_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_wu' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvslei_du(v4u64 _1, int var) { -+ v4i64 res = __lasx_xvslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvslei_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_du' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsat_b(v32i8 _1, int var) { -+ v32i8 res = __lasx_xvsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvsat_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsat_h(v16i16 _1, int var) { -+ v16i16 res = __lasx_xvsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsat_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsat_w(v8i32 _1, int var) { -+ v8i32 res = __lasx_xvsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsat_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsat_d(v4i64 _1, int var) { -+ v4i64 res = __lasx_xvsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsat_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_d' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvsat_bu(v32u8 _1, int var) { -+ v32u8 res = __lasx_xvsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvsat_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_bu' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvsat_hu(v16u16 _1, int var) { -+ v16u16 res = __lasx_xvsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsat_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_hu' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvsat_wu(v8u32 _1, int var) { -+ v8u32 res = __lasx_xvsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsat_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_wu' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvsat_du(v4u64 _1, int var) { -+ v4u64 res = __lasx_xvsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsat_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_du' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvrepl128vei_b(v32i8 _1, int var) { -+ v32i8 res = __lasx_xvrepl128vei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvrepl128vei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvrepl128vei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvrepl128vei_h(v16i16 _1, int var) { -+ v16i16 res = __lasx_xvrepl128vei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvrepl128vei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvrepl128vei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvrepl128vei_w(v8i32 _1, int var) { -+ v8i32 res = __lasx_xvrepl128vei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __lasx_xvrepl128vei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __lasx_xvrepl128vei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvrepl128vei_d(v4i64 _1, int var) { -+ v4i64 res = __lasx_xvrepl128vei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} -+ res |= __lasx_xvrepl128vei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} -+ res |= __lasx_xvrepl128vei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_d' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvandi_b(v32u8 _1, int var) { -+ v32u8 res = __lasx_xvandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lasx_xvandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lasx_xvandi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvandi_b' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvori_b(v32u8 _1, int var) { -+ v32u8 res = __lasx_xvori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lasx_xvori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lasx_xvori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvori_b' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvnori_b(v32u8 _1, int var) { -+ v32u8 res = __lasx_xvnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lasx_xvnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lasx_xvnori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvnori_b' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvxori_b(v32u8 _1, int var) { -+ v32u8 res = __lasx_xvxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lasx_xvxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lasx_xvxori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvxori_b' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvbitseli_b(v32u8 _1, v32u8 _2, int var) { -+ v32u8 res = __lasx_xvbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lasx_xvbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lasx_xvbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvbitseli_b' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvshuf4i_b(v32i8 _1, int var) { -+ v32i8 res = __lasx_xvshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lasx_xvshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lasx_xvshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvshuf4i_h(v16i16 _1, int var) { -+ v16i16 res = __lasx_xvshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lasx_xvshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lasx_xvshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvshuf4i_w(v8i32 _1, int var) { -+ v8i32 res = __lasx_xvshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lasx_xvshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lasx_xvshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __lasx_xvshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lasx_xvshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lasx_xvshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_d' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvpermi_w(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __lasx_xvpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lasx_xvpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lasx_xvpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvpermi_d(v4i64 _1, int var) { -+ v4i64 res = __lasx_xvpermi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lasx_xvpermi_d(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lasx_xvpermi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpermi_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvpermi_q(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __lasx_xvpermi_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lasx_xvpermi_q(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lasx_xvpermi_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_q' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsllwil_h_b(v32i8 _1, int var) { -+ v16i16 res = __lasx_xvsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_h_b' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsllwil_w_h(v16i16 _1, int var) { -+ v8i32 res = __lasx_xvsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_w_h' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsllwil_d_w(v8i32 _1, int var) { -+ v4i64 res = __lasx_xvsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_d_w' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvsllwil_hu_bu(v32u8 _1, int var) { -+ v16u16 res = __lasx_xvsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_hu_bu' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvsllwil_wu_hu(v16u16 _1, int var) { -+ v8u32 res = __lasx_xvsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_wu_hu' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvsllwil_du_wu(v8u32 _1, int var) { -+ v4u64 res = __lasx_xvsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_du_wu' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __lasx_xvfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __lasx_xvfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_h' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvbsrl_v(v32i8 _1, int var) { -+ v32i8 res = __lasx_xvbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvbsrl_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsrl_v' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvbsll_v(v32i8 _1, int var) { -+ v32i8 res = __lasx_xvbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvbsll_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsll_v' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvextrins_b(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __lasx_xvextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lasx_xvextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lasx_xvextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvextrins_h(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __lasx_xvextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lasx_xvextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lasx_xvextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvextrins_w(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __lasx_xvextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lasx_xvextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lasx_xvextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvextrins_d(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __lasx_xvextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __lasx_xvextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __lasx_xvextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvld(void *_1, int var) { -+ v32i8 res = __lasx_xvld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} -+ res |= __lasx_xvld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} -+ res |= __lasx_xvld(_1, var); // expected-error {{argument to '__builtin_lasx_xvld' must be a constant integer}} -+ return res; -+} -+ -+void xvst(v32i8 _1, void *_2, int var) { -+ __lasx_xvst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} -+ __lasx_xvst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} -+ __lasx_xvst(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvst' must be a constant integer}} -+} -+ -+void xvstelm_b(v32i8 _1, void * _2, int var) { -+ __lasx_xvstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} -+ __lasx_xvstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} -+ __lasx_xvstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} -+} -+ -+void xvstelm_h(v16i16 _1, void * _2, int var) { -+ __lasx_xvstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} -+ __lasx_xvstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} -+ __lasx_xvstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} -+} -+ -+void xvstelm_w(v8i32 _1, void * _2, int var) { -+ __lasx_xvstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} -+ __lasx_xvstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} -+ __lasx_xvstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} -+} -+ -+void xvstelm_d(v4i64 _1, void * _2, int var) { -+ __lasx_xvstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} -+ __lasx_xvstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} -+ __lasx_xvstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} -+} -+ -+void xvstelm_b_idx(v32i8 _1, void * _2, int var) { -+ __lasx_xvstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ __lasx_xvstelm_b(_1, _2, 1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ __lasx_xvstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} -+} -+ -+void xvstelm_h_idx(v16i16 _1, void * _2, int var) { -+ __lasx_xvstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ __lasx_xvstelm_h(_1, _2, 2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ __lasx_xvstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} -+} -+ -+void xvstelm_w_idx(v8i32 _1, void * _2, int var) { -+ __lasx_xvstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ __lasx_xvstelm_w(_1, _2, 4, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ __lasx_xvstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} -+} -+ -+void xvstelm_d_idx(v4i64 _1, void * _2, int var) { -+ __lasx_xvstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ __lasx_xvstelm_d(_1, _2, 8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ __lasx_xvstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} -+} -+ -+v8i32 xvinsve0_w(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __lasx_xvinsve0_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvinsve0_w(_1, _2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvinsve0_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvinsve0_d(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __lasx_xvinsve0_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __lasx_xvinsve0_d(_1, _2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __lasx_xvinsve0_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_d' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvpickve_w(v8i32 _1, int var) { -+ v8i32 res = __lasx_xvpickve_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvpickve_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvpickve_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvpickve_d(v4i64 _1, int var) { -+ v4i64 res = __lasx_xvpickve_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __lasx_xvpickve_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __lasx_xvpickve_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvldi(int var) { -+ v4i64 res = __lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} -+ res |= __lasx_xvldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} -+ res |= __lasx_xvldi(var); // expected-error {{argument to '__builtin_lasx_xvldi' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvinsgr2vr_w(v8i32 _1, int var) { -+ v8i32 res = __lasx_xvinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvinsgr2vr_w(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvinsgr2vr_d(v4i64 _1, int var) { -+ v4i64 res = __lasx_xvinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __lasx_xvinsgr2vr_d(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __lasx_xvinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvldrepl_b(void *_1, int var) { -+ v32i8 res = __lasx_xvldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} -+ res |= __lasx_xvldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} -+ res |= __lasx_xvldrepl_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvldrepl_h(void *_1, int var) { -+ v16i16 res = __lasx_xvldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} -+ res |= __lasx_xvldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} -+ res |= __lasx_xvldrepl_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvldrepl_w(void *_1, int var) { -+ v8i32 res = __lasx_xvldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} -+ res |= __lasx_xvldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} -+ res |= __lasx_xvldrepl_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvldrepl_d(void *_1, int var) { -+ v4i64 res = __lasx_xvldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} -+ res |= __lasx_xvldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} -+ res |= __lasx_xvldrepl_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_d' must be a constant integer}} -+ return res; -+} -+ -+int xvpickve2gr_w(v8i32 _1, int var) { -+ int res = __lasx_xvpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvpickve2gr_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_w' must be a constant integer}} -+ return res; -+} -+ -+unsigned int xvpickve2gr_wu(v8i32 _1, int var) { -+ unsigned int res = __lasx_xvpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvpickve2gr_wu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_wu' must be a constant integer}} -+ return res; -+} -+ -+long xvpickve2gr_d(v4i64 _1, int var) { -+ long res = __lasx_xvpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __lasx_xvpickve2gr_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __lasx_xvpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_d' must be a constant integer}} -+ return res; -+} -+ -+unsigned long int xvpickve2gr_du(v4i64 _1, int var) { -+ unsigned long int res = __lasx_xvpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __lasx_xvpickve2gr_du(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __lasx_xvpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_du' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvrotri_b(v32i8 _1, int var) { -+ v32i8 res = __lasx_xvrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __lasx_xvrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __lasx_xvrotri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvrotri_h(v16i16 _1, int var) { -+ v16i16 res = __lasx_xvrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvrotri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvrotri_w(v8i32 _1, int var) { -+ v8i32 res = __lasx_xvrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvrotri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvrotri_d(v4i64 _1, int var) { -+ v4i64 res = __lasx_xvrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvrotri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __lasx_xvsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __lasx_xvsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __lasx_xvsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __lasx_xvsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lasx_xvsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lasx_xvsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __lasx_xvsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __lasx_xvsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __lasx_xvsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __lasx_xvsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lasx_xvsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lasx_xvsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __lasx_xvssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __lasx_xvssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __lasx_xvssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __lasx_xvssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lasx_xvssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lasx_xvssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2, int var) { -+ v32u8 res = __lasx_xvssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_bu_h' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2, int var) { -+ v16u16 res = __lasx_xvssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_hu_w' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2, int var) { -+ v8u32 res = __lasx_xvssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_wu_d' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2, int var) { -+ v4u64 res = __lasx_xvssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lasx_xvssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lasx_xvssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_du_q' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __lasx_xvssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __lasx_xvssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __lasx_xvssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __lasx_xvssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lasx_xvssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lasx_xvssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2, int var) { -+ v32u8 res = __lasx_xvssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_bu_h' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2, int var) { -+ v16u16 res = __lasx_xvssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_hu_w' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2, int var) { -+ v8u32 res = __lasx_xvssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_wu_d' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2, int var) { -+ v4u64 res = __lasx_xvssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lasx_xvssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lasx_xvssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_du_q' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __lasx_xvsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_b_h' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __lasx_xvsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_h_w' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __lasx_xvsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_w_d' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __lasx_xvsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lasx_xvsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lasx_xvsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_d_q' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __lasx_xvsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __lasx_xvsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __lasx_xvsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __lasx_xvsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lasx_xvsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lasx_xvsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __lasx_xvssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_b_h' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __lasx_xvssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_h_w' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __lasx_xvssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_w_d' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __lasx_xvssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lasx_xvssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lasx_xvssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_d_q' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2, int var) { -+ v32u8 res = __lasx_xvssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_bu_h' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2, int var) { -+ v16u16 res = __lasx_xvssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_hu_w' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2, int var) { -+ v8u32 res = __lasx_xvssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_wu_d' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2, int var) { -+ v4u64 res = __lasx_xvssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lasx_xvssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lasx_xvssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_du_q' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __lasx_xvssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __lasx_xvssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __lasx_xvssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __lasx_xvssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lasx_xvssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lasx_xvssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2, int var) { -+ v32u8 res = __lasx_xvssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __lasx_xvssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __lasx_xvssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_bu_h' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2, int var) { -+ v16u16 res = __lasx_xvssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __lasx_xvssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __lasx_xvssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_hu_w' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2, int var) { -+ v8u32 res = __lasx_xvssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __lasx_xvssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __lasx_xvssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_wu_d' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2, int var) { -+ v4u64 res = __lasx_xvssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __lasx_xvssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __lasx_xvssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_du_q' must be a constant integer}} -+ return res; -+} -+ -+v4f64 xvpickve_d_f(v4f64 _1, int var) { -+ v4f64 res = __lasx_xvpickve_d_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res += __lasx_xvpickve_d_f(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res += __lasx_xvpickve_d_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d_f' must be a constant integer}} -+ return res; -+} -+ -+v8f32 xvpickve_w_f(v8f32 _1, int var) { -+ v8f32 res = __lasx_xvpickve_w_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res += __lasx_xvpickve_w_f(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res += __lasx_xvpickve_w_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w_f' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvrepli_b(int var) { -+ v32i8 res = __lasx_xvrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} -+ res |= __lasx_xvrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} -+ res |= __lasx_xvrepli_b(var); // expected-error {{argument to '__builtin_lasx_xvrepli_b' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvrepli_d(int var) { -+ v4i64 res = __lasx_xvrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} -+ res |= __lasx_xvrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} -+ res |= __lasx_xvrepli_d(var); // expected-error {{argument to '__builtin_lasx_xvrepli_d' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvrepli_h(int var) { -+ v16i16 res = __lasx_xvrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} -+ res |= __lasx_xvrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} -+ res |= __lasx_xvrepli_h(var); // expected-error {{argument to '__builtin_lasx_xvrepli_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvrepli_w(int var) { -+ v8i32 res = __lasx_xvrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} -+ res |= __lasx_xvrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} -+ res |= __lasx_xvrepli_w(var); // expected-error {{argument to '__builtin_lasx_xvrepli_w' must be a constant integer}} -+ return res; -+} -diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c -new file mode 100644 -index 000000000000..09b2d5fcacf5 ---- /dev/null -+++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c -@@ -0,0 +1,4430 @@ -+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -+// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s -+ -+#include -+ -+// CHECK-LABEL: @xvsll_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __lasx_xvsll_b(_1, _2); } -+// CHECK-LABEL: @xvsll_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __lasx_xvsll_h(_1, _2); } -+// CHECK-LABEL: @xvsll_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __lasx_xvsll_w(_1, _2); } -+// CHECK-LABEL: @xvsll_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __lasx_xvsll_d(_1, _2); } -+// CHECK-LABEL: @xvslli_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvslli_b(v32i8 _1) { return __lasx_xvslli_b(_1, 1); } -+// CHECK-LABEL: @xvslli_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvslli_h(v16i16 _1) { return __lasx_xvslli_h(_1, 1); } -+// CHECK-LABEL: @xvslli_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvslli_w(v8i32 _1) { return __lasx_xvslli_w(_1, 1); } -+// CHECK-LABEL: @xvslli_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvslli_d(v4i64 _1) { return __lasx_xvslli_d(_1, 1); } -+// CHECK-LABEL: @xvsra_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __lasx_xvsra_b(_1, _2); } -+// CHECK-LABEL: @xvsra_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __lasx_xvsra_h(_1, _2); } -+// CHECK-LABEL: @xvsra_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __lasx_xvsra_w(_1, _2); } -+// CHECK-LABEL: @xvsra_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __lasx_xvsra_d(_1, _2); } -+// CHECK-LABEL: @xvsrai_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrai_b(v32i8 _1) { return __lasx_xvsrai_b(_1, 1); } -+// CHECK-LABEL: @xvsrai_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrai_h(v16i16 _1) { return __lasx_xvsrai_h(_1, 1); } -+// CHECK-LABEL: @xvsrai_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrai_w(v8i32 _1) { return __lasx_xvsrai_w(_1, 1); } -+// CHECK-LABEL: @xvsrai_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrai_d(v4i64 _1) { return __lasx_xvsrai_d(_1, 1); } -+// CHECK-LABEL: @xvsrar_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrar_b(_1, _2); } -+// CHECK-LABEL: @xvsrar_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrar_h(_1, _2); } -+// CHECK-LABEL: @xvsrar_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrar_w(_1, _2); } -+// CHECK-LABEL: @xvsrar_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrar_d(_1, _2); } -+// CHECK-LABEL: @xvsrari_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrari_b(v32i8 _1) { return __lasx_xvsrari_b(_1, 1); } -+// CHECK-LABEL: @xvsrari_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrari_h(v16i16 _1) { return __lasx_xvsrari_h(_1, 1); } -+// CHECK-LABEL: @xvsrari_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrari_w(v8i32 _1) { return __lasx_xvsrari_w(_1, 1); } -+// CHECK-LABEL: @xvsrari_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrari_d(v4i64 _1) { return __lasx_xvsrari_d(_1, 1); } -+// CHECK-LABEL: @xvsrl_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrl_b(_1, _2); } -+// CHECK-LABEL: @xvsrl_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrl_h(_1, _2); } -+// CHECK-LABEL: @xvsrl_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrl_w(_1, _2); } -+// CHECK-LABEL: @xvsrl_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrl_d(_1, _2); } -+// CHECK-LABEL: @xvsrli_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrli_b(v32i8 _1) { return __lasx_xvsrli_b(_1, 1); } -+// CHECK-LABEL: @xvsrli_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrli_h(v16i16 _1) { return __lasx_xvsrli_h(_1, 1); } -+// CHECK-LABEL: @xvsrli_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrli_w(v8i32 _1) { return __lasx_xvsrli_w(_1, 1); } -+// CHECK-LABEL: @xvsrli_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrli_d(v4i64 _1) { return __lasx_xvsrli_d(_1, 1); } -+// CHECK-LABEL: @xvsrlr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrlr_b(_1, _2); } -+// CHECK-LABEL: @xvsrlr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlr_h(_1, _2); } -+// CHECK-LABEL: @xvsrlr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlr_w(_1, _2); } -+// CHECK-LABEL: @xvsrlr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlr_d(_1, _2); } -+// CHECK-LABEL: @xvsrlri_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrlri_b(v32i8 _1) { return __lasx_xvsrlri_b(_1, 1); } -+// CHECK-LABEL: @xvsrlri_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrlri_h(v16i16 _1) { return __lasx_xvsrlri_h(_1, 1); } -+// CHECK-LABEL: @xvsrlri_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrlri_w(v8i32 _1) { return __lasx_xvsrlri_w(_1, 1); } -+// CHECK-LABEL: @xvsrlri_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrlri_d(v4i64 _1) { return __lasx_xvsrlri_d(_1, 1); } -+// CHECK-LABEL: @xvbitclr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitclr_b(_1, _2); } -+// CHECK-LABEL: @xvbitclr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitclr_h(_1, _2); } -+// CHECK-LABEL: @xvbitclr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitclr_w(_1, _2); } -+// CHECK-LABEL: @xvbitclr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitclr_d(_1, _2); } -+// CHECK-LABEL: @xvbitclri_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvbitclri_b(v32u8 _1) { return __lasx_xvbitclri_b(_1, 1); } -+// CHECK-LABEL: @xvbitclri_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvbitclri_h(v16u16 _1) { return __lasx_xvbitclri_h(_1, 1); } -+// CHECK-LABEL: @xvbitclri_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvbitclri_w(v8u32 _1) { return __lasx_xvbitclri_w(_1, 1); } -+// CHECK-LABEL: @xvbitclri_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvbitclri_d(v4u64 _1) { return __lasx_xvbitclri_d(_1, 1); } -+// CHECK-LABEL: @xvbitset_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitset_b(_1, _2); } -+// CHECK-LABEL: @xvbitset_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitset_h(_1, _2); } -+// CHECK-LABEL: @xvbitset_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitset_w(_1, _2); } -+// CHECK-LABEL: @xvbitset_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitset_d(_1, _2); } -+// CHECK-LABEL: @xvbitseti_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvbitseti_b(v32u8 _1) { return __lasx_xvbitseti_b(_1, 1); } -+// CHECK-LABEL: @xvbitseti_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvbitseti_h(v16u16 _1) { return __lasx_xvbitseti_h(_1, 1); } -+// CHECK-LABEL: @xvbitseti_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvbitseti_w(v8u32 _1) { return __lasx_xvbitseti_w(_1, 1); } -+// CHECK-LABEL: @xvbitseti_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvbitseti_d(v4u64 _1) { return __lasx_xvbitseti_d(_1, 1); } -+// CHECK-LABEL: @xvbitrev_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitrev_b(_1, _2); } -+// CHECK-LABEL: @xvbitrev_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitrev_h(_1, _2); } -+// CHECK-LABEL: @xvbitrev_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitrev_w(_1, _2); } -+// CHECK-LABEL: @xvbitrev_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitrev_d(_1, _2); } -+// CHECK-LABEL: @xvbitrevi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvbitrevi_b(v32u8 _1) { return __lasx_xvbitrevi_b(_1, 1); } -+// CHECK-LABEL: @xvbitrevi_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvbitrevi_h(v16u16 _1) { return __lasx_xvbitrevi_h(_1, 1); } -+// CHECK-LABEL: @xvbitrevi_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvbitrevi_w(v8u32 _1) { return __lasx_xvbitrevi_w(_1, 1); } -+// CHECK-LABEL: @xvbitrevi_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvbitrevi_d(v4u64 _1) { return __lasx_xvbitrevi_d(_1, 1); } -+// CHECK-LABEL: @xvadd_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvadd_b(_1, _2); } -+// CHECK-LABEL: @xvadd_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvadd_h(_1, _2); } -+// CHECK-LABEL: @xvadd_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvadd_w(_1, _2); } -+// CHECK-LABEL: @xvadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvadd_d(_1, _2); } -+// CHECK-LABEL: @xvaddi_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvaddi_bu(v32i8 _1) { return __lasx_xvaddi_bu(_1, 1); } -+// CHECK-LABEL: @xvaddi_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvaddi_hu(v16i16 _1) { return __lasx_xvaddi_hu(_1, 1); } -+// CHECK-LABEL: @xvaddi_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvaddi_wu(v8i32 _1) { return __lasx_xvaddi_wu(_1, 1); } -+// CHECK-LABEL: @xvaddi_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddi_du(v4i64 _1) { return __lasx_xvaddi_du(_1, 1); } -+// CHECK-LABEL: @xvsub_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __lasx_xvsub_b(_1, _2); } -+// CHECK-LABEL: @xvsub_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __lasx_xvsub_h(_1, _2); } -+// CHECK-LABEL: @xvsub_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __lasx_xvsub_w(_1, _2); } -+// CHECK-LABEL: @xvsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __lasx_xvsub_d(_1, _2); } -+// CHECK-LABEL: @xvsubi_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsubi_bu(v32i8 _1) { return __lasx_xvsubi_bu(_1, 1); } -+// CHECK-LABEL: @xvsubi_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsubi_hu(v16i16 _1) { return __lasx_xvsubi_hu(_1, 1); } -+// CHECK-LABEL: @xvsubi_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsubi_wu(v8i32 _1) { return __lasx_xvsubi_wu(_1, 1); } -+// CHECK-LABEL: @xvsubi_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsubi_du(v4i64 _1) { return __lasx_xvsubi_du(_1, 1); } -+// CHECK-LABEL: @xvmax_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __lasx_xvmax_b(_1, _2); } -+// CHECK-LABEL: @xvmax_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __lasx_xvmax_h(_1, _2); } -+// CHECK-LABEL: @xvmax_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __lasx_xvmax_w(_1, _2); } -+// CHECK-LABEL: @xvmax_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __lasx_xvmax_d(_1, _2); } -+// CHECK-LABEL: @xvmaxi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmaxi_b(v32i8 _1) { return __lasx_xvmaxi_b(_1, 1); } -+// CHECK-LABEL: @xvmaxi_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmaxi_h(v16i16 _1) { return __lasx_xvmaxi_h(_1, 1); } -+// CHECK-LABEL: @xvmaxi_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmaxi_w(v8i32 _1) { return __lasx_xvmaxi_w(_1, 1); } -+// CHECK-LABEL: @xvmaxi_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmaxi_d(v4i64 _1) { return __lasx_xvmaxi_d(_1, 1); } -+// CHECK-LABEL: @xvmax_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmax_bu(_1, _2); } -+// CHECK-LABEL: @xvmax_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmax_hu(_1, _2); } -+// CHECK-LABEL: @xvmax_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmax_wu(_1, _2); } -+// CHECK-LABEL: @xvmax_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __lasx_xvmax_du(_1, _2); } -+// CHECK-LABEL: @xvmaxi_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvmaxi_bu(v32u8 _1) { return __lasx_xvmaxi_bu(_1, 1); } -+// CHECK-LABEL: @xvmaxi_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvmaxi_hu(v16u16 _1) { return __lasx_xvmaxi_hu(_1, 1); } -+// CHECK-LABEL: @xvmaxi_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvmaxi_wu(v8u32 _1) { return __lasx_xvmaxi_wu(_1, 1); } -+// CHECK-LABEL: @xvmaxi_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmaxi_du(v4u64 _1) { return __lasx_xvmaxi_du(_1, 1); } -+// CHECK-LABEL: @xvmin_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __lasx_xvmin_b(_1, _2); } -+// CHECK-LABEL: @xvmin_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __lasx_xvmin_h(_1, _2); } -+// CHECK-LABEL: @xvmin_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __lasx_xvmin_w(_1, _2); } -+// CHECK-LABEL: @xvmin_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __lasx_xvmin_d(_1, _2); } -+// CHECK-LABEL: @xvmini_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmini_b(v32i8 _1) { return __lasx_xvmini_b(_1, 1); } -+// CHECK-LABEL: @xvmini_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmini_h(v16i16 _1) { return __lasx_xvmini_h(_1, 1); } -+// CHECK-LABEL: @xvmini_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmini_w(v8i32 _1) { return __lasx_xvmini_w(_1, 1); } -+// CHECK-LABEL: @xvmini_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmini_d(v4i64 _1) { return __lasx_xvmini_d(_1, 1); } -+// CHECK-LABEL: @xvmin_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmin_bu(_1, _2); } -+// CHECK-LABEL: @xvmin_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmin_hu(_1, _2); } -+// CHECK-LABEL: @xvmin_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmin_wu(_1, _2); } -+// CHECK-LABEL: @xvmin_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __lasx_xvmin_du(_1, _2); } -+// CHECK-LABEL: @xvmini_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvmini_bu(v32u8 _1) { return __lasx_xvmini_bu(_1, 1); } -+// CHECK-LABEL: @xvmini_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvmini_hu(v16u16 _1) { return __lasx_xvmini_hu(_1, 1); } -+// CHECK-LABEL: @xvmini_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvmini_wu(v8u32 _1) { return __lasx_xvmini_wu(_1, 1); } -+// CHECK-LABEL: @xvmini_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmini_du(v4u64 _1) { return __lasx_xvmini_du(_1, 1); } -+// CHECK-LABEL: @xvseq_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __lasx_xvseq_b(_1, _2); } -+// CHECK-LABEL: @xvseq_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __lasx_xvseq_h(_1, _2); } -+// CHECK-LABEL: @xvseq_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __lasx_xvseq_w(_1, _2); } -+// CHECK-LABEL: @xvseq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __lasx_xvseq_d(_1, _2); } -+// CHECK-LABEL: @xvseqi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvseqi_b(v32i8 _1) { return __lasx_xvseqi_b(_1, 1); } -+// CHECK-LABEL: @xvseqi_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvseqi_h(v16i16 _1) { return __lasx_xvseqi_h(_1, 1); } -+// CHECK-LABEL: @xvseqi_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvseqi_w(v8i32 _1) { return __lasx_xvseqi_w(_1, 1); } -+// CHECK-LABEL: @xvseqi_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvseqi_d(v4i64 _1) { return __lasx_xvseqi_d(_1, 1); } -+// CHECK-LABEL: @xvslt_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __lasx_xvslt_b(_1, _2); } -+// CHECK-LABEL: @xvslt_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __lasx_xvslt_h(_1, _2); } -+// CHECK-LABEL: @xvslt_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __lasx_xvslt_w(_1, _2); } -+// CHECK-LABEL: @xvslt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __lasx_xvslt_d(_1, _2); } -+// CHECK-LABEL: @xvslti_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvslti_b(v32i8 _1) { return __lasx_xvslti_b(_1, 1); } -+// CHECK-LABEL: @xvslti_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvslti_h(v16i16 _1) { return __lasx_xvslti_h(_1, 1); } -+// CHECK-LABEL: @xvslti_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvslti_w(v8i32 _1) { return __lasx_xvslti_w(_1, 1); } -+// CHECK-LABEL: @xvslti_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvslti_d(v4i64 _1) { return __lasx_xvslti_d(_1, 1); } -+// CHECK-LABEL: @xvslt_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __lasx_xvslt_bu(_1, _2); } -+// CHECK-LABEL: @xvslt_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __lasx_xvslt_hu(_1, _2); } -+// CHECK-LABEL: @xvslt_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __lasx_xvslt_wu(_1, _2); } -+// CHECK-LABEL: @xvslt_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __lasx_xvslt_du(_1, _2); } -+// CHECK-LABEL: @xvslti_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvslti_bu(v32u8 _1) { return __lasx_xvslti_bu(_1, 1); } -+// CHECK-LABEL: @xvslti_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvslti_hu(v16u16 _1) { return __lasx_xvslti_hu(_1, 1); } -+// CHECK-LABEL: @xvslti_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvslti_wu(v8u32 _1) { return __lasx_xvslti_wu(_1, 1); } -+// CHECK-LABEL: @xvslti_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvslti_du(v4u64 _1) { return __lasx_xvslti_du(_1, 1); } -+// CHECK-LABEL: @xvsle_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __lasx_xvsle_b(_1, _2); } -+// CHECK-LABEL: @xvsle_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __lasx_xvsle_h(_1, _2); } -+// CHECK-LABEL: @xvsle_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __lasx_xvsle_w(_1, _2); } -+// CHECK-LABEL: @xvsle_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __lasx_xvsle_d(_1, _2); } -+// CHECK-LABEL: @xvslei_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvslei_b(v32i8 _1) { return __lasx_xvslei_b(_1, 1); } -+// CHECK-LABEL: @xvslei_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvslei_h(v16i16 _1) { return __lasx_xvslei_h(_1, 1); } -+// CHECK-LABEL: @xvslei_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvslei_w(v8i32 _1) { return __lasx_xvslei_w(_1, 1); } -+// CHECK-LABEL: @xvslei_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvslei_d(v4i64 _1) { return __lasx_xvslei_d(_1, 1); } -+// CHECK-LABEL: @xvsle_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsle_bu(_1, _2); } -+// CHECK-LABEL: @xvsle_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsle_hu(_1, _2); } -+// CHECK-LABEL: @xvsle_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsle_wu(_1, _2); } -+// CHECK-LABEL: @xvsle_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __lasx_xvsle_du(_1, _2); } -+// CHECK-LABEL: @xvslei_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvslei_bu(v32u8 _1) { return __lasx_xvslei_bu(_1, 1); } -+// CHECK-LABEL: @xvslei_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvslei_hu(v16u16 _1) { return __lasx_xvslei_hu(_1, 1); } -+// CHECK-LABEL: @xvslei_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvslei_wu(v8u32 _1) { return __lasx_xvslei_wu(_1, 1); } -+// CHECK-LABEL: @xvslei_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvslei_du(v4u64 _1) { return __lasx_xvslei_du(_1, 1); } -+// CHECK-LABEL: @xvsat_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsat_b(v32i8 _1) { return __lasx_xvsat_b(_1, 1); } -+// CHECK-LABEL: @xvsat_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsat_h(v16i16 _1) { return __lasx_xvsat_h(_1, 1); } -+// CHECK-LABEL: @xvsat_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsat_w(v8i32 _1) { return __lasx_xvsat_w(_1, 1); } -+// CHECK-LABEL: @xvsat_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsat_d(v4i64 _1) { return __lasx_xvsat_d(_1, 1); } -+// CHECK-LABEL: @xvsat_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvsat_bu(v32u8 _1) { return __lasx_xvsat_bu(_1, 1); } -+// CHECK-LABEL: @xvsat_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvsat_hu(v16u16 _1) { return __lasx_xvsat_hu(_1, 1); } -+// CHECK-LABEL: @xvsat_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvsat_wu(v8u32 _1) { return __lasx_xvsat_wu(_1, 1); } -+// CHECK-LABEL: @xvsat_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvsat_du(v4u64 _1) { return __lasx_xvsat_du(_1, 1); } -+// CHECK-LABEL: @xvadda_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __lasx_xvadda_b(_1, _2); } -+// CHECK-LABEL: @xvadda_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __lasx_xvadda_h(_1, _2); } -+// CHECK-LABEL: @xvadda_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __lasx_xvadda_w(_1, _2); } -+// CHECK-LABEL: @xvadda_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __lasx_xvadda_d(_1, _2); } -+// CHECK-LABEL: @xvsadd_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvsadd_b(_1, _2); } -+// CHECK-LABEL: @xvsadd_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvsadd_h(_1, _2); } -+// CHECK-LABEL: @xvsadd_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvsadd_w(_1, _2); } -+// CHECK-LABEL: @xvsadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvsadd_d(_1, _2); } -+// CHECK-LABEL: @xvsadd_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsadd_bu(_1, _2); } -+// CHECK-LABEL: @xvsadd_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsadd_hu(_1, _2); } -+// CHECK-LABEL: @xvsadd_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsadd_wu(_1, _2); } -+// CHECK-LABEL: @xvsadd_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __lasx_xvsadd_du(_1, _2); } -+// CHECK-LABEL: @xvavg_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __lasx_xvavg_b(_1, _2); } -+// CHECK-LABEL: @xvavg_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __lasx_xvavg_h(_1, _2); } -+// CHECK-LABEL: @xvavg_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __lasx_xvavg_w(_1, _2); } -+// CHECK-LABEL: @xvavg_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __lasx_xvavg_d(_1, _2); } -+// CHECK-LABEL: @xvavg_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavg_bu(_1, _2); } -+// CHECK-LABEL: @xvavg_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavg_hu(_1, _2); } -+// CHECK-LABEL: @xvavg_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavg_wu(_1, _2); } -+// CHECK-LABEL: @xvavg_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __lasx_xvavg_du(_1, _2); } -+// CHECK-LABEL: @xvavgr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __lasx_xvavgr_b(_1, _2); } -+// CHECK-LABEL: @xvavgr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __lasx_xvavgr_h(_1, _2); } -+// CHECK-LABEL: @xvavgr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __lasx_xvavgr_w(_1, _2); } -+// CHECK-LABEL: @xvavgr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __lasx_xvavgr_d(_1, _2); } -+// CHECK-LABEL: @xvavgr_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavgr_bu(_1, _2); } -+// CHECK-LABEL: @xvavgr_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavgr_hu(_1, _2); } -+// CHECK-LABEL: @xvavgr_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavgr_wu(_1, _2); } -+// CHECK-LABEL: @xvavgr_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __lasx_xvavgr_du(_1, _2); } -+// CHECK-LABEL: @xvssub_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __lasx_xvssub_b(_1, _2); } -+// CHECK-LABEL: @xvssub_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __lasx_xvssub_h(_1, _2); } -+// CHECK-LABEL: @xvssub_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __lasx_xvssub_w(_1, _2); } -+// CHECK-LABEL: @xvssub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __lasx_xvssub_d(_1, _2); } -+// CHECK-LABEL: @xvssub_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __lasx_xvssub_bu(_1, _2); } -+// CHECK-LABEL: @xvssub_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __lasx_xvssub_hu(_1, _2); } -+// CHECK-LABEL: @xvssub_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __lasx_xvssub_wu(_1, _2); } -+// CHECK-LABEL: @xvssub_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __lasx_xvssub_du(_1, _2); } -+// CHECK-LABEL: @xvabsd_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __lasx_xvabsd_b(_1, _2); } -+// CHECK-LABEL: @xvabsd_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __lasx_xvabsd_h(_1, _2); } -+// CHECK-LABEL: @xvabsd_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __lasx_xvabsd_w(_1, _2); } -+// CHECK-LABEL: @xvabsd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __lasx_xvabsd_d(_1, _2); } -+// CHECK-LABEL: @xvabsd_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvabsd_bu(_1, _2); } -+// CHECK-LABEL: @xvabsd_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvabsd_hu(_1, _2); } -+// CHECK-LABEL: @xvabsd_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvabsd_wu(_1, _2); } -+// CHECK-LABEL: @xvabsd_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __lasx_xvabsd_du(_1, _2); } -+// CHECK-LABEL: @xvmul_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __lasx_xvmul_b(_1, _2); } -+// CHECK-LABEL: @xvmul_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __lasx_xvmul_h(_1, _2); } -+// CHECK-LABEL: @xvmul_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __lasx_xvmul_w(_1, _2); } -+// CHECK-LABEL: @xvmul_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __lasx_xvmul_d(_1, _2); } -+// CHECK-LABEL: @xvmadd_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmadd_b(_1, _2, _3); } -+// CHECK-LABEL: @xvmadd_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmadd_h(_1, _2, _3); } -+// CHECK-LABEL: @xvmadd_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmadd_w(_1, _2, _3); } -+// CHECK-LABEL: @xvmadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmadd_d(_1, _2, _3); } -+// CHECK-LABEL: @xvmsub_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmsub_b(_1, _2, _3); } -+// CHECK-LABEL: @xvmsub_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmsub_h(_1, _2, _3); } -+// CHECK-LABEL: @xvmsub_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmsub_w(_1, _2, _3); } -+// CHECK-LABEL: @xvmsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmsub_d(_1, _2, _3); } -+// CHECK-LABEL: @xvdiv_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __lasx_xvdiv_b(_1, _2); } -+// CHECK-LABEL: @xvdiv_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __lasx_xvdiv_h(_1, _2); } -+// CHECK-LABEL: @xvdiv_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __lasx_xvdiv_w(_1, _2); } -+// CHECK-LABEL: @xvdiv_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __lasx_xvdiv_d(_1, _2); } -+// CHECK-LABEL: @xvdiv_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __lasx_xvdiv_bu(_1, _2); } -+// CHECK-LABEL: @xvdiv_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __lasx_xvdiv_hu(_1, _2); } -+// CHECK-LABEL: @xvdiv_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __lasx_xvdiv_wu(_1, _2); } -+// CHECK-LABEL: @xvdiv_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __lasx_xvdiv_du(_1, _2); } -+// CHECK-LABEL: @xvhaddw_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhaddw_h_b(_1, _2); } -+// CHECK-LABEL: @xvhaddw_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhaddw_w_h(_1, _2); } -+// CHECK-LABEL: @xvhaddw_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhaddw_d_w(_1, _2); } -+// CHECK-LABEL: @xvhaddw_hu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhaddw_hu_bu(_1, _2); } -+// CHECK-LABEL: @xvhaddw_wu_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhaddw_wu_hu(_1, _2); } -+// CHECK-LABEL: @xvhaddw_du_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhaddw_du_wu(_1, _2); } -+// CHECK-LABEL: @xvhsubw_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhsubw_h_b(_1, _2); } -+// CHECK-LABEL: @xvhsubw_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhsubw_w_h(_1, _2); } -+// CHECK-LABEL: @xvhsubw_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhsubw_d_w(_1, _2); } -+// CHECK-LABEL: @xvhsubw_hu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhsubw_hu_bu(_1, _2); } -+// CHECK-LABEL: @xvhsubw_wu_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhsubw_wu_hu(_1, _2); } -+// CHECK-LABEL: @xvhsubw_du_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhsubw_du_wu(_1, _2); } -+// CHECK-LABEL: @xvmod_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __lasx_xvmod_b(_1, _2); } -+// CHECK-LABEL: @xvmod_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __lasx_xvmod_h(_1, _2); } -+// CHECK-LABEL: @xvmod_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __lasx_xvmod_w(_1, _2); } -+// CHECK-LABEL: @xvmod_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __lasx_xvmod_d(_1, _2); } -+// CHECK-LABEL: @xvmod_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmod_bu(_1, _2); } -+// CHECK-LABEL: @xvmod_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmod_hu(_1, _2); } -+// CHECK-LABEL: @xvmod_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmod_wu(_1, _2); } -+// CHECK-LABEL: @xvmod_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __lasx_xvmod_du(_1, _2); } -+// CHECK-LABEL: @xvrepl128vei_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvrepl128vei_b(v32i8 _1) { return __lasx_xvrepl128vei_b(_1, 1); } -+// CHECK-LABEL: @xvrepl128vei_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvrepl128vei_h(v16i16 _1) { return __lasx_xvrepl128vei_h(_1, 1); } -+// CHECK-LABEL: @xvrepl128vei_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvrepl128vei_w(v8i32 _1) { return __lasx_xvrepl128vei_w(_1, 1); } -+// CHECK-LABEL: @xvrepl128vei_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvrepl128vei_d(v4i64 _1) { return __lasx_xvrepl128vei_d(_1, 1); } -+// CHECK-LABEL: @xvpickev_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickev_b(_1, _2); } -+// CHECK-LABEL: @xvpickev_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickev_h(_1, _2); } -+// CHECK-LABEL: @xvpickev_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickev_w(_1, _2); } -+// CHECK-LABEL: @xvpickev_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickev_d(_1, _2); } -+// CHECK-LABEL: @xvpickod_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickod_b(_1, _2); } -+// CHECK-LABEL: @xvpickod_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickod_h(_1, _2); } -+// CHECK-LABEL: @xvpickod_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickod_w(_1, _2); } -+// CHECK-LABEL: @xvpickod_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickod_d(_1, _2); } -+// CHECK-LABEL: @xvilvh_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvh_b(_1, _2); } -+// CHECK-LABEL: @xvilvh_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvh_h(_1, _2); } -+// CHECK-LABEL: @xvilvh_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvh_w(_1, _2); } -+// CHECK-LABEL: @xvilvh_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvh_d(_1, _2); } -+// CHECK-LABEL: @xvilvl_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvl_b(_1, _2); } -+// CHECK-LABEL: @xvilvl_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvl_h(_1, _2); } -+// CHECK-LABEL: @xvilvl_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvl_w(_1, _2); } -+// CHECK-LABEL: @xvilvl_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvl_d(_1, _2); } -+// CHECK-LABEL: @xvpackev_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackev_b(_1, _2); } -+// CHECK-LABEL: @xvpackev_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackev_h(_1, _2); } -+// CHECK-LABEL: @xvpackev_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackev_w(_1, _2); } -+// CHECK-LABEL: @xvpackev_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackev_d(_1, _2); } -+// CHECK-LABEL: @xvpackod_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackod_b(_1, _2); } -+// CHECK-LABEL: @xvpackod_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackod_h(_1, _2); } -+// CHECK-LABEL: @xvpackod_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackod_w(_1, _2); } -+// CHECK-LABEL: @xvpackod_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackod_d(_1, _2); } -+// CHECK-LABEL: @xvshuf_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvshuf_b(_1, _2, _3); } -+// CHECK-LABEL: @xvshuf_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvshuf_h(_1, _2, _3); } -+// CHECK-LABEL: @xvshuf_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvshuf_w(_1, _2, _3); } -+// CHECK-LABEL: @xvshuf_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvshuf_d(_1, _2, _3); } -+// CHECK-LABEL: @xvand_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __lasx_xvand_v(_1, _2); } -+// CHECK-LABEL: @xvandi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvandi_b(v32u8 _1) { return __lasx_xvandi_b(_1, 1); } -+// CHECK-LABEL: @xvor_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __lasx_xvor_v(_1, _2); } -+// CHECK-LABEL: @xvori_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvori_b(v32u8 _1) { return __lasx_xvori_b(_1, 1); } -+// CHECK-LABEL: @xvnor_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __lasx_xvnor_v(_1, _2); } -+// CHECK-LABEL: @xvnori_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvnori_b(v32u8 _1) { return __lasx_xvnori_b(_1, 1); } -+// CHECK-LABEL: @xvxor_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __lasx_xvxor_v(_1, _2); } -+// CHECK-LABEL: @xvxori_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvxori_b(v32u8 _1) { return __lasx_xvxori_b(_1, 1); } -+// CHECK-LABEL: @xvbitsel_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __lasx_xvbitsel_v(_1, _2, _3); } -+// CHECK-LABEL: @xvbitseli_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitseli_b(_1, _2, 1); } -+// CHECK-LABEL: @xvshuf4i_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvshuf4i_b(v32i8 _1) { return __lasx_xvshuf4i_b(_1, 1); } -+// CHECK-LABEL: @xvshuf4i_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvshuf4i_h(v16i16 _1) { return __lasx_xvshuf4i_h(_1, 1); } -+// CHECK-LABEL: @xvshuf4i_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvshuf4i_w(v8i32 _1) { return __lasx_xvshuf4i_w(_1, 1); } -+// CHECK-LABEL: @xvreplgr2vr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvreplgr2vr_b(int _1) { return __lasx_xvreplgr2vr_b(_1); } -+// CHECK-LABEL: @xvreplgr2vr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvreplgr2vr_h(int _1) { return __lasx_xvreplgr2vr_h(_1); } -+// CHECK-LABEL: @xvreplgr2vr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvreplgr2vr_w(int _1) { return __lasx_xvreplgr2vr_w(_1); } -+// CHECK-LABEL: @xvreplgr2vr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvreplgr2vr_d(int _1) { return __lasx_xvreplgr2vr_d(_1); } -+// CHECK-LABEL: @xvpcnt_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvpcnt_b(v32i8 _1) { return __lasx_xvpcnt_b(_1); } -+// CHECK-LABEL: @xvpcnt_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvpcnt_h(v16i16 _1) { return __lasx_xvpcnt_h(_1); } -+// CHECK-LABEL: @xvpcnt_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvpcnt_w(v8i32 _1) { return __lasx_xvpcnt_w(_1); } -+// CHECK-LABEL: @xvpcnt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvpcnt_d(v4i64 _1) { return __lasx_xvpcnt_d(_1); } -+// CHECK-LABEL: @xvclo_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvclo_b(v32i8 _1) { return __lasx_xvclo_b(_1); } -+// CHECK-LABEL: @xvclo_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvclo_h(v16i16 _1) { return __lasx_xvclo_h(_1); } -+// CHECK-LABEL: @xvclo_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvclo_w(v8i32 _1) { return __lasx_xvclo_w(_1); } -+// CHECK-LABEL: @xvclo_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvclo_d(v4i64 _1) { return __lasx_xvclo_d(_1); } -+// CHECK-LABEL: @xvclz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvclz_b(v32i8 _1) { return __lasx_xvclz_b(_1); } -+// CHECK-LABEL: @xvclz_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvclz_h(v16i16 _1) { return __lasx_xvclz_h(_1); } -+// CHECK-LABEL: @xvclz_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvclz_w(v8i32 _1) { return __lasx_xvclz_w(_1); } -+// CHECK-LABEL: @xvclz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvclz_d(v4i64 _1) { return __lasx_xvclz_d(_1); } -+// CHECK-LABEL: @xvfadd_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __lasx_xvfadd_s(_1, _2); } -+// CHECK-LABEL: @xvfadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __lasx_xvfadd_d(_1, _2); } -+// CHECK-LABEL: @xvfsub_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __lasx_xvfsub_s(_1, _2); } -+// CHECK-LABEL: @xvfsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __lasx_xvfsub_d(_1, _2); } -+// CHECK-LABEL: @xvfmul_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmul_s(_1, _2); } -+// CHECK-LABEL: @xvfmul_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmul_d(_1, _2); } -+// CHECK-LABEL: @xvfdiv_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __lasx_xvfdiv_s(_1, _2); } -+// CHECK-LABEL: @xvfdiv_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __lasx_xvfdiv_d(_1, _2); } -+// CHECK-LABEL: @xvfcvt_h_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcvt_h_s(_1, _2); } -+// CHECK-LABEL: @xvfcvt_s_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcvt_s_d(_1, _2); } -+// CHECK-LABEL: @xvfmin_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmin_s(_1, _2); } -+// CHECK-LABEL: @xvfmin_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmin_d(_1, _2); } -+// CHECK-LABEL: @xvfmina_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmina_s(_1, _2); } -+// CHECK-LABEL: @xvfmina_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmina_d(_1, _2); } -+// CHECK-LABEL: @xvfmax_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmax_s(_1, _2); } -+// CHECK-LABEL: @xvfmax_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmax_d(_1, _2); } -+// CHECK-LABEL: @xvfmaxa_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmaxa_s(_1, _2); } -+// CHECK-LABEL: @xvfmaxa_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmaxa_d(_1, _2); } -+// CHECK-LABEL: @xvfclass_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfclass_s(v8f32 _1) { return __lasx_xvfclass_s(_1); } -+// CHECK-LABEL: @xvfclass_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfclass_d(v4f64 _1) { return __lasx_xvfclass_d(_1); } -+// CHECK-LABEL: @xvfsqrt_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfsqrt_s(v8f32 _1) { return __lasx_xvfsqrt_s(_1); } -+// CHECK-LABEL: @xvfsqrt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfsqrt_d(v4f64 _1) { return __lasx_xvfsqrt_d(_1); } -+// CHECK-LABEL: @xvfrecip_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfrecip_s(v8f32 _1) { return __lasx_xvfrecip_s(_1); } -+// CHECK-LABEL: @xvfrecip_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfrecip_d(v4f64 _1) { return __lasx_xvfrecip_d(_1); } -+// CHECK-LABEL: @xvfrint_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfrint_s(v8f32 _1) { return __lasx_xvfrint_s(_1); } -+// CHECK-LABEL: @xvfrint_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfrint_d(v4f64 _1) { return __lasx_xvfrint_d(_1); } -+// CHECK-LABEL: @xvfrsqrt_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfrsqrt_s(v8f32 _1) { return __lasx_xvfrsqrt_s(_1); } -+// CHECK-LABEL: @xvfrsqrt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfrsqrt_d(v4f64 _1) { return __lasx_xvfrsqrt_d(_1); } -+// CHECK-LABEL: @xvflogb_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvflogb_s(v8f32 _1) { return __lasx_xvflogb_s(_1); } -+// CHECK-LABEL: @xvflogb_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvflogb_d(v4f64 _1) { return __lasx_xvflogb_d(_1); } -+// CHECK-LABEL: @xvfcvth_s_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfcvth_s_h(v16i16 _1) { return __lasx_xvfcvth_s_h(_1); } -+// CHECK-LABEL: @xvfcvth_d_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfcvth_d_s(v8f32 _1) { return __lasx_xvfcvth_d_s(_1); } -+// CHECK-LABEL: @xvfcvtl_s_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfcvtl_s_h(v16i16 _1) { return __lasx_xvfcvtl_s_h(_1); } -+// CHECK-LABEL: @xvfcvtl_d_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfcvtl_d_s(v8f32 _1) { return __lasx_xvfcvtl_d_s(_1); } -+// CHECK-LABEL: @xvftint_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftint_w_s(v8f32 _1) { return __lasx_xvftint_w_s(_1); } -+// CHECK-LABEL: @xvftint_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftint_l_d(v4f64 _1) { return __lasx_xvftint_l_d(_1); } -+// CHECK-LABEL: @xvftint_wu_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvftint_wu_s(v8f32 _1) { return __lasx_xvftint_wu_s(_1); } -+// CHECK-LABEL: @xvftint_lu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvftint_lu_d(v4f64 _1) { return __lasx_xvftint_lu_d(_1); } -+// CHECK-LABEL: @xvftintrz_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftintrz_w_s(v8f32 _1) { return __lasx_xvftintrz_w_s(_1); } -+// CHECK-LABEL: @xvftintrz_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrz_l_d(v4f64 _1) { return __lasx_xvftintrz_l_d(_1); } -+// CHECK-LABEL: @xvftintrz_wu_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvftintrz_wu_s(v8f32 _1) { return __lasx_xvftintrz_wu_s(_1); } -+// CHECK-LABEL: @xvftintrz_lu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvftintrz_lu_d(v4f64 _1) { return __lasx_xvftintrz_lu_d(_1); } -+// CHECK-LABEL: @xvffint_s_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvffint_s_w(v8i32 _1) { return __lasx_xvffint_s_w(_1); } -+// CHECK-LABEL: @xvffint_d_l( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvffint_d_l(v4i64 _1) { return __lasx_xvffint_d_l(_1); } -+// CHECK-LABEL: @xvffint_s_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvffint_s_wu(v8u32 _1) { return __lasx_xvffint_s_wu(_1); } -+// CHECK-LABEL: @xvffint_d_lu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvffint_d_lu(v4u64 _1) { return __lasx_xvffint_d_lu(_1); } -+// CHECK-LABEL: @xvreplve_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1:%.*]], i32 [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvreplve_b(v32i8 _1, int _2) { return __lasx_xvreplve_b(_1, _2); } -+// CHECK-LABEL: @xvreplve_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1:%.*]], i32 [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvreplve_h(v16i16 _1, int _2) { return __lasx_xvreplve_h(_1, _2); } -+// CHECK-LABEL: @xvreplve_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1:%.*]], i32 [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvreplve_w(v8i32 _1, int _2) { return __lasx_xvreplve_w(_1, _2); } -+// CHECK-LABEL: @xvreplve_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1:%.*]], i32 [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvreplve_d(v4i64 _1, int _2) { return __lasx_xvreplve_d(_1, _2); } -+// CHECK-LABEL: @xvpermi_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __lasx_xvpermi_w(_1, _2, 1); } -+// CHECK-LABEL: @xvandn_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __lasx_xvandn_v(_1, _2); } -+// CHECK-LABEL: @xvneg_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvneg_b(v32i8 _1) { return __lasx_xvneg_b(_1); } -+// CHECK-LABEL: @xvneg_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvneg_h(v16i16 _1) { return __lasx_xvneg_h(_1); } -+// CHECK-LABEL: @xvneg_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvneg_w(v8i32 _1) { return __lasx_xvneg_w(_1); } -+// CHECK-LABEL: @xvneg_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvneg_d(v4i64 _1) { return __lasx_xvneg_d(_1); } -+// CHECK-LABEL: @xvmuh_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __lasx_xvmuh_b(_1, _2); } -+// CHECK-LABEL: @xvmuh_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __lasx_xvmuh_h(_1, _2); } -+// CHECK-LABEL: @xvmuh_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __lasx_xvmuh_w(_1, _2); } -+// CHECK-LABEL: @xvmuh_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __lasx_xvmuh_d(_1, _2); } -+// CHECK-LABEL: @xvmuh_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmuh_bu(_1, _2); } -+// CHECK-LABEL: @xvmuh_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmuh_hu(_1, _2); } -+// CHECK-LABEL: @xvmuh_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmuh_wu(_1, _2); } -+// CHECK-LABEL: @xvmuh_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __lasx_xvmuh_du(_1, _2); } -+// CHECK-LABEL: @xvsllwil_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsllwil_h_b(v32i8 _1) { return __lasx_xvsllwil_h_b(_1, 1); } -+// CHECK-LABEL: @xvsllwil_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsllwil_w_h(v16i16 _1) { return __lasx_xvsllwil_w_h(_1, 1); } -+// CHECK-LABEL: @xvsllwil_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsllwil_d_w(v8i32 _1) { return __lasx_xvsllwil_d_w(_1, 1); } -+// CHECK-LABEL: @xvsllwil_hu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvsllwil_hu_bu(v32u8 _1) { return __lasx_xvsllwil_hu_bu(_1, 1); } -+// CHECK-LABEL: @xvsllwil_wu_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvsllwil_wu_hu(v16u16 _1) { return __lasx_xvsllwil_wu_hu(_1, 1); } -+// CHECK-LABEL: @xvsllwil_du_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvsllwil_du_wu(v8u32 _1) { return __lasx_xvsllwil_du_wu(_1, 1); } -+// CHECK-LABEL: @xvsran_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsran_b_h(_1, _2); } -+// CHECK-LABEL: @xvsran_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsran_h_w(_1, _2); } -+// CHECK-LABEL: @xvsran_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsran_w_d(_1, _2); } -+// CHECK-LABEL: @xvssran_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssran_b_h(_1, _2); } -+// CHECK-LABEL: @xvssran_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssran_h_w(_1, _2); } -+// CHECK-LABEL: @xvssran_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssran_w_d(_1, _2); } -+// CHECK-LABEL: @xvssran_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssran_bu_h(_1, _2); } -+// CHECK-LABEL: @xvssran_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssran_hu_w(_1, _2); } -+// CHECK-LABEL: @xvssran_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssran_wu_d(_1, _2); } -+// CHECK-LABEL: @xvsrarn_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrarn_b_h(_1, _2); } -+// CHECK-LABEL: @xvsrarn_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrarn_h_w(_1, _2); } -+// CHECK-LABEL: @xvsrarn_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrarn_w_d(_1, _2); } -+// CHECK-LABEL: @xvssrarn_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrarn_b_h(_1, _2); } -+// CHECK-LABEL: @xvssrarn_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrarn_h_w(_1, _2); } -+// CHECK-LABEL: @xvssrarn_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrarn_w_d(_1, _2); } -+// CHECK-LABEL: @xvssrarn_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrarn_bu_h(_1, _2); } -+// CHECK-LABEL: @xvssrarn_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrarn_hu_w(_1, _2); } -+// CHECK-LABEL: @xvssrarn_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrarn_wu_d(_1, _2); } -+// CHECK-LABEL: @xvsrln_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrln_b_h(_1, _2); } -+// CHECK-LABEL: @xvsrln_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrln_h_w(_1, _2); } -+// CHECK-LABEL: @xvsrln_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrln_w_d(_1, _2); } -+// CHECK-LABEL: @xvssrln_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrln_bu_h(_1, _2); } -+// CHECK-LABEL: @xvssrln_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrln_hu_w(_1, _2); } -+// CHECK-LABEL: @xvssrln_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrln_wu_d(_1, _2); } -+// CHECK-LABEL: @xvsrlrn_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrn_b_h(_1, _2); } -+// CHECK-LABEL: @xvsrlrn_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrn_h_w(_1, _2); } -+// CHECK-LABEL: @xvsrlrn_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrn_w_d(_1, _2); } -+// CHECK-LABEL: @xvssrlrn_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrlrn_bu_h(_1, _2); } -+// CHECK-LABEL: @xvssrlrn_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrlrn_hu_w(_1, _2); } -+// CHECK-LABEL: @xvssrlrn_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrlrn_wu_d(_1, _2); } -+// CHECK-LABEL: @xvfrstpi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __lasx_xvfrstpi_b(_1, _2, 1); } -+// CHECK-LABEL: @xvfrstpi_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __lasx_xvfrstpi_h(_1, _2, 1); } -+// CHECK-LABEL: @xvfrstp_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvfrstp_b(_1, _2, _3); } -+// CHECK-LABEL: @xvfrstp_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvfrstp_h(_1, _2, _3); } -+// CHECK-LABEL: @xvshuf4i_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __lasx_xvshuf4i_d(_1, _2, 1); } -+// CHECK-LABEL: @xvbsrl_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvbsrl_v(v32i8 _1) { return __lasx_xvbsrl_v(_1, 1); } -+// CHECK-LABEL: @xvbsll_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvbsll_v(v32i8 _1) { return __lasx_xvbsll_v(_1, 1); } -+// CHECK-LABEL: @xvextrins_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __lasx_xvextrins_b(_1, _2, 1); } -+// CHECK-LABEL: @xvextrins_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __lasx_xvextrins_h(_1, _2, 1); } -+// CHECK-LABEL: @xvextrins_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __lasx_xvextrins_w(_1, _2, 1); } -+// CHECK-LABEL: @xvextrins_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __lasx_xvextrins_d(_1, _2, 1); } -+// CHECK-LABEL: @xvmskltz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmskltz_b(v32i8 _1) { return __lasx_xvmskltz_b(_1); } -+// CHECK-LABEL: @xvmskltz_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmskltz_h(v16i16 _1) { return __lasx_xvmskltz_h(_1); } -+// CHECK-LABEL: @xvmskltz_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmskltz_w(v8i32 _1) { return __lasx_xvmskltz_w(_1); } -+// CHECK-LABEL: @xvmskltz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmskltz_d(v4i64 _1) { return __lasx_xvmskltz_d(_1); } -+// CHECK-LABEL: @xvsigncov_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __lasx_xvsigncov_b(_1, _2); } -+// CHECK-LABEL: @xvsigncov_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __lasx_xvsigncov_h(_1, _2); } -+// CHECK-LABEL: @xvsigncov_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __lasx_xvsigncov_w(_1, _2); } -+// CHECK-LABEL: @xvsigncov_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __lasx_xvsigncov_d(_1, _2); } -+// CHECK-LABEL: @xvfmadd_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmadd_s(_1, _2, _3); } -+// CHECK-LABEL: @xvfmadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmadd_d(_1, _2, _3); } -+// CHECK-LABEL: @xvfmsub_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmsub_s(_1, _2, _3); } -+// CHECK-LABEL: @xvfmsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmsub_d(_1, _2, _3); } -+// CHECK-LABEL: @xvfnmadd_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmadd_s(_1, _2, _3); } -+// CHECK-LABEL: @xvfnmadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmadd_d(_1, _2, _3); } -+// CHECK-LABEL: @xvfnmsub_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmsub_s(_1, _2, _3); } -+// CHECK-LABEL: @xvfnmsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmsub_d(_1, _2, _3); } -+// CHECK-LABEL: @xvftintrne_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftintrne_w_s(v8f32 _1) { return __lasx_xvftintrne_w_s(_1); } -+// CHECK-LABEL: @xvftintrne_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrne_l_d(v4f64 _1) { return __lasx_xvftintrne_l_d(_1); } -+// CHECK-LABEL: @xvftintrp_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftintrp_w_s(v8f32 _1) { return __lasx_xvftintrp_w_s(_1); } -+// CHECK-LABEL: @xvftintrp_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrp_l_d(v4f64 _1) { return __lasx_xvftintrp_l_d(_1); } -+// CHECK-LABEL: @xvftintrm_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftintrm_w_s(v8f32 _1) { return __lasx_xvftintrm_w_s(_1); } -+// CHECK-LABEL: @xvftintrm_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrm_l_d(v4f64 _1) { return __lasx_xvftintrm_l_d(_1); } -+// CHECK-LABEL: @xvftint_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftint_w_d(_1, _2); } -+// CHECK-LABEL: @xvffint_s_l( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __lasx_xvffint_s_l(_1, _2); } -+// CHECK-LABEL: @xvftintrz_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrz_w_d(_1, _2); } -+// CHECK-LABEL: @xvftintrp_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrp_w_d(_1, _2); } -+// CHECK-LABEL: @xvftintrm_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrm_w_d(_1, _2); } -+// CHECK-LABEL: @xvftintrne_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrne_w_d(_1, _2); } -+// CHECK-LABEL: @xvftinth_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftinth_l_s(v8f32 _1) { return __lasx_xvftinth_l_s(_1); } -+// CHECK-LABEL: @xvftintl_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintl_l_s(v8f32 _1) { return __lasx_xvftintl_l_s(_1); } -+// CHECK-LABEL: @xvffinth_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvffinth_d_w(v8i32 _1) { return __lasx_xvffinth_d_w(_1); } -+// CHECK-LABEL: @xvffintl_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvffintl_d_w(v8i32 _1) { return __lasx_xvffintl_d_w(_1); } -+// CHECK-LABEL: @xvftintrzh_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrzh_l_s(v8f32 _1) { return __lasx_xvftintrzh_l_s(_1); } -+// CHECK-LABEL: @xvftintrzl_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrzl_l_s(v8f32 _1) { return __lasx_xvftintrzl_l_s(_1); } -+// CHECK-LABEL: @xvftintrph_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrph_l_s(v8f32 _1) { return __lasx_xvftintrph_l_s(_1); } -+// CHECK-LABEL: @xvftintrpl_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrpl_l_s(v8f32 _1) { return __lasx_xvftintrpl_l_s(_1); } -+// CHECK-LABEL: @xvftintrmh_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrmh_l_s(v8f32 _1) { return __lasx_xvftintrmh_l_s(_1); } -+// CHECK-LABEL: @xvftintrml_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrml_l_s(v8f32 _1) { return __lasx_xvftintrml_l_s(_1); } -+// CHECK-LABEL: @xvftintrneh_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrneh_l_s(v8f32 _1) { return __lasx_xvftintrneh_l_s(_1); } -+// CHECK-LABEL: @xvftintrnel_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrnel_l_s(v8f32 _1) { return __lasx_xvftintrnel_l_s(_1); } -+// CHECK-LABEL: @xvfrintrne_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> -+// CHECK-NEXT: ret <8 x i32> [[TMP1]] -+// -+v8i32 xvfrintrne_s(v8f32 _1) { return __lasx_xvfrintrne_s(_1); } -+// CHECK-LABEL: @xvfrintrne_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> -+// CHECK-NEXT: ret <4 x i64> [[TMP1]] -+// -+v4i64 xvfrintrne_d(v4f64 _1) { return __lasx_xvfrintrne_d(_1); } -+// CHECK-LABEL: @xvfrintrz_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> -+// CHECK-NEXT: ret <8 x i32> [[TMP1]] -+// -+v8i32 xvfrintrz_s(v8f32 _1) { return __lasx_xvfrintrz_s(_1); } -+// CHECK-LABEL: @xvfrintrz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> -+// CHECK-NEXT: ret <4 x i64> [[TMP1]] -+// -+v4i64 xvfrintrz_d(v4f64 _1) { return __lasx_xvfrintrz_d(_1); } -+// CHECK-LABEL: @xvfrintrp_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> -+// CHECK-NEXT: ret <8 x i32> [[TMP1]] -+// -+v8i32 xvfrintrp_s(v8f32 _1) { return __lasx_xvfrintrp_s(_1); } -+// CHECK-LABEL: @xvfrintrp_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> -+// CHECK-NEXT: ret <4 x i64> [[TMP1]] -+// -+v4i64 xvfrintrp_d(v4f64 _1) { return __lasx_xvfrintrp_d(_1); } -+// CHECK-LABEL: @xvfrintrm_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> -+// CHECK-NEXT: ret <8 x i32> [[TMP1]] -+// -+v8i32 xvfrintrm_s(v8f32 _1) { return __lasx_xvfrintrm_s(_1); } -+// CHECK-LABEL: @xvfrintrm_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> -+// CHECK-NEXT: ret <4 x i64> [[TMP1]] -+// -+v4i64 xvfrintrm_d(v4f64 _1) { return __lasx_xvfrintrm_d(_1); } -+// CHECK-LABEL: @xvld( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvld(void * _1) { return __lasx_xvld(_1, 1); } -+// CHECK-LABEL: @xvst( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret void -+// -+void xvst(v32i8 _1, void * _2) { return __lasx_xvst(_1, _2, 1); } -+// CHECK-LABEL: @xvstelm_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) -+// CHECK-NEXT: ret void -+// -+void xvstelm_b(v32i8 _1, void * _2) { return __lasx_xvstelm_b(_1, _2, 1, 1); } -+// CHECK-LABEL: @xvstelm_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) -+// CHECK-NEXT: ret void -+// -+void xvstelm_h(v16i16 _1, void * _2) { return __lasx_xvstelm_h(_1, _2, 2, 1); } -+// CHECK-LABEL: @xvstelm_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) -+// CHECK-NEXT: ret void -+// -+void xvstelm_w(v8i32 _1, void * _2) { return __lasx_xvstelm_w(_1, _2, 4, 1); } -+// CHECK-LABEL: @xvstelm_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) -+// CHECK-NEXT: ret void -+// -+void xvstelm_d(v4i64 _1, void * _2) { return __lasx_xvstelm_d(_1, _2, 8, 1); } -+// CHECK-LABEL: @xvinsve0_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __lasx_xvinsve0_w(_1, _2, 1); } -+// CHECK-LABEL: @xvinsve0_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __lasx_xvinsve0_d(_1, _2, 1); } -+// CHECK-LABEL: @xvpickve_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvpickve_w(v8i32 _1) { return __lasx_xvpickve_w(_1, 1); } -+// CHECK-LABEL: @xvpickve_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvpickve_d(v4i64 _1) { return __lasx_xvpickve_d(_1, 1); } -+// CHECK-LABEL: @xvssrlrn_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrn_b_h(_1, _2); } -+// CHECK-LABEL: @xvssrlrn_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrn_h_w(_1, _2); } -+// CHECK-LABEL: @xvssrlrn_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrn_w_d(_1, _2); } -+// CHECK-LABEL: @xvssrln_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrln_b_h(_1, _2); } -+// CHECK-LABEL: @xvssrln_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrln_h_w(_1, _2); } -+// CHECK-LABEL: @xvssrln_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrln_w_d(_1, _2); } -+// CHECK-LABEL: @xvorn_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __lasx_xvorn_v(_1, _2); } -+// CHECK-LABEL: @xvldi( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvldi() { return __lasx_xvldi(1); } -+// CHECK-LABEL: @xvldx( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvldx(void * _1) { return __lasx_xvldx(_1, 1); } -+// CHECK-LABEL: @xvstx( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) -+// CHECK-NEXT: ret void -+// -+void xvstx(v32i8 _1, void * _2) { return __lasx_xvstx(_1, _2, 1); } -+// CHECK-LABEL: @xvextl_qu_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvextl_qu_du(v4u64 _1) { return __lasx_xvextl_qu_du(_1); } -+// CHECK-LABEL: @xvinsgr2vr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1:%.*]], i32 1, i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvinsgr2vr_w(v8i32 _1) { return __lasx_xvinsgr2vr_w(_1, 1, 1); } -+// CHECK-LABEL: @xvinsgr2vr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1:%.*]], i64 1, i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvinsgr2vr_d(v4i64 _1) { return __lasx_xvinsgr2vr_d(_1, 1, 1); } -+// CHECK-LABEL: @xvreplve0_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvreplve0_b(v32i8 _1) { return __lasx_xvreplve0_b(_1); } -+// CHECK-LABEL: @xvreplve0_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvreplve0_h(v16i16 _1) { return __lasx_xvreplve0_h(_1); } -+// CHECK-LABEL: @xvreplve0_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvreplve0_w(v8i32 _1) { return __lasx_xvreplve0_w(_1); } -+// CHECK-LABEL: @xvreplve0_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvreplve0_d(v4i64 _1) { return __lasx_xvreplve0_d(_1); } -+// CHECK-LABEL: @xvreplve0_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvreplve0_q(v32i8 _1) { return __lasx_xvreplve0_q(_1); } -+// CHECK-LABEL: @vext2xv_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 vext2xv_h_b(v32i8 _1) { return __lasx_vext2xv_h_b(_1); } -+// CHECK-LABEL: @vext2xv_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 vext2xv_w_h(v16i16 _1) { return __lasx_vext2xv_w_h(_1); } -+// CHECK-LABEL: @vext2xv_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 vext2xv_d_w(v8i32 _1) { return __lasx_vext2xv_d_w(_1); } -+// CHECK-LABEL: @vext2xv_w_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 vext2xv_w_b(v32i8 _1) { return __lasx_vext2xv_w_b(_1); } -+// CHECK-LABEL: @vext2xv_d_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 vext2xv_d_h(v16i16 _1) { return __lasx_vext2xv_d_h(_1); } -+// CHECK-LABEL: @vext2xv_d_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 vext2xv_d_b(v32i8 _1) { return __lasx_vext2xv_d_b(_1); } -+// CHECK-LABEL: @vext2xv_hu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 vext2xv_hu_bu(v32i8 _1) { return __lasx_vext2xv_hu_bu(_1); } -+// CHECK-LABEL: @vext2xv_wu_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 vext2xv_wu_hu(v16i16 _1) { return __lasx_vext2xv_wu_hu(_1); } -+// CHECK-LABEL: @vext2xv_du_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 vext2xv_du_wu(v8i32 _1) { return __lasx_vext2xv_du_wu(_1); } -+// CHECK-LABEL: @vext2xv_wu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 vext2xv_wu_bu(v32i8 _1) { return __lasx_vext2xv_wu_bu(_1); } -+// CHECK-LABEL: @vext2xv_du_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 vext2xv_du_hu(v16i16 _1) { return __lasx_vext2xv_du_hu(_1); } -+// CHECK-LABEL: @vext2xv_du_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 vext2xv_du_bu(v32i8 _1) { return __lasx_vext2xv_du_bu(_1); } -+// CHECK-LABEL: @xvpermi_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __lasx_xvpermi_q(_1, _2, 1); } -+// CHECK-LABEL: @xvpermi_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvpermi_d(v4i64 _1) { return __lasx_xvpermi_d(_1, 1); } -+// CHECK-LABEL: @xvperm_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __lasx_xvperm_w(_1, _2); } -+// CHECK-LABEL: @xvldrepl_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvldrepl_b(void * _1) { return __lasx_xvldrepl_b(_1, 1); } -+// CHECK-LABEL: @xvldrepl_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvldrepl_h(void * _1) { return __lasx_xvldrepl_h(_1, 2); } -+// CHECK-LABEL: @xvldrepl_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvldrepl_w(void * _1) { return __lasx_xvldrepl_w(_1, 4); } -+// CHECK-LABEL: @xvldrepl_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvldrepl_d(void * _1) { return __lasx_xvldrepl_d(_1, 8); } -+// CHECK-LABEL: @xvpickve2gr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xvpickve2gr_w(v8i32 _1) { return __lasx_xvpickve2gr_w(_1, 1); } -+// CHECK-LABEL: @xvpickve2gr_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+unsigned int xvpickve2gr_wu(v8i32 _1) { return __lasx_xvpickve2gr_wu(_1, 1); } -+// CHECK-LABEL: @xvpickve2gr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i64 [[TMP0]] -+// -+long xvpickve2gr_d(v4i64 _1) { return __lasx_xvpickve2gr_d(_1, 1); } -+// CHECK-LABEL: @xvpickve2gr_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i64 [[TMP0]] -+// -+unsigned long int xvpickve2gr_du(v4i64 _1) { return __lasx_xvpickve2gr_du(_1, 1); } -+// CHECK-LABEL: @xvaddwev_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwev_q_d(_1, _2); } -+// CHECK-LABEL: @xvaddwev_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwev_d_w(_1, _2); } -+// CHECK-LABEL: @xvaddwev_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwev_w_h(_1, _2); } -+// CHECK-LABEL: @xvaddwev_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwev_h_b(_1, _2); } -+// CHECK-LABEL: @xvaddwev_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwev_q_du(_1, _2); } -+// CHECK-LABEL: @xvaddwev_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwev_d_wu(_1, _2); } -+// CHECK-LABEL: @xvaddwev_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwev_w_hu(_1, _2); } -+// CHECK-LABEL: @xvaddwev_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwev_h_bu(_1, _2); } -+// CHECK-LABEL: @xvsubwev_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwev_q_d(_1, _2); } -+// CHECK-LABEL: @xvsubwev_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwev_d_w(_1, _2); } -+// CHECK-LABEL: @xvsubwev_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwev_w_h(_1, _2); } -+// CHECK-LABEL: @xvsubwev_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwev_h_b(_1, _2); } -+// CHECK-LABEL: @xvsubwev_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwev_q_du(_1, _2); } -+// CHECK-LABEL: @xvsubwev_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwev_d_wu(_1, _2); } -+// CHECK-LABEL: @xvsubwev_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwev_w_hu(_1, _2); } -+// CHECK-LABEL: @xvsubwev_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwev_h_bu(_1, _2); } -+// CHECK-LABEL: @xvmulwev_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwev_q_d(_1, _2); } -+// CHECK-LABEL: @xvmulwev_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwev_d_w(_1, _2); } -+// CHECK-LABEL: @xvmulwev_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwev_w_h(_1, _2); } -+// CHECK-LABEL: @xvmulwev_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwev_h_b(_1, _2); } -+// CHECK-LABEL: @xvmulwev_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwev_q_du(_1, _2); } -+// CHECK-LABEL: @xvmulwev_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwev_d_wu(_1, _2); } -+// CHECK-LABEL: @xvmulwev_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwev_w_hu(_1, _2); } -+// CHECK-LABEL: @xvmulwev_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwev_h_bu(_1, _2); } -+// CHECK-LABEL: @xvaddwod_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwod_q_d(_1, _2); } -+// CHECK-LABEL: @xvaddwod_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwod_d_w(_1, _2); } -+// CHECK-LABEL: @xvaddwod_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwod_w_h(_1, _2); } -+// CHECK-LABEL: @xvaddwod_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwod_h_b(_1, _2); } -+// CHECK-LABEL: @xvaddwod_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwod_q_du(_1, _2); } -+// CHECK-LABEL: @xvaddwod_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwod_d_wu(_1, _2); } -+// CHECK-LABEL: @xvaddwod_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwod_w_hu(_1, _2); } -+// CHECK-LABEL: @xvaddwod_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwod_h_bu(_1, _2); } -+// CHECK-LABEL: @xvsubwod_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwod_q_d(_1, _2); } -+// CHECK-LABEL: @xvsubwod_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwod_d_w(_1, _2); } -+// CHECK-LABEL: @xvsubwod_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwod_w_h(_1, _2); } -+// CHECK-LABEL: @xvsubwod_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwod_h_b(_1, _2); } -+// CHECK-LABEL: @xvsubwod_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwod_q_du(_1, _2); } -+// CHECK-LABEL: @xvsubwod_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwod_d_wu(_1, _2); } -+// CHECK-LABEL: @xvsubwod_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwod_w_hu(_1, _2); } -+// CHECK-LABEL: @xvsubwod_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwod_h_bu(_1, _2); } -+// CHECK-LABEL: @xvmulwod_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwod_q_d(_1, _2); } -+// CHECK-LABEL: @xvmulwod_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwod_d_w(_1, _2); } -+// CHECK-LABEL: @xvmulwod_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwod_w_h(_1, _2); } -+// CHECK-LABEL: @xvmulwod_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwod_h_b(_1, _2); } -+// CHECK-LABEL: @xvmulwod_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwod_q_du(_1, _2); } -+// CHECK-LABEL: @xvmulwod_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwod_d_wu(_1, _2); } -+// CHECK-LABEL: @xvmulwod_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwod_w_hu(_1, _2); } -+// CHECK-LABEL: @xvmulwod_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwod_h_bu(_1, _2); } -+// CHECK-LABEL: @xvaddwev_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwev_d_wu_w(_1, _2); } -+// CHECK-LABEL: @xvaddwev_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwev_w_hu_h(_1, _2); } -+// CHECK-LABEL: @xvaddwev_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwev_h_bu_b(_1, _2); } -+// CHECK-LABEL: @xvmulwev_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwev_d_wu_w(_1, _2); } -+// CHECK-LABEL: @xvmulwev_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwev_w_hu_h(_1, _2); } -+// CHECK-LABEL: @xvmulwev_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwev_h_bu_b(_1, _2); } -+// CHECK-LABEL: @xvaddwod_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwod_d_wu_w(_1, _2); } -+// CHECK-LABEL: @xvaddwod_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwod_w_hu_h(_1, _2); } -+// CHECK-LABEL: @xvaddwod_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwod_h_bu_b(_1, _2); } -+// CHECK-LABEL: @xvmulwod_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwod_d_wu_w(_1, _2); } -+// CHECK-LABEL: @xvmulwod_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwod_w_hu_h(_1, _2); } -+// CHECK-LABEL: @xvmulwod_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwod_h_bu_b(_1, _2); } -+// CHECK-LABEL: @xvhaddw_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhaddw_q_d(_1, _2); } -+// CHECK-LABEL: @xvhaddw_qu_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhaddw_qu_du(_1, _2); } -+// CHECK-LABEL: @xvhsubw_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhsubw_q_d(_1, _2); } -+// CHECK-LABEL: @xvhsubw_qu_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhsubw_qu_du(_1, _2); } -+// CHECK-LABEL: @xvmaddwev_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_d(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_w(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_h(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_b(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwev_q_du(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwev_d_wu(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwev_w_hu(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwev_h_bu(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_d(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_w(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_h(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_b(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwod_q_du(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwod_d_wu(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwod_w_hu(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwod_h_bu(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_du_d(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_wu_w(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_hu_h(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_bu_b(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_du_d(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_wu_w(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_hu_h(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_bu_b(_1, _2, _3); } -+// CHECK-LABEL: @xvrotr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __lasx_xvrotr_b(_1, _2); } -+// CHECK-LABEL: @xvrotr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __lasx_xvrotr_h(_1, _2); } -+// CHECK-LABEL: @xvrotr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __lasx_xvrotr_w(_1, _2); } -+// CHECK-LABEL: @xvrotr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __lasx_xvrotr_d(_1, _2); } -+// CHECK-LABEL: @xvadd_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __lasx_xvadd_q(_1, _2); } -+// CHECK-LABEL: @xvsub_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __lasx_xvsub_q(_1, _2); } -+// CHECK-LABEL: @xvaddwev_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwev_q_du_d(_1, _2); } -+// CHECK-LABEL: @xvaddwod_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwod_q_du_d(_1, _2); } -+// CHECK-LABEL: @xvmulwev_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwev_q_du_d(_1, _2); } -+// CHECK-LABEL: @xvmulwod_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwod_q_du_d(_1, _2); } -+// CHECK-LABEL: @xvmskgez_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmskgez_b(v32i8 _1) { return __lasx_xvmskgez_b(_1); } -+// CHECK-LABEL: @xvmsknz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmsknz_b(v32i8 _1) { return __lasx_xvmsknz_b(_1); } -+// CHECK-LABEL: @xvexth_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvexth_h_b(v32i8 _1) { return __lasx_xvexth_h_b(_1); } -+// CHECK-LABEL: @xvexth_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvexth_w_h(v16i16 _1) { return __lasx_xvexth_w_h(_1); } -+// CHECK-LABEL: @xvexth_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvexth_d_w(v8i32 _1) { return __lasx_xvexth_d_w(_1); } -+// CHECK-LABEL: @xvexth_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvexth_q_d(v4i64 _1) { return __lasx_xvexth_q_d(_1); } -+// CHECK-LABEL: @xvexth_hu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvexth_hu_bu(v32u8 _1) { return __lasx_xvexth_hu_bu(_1); } -+// CHECK-LABEL: @xvexth_wu_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvexth_wu_hu(v16u16 _1) { return __lasx_xvexth_wu_hu(_1); } -+// CHECK-LABEL: @xvexth_du_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvexth_du_wu(v8u32 _1) { return __lasx_xvexth_du_wu(_1); } -+// CHECK-LABEL: @xvexth_qu_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvexth_qu_du(v4u64 _1) { return __lasx_xvexth_qu_du(_1); } -+// CHECK-LABEL: @xvrotri_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvrotri_b(v32i8 _1) { return __lasx_xvrotri_b(_1, 1); } -+// CHECK-LABEL: @xvrotri_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvrotri_h(v16i16 _1) { return __lasx_xvrotri_h(_1, 1); } -+// CHECK-LABEL: @xvrotri_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvrotri_w(v8i32 _1) { return __lasx_xvrotri_w(_1, 1); } -+// CHECK-LABEL: @xvrotri_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvrotri_d(v4i64 _1) { return __lasx_xvrotri_d(_1, 1); } -+// CHECK-LABEL: @xvextl_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvextl_q_d(v4i64 _1) { return __lasx_xvextl_q_d(_1); } -+// CHECK-LABEL: @xvsrlni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlni_b_h(_1, _2, 1); } -+// CHECK-LABEL: @xvsrlni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlni_h_w(_1, _2, 1); } -+// CHECK-LABEL: @xvsrlni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlni_w_d(_1, _2, 1); } -+// CHECK-LABEL: @xvsrlni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlni_d_q(_1, _2, 1); } -+// CHECK-LABEL: @xvsrlrni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlrni_b_h(_1, _2, 1); } -+// CHECK-LABEL: @xvsrlrni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrni_h_w(_1, _2, 1); } -+// CHECK-LABEL: @xvsrlrni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrni_w_d(_1, _2, 1); } -+// CHECK-LABEL: @xvsrlrni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrni_d_q(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlni_b_h(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlni_h_w(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlni_w_d(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlni_d_q(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlni_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlni_bu_h(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlni_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlni_hu_w(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlni_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlni_wu_d(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlni_du_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlni_du_q(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlrni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlrni_b_h(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlrni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrni_h_w(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlrni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrni_w_d(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlrni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrni_d_q(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlrni_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlrni_bu_h(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlrni_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlrni_hu_w(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlrni_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlrni_wu_d(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlrni_du_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlrni_du_q(_1, _2, 1); } -+// CHECK-LABEL: @xvsrani_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrani_b_h(_1, _2, 1); } -+// CHECK-LABEL: @xvsrani_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrani_h_w(_1, _2, 1); } -+// CHECK-LABEL: @xvsrani_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrani_w_d(_1, _2, 1); } -+// CHECK-LABEL: @xvsrani_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrani_d_q(_1, _2, 1); } -+// CHECK-LABEL: @xvsrarni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrarni_b_h(_1, _2, 1); } -+// CHECK-LABEL: @xvsrarni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrarni_h_w(_1, _2, 1); } -+// CHECK-LABEL: @xvsrarni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrarni_w_d(_1, _2, 1); } -+// CHECK-LABEL: @xvsrarni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrarni_d_q(_1, _2, 1); } -+// CHECK-LABEL: @xvssrani_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrani_b_h(_1, _2, 1); } -+// CHECK-LABEL: @xvssrani_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrani_h_w(_1, _2, 1); } -+// CHECK-LABEL: @xvssrani_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrani_w_d(_1, _2, 1); } -+// CHECK-LABEL: @xvssrani_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrani_d_q(_1, _2, 1); } -+// CHECK-LABEL: @xvssrani_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrani_bu_h(_1, _2, 1); } -+// CHECK-LABEL: @xvssrani_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrani_hu_w(_1, _2, 1); } -+// CHECK-LABEL: @xvssrani_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrani_wu_d(_1, _2, 1); } -+// CHECK-LABEL: @xvssrani_du_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrani_du_q(_1, _2, 1); } -+// CHECK-LABEL: @xvssrarni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrarni_b_h(_1, _2, 1); } -+// CHECK-LABEL: @xvssrarni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrarni_h_w(_1, _2, 1); } -+// CHECK-LABEL: @xvssrarni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrarni_w_d(_1, _2, 1); } -+// CHECK-LABEL: @xvssrarni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrarni_d_q(_1, _2, 1); } -+// CHECK-LABEL: @xvssrarni_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrarni_bu_h(_1, _2, 1); } -+// CHECK-LABEL: @xvssrarni_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrarni_hu_w(_1, _2, 1); } -+// CHECK-LABEL: @xvssrarni_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrarni_wu_d(_1, _2, 1); } -+// CHECK-LABEL: @xvssrarni_du_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrarni_du_q(_1, _2, 1); } -+// CHECK-LABEL: @xbnz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbnz_b(v32u8 _1) { return __lasx_xbnz_b(_1); } -+// CHECK-LABEL: @xbnz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbnz_d(v4u64 _1) { return __lasx_xbnz_d(_1); } -+// CHECK-LABEL: @xbnz_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbnz_h(v16u16 _1) { return __lasx_xbnz_h(_1); } -+// CHECK-LABEL: @xbnz_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbnz_v(v32u8 _1) { return __lasx_xbnz_v(_1); } -+// CHECK-LABEL: @xbnz_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbnz_w(v8u32 _1) { return __lasx_xbnz_w(_1); } -+// CHECK-LABEL: @xbz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbz_b(v32u8 _1) { return __lasx_xbz_b(_1); } -+// CHECK-LABEL: @xbz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbz_d(v4u64 _1) { return __lasx_xbz_d(_1); } -+// CHECK-LABEL: @xbz_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbz_h(v16u16 _1) { return __lasx_xbz_h(_1); } -+// CHECK-LABEL: @xbz_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbz_v(v32u8 _1) { return __lasx_xbz_v(_1); } -+// CHECK-LABEL: @xbz_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbz_w(v8u32 _1) { return __lasx_xbz_w(_1); } -+// CHECK-LABEL: @xvfcmp_caf_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_caf_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_caf_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_caf_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_ceq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_ceq_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_ceq_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_ceq_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cle_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cle_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cle_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cle_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_clt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_clt_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_clt_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_clt_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cne_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cne_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cne_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cne_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cor_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cor_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cor_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cor_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cueq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cueq_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cueq_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cueq_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cule_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cule_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cule_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cule_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cult_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cult_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cult_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cult_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cun_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cun_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cune_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cune_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cune_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cune_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cun_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cun_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_saf_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_saf_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_saf_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_saf_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_seq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_seq_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_seq_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_seq_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sle_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sle_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sle_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sle_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_slt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_slt_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_slt_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_slt_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sne_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sne_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sne_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sne_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sor_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sor_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sor_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sor_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sueq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sueq_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sueq_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sueq_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sule_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sule_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sule_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sule_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sult_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sult_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sult_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sult_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sun_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sun_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sune_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sune_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sune_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sune_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sun_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sun_s(_1, _2); } -+// CHECK-LABEL: @xvpickve_d_f( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvpickve_d_f(v4f64 _1) { return __lasx_xvpickve_d_f(_1, 1); } -+// CHECK-LABEL: @xvpickve_w_f( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvpickve_w_f(v8f32 _1) { return __lasx_xvpickve_w_f(_1, 1); } -+// CHECK-LABEL: @xvrepli_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvrepli_b() { return __lasx_xvrepli_b(1); } -+// CHECK-LABEL: @xvrepli_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvrepli_d() { return __lasx_xvrepli_d(1); } -+// CHECK-LABEL: @xvrepli_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); } -+// CHECK-LABEL: @xvrepli_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); } -diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-error.c b/clang/test/CodeGen/LoongArch/lasx/builtin-error.c -new file mode 100644 -index 000000000000..724484465769 ---- /dev/null -+++ b/clang/test/CodeGen/LoongArch/lasx/builtin-error.c -@@ -0,0 +1,1392 @@ -+// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -verify %s -+ -+typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); -+typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); -+typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); -+typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); -+typedef short v16i16 __attribute__((vector_size(32), aligned(32))); -+typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); -+typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); -+typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); -+typedef int v8i32 __attribute__((vector_size(32), aligned(32))); -+typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); -+typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); -+typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); -+typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); -+typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); -+typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); -+typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); -+typedef float v8f32 __attribute__((vector_size(32), aligned(32))); -+typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); -+typedef double v4f64 __attribute__((vector_size(32), aligned(32))); -+typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); -+ -+v32i8 xvslli_b(v32i8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvslli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvslli_h(v16i16 _1, int var) { -+ v16i16 res = __builtin_lasx_xvslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvslli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvslli_w(v8i32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvslli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvslli_d(v4i64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvslli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsrai_b(v32i8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvsrai_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsrai_h(v16i16 _1, int var) { -+ v16i16 res = __builtin_lasx_xvsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsrai_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsrai_w(v8i32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsrai_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsrai_d(v4i64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsrai_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsrari_b(v32i8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvsrari_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsrari_h(v16i16 _1, int var) { -+ v16i16 res = __builtin_lasx_xvsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsrari_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsrari_w(v8i32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsrari_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsrari_d(v4i64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsrari_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsrli_b(v32i8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvsrli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsrli_h(v16i16 _1, int var) { -+ v16i16 res = __builtin_lasx_xvsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsrli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsrli_w(v8i32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsrli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsrli_d(v4i64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsrli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsrlri_b(v32i8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvsrlri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsrlri_h(v16i16 _1, int var) { -+ v16i16 res = __builtin_lasx_xvsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsrlri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsrlri_w(v8i32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsrlri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsrlri_d(v4i64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsrlri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_d' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvbitclri_b(v32u8 _1, int var) { -+ v32u8 res = __builtin_lasx_xvbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvbitclri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_b' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvbitclri_h(v16u16 _1, int var) { -+ v16u16 res = __builtin_lasx_xvbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvbitclri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_h' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvbitclri_w(v8u32 _1, int var) { -+ v8u32 res = __builtin_lasx_xvbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvbitclri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_w' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvbitclri_d(v4u64 _1, int var) { -+ v4u64 res = __builtin_lasx_xvbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvbitclri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_d' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvbitseti_b(v32u8 _1, int var) { -+ v32u8 res = __builtin_lasx_xvbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvbitseti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_b' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvbitseti_h(v16u16 _1, int var) { -+ v16u16 res = __builtin_lasx_xvbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvbitseti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_h' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvbitseti_w(v8u32 _1, int var) { -+ v8u32 res = __builtin_lasx_xvbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvbitseti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_w' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvbitseti_d(v4u64 _1, int var) { -+ v4u64 res = __builtin_lasx_xvbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvbitseti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_d' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvbitrevi_b(v32u8 _1, int var) { -+ v32u8 res = __builtin_lasx_xvbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_b' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvbitrevi_h(v16u16 _1, int var) { -+ v16u16 res = __builtin_lasx_xvbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_h' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvbitrevi_w(v8u32 _1, int var) { -+ v8u32 res = __builtin_lasx_xvbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_w' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvbitrevi_d(v4u64 _1, int var) { -+ v4u64 res = __builtin_lasx_xvbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvaddi_bu(v32i8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvaddi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_bu' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvaddi_hu(v16i16 _1, int var) { -+ v16i16 res = __builtin_lasx_xvaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvaddi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_hu' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvaddi_wu(v8i32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvaddi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_wu' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvaddi_du(v4i64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvaddi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_du' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsubi_bu(v32i8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsubi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_bu' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsubi_hu(v16i16 _1, int var) { -+ v16i16 res = __builtin_lasx_xvsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsubi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_hu' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsubi_wu(v8i32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsubi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_wu' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsubi_du(v4i64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsubi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_du' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvmaxi_b(v32i8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvmaxi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvmaxi_h(v16i16 _1, int var) { -+ v16i16 res = __builtin_lasx_xvmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvmaxi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvmaxi_w(v8i32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvmaxi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvmaxi_d(v4i64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvmaxi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_d' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvmaxi_bu(v32u8 _1, int var) { -+ v32u8 res = __builtin_lasx_xvmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_bu' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvmaxi_hu(v16u16 _1, int var) { -+ v16u16 res = __builtin_lasx_xvmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_hu' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvmaxi_wu(v8u32 _1, int var) { -+ v8u32 res = __builtin_lasx_xvmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_wu' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvmaxi_du(v4u64 _1, int var) { -+ v4u64 res = __builtin_lasx_xvmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvmaxi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_du' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvmini_b(v32i8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvmini_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvmini_h(v16i16 _1, int var) { -+ v16i16 res = __builtin_lasx_xvmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvmini_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_h' must be a constant integer}}} -+ return res; -+} -+ -+v8i32 xvmini_w(v8i32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvmini_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvmini_d(v4i64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvmini_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_d' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvmini_bu(v32u8 _1, int var) { -+ v32u8 res = __builtin_lasx_xvmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvmini_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_bu' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvmini_hu(v16u16 _1, int var) { -+ v16u16 res = __builtin_lasx_xvmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvmini_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_hu' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvmini_wu(v8u32 _1, int var) { -+ v8u32 res = __builtin_lasx_xvmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvmini_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_wu' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvmini_du(v4u64 _1, int var) { -+ v4u64 res = __builtin_lasx_xvmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvmini_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_du' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvseqi_b(v32i8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvseqi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvseqi_h(v16i16 _1, int var) { -+ v16i16 res = __builtin_lasx_xvseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvseqi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvseqi_w(v8i32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvseqi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvseqi_d(v4i64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvseqi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvslti_b(v32i8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvslti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvslti_h(v16i16 _1, int var) { -+ v16i16 res = __builtin_lasx_xvslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvslti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvslti_w(v8i32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvslti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvslti_d(v4i64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvslti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvslti_bu(v32u8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvslti_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_bu' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvslti_hu(v16u16 _1, int var) { -+ v16i16 res = __builtin_lasx_xvslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvslti_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_hu' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvslti_wu(v8u32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvslti_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_wu' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvslti_du(v4u64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvslti_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_du' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvslei_b(v32i8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvslei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvslei_h(v16i16 _1, int var) { -+ v16i16 res = __builtin_lasx_xvslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvslei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvslei_w(v8i32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvslei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvslei_d(v4i64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} -+ res |= __builtin_lasx_xvslei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvslei_bu(v32u8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvslei_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_bu' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvslei_hu(v16u16 _1, int var) { -+ v16i16 res = __builtin_lasx_xvslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvslei_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_hu' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvslei_wu(v8u32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvslei_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_wu' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvslei_du(v4u64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvslei_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_du' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsat_b(v32i8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvsat_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsat_h(v16i16 _1, int var) { -+ v16i16 res = __builtin_lasx_xvsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsat_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsat_w(v8i32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsat_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsat_d(v4i64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsat_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_d' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvsat_bu(v32u8 _1, int var) { -+ v32u8 res = __builtin_lasx_xvsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvsat_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_bu' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvsat_hu(v16u16 _1, int var) { -+ v16u16 res = __builtin_lasx_xvsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsat_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_hu' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvsat_wu(v8u32 _1, int var) { -+ v8u32 res = __builtin_lasx_xvsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsat_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_wu' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvsat_du(v4u64 _1, int var) { -+ v4u64 res = __builtin_lasx_xvsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsat_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_du' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvrepl128vei_b(v32i8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvrepl128vei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvrepl128vei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvrepl128vei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvrepl128vei_h(v16i16 _1, int var) { -+ v16i16 res = __builtin_lasx_xvrepl128vei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvrepl128vei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvrepl128vei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvrepl128vei_w(v8i32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvrepl128vei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __builtin_lasx_xvrepl128vei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __builtin_lasx_xvrepl128vei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvrepl128vei_d(v4i64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvrepl128vei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} -+ res |= __builtin_lasx_xvrepl128vei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} -+ res |= __builtin_lasx_xvrepl128vei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_d' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvandi_b(v32u8 _1, int var) { -+ v32u8 res = __builtin_lasx_xvandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvandi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvandi_b' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvori_b(v32u8 _1, int var) { -+ v32u8 res = __builtin_lasx_xvori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvori_b' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvnori_b(v32u8 _1, int var) { -+ v32u8 res = __builtin_lasx_xvnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvnori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvnori_b' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvxori_b(v32u8 _1, int var) { -+ v32u8 res = __builtin_lasx_xvxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvxori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvxori_b' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvbitseli_b(v32u8 _1, v32u8 _2, int var) { -+ v32u8 res = __builtin_lasx_xvbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvbitseli_b' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvshuf4i_b(v32i8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvshuf4i_h(v16i16 _1, int var) { -+ v16i16 res = __builtin_lasx_xvshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvshuf4i_w(v8i32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __builtin_lasx_xvshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_d' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvpermi_w(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __builtin_lasx_xvpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvpermi_d(v4i64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvpermi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvpermi_d(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvpermi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpermi_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvpermi_q(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __builtin_lasx_xvpermi_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvpermi_q(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvpermi_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_q' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsllwil_h_b(v32i8 _1, int var) { -+ v16i16 res = __builtin_lasx_xvsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_h_b' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsllwil_w_h(v16i16 _1, int var) { -+ v8i32 res = __builtin_lasx_xvsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_w_h' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsllwil_d_w(v8i32 _1, int var) { -+ v4i64 res = __builtin_lasx_xvsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_d_w' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvsllwil_hu_bu(v32u8 _1, int var) { -+ v16u16 res = __builtin_lasx_xvsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_hu_bu' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvsllwil_wu_hu(v16u16 _1, int var) { -+ v8u32 res = __builtin_lasx_xvsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_wu_hu' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvsllwil_du_wu(v8u32 _1, int var) { -+ v4u64 res = __builtin_lasx_xvsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_du_wu' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __builtin_lasx_xvfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __builtin_lasx_xvfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_h' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvbsrl_v(v32i8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvbsrl_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsrl_v' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvbsll_v(v32i8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvbsll_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsll_v' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvextrins_b(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __builtin_lasx_xvextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvextrins_h(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __builtin_lasx_xvextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvextrins_w(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __builtin_lasx_xvextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvextrins_d(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __builtin_lasx_xvextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} -+ res |= __builtin_lasx_xvextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvld(void *_1, int var) { -+ v32i8 res = __builtin_lasx_xvld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} -+ res |= __builtin_lasx_xvld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} -+ res |= __builtin_lasx_xvld(_1, var); // expected-error {{argument to '__builtin_lasx_xvld' must be a constant integer}} -+ return res; -+} -+ -+void xvst(v32i8 _1, void *_2, int var) { -+ __builtin_lasx_xvst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} -+ __builtin_lasx_xvst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} -+ __builtin_lasx_xvst(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvst' must be a constant integer}} -+} -+ -+void xvstelm_b(v32i8 _1, void * _2, int var) { -+ __builtin_lasx_xvstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} -+ __builtin_lasx_xvstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} -+ __builtin_lasx_xvstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} -+} -+ -+void xvstelm_h(v16i16 _1, void * _2, int var) { -+ __builtin_lasx_xvstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} -+ __builtin_lasx_xvstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} -+ __builtin_lasx_xvstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} -+} -+ -+void xvstelm_w(v8i32 _1, void * _2, int var) { -+ __builtin_lasx_xvstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} -+ __builtin_lasx_xvstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} -+ __builtin_lasx_xvstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} -+} -+ -+void xvstelm_d(v4i64 _1, void * _2, int var) { -+ __builtin_lasx_xvstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} -+ __builtin_lasx_xvstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} -+ __builtin_lasx_xvstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} -+} -+ -+void xvstelm_b_idx(v32i8 _1, void * _2, int var) { -+ __builtin_lasx_xvstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ __builtin_lasx_xvstelm_b(_1, _2, 1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ __builtin_lasx_xvstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} -+} -+ -+void xvstelm_h_idx(v16i16 _1, void * _2, int var) { -+ __builtin_lasx_xvstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ __builtin_lasx_xvstelm_h(_1, _2, 2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ __builtin_lasx_xvstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} -+} -+ -+void xvstelm_w_idx(v8i32 _1, void * _2, int var) { -+ __builtin_lasx_xvstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ __builtin_lasx_xvstelm_w(_1, _2, 4, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ __builtin_lasx_xvstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} -+} -+ -+void xvstelm_d_idx(v4i64 _1, void * _2, int var) { -+ __builtin_lasx_xvstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ __builtin_lasx_xvstelm_d(_1, _2, 8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ __builtin_lasx_xvstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} -+} -+ -+v8i32 xvinsve0_w(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __builtin_lasx_xvinsve0_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvinsve0_w(_1, _2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvinsve0_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvinsve0_d(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __builtin_lasx_xvinsve0_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __builtin_lasx_xvinsve0_d(_1, _2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __builtin_lasx_xvinsve0_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_d' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvpickve_w(v8i32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvpickve_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvpickve_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvpickve_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvpickve_d(v4i64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvpickve_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __builtin_lasx_xvpickve_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __builtin_lasx_xvpickve_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvldi(int var) { -+ v4i64 res = __builtin_lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} -+ res |= __builtin_lasx_xvldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} -+ res |= __builtin_lasx_xvldi(var); // expected-error {{argument to '__builtin_lasx_xvldi' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvinsgr2vr_w(v8i32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvinsgr2vr_w(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvinsgr2vr_d(v4i64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __builtin_lasx_xvinsgr2vr_d(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __builtin_lasx_xvinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvldrepl_b(void *_1, int var) { -+ v32i8 res = __builtin_lasx_xvldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} -+ res |= __builtin_lasx_xvldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} -+ res |= __builtin_lasx_xvldrepl_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvldrepl_h(void *_1, int var) { -+ v16i16 res = __builtin_lasx_xvldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} -+ res |= __builtin_lasx_xvldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} -+ res |= __builtin_lasx_xvldrepl_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvldrepl_w(void *_1, int var) { -+ v8i32 res = __builtin_lasx_xvldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} -+ res |= __builtin_lasx_xvldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} -+ res |= __builtin_lasx_xvldrepl_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvldrepl_d(void *_1, int var) { -+ v4i64 res = __builtin_lasx_xvldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} -+ res |= __builtin_lasx_xvldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} -+ res |= __builtin_lasx_xvldrepl_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_d' must be a constant integer}} -+ return res; -+} -+ -+int xvpickve2gr_w(v8i32 _1, int var) { -+ int res = __builtin_lasx_xvpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvpickve2gr_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_w' must be a constant integer}} -+ return res; -+} -+ -+unsigned int xvpickve2gr_wu(v8i32 _1, int var) { -+ unsigned int res = __builtin_lasx_xvpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvpickve2gr_wu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_wu' must be a constant integer}} -+ return res; -+} -+ -+long xvpickve2gr_d(v4i64 _1, int var) { -+ long res = __builtin_lasx_xvpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __builtin_lasx_xvpickve2gr_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __builtin_lasx_xvpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_d' must be a constant integer}} -+ return res; -+} -+ -+unsigned long int xvpickve2gr_du(v4i64 _1, int var) { -+ unsigned long int res = __builtin_lasx_xvpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res |= __builtin_lasx_xvpickve2gr_du(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res |= __builtin_lasx_xvpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_du' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvrotri_b(v32i8 _1, int var) { -+ v32i8 res = __builtin_lasx_xvrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res |= __builtin_lasx_xvrotri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_b' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvrotri_h(v16i16 _1, int var) { -+ v16i16 res = __builtin_lasx_xvrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvrotri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvrotri_w(v8i32 _1, int var) { -+ v8i32 res = __builtin_lasx_xvrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvrotri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_w' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvrotri_d(v4i64 _1, int var) { -+ v4i64 res = __builtin_lasx_xvrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvrotri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_d' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __builtin_lasx_xvsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __builtin_lasx_xvsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __builtin_lasx_xvsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __builtin_lasx_xvsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __builtin_lasx_xvsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __builtin_lasx_xvsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __builtin_lasx_xvsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __builtin_lasx_xvsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __builtin_lasx_xvssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __builtin_lasx_xvssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __builtin_lasx_xvssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __builtin_lasx_xvssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2, int var) { -+ v32u8 res = __builtin_lasx_xvssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_bu_h' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2, int var) { -+ v16u16 res = __builtin_lasx_xvssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_hu_w' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2, int var) { -+ v8u32 res = __builtin_lasx_xvssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_wu_d' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2, int var) { -+ v4u64 res = __builtin_lasx_xvssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_du_q' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __builtin_lasx_xvssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __builtin_lasx_xvssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __builtin_lasx_xvssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __builtin_lasx_xvssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2, int var) { -+ v32u8 res = __builtin_lasx_xvssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_bu_h' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2, int var) { -+ v16u16 res = __builtin_lasx_xvssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_hu_w' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2, int var) { -+ v8u32 res = __builtin_lasx_xvssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_wu_d' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2, int var) { -+ v4u64 res = __builtin_lasx_xvssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_du_q' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __builtin_lasx_xvsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_b_h' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __builtin_lasx_xvsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_h_w' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __builtin_lasx_xvsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_w_d' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __builtin_lasx_xvsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_d_q' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __builtin_lasx_xvsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __builtin_lasx_xvsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __builtin_lasx_xvsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __builtin_lasx_xvsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __builtin_lasx_xvssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_b_h' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __builtin_lasx_xvssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_h_w' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __builtin_lasx_xvssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_w_d' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __builtin_lasx_xvssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_d_q' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2, int var) { -+ v32u8 res = __builtin_lasx_xvssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_bu_h' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2, int var) { -+ v16u16 res = __builtin_lasx_xvssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_hu_w' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2, int var) { -+ v8u32 res = __builtin_lasx_xvssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_wu_d' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2, int var) { -+ v4u64 res = __builtin_lasx_xvssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_du_q' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2, int var) { -+ v32i8 res = __builtin_lasx_xvssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_b_h' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2, int var) { -+ v16i16 res = __builtin_lasx_xvssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_h_w' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2, int var) { -+ v8i32 res = __builtin_lasx_xvssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_w_d' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2, int var) { -+ v4i64 res = __builtin_lasx_xvssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_d_q' must be a constant integer}} -+ return res; -+} -+ -+v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2, int var) { -+ v32u8 res = __builtin_lasx_xvssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} -+ res |= __builtin_lasx_xvssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_bu_h' must be a constant integer}} -+ return res; -+} -+ -+v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2, int var) { -+ v16u16 res = __builtin_lasx_xvssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} -+ res |= __builtin_lasx_xvssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_hu_w' must be a constant integer}} -+ return res; -+} -+ -+v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2, int var) { -+ v8u32 res = __builtin_lasx_xvssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} -+ res |= __builtin_lasx_xvssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_wu_d' must be a constant integer}} -+ return res; -+} -+ -+v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2, int var) { -+ v4u64 res = __builtin_lasx_xvssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} -+ res |= __builtin_lasx_xvssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_du_q' must be a constant integer}} -+ return res; -+} -+ -+v4f64 xvpickve_d_f(v4f64 _1, int var) { -+ v4f64 res = __builtin_lasx_xvpickve_d_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} -+ res += __builtin_lasx_xvpickve_d_f(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} -+ res += __builtin_lasx_xvpickve_d_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d_f' must be a constant integer}} -+ return res; -+} -+ -+v8f32 xvpickve_w_f(v8f32 _1, int var) { -+ v8f32 res = __builtin_lasx_xvpickve_w_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} -+ res += __builtin_lasx_xvpickve_w_f(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} -+ res += __builtin_lasx_xvpickve_w_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w_f' must be a constant integer}} -+ return res; -+} -+ -+v32i8 xvrepli_b(int var) { -+ v32i8 res = __builtin_lasx_xvrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} -+ res |= __builtin_lasx_xvrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} -+ res |= __builtin_lasx_xvrepli_b(var); // expected-error {{argument to '__builtin_lasx_xvrepli_b' must be a constant integer}} -+ return res; -+} -+ -+v4i64 xvrepli_d(int var) { -+ v4i64 res = __builtin_lasx_xvrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} -+ res |= __builtin_lasx_xvrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} -+ res |= __builtin_lasx_xvrepli_d(var); // expected-error {{argument to '__builtin_lasx_xvrepli_d' must be a constant integer}} -+ return res; -+} -+ -+v16i16 xvrepli_h(int var) { -+ v16i16 res = __builtin_lasx_xvrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} -+ res |= __builtin_lasx_xvrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} -+ res |= __builtin_lasx_xvrepli_h(var); // expected-error {{argument to '__builtin_lasx_xvrepli_h' must be a constant integer}} -+ return res; -+} -+ -+v8i32 xvrepli_w(int var) { -+ v8i32 res = __builtin_lasx_xvrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} -+ res |= __builtin_lasx_xvrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} -+ res |= __builtin_lasx_xvrepli_w(var); // expected-error {{argument to '__builtin_lasx_xvrepli_w' must be a constant integer}} -+ return res; -+} -diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin.c b/clang/test/CodeGen/LoongArch/lasx/builtin.c -new file mode 100644 -index 000000000000..0185f2004d52 ---- /dev/null -+++ b/clang/test/CodeGen/LoongArch/lasx/builtin.c -@@ -0,0 +1,4452 @@ -+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -+// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s -+ -+typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); -+typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); -+typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); -+typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); -+typedef short v16i16 __attribute__((vector_size(32), aligned(32))); -+typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); -+typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); -+typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); -+typedef int v8i32 __attribute__((vector_size(32), aligned(32))); -+typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); -+typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); -+typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); -+typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); -+typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); -+typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); -+typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); -+typedef float v8f32 __attribute__((vector_size(32), aligned(32))); -+typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); -+typedef double v4f64 __attribute__((vector_size(32), aligned(32))); -+typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); -+ -+typedef double v4f64 __attribute__((vector_size(32), aligned(32))); -+typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); -+ -+// CHECK-LABEL: @xvsll_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsll_b(_1, _2); } -+// CHECK-LABEL: @xvsll_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsll_h(_1, _2); } -+// CHECK-LABEL: @xvsll_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsll_w(_1, _2); } -+// CHECK-LABEL: @xvsll_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsll_d(_1, _2); } -+// CHECK-LABEL: @xvslli_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvslli_b(v32i8 _1) { return __builtin_lasx_xvslli_b(_1, 1); } -+// CHECK-LABEL: @xvslli_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvslli_h(v16i16 _1) { return __builtin_lasx_xvslli_h(_1, 1); } -+// CHECK-LABEL: @xvslli_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvslli_w(v8i32 _1) { return __builtin_lasx_xvslli_w(_1, 1); } -+// CHECK-LABEL: @xvslli_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvslli_d(v4i64 _1) { return __builtin_lasx_xvslli_d(_1, 1); } -+// CHECK-LABEL: @xvsra_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsra_b(_1, _2); } -+// CHECK-LABEL: @xvsra_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsra_h(_1, _2); } -+// CHECK-LABEL: @xvsra_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsra_w(_1, _2); } -+// CHECK-LABEL: @xvsra_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsra_d(_1, _2); } -+// CHECK-LABEL: @xvsrai_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrai_b(v32i8 _1) { return __builtin_lasx_xvsrai_b(_1, 1); } -+// CHECK-LABEL: @xvsrai_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrai_h(v16i16 _1) { return __builtin_lasx_xvsrai_h(_1, 1); } -+// CHECK-LABEL: @xvsrai_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrai_w(v8i32 _1) { return __builtin_lasx_xvsrai_w(_1, 1); } -+// CHECK-LABEL: @xvsrai_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrai_d(v4i64 _1) { return __builtin_lasx_xvsrai_d(_1, 1); } -+// CHECK-LABEL: @xvsrar_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrar_b(_1, _2); } -+// CHECK-LABEL: @xvsrar_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrar_h(_1, _2); } -+// CHECK-LABEL: @xvsrar_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrar_w(_1, _2); } -+// CHECK-LABEL: @xvsrar_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrar_d(_1, _2); } -+// CHECK-LABEL: @xvsrari_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrari_b(v32i8 _1) { return __builtin_lasx_xvsrari_b(_1, 1); } -+// CHECK-LABEL: @xvsrari_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrari_h(v16i16 _1) { return __builtin_lasx_xvsrari_h(_1, 1); } -+// CHECK-LABEL: @xvsrari_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrari_w(v8i32 _1) { return __builtin_lasx_xvsrari_w(_1, 1); } -+// CHECK-LABEL: @xvsrari_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrari_d(v4i64 _1) { return __builtin_lasx_xvsrari_d(_1, 1); } -+// CHECK-LABEL: @xvsrl_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrl_b(_1, _2); } -+// CHECK-LABEL: @xvsrl_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrl_h(_1, _2); } -+// CHECK-LABEL: @xvsrl_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrl_w(_1, _2); } -+// CHECK-LABEL: @xvsrl_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrl_d(_1, _2); } -+// CHECK-LABEL: @xvsrli_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrli_b(v32i8 _1) { return __builtin_lasx_xvsrli_b(_1, 1); } -+// CHECK-LABEL: @xvsrli_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrli_h(v16i16 _1) { return __builtin_lasx_xvsrli_h(_1, 1); } -+// CHECK-LABEL: @xvsrli_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrli_w(v8i32 _1) { return __builtin_lasx_xvsrli_w(_1, 1); } -+// CHECK-LABEL: @xvsrli_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrli_d(v4i64 _1) { return __builtin_lasx_xvsrli_d(_1, 1); } -+// CHECK-LABEL: @xvsrlr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlr_b(_1, _2); } -+// CHECK-LABEL: @xvsrlr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlr_h(_1, _2); } -+// CHECK-LABEL: @xvsrlr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlr_w(_1, _2); } -+// CHECK-LABEL: @xvsrlr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlr_d(_1, _2); } -+// CHECK-LABEL: @xvsrlri_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrlri_b(v32i8 _1) { return __builtin_lasx_xvsrlri_b(_1, 1); } -+// CHECK-LABEL: @xvsrlri_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrlri_h(v16i16 _1) { return __builtin_lasx_xvsrlri_h(_1, 1); } -+// CHECK-LABEL: @xvsrlri_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrlri_w(v8i32 _1) { return __builtin_lasx_xvsrlri_w(_1, 1); } -+// CHECK-LABEL: @xvsrlri_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrlri_d(v4i64 _1) { return __builtin_lasx_xvsrlri_d(_1, 1); } -+// CHECK-LABEL: @xvbitclr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitclr_b(_1, _2); } -+// CHECK-LABEL: @xvbitclr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitclr_h(_1, _2); } -+// CHECK-LABEL: @xvbitclr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitclr_w(_1, _2); } -+// CHECK-LABEL: @xvbitclr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitclr_d(_1, _2); } -+// CHECK-LABEL: @xvbitclri_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvbitclri_b(v32u8 _1) { return __builtin_lasx_xvbitclri_b(_1, 1); } -+// CHECK-LABEL: @xvbitclri_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvbitclri_h(v16u16 _1) { return __builtin_lasx_xvbitclri_h(_1, 1); } -+// CHECK-LABEL: @xvbitclri_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvbitclri_w(v8u32 _1) { return __builtin_lasx_xvbitclri_w(_1, 1); } -+// CHECK-LABEL: @xvbitclri_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvbitclri_d(v4u64 _1) { return __builtin_lasx_xvbitclri_d(_1, 1); } -+// CHECK-LABEL: @xvbitset_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitset_b(_1, _2); } -+// CHECK-LABEL: @xvbitset_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitset_h(_1, _2); } -+// CHECK-LABEL: @xvbitset_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitset_w(_1, _2); } -+// CHECK-LABEL: @xvbitset_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitset_d(_1, _2); } -+// CHECK-LABEL: @xvbitseti_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvbitseti_b(v32u8 _1) { return __builtin_lasx_xvbitseti_b(_1, 1); } -+// CHECK-LABEL: @xvbitseti_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvbitseti_h(v16u16 _1) { return __builtin_lasx_xvbitseti_h(_1, 1); } -+// CHECK-LABEL: @xvbitseti_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvbitseti_w(v8u32 _1) { return __builtin_lasx_xvbitseti_w(_1, 1); } -+// CHECK-LABEL: @xvbitseti_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvbitseti_d(v4u64 _1) { return __builtin_lasx_xvbitseti_d(_1, 1); } -+// CHECK-LABEL: @xvbitrev_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitrev_b(_1, _2); } -+// CHECK-LABEL: @xvbitrev_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitrev_h(_1, _2); } -+// CHECK-LABEL: @xvbitrev_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitrev_w(_1, _2); } -+// CHECK-LABEL: @xvbitrev_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitrev_d(_1, _2); } -+// CHECK-LABEL: @xvbitrevi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvbitrevi_b(v32u8 _1) { return __builtin_lasx_xvbitrevi_b(_1, 1); } -+// CHECK-LABEL: @xvbitrevi_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvbitrevi_h(v16u16 _1) { return __builtin_lasx_xvbitrevi_h(_1, 1); } -+// CHECK-LABEL: @xvbitrevi_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvbitrevi_w(v8u32 _1) { return __builtin_lasx_xvbitrevi_w(_1, 1); } -+// CHECK-LABEL: @xvbitrevi_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvbitrevi_d(v4u64 _1) { return __builtin_lasx_xvbitrevi_d(_1, 1); } -+// CHECK-LABEL: @xvadd_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadd_b(_1, _2); } -+// CHECK-LABEL: @xvadd_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadd_h(_1, _2); } -+// CHECK-LABEL: @xvadd_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadd_w(_1, _2); } -+// CHECK-LABEL: @xvadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_d(_1, _2); } -+// CHECK-LABEL: @xvaddi_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvaddi_bu(v32i8 _1) { return __builtin_lasx_xvaddi_bu(_1, 1); } -+// CHECK-LABEL: @xvaddi_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvaddi_hu(v16i16 _1) { return __builtin_lasx_xvaddi_hu(_1, 1); } -+// CHECK-LABEL: @xvaddi_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvaddi_wu(v8i32 _1) { return __builtin_lasx_xvaddi_wu(_1, 1); } -+// CHECK-LABEL: @xvaddi_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddi_du(v4i64 _1) { return __builtin_lasx_xvaddi_du(_1, 1); } -+// CHECK-LABEL: @xvsub_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsub_b(_1, _2); } -+// CHECK-LABEL: @xvsub_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsub_h(_1, _2); } -+// CHECK-LABEL: @xvsub_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsub_w(_1, _2); } -+// CHECK-LABEL: @xvsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_d(_1, _2); } -+// CHECK-LABEL: @xvsubi_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsubi_bu(v32i8 _1) { return __builtin_lasx_xvsubi_bu(_1, 1); } -+// CHECK-LABEL: @xvsubi_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsubi_hu(v16i16 _1) { return __builtin_lasx_xvsubi_hu(_1, 1); } -+// CHECK-LABEL: @xvsubi_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsubi_wu(v8i32 _1) { return __builtin_lasx_xvsubi_wu(_1, 1); } -+// CHECK-LABEL: @xvsubi_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsubi_du(v4i64 _1) { return __builtin_lasx_xvsubi_du(_1, 1); } -+// CHECK-LABEL: @xvmax_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmax_b(_1, _2); } -+// CHECK-LABEL: @xvmax_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmax_h(_1, _2); } -+// CHECK-LABEL: @xvmax_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmax_w(_1, _2); } -+// CHECK-LABEL: @xvmax_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmax_d(_1, _2); } -+// CHECK-LABEL: @xvmaxi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmaxi_b(v32i8 _1) { return __builtin_lasx_xvmaxi_b(_1, 1); } -+// CHECK-LABEL: @xvmaxi_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmaxi_h(v16i16 _1) { return __builtin_lasx_xvmaxi_h(_1, 1); } -+// CHECK-LABEL: @xvmaxi_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmaxi_w(v8i32 _1) { return __builtin_lasx_xvmaxi_w(_1, 1); } -+// CHECK-LABEL: @xvmaxi_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmaxi_d(v4i64 _1) { return __builtin_lasx_xvmaxi_d(_1, 1); } -+// CHECK-LABEL: @xvmax_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmax_bu(_1, _2); } -+// CHECK-LABEL: @xvmax_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmax_hu(_1, _2); } -+// CHECK-LABEL: @xvmax_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmax_wu(_1, _2); } -+// CHECK-LABEL: @xvmax_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmax_du(_1, _2); } -+// CHECK-LABEL: @xvmaxi_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvmaxi_bu(v32u8 _1) { return __builtin_lasx_xvmaxi_bu(_1, 1); } -+// CHECK-LABEL: @xvmaxi_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvmaxi_hu(v16u16 _1) { return __builtin_lasx_xvmaxi_hu(_1, 1); } -+// CHECK-LABEL: @xvmaxi_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvmaxi_wu(v8u32 _1) { return __builtin_lasx_xvmaxi_wu(_1, 1); } -+// CHECK-LABEL: @xvmaxi_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmaxi_du(v4u64 _1) { return __builtin_lasx_xvmaxi_du(_1, 1); } -+// CHECK-LABEL: @xvmin_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmin_b(_1, _2); } -+// CHECK-LABEL: @xvmin_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmin_h(_1, _2); } -+// CHECK-LABEL: @xvmin_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmin_w(_1, _2); } -+// CHECK-LABEL: @xvmin_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmin_d(_1, _2); } -+// CHECK-LABEL: @xvmini_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmini_b(v32i8 _1) { return __builtin_lasx_xvmini_b(_1, 1); } -+// CHECK-LABEL: @xvmini_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmini_h(v16i16 _1) { return __builtin_lasx_xvmini_h(_1, 1); } -+// CHECK-LABEL: @xvmini_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmini_w(v8i32 _1) { return __builtin_lasx_xvmini_w(_1, 1); } -+// CHECK-LABEL: @xvmini_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmini_d(v4i64 _1) { return __builtin_lasx_xvmini_d(_1, 1); } -+// CHECK-LABEL: @xvmin_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmin_bu(_1, _2); } -+// CHECK-LABEL: @xvmin_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmin_hu(_1, _2); } -+// CHECK-LABEL: @xvmin_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmin_wu(_1, _2); } -+// CHECK-LABEL: @xvmin_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmin_du(_1, _2); } -+// CHECK-LABEL: @xvmini_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvmini_bu(v32u8 _1) { return __builtin_lasx_xvmini_bu(_1, 1); } -+// CHECK-LABEL: @xvmini_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvmini_hu(v16u16 _1) { return __builtin_lasx_xvmini_hu(_1, 1); } -+// CHECK-LABEL: @xvmini_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvmini_wu(v8u32 _1) { return __builtin_lasx_xvmini_wu(_1, 1); } -+// CHECK-LABEL: @xvmini_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmini_du(v4u64 _1) { return __builtin_lasx_xvmini_du(_1, 1); } -+// CHECK-LABEL: @xvseq_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvseq_b(_1, _2); } -+// CHECK-LABEL: @xvseq_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvseq_h(_1, _2); } -+// CHECK-LABEL: @xvseq_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvseq_w(_1, _2); } -+// CHECK-LABEL: @xvseq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvseq_d(_1, _2); } -+// CHECK-LABEL: @xvseqi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvseqi_b(v32i8 _1) { return __builtin_lasx_xvseqi_b(_1, 1); } -+// CHECK-LABEL: @xvseqi_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvseqi_h(v16i16 _1) { return __builtin_lasx_xvseqi_h(_1, 1); } -+// CHECK-LABEL: @xvseqi_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvseqi_w(v8i32 _1) { return __builtin_lasx_xvseqi_w(_1, 1); } -+// CHECK-LABEL: @xvseqi_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvseqi_d(v4i64 _1) { return __builtin_lasx_xvseqi_d(_1, 1); } -+// CHECK-LABEL: @xvslt_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvslt_b(_1, _2); } -+// CHECK-LABEL: @xvslt_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvslt_h(_1, _2); } -+// CHECK-LABEL: @xvslt_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvslt_w(_1, _2); } -+// CHECK-LABEL: @xvslt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvslt_d(_1, _2); } -+// CHECK-LABEL: @xvslti_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvslti_b(v32i8 _1) { return __builtin_lasx_xvslti_b(_1, 1); } -+// CHECK-LABEL: @xvslti_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvslti_h(v16i16 _1) { return __builtin_lasx_xvslti_h(_1, 1); } -+// CHECK-LABEL: @xvslti_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvslti_w(v8i32 _1) { return __builtin_lasx_xvslti_w(_1, 1); } -+// CHECK-LABEL: @xvslti_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvslti_d(v4i64 _1) { return __builtin_lasx_xvslti_d(_1, 1); } -+// CHECK-LABEL: @xvslt_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvslt_bu(_1, _2); } -+// CHECK-LABEL: @xvslt_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvslt_hu(_1, _2); } -+// CHECK-LABEL: @xvslt_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvslt_wu(_1, _2); } -+// CHECK-LABEL: @xvslt_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvslt_du(_1, _2); } -+// CHECK-LABEL: @xvslti_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvslti_bu(v32u8 _1) { return __builtin_lasx_xvslti_bu(_1, 1); } -+// CHECK-LABEL: @xvslti_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvslti_hu(v16u16 _1) { return __builtin_lasx_xvslti_hu(_1, 1); } -+// CHECK-LABEL: @xvslti_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvslti_wu(v8u32 _1) { return __builtin_lasx_xvslti_wu(_1, 1); } -+// CHECK-LABEL: @xvslti_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvslti_du(v4u64 _1) { return __builtin_lasx_xvslti_du(_1, 1); } -+// CHECK-LABEL: @xvsle_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsle_b(_1, _2); } -+// CHECK-LABEL: @xvsle_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsle_h(_1, _2); } -+// CHECK-LABEL: @xvsle_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsle_w(_1, _2); } -+// CHECK-LABEL: @xvsle_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsle_d(_1, _2); } -+// CHECK-LABEL: @xvslei_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvslei_b(v32i8 _1) { return __builtin_lasx_xvslei_b(_1, 1); } -+// CHECK-LABEL: @xvslei_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvslei_h(v16i16 _1) { return __builtin_lasx_xvslei_h(_1, 1); } -+// CHECK-LABEL: @xvslei_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvslei_w(v8i32 _1) { return __builtin_lasx_xvslei_w(_1, 1); } -+// CHECK-LABEL: @xvslei_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvslei_d(v4i64 _1) { return __builtin_lasx_xvslei_d(_1, 1); } -+// CHECK-LABEL: @xvsle_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsle_bu(_1, _2); } -+// CHECK-LABEL: @xvsle_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsle_hu(_1, _2); } -+// CHECK-LABEL: @xvsle_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsle_wu(_1, _2); } -+// CHECK-LABEL: @xvsle_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsle_du(_1, _2); } -+// CHECK-LABEL: @xvslei_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvslei_bu(v32u8 _1) { return __builtin_lasx_xvslei_bu(_1, 1); } -+// CHECK-LABEL: @xvslei_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvslei_hu(v16u16 _1) { return __builtin_lasx_xvslei_hu(_1, 1); } -+// CHECK-LABEL: @xvslei_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvslei_wu(v8u32 _1) { return __builtin_lasx_xvslei_wu(_1, 1); } -+// CHECK-LABEL: @xvslei_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvslei_du(v4u64 _1) { return __builtin_lasx_xvslei_du(_1, 1); } -+// CHECK-LABEL: @xvsat_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsat_b(v32i8 _1) { return __builtin_lasx_xvsat_b(_1, 1); } -+// CHECK-LABEL: @xvsat_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsat_h(v16i16 _1) { return __builtin_lasx_xvsat_h(_1, 1); } -+// CHECK-LABEL: @xvsat_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsat_w(v8i32 _1) { return __builtin_lasx_xvsat_w(_1, 1); } -+// CHECK-LABEL: @xvsat_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsat_d(v4i64 _1) { return __builtin_lasx_xvsat_d(_1, 1); } -+// CHECK-LABEL: @xvsat_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvsat_bu(v32u8 _1) { return __builtin_lasx_xvsat_bu(_1, 1); } -+// CHECK-LABEL: @xvsat_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvsat_hu(v16u16 _1) { return __builtin_lasx_xvsat_hu(_1, 1); } -+// CHECK-LABEL: @xvsat_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvsat_wu(v8u32 _1) { return __builtin_lasx_xvsat_wu(_1, 1); } -+// CHECK-LABEL: @xvsat_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvsat_du(v4u64 _1) { return __builtin_lasx_xvsat_du(_1, 1); } -+// CHECK-LABEL: @xvadda_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadda_b(_1, _2); } -+// CHECK-LABEL: @xvadda_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadda_h(_1, _2); } -+// CHECK-LABEL: @xvadda_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadda_w(_1, _2); } -+// CHECK-LABEL: @xvadda_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadda_d(_1, _2); } -+// CHECK-LABEL: @xvsadd_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsadd_b(_1, _2); } -+// CHECK-LABEL: @xvsadd_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsadd_h(_1, _2); } -+// CHECK-LABEL: @xvsadd_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsadd_w(_1, _2); } -+// CHECK-LABEL: @xvsadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsadd_d(_1, _2); } -+// CHECK-LABEL: @xvsadd_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsadd_bu(_1, _2); } -+// CHECK-LABEL: @xvsadd_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsadd_hu(_1, _2); } -+// CHECK-LABEL: @xvsadd_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsadd_wu(_1, _2); } -+// CHECK-LABEL: @xvsadd_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsadd_du(_1, _2); } -+// CHECK-LABEL: @xvavg_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavg_b(_1, _2); } -+// CHECK-LABEL: @xvavg_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavg_h(_1, _2); } -+// CHECK-LABEL: @xvavg_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavg_w(_1, _2); } -+// CHECK-LABEL: @xvavg_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavg_d(_1, _2); } -+// CHECK-LABEL: @xvavg_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavg_bu(_1, _2); } -+// CHECK-LABEL: @xvavg_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavg_hu(_1, _2); } -+// CHECK-LABEL: @xvavg_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavg_wu(_1, _2); } -+// CHECK-LABEL: @xvavg_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavg_du(_1, _2); } -+// CHECK-LABEL: @xvavgr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavgr_b(_1, _2); } -+// CHECK-LABEL: @xvavgr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavgr_h(_1, _2); } -+// CHECK-LABEL: @xvavgr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavgr_w(_1, _2); } -+// CHECK-LABEL: @xvavgr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavgr_d(_1, _2); } -+// CHECK-LABEL: @xvavgr_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavgr_bu(_1, _2); } -+// CHECK-LABEL: @xvavgr_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavgr_hu(_1, _2); } -+// CHECK-LABEL: @xvavgr_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavgr_wu(_1, _2); } -+// CHECK-LABEL: @xvavgr_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavgr_du(_1, _2); } -+// CHECK-LABEL: @xvssub_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssub_b(_1, _2); } -+// CHECK-LABEL: @xvssub_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssub_h(_1, _2); } -+// CHECK-LABEL: @xvssub_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssub_w(_1, _2); } -+// CHECK-LABEL: @xvssub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssub_d(_1, _2); } -+// CHECK-LABEL: @xvssub_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvssub_bu(_1, _2); } -+// CHECK-LABEL: @xvssub_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssub_hu(_1, _2); } -+// CHECK-LABEL: @xvssub_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssub_wu(_1, _2); } -+// CHECK-LABEL: @xvssub_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssub_du(_1, _2); } -+// CHECK-LABEL: @xvabsd_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvabsd_b(_1, _2); } -+// CHECK-LABEL: @xvabsd_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvabsd_h(_1, _2); } -+// CHECK-LABEL: @xvabsd_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvabsd_w(_1, _2); } -+// CHECK-LABEL: @xvabsd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvabsd_d(_1, _2); } -+// CHECK-LABEL: @xvabsd_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvabsd_bu(_1, _2); } -+// CHECK-LABEL: @xvabsd_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvabsd_hu(_1, _2); } -+// CHECK-LABEL: @xvabsd_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvabsd_wu(_1, _2); } -+// CHECK-LABEL: @xvabsd_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvabsd_du(_1, _2); } -+// CHECK-LABEL: @xvmul_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmul_b(_1, _2); } -+// CHECK-LABEL: @xvmul_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmul_h(_1, _2); } -+// CHECK-LABEL: @xvmul_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmul_w(_1, _2); } -+// CHECK-LABEL: @xvmul_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmul_d(_1, _2); } -+// CHECK-LABEL: @xvmadd_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmadd_b(_1, _2, _3); } -+// CHECK-LABEL: @xvmadd_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmadd_h(_1, _2, _3); } -+// CHECK-LABEL: @xvmadd_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmadd_w(_1, _2, _3); } -+// CHECK-LABEL: @xvmadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmadd_d(_1, _2, _3); } -+// CHECK-LABEL: @xvmsub_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmsub_b(_1, _2, _3); } -+// CHECK-LABEL: @xvmsub_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmsub_h(_1, _2, _3); } -+// CHECK-LABEL: @xvmsub_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmsub_w(_1, _2, _3); } -+// CHECK-LABEL: @xvmsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmsub_d(_1, _2, _3); } -+// CHECK-LABEL: @xvdiv_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvdiv_b(_1, _2); } -+// CHECK-LABEL: @xvdiv_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvdiv_h(_1, _2); } -+// CHECK-LABEL: @xvdiv_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvdiv_w(_1, _2); } -+// CHECK-LABEL: @xvdiv_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvdiv_d(_1, _2); } -+// CHECK-LABEL: @xvdiv_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvdiv_bu(_1, _2); } -+// CHECK-LABEL: @xvdiv_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvdiv_hu(_1, _2); } -+// CHECK-LABEL: @xvdiv_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvdiv_wu(_1, _2); } -+// CHECK-LABEL: @xvdiv_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvdiv_du(_1, _2); } -+// CHECK-LABEL: @xvhaddw_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhaddw_h_b(_1, _2); } -+// CHECK-LABEL: @xvhaddw_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhaddw_w_h(_1, _2); } -+// CHECK-LABEL: @xvhaddw_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhaddw_d_w(_1, _2); } -+// CHECK-LABEL: @xvhaddw_hu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhaddw_hu_bu(_1, _2); } -+// CHECK-LABEL: @xvhaddw_wu_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhaddw_wu_hu(_1, _2); } -+// CHECK-LABEL: @xvhaddw_du_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhaddw_du_wu(_1, _2); } -+// CHECK-LABEL: @xvhsubw_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhsubw_h_b(_1, _2); } -+// CHECK-LABEL: @xvhsubw_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhsubw_w_h(_1, _2); } -+// CHECK-LABEL: @xvhsubw_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhsubw_d_w(_1, _2); } -+// CHECK-LABEL: @xvhsubw_hu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhsubw_hu_bu(_1, _2); } -+// CHECK-LABEL: @xvhsubw_wu_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhsubw_wu_hu(_1, _2); } -+// CHECK-LABEL: @xvhsubw_du_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhsubw_du_wu(_1, _2); } -+// CHECK-LABEL: @xvmod_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmod_b(_1, _2); } -+// CHECK-LABEL: @xvmod_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmod_h(_1, _2); } -+// CHECK-LABEL: @xvmod_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmod_w(_1, _2); } -+// CHECK-LABEL: @xvmod_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmod_d(_1, _2); } -+// CHECK-LABEL: @xvmod_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmod_bu(_1, _2); } -+// CHECK-LABEL: @xvmod_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmod_hu(_1, _2); } -+// CHECK-LABEL: @xvmod_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmod_wu(_1, _2); } -+// CHECK-LABEL: @xvmod_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmod_du(_1, _2); } -+// CHECK-LABEL: @xvrepl128vei_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvrepl128vei_b(v32i8 _1) { return __builtin_lasx_xvrepl128vei_b(_1, 1); } -+// CHECK-LABEL: @xvrepl128vei_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvrepl128vei_h(v16i16 _1) { return __builtin_lasx_xvrepl128vei_h(_1, 1); } -+// CHECK-LABEL: @xvrepl128vei_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvrepl128vei_w(v8i32 _1) { return __builtin_lasx_xvrepl128vei_w(_1, 1); } -+// CHECK-LABEL: @xvrepl128vei_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvrepl128vei_d(v4i64 _1) { return __builtin_lasx_xvrepl128vei_d(_1, 1); } -+// CHECK-LABEL: @xvpickev_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickev_b(_1, _2); } -+// CHECK-LABEL: @xvpickev_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickev_h(_1, _2); } -+// CHECK-LABEL: @xvpickev_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickev_w(_1, _2); } -+// CHECK-LABEL: @xvpickev_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickev_d(_1, _2); } -+// CHECK-LABEL: @xvpickod_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickod_b(_1, _2); } -+// CHECK-LABEL: @xvpickod_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickod_h(_1, _2); } -+// CHECK-LABEL: @xvpickod_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickod_w(_1, _2); } -+// CHECK-LABEL: @xvpickod_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickod_d(_1, _2); } -+// CHECK-LABEL: @xvilvh_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvh_b(_1, _2); } -+// CHECK-LABEL: @xvilvh_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvh_h(_1, _2); } -+// CHECK-LABEL: @xvilvh_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvh_w(_1, _2); } -+// CHECK-LABEL: @xvilvh_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvh_d(_1, _2); } -+// CHECK-LABEL: @xvilvl_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvl_b(_1, _2); } -+// CHECK-LABEL: @xvilvl_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvl_h(_1, _2); } -+// CHECK-LABEL: @xvilvl_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvl_w(_1, _2); } -+// CHECK-LABEL: @xvilvl_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvl_d(_1, _2); } -+// CHECK-LABEL: @xvpackev_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackev_b(_1, _2); } -+// CHECK-LABEL: @xvpackev_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackev_h(_1, _2); } -+// CHECK-LABEL: @xvpackev_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackev_w(_1, _2); } -+// CHECK-LABEL: @xvpackev_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackev_d(_1, _2); } -+// CHECK-LABEL: @xvpackod_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackod_b(_1, _2); } -+// CHECK-LABEL: @xvpackod_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackod_h(_1, _2); } -+// CHECK-LABEL: @xvpackod_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackod_w(_1, _2); } -+// CHECK-LABEL: @xvpackod_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackod_d(_1, _2); } -+// CHECK-LABEL: @xvshuf_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvshuf_b(_1, _2, _3); } -+// CHECK-LABEL: @xvshuf_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvshuf_h(_1, _2, _3); } -+// CHECK-LABEL: @xvshuf_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvshuf_w(_1, _2, _3); } -+// CHECK-LABEL: @xvshuf_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvshuf_d(_1, _2, _3); } -+// CHECK-LABEL: @xvand_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvand_v(_1, _2); } -+// CHECK-LABEL: @xvandi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvandi_b(v32u8 _1) { return __builtin_lasx_xvandi_b(_1, 1); } -+// CHECK-LABEL: @xvor_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvor_v(_1, _2); } -+// CHECK-LABEL: @xvori_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvori_b(v32u8 _1) { return __builtin_lasx_xvori_b(_1, 1); } -+// CHECK-LABEL: @xvnor_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvnor_v(_1, _2); } -+// CHECK-LABEL: @xvnori_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvnori_b(v32u8 _1) { return __builtin_lasx_xvnori_b(_1, 1); } -+// CHECK-LABEL: @xvxor_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvxor_v(_1, _2); } -+// CHECK-LABEL: @xvxori_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvxori_b(v32u8 _1) { return __builtin_lasx_xvxori_b(_1, 1); } -+// CHECK-LABEL: @xvbitsel_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvbitsel_v(_1, _2, _3); } -+// CHECK-LABEL: @xvbitseli_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitseli_b(_1, _2, 1); } -+// CHECK-LABEL: @xvshuf4i_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvshuf4i_b(v32i8 _1) { return __builtin_lasx_xvshuf4i_b(_1, 1); } -+// CHECK-LABEL: @xvshuf4i_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvshuf4i_h(v16i16 _1) { return __builtin_lasx_xvshuf4i_h(_1, 1); } -+// CHECK-LABEL: @xvshuf4i_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvshuf4i_w(v8i32 _1) { return __builtin_lasx_xvshuf4i_w(_1, 1); } -+// CHECK-LABEL: @xvreplgr2vr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvreplgr2vr_b(int _1) { return __builtin_lasx_xvreplgr2vr_b(_1); } -+// CHECK-LABEL: @xvreplgr2vr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvreplgr2vr_h(int _1) { return __builtin_lasx_xvreplgr2vr_h(_1); } -+// CHECK-LABEL: @xvreplgr2vr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvreplgr2vr_w(int _1) { return __builtin_lasx_xvreplgr2vr_w(_1); } -+// CHECK-LABEL: @xvreplgr2vr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvreplgr2vr_d(int _1) { return __builtin_lasx_xvreplgr2vr_d(_1); } -+// CHECK-LABEL: @xvpcnt_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvpcnt_b(v32i8 _1) { return __builtin_lasx_xvpcnt_b(_1); } -+// CHECK-LABEL: @xvpcnt_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvpcnt_h(v16i16 _1) { return __builtin_lasx_xvpcnt_h(_1); } -+// CHECK-LABEL: @xvpcnt_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvpcnt_w(v8i32 _1) { return __builtin_lasx_xvpcnt_w(_1); } -+// CHECK-LABEL: @xvpcnt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvpcnt_d(v4i64 _1) { return __builtin_lasx_xvpcnt_d(_1); } -+// CHECK-LABEL: @xvclo_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvclo_b(v32i8 _1) { return __builtin_lasx_xvclo_b(_1); } -+// CHECK-LABEL: @xvclo_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvclo_h(v16i16 _1) { return __builtin_lasx_xvclo_h(_1); } -+// CHECK-LABEL: @xvclo_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvclo_w(v8i32 _1) { return __builtin_lasx_xvclo_w(_1); } -+// CHECK-LABEL: @xvclo_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvclo_d(v4i64 _1) { return __builtin_lasx_xvclo_d(_1); } -+// CHECK-LABEL: @xvclz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvclz_b(v32i8 _1) { return __builtin_lasx_xvclz_b(_1); } -+// CHECK-LABEL: @xvclz_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvclz_h(v16i16 _1) { return __builtin_lasx_xvclz_h(_1); } -+// CHECK-LABEL: @xvclz_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvclz_w(v8i32 _1) { return __builtin_lasx_xvclz_w(_1); } -+// CHECK-LABEL: @xvclz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvclz_d(v4i64 _1) { return __builtin_lasx_xvclz_d(_1); } -+// CHECK-LABEL: @xvfadd_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfadd_s(_1, _2); } -+// CHECK-LABEL: @xvfadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfadd_d(_1, _2); } -+// CHECK-LABEL: @xvfsub_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfsub_s(_1, _2); } -+// CHECK-LABEL: @xvfsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfsub_d(_1, _2); } -+// CHECK-LABEL: @xvfmul_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmul_s(_1, _2); } -+// CHECK-LABEL: @xvfmul_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmul_d(_1, _2); } -+// CHECK-LABEL: @xvfdiv_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfdiv_s(_1, _2); } -+// CHECK-LABEL: @xvfdiv_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfdiv_d(_1, _2); } -+// CHECK-LABEL: @xvfcvt_h_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcvt_h_s(_1, _2); } -+// CHECK-LABEL: @xvfcvt_s_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcvt_s_d(_1, _2); } -+// CHECK-LABEL: @xvfmin_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmin_s(_1, _2); } -+// CHECK-LABEL: @xvfmin_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmin_d(_1, _2); } -+// CHECK-LABEL: @xvfmina_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmina_s(_1, _2); } -+// CHECK-LABEL: @xvfmina_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmina_d(_1, _2); } -+// CHECK-LABEL: @xvfmax_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmax_s(_1, _2); } -+// CHECK-LABEL: @xvfmax_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmax_d(_1, _2); } -+// CHECK-LABEL: @xvfmaxa_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmaxa_s(_1, _2); } -+// CHECK-LABEL: @xvfmaxa_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmaxa_d(_1, _2); } -+// CHECK-LABEL: @xvfclass_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfclass_s(v8f32 _1) { return __builtin_lasx_xvfclass_s(_1); } -+// CHECK-LABEL: @xvfclass_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfclass_d(v4f64 _1) { return __builtin_lasx_xvfclass_d(_1); } -+// CHECK-LABEL: @xvfsqrt_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfsqrt_s(v8f32 _1) { return __builtin_lasx_xvfsqrt_s(_1); } -+// CHECK-LABEL: @xvfsqrt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfsqrt_d(v4f64 _1) { return __builtin_lasx_xvfsqrt_d(_1); } -+// CHECK-LABEL: @xvfrecip_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfrecip_s(v8f32 _1) { return __builtin_lasx_xvfrecip_s(_1); } -+// CHECK-LABEL: @xvfrecip_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfrecip_d(v4f64 _1) { return __builtin_lasx_xvfrecip_d(_1); } -+// CHECK-LABEL: @xvfrint_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfrint_s(v8f32 _1) { return __builtin_lasx_xvfrint_s(_1); } -+// CHECK-LABEL: @xvfrint_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfrint_d(v4f64 _1) { return __builtin_lasx_xvfrint_d(_1); } -+// CHECK-LABEL: @xvfrsqrt_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfrsqrt_s(v8f32 _1) { return __builtin_lasx_xvfrsqrt_s(_1); } -+// CHECK-LABEL: @xvfrsqrt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfrsqrt_d(v4f64 _1) { return __builtin_lasx_xvfrsqrt_d(_1); } -+// CHECK-LABEL: @xvflogb_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvflogb_s(v8f32 _1) { return __builtin_lasx_xvflogb_s(_1); } -+// CHECK-LABEL: @xvflogb_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvflogb_d(v4f64 _1) { return __builtin_lasx_xvflogb_d(_1); } -+// CHECK-LABEL: @xvfcvth_s_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfcvth_s_h(v16i16 _1) { return __builtin_lasx_xvfcvth_s_h(_1); } -+// CHECK-LABEL: @xvfcvth_d_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfcvth_d_s(v8f32 _1) { return __builtin_lasx_xvfcvth_d_s(_1); } -+// CHECK-LABEL: @xvfcvtl_s_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfcvtl_s_h(v16i16 _1) { return __builtin_lasx_xvfcvtl_s_h(_1); } -+// CHECK-LABEL: @xvfcvtl_d_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfcvtl_d_s(v8f32 _1) { return __builtin_lasx_xvfcvtl_d_s(_1); } -+// CHECK-LABEL: @xvftint_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftint_w_s(v8f32 _1) { return __builtin_lasx_xvftint_w_s(_1); } -+// CHECK-LABEL: @xvftint_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftint_l_d(v4f64 _1) { return __builtin_lasx_xvftint_l_d(_1); } -+// CHECK-LABEL: @xvftint_wu_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvftint_wu_s(v8f32 _1) { return __builtin_lasx_xvftint_wu_s(_1); } -+// CHECK-LABEL: @xvftint_lu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvftint_lu_d(v4f64 _1) { return __builtin_lasx_xvftint_lu_d(_1); } -+// CHECK-LABEL: @xvftintrz_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftintrz_w_s(v8f32 _1) { return __builtin_lasx_xvftintrz_w_s(_1); } -+// CHECK-LABEL: @xvftintrz_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrz_l_d(v4f64 _1) { return __builtin_lasx_xvftintrz_l_d(_1); } -+// CHECK-LABEL: @xvftintrz_wu_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvftintrz_wu_s(v8f32 _1) { return __builtin_lasx_xvftintrz_wu_s(_1); } -+// CHECK-LABEL: @xvftintrz_lu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvftintrz_lu_d(v4f64 _1) { return __builtin_lasx_xvftintrz_lu_d(_1); } -+// CHECK-LABEL: @xvffint_s_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvffint_s_w(v8i32 _1) { return __builtin_lasx_xvffint_s_w(_1); } -+// CHECK-LABEL: @xvffint_d_l( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvffint_d_l(v4i64 _1) { return __builtin_lasx_xvffint_d_l(_1); } -+// CHECK-LABEL: @xvffint_s_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvffint_s_wu(v8u32 _1) { return __builtin_lasx_xvffint_s_wu(_1); } -+// CHECK-LABEL: @xvffint_d_lu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvffint_d_lu(v4u64 _1) { return __builtin_lasx_xvffint_d_lu(_1); } -+// CHECK-LABEL: @xvreplve_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1:%.*]], i32 [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvreplve_b(v32i8 _1, int _2) { return __builtin_lasx_xvreplve_b(_1, _2); } -+// CHECK-LABEL: @xvreplve_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1:%.*]], i32 [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvreplve_h(v16i16 _1, int _2) { return __builtin_lasx_xvreplve_h(_1, _2); } -+// CHECK-LABEL: @xvreplve_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1:%.*]], i32 [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvreplve_w(v8i32 _1, int _2) { return __builtin_lasx_xvreplve_w(_1, _2); } -+// CHECK-LABEL: @xvreplve_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1:%.*]], i32 [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvreplve_d(v4i64 _1, int _2) { return __builtin_lasx_xvreplve_d(_1, _2); } -+// CHECK-LABEL: @xvpermi_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpermi_w(_1, _2, 1); } -+// CHECK-LABEL: @xvandn_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvandn_v(_1, _2); } -+// CHECK-LABEL: @xvneg_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvneg_b(v32i8 _1) { return __builtin_lasx_xvneg_b(_1); } -+// CHECK-LABEL: @xvneg_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvneg_h(v16i16 _1) { return __builtin_lasx_xvneg_h(_1); } -+// CHECK-LABEL: @xvneg_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvneg_w(v8i32 _1) { return __builtin_lasx_xvneg_w(_1); } -+// CHECK-LABEL: @xvneg_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvneg_d(v4i64 _1) { return __builtin_lasx_xvneg_d(_1); } -+// CHECK-LABEL: @xvmuh_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmuh_b(_1, _2); } -+// CHECK-LABEL: @xvmuh_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmuh_h(_1, _2); } -+// CHECK-LABEL: @xvmuh_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmuh_w(_1, _2); } -+// CHECK-LABEL: @xvmuh_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmuh_d(_1, _2); } -+// CHECK-LABEL: @xvmuh_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmuh_bu(_1, _2); } -+// CHECK-LABEL: @xvmuh_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmuh_hu(_1, _2); } -+// CHECK-LABEL: @xvmuh_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmuh_wu(_1, _2); } -+// CHECK-LABEL: @xvmuh_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmuh_du(_1, _2); } -+// CHECK-LABEL: @xvsllwil_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsllwil_h_b(v32i8 _1) { return __builtin_lasx_xvsllwil_h_b(_1, 1); } -+// CHECK-LABEL: @xvsllwil_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsllwil_w_h(v16i16 _1) { return __builtin_lasx_xvsllwil_w_h(_1, 1); } -+// CHECK-LABEL: @xvsllwil_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsllwil_d_w(v8i32 _1) { return __builtin_lasx_xvsllwil_d_w(_1, 1); } -+// CHECK-LABEL: @xvsllwil_hu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvsllwil_hu_bu(v32u8 _1) { return __builtin_lasx_xvsllwil_hu_bu(_1, 1); } -+// CHECK-LABEL: @xvsllwil_wu_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvsllwil_wu_hu(v16u16 _1) { return __builtin_lasx_xvsllwil_wu_hu(_1, 1); } -+// CHECK-LABEL: @xvsllwil_du_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvsllwil_du_wu(v8u32 _1) { return __builtin_lasx_xvsllwil_du_wu(_1, 1); } -+// CHECK-LABEL: @xvsran_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsran_b_h(_1, _2); } -+// CHECK-LABEL: @xvsran_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsran_h_w(_1, _2); } -+// CHECK-LABEL: @xvsran_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsran_w_d(_1, _2); } -+// CHECK-LABEL: @xvssran_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssran_b_h(_1, _2); } -+// CHECK-LABEL: @xvssran_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssran_h_w(_1, _2); } -+// CHECK-LABEL: @xvssran_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssran_w_d(_1, _2); } -+// CHECK-LABEL: @xvssran_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssran_bu_h(_1, _2); } -+// CHECK-LABEL: @xvssran_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssran_hu_w(_1, _2); } -+// CHECK-LABEL: @xvssran_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssran_wu_d(_1, _2); } -+// CHECK-LABEL: @xvsrarn_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarn_b_h(_1, _2); } -+// CHECK-LABEL: @xvsrarn_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarn_h_w(_1, _2); } -+// CHECK-LABEL: @xvsrarn_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarn_w_d(_1, _2); } -+// CHECK-LABEL: @xvssrarn_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarn_b_h(_1, _2); } -+// CHECK-LABEL: @xvssrarn_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarn_h_w(_1, _2); } -+// CHECK-LABEL: @xvssrarn_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarn_w_d(_1, _2); } -+// CHECK-LABEL: @xvssrarn_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrarn_bu_h(_1, _2); } -+// CHECK-LABEL: @xvssrarn_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrarn_hu_w(_1, _2); } -+// CHECK-LABEL: @xvssrarn_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrarn_wu_d(_1, _2); } -+// CHECK-LABEL: @xvsrln_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrln_b_h(_1, _2); } -+// CHECK-LABEL: @xvsrln_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrln_h_w(_1, _2); } -+// CHECK-LABEL: @xvsrln_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrln_w_d(_1, _2); } -+// CHECK-LABEL: @xvssrln_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrln_bu_h(_1, _2); } -+// CHECK-LABEL: @xvssrln_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrln_hu_w(_1, _2); } -+// CHECK-LABEL: @xvssrln_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrln_wu_d(_1, _2); } -+// CHECK-LABEL: @xvsrlrn_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrn_b_h(_1, _2); } -+// CHECK-LABEL: @xvsrlrn_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrn_h_w(_1, _2); } -+// CHECK-LABEL: @xvsrlrn_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrn_w_d(_1, _2); } -+// CHECK-LABEL: @xvssrlrn_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrlrn_bu_h(_1, _2); } -+// CHECK-LABEL: @xvssrlrn_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrlrn_hu_w(_1, _2); } -+// CHECK-LABEL: @xvssrlrn_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrlrn_wu_d(_1, _2); } -+// CHECK-LABEL: @xvfrstpi_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvfrstpi_b(_1, _2, 1); } -+// CHECK-LABEL: @xvfrstpi_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvfrstpi_h(_1, _2, 1); } -+// CHECK-LABEL: @xvfrstp_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvfrstp_b(_1, _2, _3); } -+// CHECK-LABEL: @xvfrstp_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvfrstp_h(_1, _2, _3); } -+// CHECK-LABEL: @xvshuf4i_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvshuf4i_d(_1, _2, 1); } -+// CHECK-LABEL: @xvbsrl_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvbsrl_v(v32i8 _1) { return __builtin_lasx_xvbsrl_v(_1, 1); } -+// CHECK-LABEL: @xvbsll_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvbsll_v(v32i8 _1) { return __builtin_lasx_xvbsll_v(_1, 1); } -+// CHECK-LABEL: @xvextrins_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvextrins_b(_1, _2, 1); } -+// CHECK-LABEL: @xvextrins_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvextrins_h(_1, _2, 1); } -+// CHECK-LABEL: @xvextrins_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvextrins_w(_1, _2, 1); } -+// CHECK-LABEL: @xvextrins_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvextrins_d(_1, _2, 1); } -+// CHECK-LABEL: @xvmskltz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmskltz_b(v32i8 _1) { return __builtin_lasx_xvmskltz_b(_1); } -+// CHECK-LABEL: @xvmskltz_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmskltz_h(v16i16 _1) { return __builtin_lasx_xvmskltz_h(_1); } -+// CHECK-LABEL: @xvmskltz_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmskltz_w(v8i32 _1) { return __builtin_lasx_xvmskltz_w(_1); } -+// CHECK-LABEL: @xvmskltz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmskltz_d(v4i64 _1) { return __builtin_lasx_xvmskltz_d(_1); } -+// CHECK-LABEL: @xvsigncov_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsigncov_b(_1, _2); } -+// CHECK-LABEL: @xvsigncov_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsigncov_h(_1, _2); } -+// CHECK-LABEL: @xvsigncov_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsigncov_w(_1, _2); } -+// CHECK-LABEL: @xvsigncov_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsigncov_d(_1, _2); } -+// CHECK-LABEL: @xvfmadd_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmadd_s(_1, _2, _3); } -+// CHECK-LABEL: @xvfmadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmadd_d(_1, _2, _3); } -+// CHECK-LABEL: @xvfmsub_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmsub_s(_1, _2, _3); } -+// CHECK-LABEL: @xvfmsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmsub_d(_1, _2, _3); } -+// CHECK-LABEL: @xvfnmadd_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmadd_s(_1, _2, _3); } -+// CHECK-LABEL: @xvfnmadd_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmadd_d(_1, _2, _3); } -+// CHECK-LABEL: @xvfnmsub_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmsub_s(_1, _2, _3); } -+// CHECK-LABEL: @xvfnmsub_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmsub_d(_1, _2, _3); } -+// CHECK-LABEL: @xvftintrne_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftintrne_w_s(v8f32 _1) { return __builtin_lasx_xvftintrne_w_s(_1); } -+// CHECK-LABEL: @xvftintrne_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrne_l_d(v4f64 _1) { return __builtin_lasx_xvftintrne_l_d(_1); } -+// CHECK-LABEL: @xvftintrp_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftintrp_w_s(v8f32 _1) { return __builtin_lasx_xvftintrp_w_s(_1); } -+// CHECK-LABEL: @xvftintrp_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrp_l_d(v4f64 _1) { return __builtin_lasx_xvftintrp_l_d(_1); } -+// CHECK-LABEL: @xvftintrm_w_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftintrm_w_s(v8f32 _1) { return __builtin_lasx_xvftintrm_w_s(_1); } -+// CHECK-LABEL: @xvftintrm_l_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrm_l_d(v4f64 _1) { return __builtin_lasx_xvftintrm_l_d(_1); } -+// CHECK-LABEL: @xvftint_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftint_w_d(_1, _2); } -+// CHECK-LABEL: @xvffint_s_l( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvffint_s_l(_1, _2); } -+// CHECK-LABEL: @xvftintrz_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrz_w_d(_1, _2); } -+// CHECK-LABEL: @xvftintrp_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrp_w_d(_1, _2); } -+// CHECK-LABEL: @xvftintrm_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrm_w_d(_1, _2); } -+// CHECK-LABEL: @xvftintrne_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrne_w_d(_1, _2); } -+// CHECK-LABEL: @xvftinth_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftinth_l_s(v8f32 _1) { return __builtin_lasx_xvftinth_l_s(_1); } -+// CHECK-LABEL: @xvftintl_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintl_l_s(v8f32 _1) { return __builtin_lasx_xvftintl_l_s(_1); } -+// CHECK-LABEL: @xvffinth_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvffinth_d_w(v8i32 _1) { return __builtin_lasx_xvffinth_d_w(_1); } -+// CHECK-LABEL: @xvffintl_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvffintl_d_w(v8i32 _1) { return __builtin_lasx_xvffintl_d_w(_1); } -+// CHECK-LABEL: @xvftintrzh_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrzh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzh_l_s(_1); } -+// CHECK-LABEL: @xvftintrzl_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrzl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzl_l_s(_1); } -+// CHECK-LABEL: @xvftintrph_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrph_l_s(v8f32 _1) { return __builtin_lasx_xvftintrph_l_s(_1); } -+// CHECK-LABEL: @xvftintrpl_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrpl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrpl_l_s(_1); } -+// CHECK-LABEL: @xvftintrmh_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrmh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrmh_l_s(_1); } -+// CHECK-LABEL: @xvftintrml_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrml_l_s(v8f32 _1) { return __builtin_lasx_xvftintrml_l_s(_1); } -+// CHECK-LABEL: @xvftintrneh_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrneh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrneh_l_s(_1); } -+// CHECK-LABEL: @xvftintrnel_l_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvftintrnel_l_s(v8f32 _1) { return __builtin_lasx_xvftintrnel_l_s(_1); } -+// CHECK-LABEL: @xvfrintrne_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> -+// CHECK-NEXT: ret <8 x i32> [[TMP1]] -+// -+v8i32 xvfrintrne_s(v8f32 _1) { return __builtin_lasx_xvfrintrne_s(_1); } -+// CHECK-LABEL: @xvfrintrne_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> -+// CHECK-NEXT: ret <4 x i64> [[TMP1]] -+// -+v4i64 xvfrintrne_d(v4f64 _1) { return __builtin_lasx_xvfrintrne_d(_1); } -+// CHECK-LABEL: @xvfrintrz_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> -+// CHECK-NEXT: ret <8 x i32> [[TMP1]] -+// -+v8i32 xvfrintrz_s(v8f32 _1) { return __builtin_lasx_xvfrintrz_s(_1); } -+// CHECK-LABEL: @xvfrintrz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> -+// CHECK-NEXT: ret <4 x i64> [[TMP1]] -+// -+v4i64 xvfrintrz_d(v4f64 _1) { return __builtin_lasx_xvfrintrz_d(_1); } -+// CHECK-LABEL: @xvfrintrp_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> -+// CHECK-NEXT: ret <8 x i32> [[TMP1]] -+// -+v8i32 xvfrintrp_s(v8f32 _1) { return __builtin_lasx_xvfrintrp_s(_1); } -+// CHECK-LABEL: @xvfrintrp_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> -+// CHECK-NEXT: ret <4 x i64> [[TMP1]] -+// -+v4i64 xvfrintrp_d(v4f64 _1) { return __builtin_lasx_xvfrintrp_d(_1); } -+// CHECK-LABEL: @xvfrintrm_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> -+// CHECK-NEXT: ret <8 x i32> [[TMP1]] -+// -+v8i32 xvfrintrm_s(v8f32 _1) { return __builtin_lasx_xvfrintrm_s(_1); } -+// CHECK-LABEL: @xvfrintrm_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> -+// CHECK-NEXT: ret <4 x i64> [[TMP1]] -+// -+v4i64 xvfrintrm_d(v4f64 _1) { return __builtin_lasx_xvfrintrm_d(_1); } -+// CHECK-LABEL: @xvld( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvld(void *_1) { return __builtin_lasx_xvld(_1, 1); } -+// CHECK-LABEL: @xvst( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret void -+// -+void xvst(v32i8 _1, void *_2) { return __builtin_lasx_xvst(_1, _2, 1); } -+// CHECK-LABEL: @xvstelm_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) -+// CHECK-NEXT: ret void -+// -+void xvstelm_b(v32i8 _1, void * _2) { return __builtin_lasx_xvstelm_b(_1, _2, 1, 1); } -+// CHECK-LABEL: @xvstelm_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) -+// CHECK-NEXT: ret void -+// -+void xvstelm_h(v16i16 _1, void * _2) { return __builtin_lasx_xvstelm_h(_1, _2, 2, 1); } -+// CHECK-LABEL: @xvstelm_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) -+// CHECK-NEXT: ret void -+// -+void xvstelm_w(v8i32 _1, void * _2) { return __builtin_lasx_xvstelm_w(_1, _2, 4, 1); } -+// CHECK-LABEL: @xvstelm_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) -+// CHECK-NEXT: ret void -+// -+void xvstelm_d(v4i64 _1, void * _2) { return __builtin_lasx_xvstelm_d(_1, _2, 8, 1); } -+// CHECK-LABEL: @xvinsve0_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvinsve0_w(_1, _2, 1); } -+// CHECK-LABEL: @xvinsve0_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvinsve0_d(_1, _2, 1); } -+// CHECK-LABEL: @xvpickve_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvpickve_w(v8i32 _1) { return __builtin_lasx_xvpickve_w(_1, 1); } -+// CHECK-LABEL: @xvpickve_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvpickve_d(v4i64 _1) { return __builtin_lasx_xvpickve_d(_1, 1); } -+// CHECK-LABEL: @xvssrlrn_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrn_b_h(_1, _2); } -+// CHECK-LABEL: @xvssrlrn_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrn_h_w(_1, _2); } -+// CHECK-LABEL: @xvssrlrn_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrn_w_d(_1, _2); } -+// CHECK-LABEL: @xvssrln_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrln_b_h(_1, _2); } -+// CHECK-LABEL: @xvssrln_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrln_h_w(_1, _2); } -+// CHECK-LABEL: @xvssrln_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrln_w_d(_1, _2); } -+// CHECK-LABEL: @xvorn_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvorn_v(_1, _2); } -+// CHECK-LABEL: @xvldi( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvldi() { return __builtin_lasx_xvldi(1); } -+// CHECK-LABEL: @xvldx( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvldx(void *_1) { return __builtin_lasx_xvldx(_1, 1); } -+// CHECK-LABEL: @xvstx( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) -+// CHECK-NEXT: ret void -+// -+void xvstx(v32i8 _1, void *_2) { return __builtin_lasx_xvstx(_1, _2, 1); } -+// CHECK-LABEL: @xvextl_qu_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvextl_qu_du(v4u64 _1) { return __builtin_lasx_xvextl_qu_du(_1); } -+// CHECK-LABEL: @xvinsgr2vr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1:%.*]], i32 1, i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvinsgr2vr_w(v8i32 _1) { return __builtin_lasx_xvinsgr2vr_w(_1, 1, 1); } -+// CHECK-LABEL: @xvinsgr2vr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1:%.*]], i64 1, i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvinsgr2vr_d(v4i64 _1) { return __builtin_lasx_xvinsgr2vr_d(_1, 1, 1); } -+// CHECK-LABEL: @xvreplve0_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvreplve0_b(v32i8 _1) { return __builtin_lasx_xvreplve0_b(_1); } -+// CHECK-LABEL: @xvreplve0_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvreplve0_h(v16i16 _1) { return __builtin_lasx_xvreplve0_h(_1); } -+// CHECK-LABEL: @xvreplve0_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvreplve0_w(v8i32 _1) { return __builtin_lasx_xvreplve0_w(_1); } -+// CHECK-LABEL: @xvreplve0_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvreplve0_d(v4i64 _1) { return __builtin_lasx_xvreplve0_d(_1); } -+// CHECK-LABEL: @xvreplve0_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvreplve0_q(v32i8 _1) { return __builtin_lasx_xvreplve0_q(_1); } -+// CHECK-LABEL: @vext2xv_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 vext2xv_h_b(v32i8 _1) { return __builtin_lasx_vext2xv_h_b(_1); } -+// CHECK-LABEL: @vext2xv_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 vext2xv_w_h(v16i16 _1) { return __builtin_lasx_vext2xv_w_h(_1); } -+// CHECK-LABEL: @vext2xv_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 vext2xv_d_w(v8i32 _1) { return __builtin_lasx_vext2xv_d_w(_1); } -+// CHECK-LABEL: @vext2xv_w_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 vext2xv_w_b(v32i8 _1) { return __builtin_lasx_vext2xv_w_b(_1); } -+// CHECK-LABEL: @vext2xv_d_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 vext2xv_d_h(v16i16 _1) { return __builtin_lasx_vext2xv_d_h(_1); } -+// CHECK-LABEL: @vext2xv_d_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 vext2xv_d_b(v32i8 _1) { return __builtin_lasx_vext2xv_d_b(_1); } -+// CHECK-LABEL: @vext2xv_hu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 vext2xv_hu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_hu_bu(_1); } -+// CHECK-LABEL: @vext2xv_wu_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 vext2xv_wu_hu(v16i16 _1) { return __builtin_lasx_vext2xv_wu_hu(_1); } -+// CHECK-LABEL: @vext2xv_du_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 vext2xv_du_wu(v8i32 _1) { return __builtin_lasx_vext2xv_du_wu(_1); } -+// CHECK-LABEL: @vext2xv_wu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 vext2xv_wu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_wu_bu(_1); } -+// CHECK-LABEL: @vext2xv_du_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 vext2xv_du_hu(v16i16 _1) { return __builtin_lasx_vext2xv_du_hu(_1); } -+// CHECK-LABEL: @vext2xv_du_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 vext2xv_du_bu(v32i8 _1) { return __builtin_lasx_vext2xv_du_bu(_1); } -+// CHECK-LABEL: @xvpermi_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpermi_q(_1, _2, 1); } -+// CHECK-LABEL: @xvpermi_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvpermi_d(v4i64 _1) { return __builtin_lasx_xvpermi_d(_1, 1); } -+// CHECK-LABEL: @xvperm_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvperm_w(_1, _2); } -+// CHECK-LABEL: @xvldrepl_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvldrepl_b(void *_1) { return __builtin_lasx_xvldrepl_b(_1, 1); } -+// CHECK-LABEL: @xvldrepl_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvldrepl_h(void *_1) { return __builtin_lasx_xvldrepl_h(_1, 2); } -+// CHECK-LABEL: @xvldrepl_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvldrepl_w(void *_1) { return __builtin_lasx_xvldrepl_w(_1, 4); } -+// CHECK-LABEL: @xvldrepl_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvldrepl_d(void *_1) { return __builtin_lasx_xvldrepl_d(_1, 8); } -+// CHECK-LABEL: @xvpickve2gr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xvpickve2gr_w(v8i32 _1) { return __builtin_lasx_xvpickve2gr_w(_1, 1); } -+// CHECK-LABEL: @xvpickve2gr_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+unsigned int xvpickve2gr_wu(v8i32 _1) { return __builtin_lasx_xvpickve2gr_wu(_1, 1); } -+// CHECK-LABEL: @xvpickve2gr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i64 [[TMP0]] -+// -+long xvpickve2gr_d(v4i64 _1) { return __builtin_lasx_xvpickve2gr_d(_1, 1); } -+// CHECK-LABEL: @xvpickve2gr_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret i64 [[TMP0]] -+// -+unsigned long int xvpickve2gr_du(v4i64 _1) { return __builtin_lasx_xvpickve2gr_du(_1, 1); } -+// CHECK-LABEL: @xvaddwev_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_d(_1, _2); } -+// CHECK-LABEL: @xvaddwev_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_w(_1, _2); } -+// CHECK-LABEL: @xvaddwev_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_h(_1, _2); } -+// CHECK-LABEL: @xvaddwev_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_b(_1, _2); } -+// CHECK-LABEL: @xvaddwev_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwev_q_du(_1, _2); } -+// CHECK-LABEL: @xvaddwev_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwev_d_wu(_1, _2); } -+// CHECK-LABEL: @xvaddwev_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwev_w_hu(_1, _2); } -+// CHECK-LABEL: @xvaddwev_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwev_h_bu(_1, _2); } -+// CHECK-LABEL: @xvsubwev_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwev_q_d(_1, _2); } -+// CHECK-LABEL: @xvsubwev_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwev_d_w(_1, _2); } -+// CHECK-LABEL: @xvsubwev_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwev_w_h(_1, _2); } -+// CHECK-LABEL: @xvsubwev_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwev_h_b(_1, _2); } -+// CHECK-LABEL: @xvsubwev_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwev_q_du(_1, _2); } -+// CHECK-LABEL: @xvsubwev_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwev_d_wu(_1, _2); } -+// CHECK-LABEL: @xvsubwev_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwev_w_hu(_1, _2); } -+// CHECK-LABEL: @xvsubwev_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwev_h_bu(_1, _2); } -+// CHECK-LABEL: @xvmulwev_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_d(_1, _2); } -+// CHECK-LABEL: @xvmulwev_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_w(_1, _2); } -+// CHECK-LABEL: @xvmulwev_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_h(_1, _2); } -+// CHECK-LABEL: @xvmulwev_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_b(_1, _2); } -+// CHECK-LABEL: @xvmulwev_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwev_q_du(_1, _2); } -+// CHECK-LABEL: @xvmulwev_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwev_d_wu(_1, _2); } -+// CHECK-LABEL: @xvmulwev_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwev_w_hu(_1, _2); } -+// CHECK-LABEL: @xvmulwev_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwev_h_bu(_1, _2); } -+// CHECK-LABEL: @xvaddwod_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_d(_1, _2); } -+// CHECK-LABEL: @xvaddwod_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_w(_1, _2); } -+// CHECK-LABEL: @xvaddwod_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_h(_1, _2); } -+// CHECK-LABEL: @xvaddwod_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_b(_1, _2); } -+// CHECK-LABEL: @xvaddwod_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwod_q_du(_1, _2); } -+// CHECK-LABEL: @xvaddwod_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwod_d_wu(_1, _2); } -+// CHECK-LABEL: @xvaddwod_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwod_w_hu(_1, _2); } -+// CHECK-LABEL: @xvaddwod_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwod_h_bu(_1, _2); } -+// CHECK-LABEL: @xvsubwod_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwod_q_d(_1, _2); } -+// CHECK-LABEL: @xvsubwod_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwod_d_w(_1, _2); } -+// CHECK-LABEL: @xvsubwod_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwod_w_h(_1, _2); } -+// CHECK-LABEL: @xvsubwod_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwod_h_b(_1, _2); } -+// CHECK-LABEL: @xvsubwod_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwod_q_du(_1, _2); } -+// CHECK-LABEL: @xvsubwod_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwod_d_wu(_1, _2); } -+// CHECK-LABEL: @xvsubwod_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwod_w_hu(_1, _2); } -+// CHECK-LABEL: @xvsubwod_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwod_h_bu(_1, _2); } -+// CHECK-LABEL: @xvmulwod_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_d(_1, _2); } -+// CHECK-LABEL: @xvmulwod_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_w(_1, _2); } -+// CHECK-LABEL: @xvmulwod_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_h(_1, _2); } -+// CHECK-LABEL: @xvmulwod_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_b(_1, _2); } -+// CHECK-LABEL: @xvmulwod_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwod_q_du(_1, _2); } -+// CHECK-LABEL: @xvmulwod_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwod_d_wu(_1, _2); } -+// CHECK-LABEL: @xvmulwod_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwod_w_hu(_1, _2); } -+// CHECK-LABEL: @xvmulwod_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwod_h_bu(_1, _2); } -+// CHECK-LABEL: @xvaddwev_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_wu_w(_1, _2); } -+// CHECK-LABEL: @xvaddwev_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_hu_h(_1, _2); } -+// CHECK-LABEL: @xvaddwev_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_bu_b(_1, _2); } -+// CHECK-LABEL: @xvmulwev_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_wu_w(_1, _2); } -+// CHECK-LABEL: @xvmulwev_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_hu_h(_1, _2); } -+// CHECK-LABEL: @xvmulwev_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_bu_b(_1, _2); } -+// CHECK-LABEL: @xvaddwod_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_wu_w(_1, _2); } -+// CHECK-LABEL: @xvaddwod_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_hu_h(_1, _2); } -+// CHECK-LABEL: @xvaddwod_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_bu_b(_1, _2); } -+// CHECK-LABEL: @xvmulwod_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_wu_w(_1, _2); } -+// CHECK-LABEL: @xvmulwod_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_hu_h(_1, _2); } -+// CHECK-LABEL: @xvmulwod_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_bu_b(_1, _2); } -+// CHECK-LABEL: @xvhaddw_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhaddw_q_d(_1, _2); } -+// CHECK-LABEL: @xvhaddw_qu_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhaddw_qu_du(_1, _2); } -+// CHECK-LABEL: @xvhsubw_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhsubw_q_d(_1, _2); } -+// CHECK-LABEL: @xvhsubw_qu_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhsubw_qu_du(_1, _2); } -+// CHECK-LABEL: @xvmaddwev_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_d(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_w(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_h(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_b(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwev_q_du(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwev_d_wu(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwev_w_hu(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwev_h_bu(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_d(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_w(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_h(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_b(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_q_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwod_q_du(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_d_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwod_d_wu(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_w_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwod_w_hu(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_h_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwod_h_bu(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_du_d(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_wu_w(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_hu_h(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwev_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_bu_b(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_du_d(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_d_wu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_wu_w(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_w_hu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_hu_h(_1, _2, _3); } -+// CHECK-LABEL: @xvmaddwod_h_bu_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_bu_b(_1, _2, _3); } -+// CHECK-LABEL: @xvrotr_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvrotr_b(_1, _2); } -+// CHECK-LABEL: @xvrotr_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvrotr_h(_1, _2); } -+// CHECK-LABEL: @xvrotr_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvrotr_w(_1, _2); } -+// CHECK-LABEL: @xvrotr_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvrotr_d(_1, _2); } -+// CHECK-LABEL: @xvadd_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_q(_1, _2); } -+// CHECK-LABEL: @xvsub_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_q(_1, _2); } -+// CHECK-LABEL: @xvaddwev_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_du_d(_1, _2); } -+// CHECK-LABEL: @xvaddwod_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_du_d(_1, _2); } -+// CHECK-LABEL: @xvmulwev_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_du_d(_1, _2); } -+// CHECK-LABEL: @xvmulwod_q_du_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_du_d(_1, _2); } -+// CHECK-LABEL: @xvmskgez_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmskgez_b(v32i8 _1) { return __builtin_lasx_xvmskgez_b(_1); } -+// CHECK-LABEL: @xvmsknz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvmsknz_b(v32i8 _1) { return __builtin_lasx_xvmsknz_b(_1); } -+// CHECK-LABEL: @xvexth_h_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvexth_h_b(v32i8 _1) { return __builtin_lasx_xvexth_h_b(_1); } -+// CHECK-LABEL: @xvexth_w_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvexth_w_h(v16i16 _1) { return __builtin_lasx_xvexth_w_h(_1); } -+// CHECK-LABEL: @xvexth_d_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvexth_d_w(v8i32 _1) { return __builtin_lasx_xvexth_d_w(_1); } -+// CHECK-LABEL: @xvexth_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvexth_q_d(v4i64 _1) { return __builtin_lasx_xvexth_q_d(_1); } -+// CHECK-LABEL: @xvexth_hu_bu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvexth_hu_bu(v32u8 _1) { return __builtin_lasx_xvexth_hu_bu(_1); } -+// CHECK-LABEL: @xvexth_wu_hu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvexth_wu_hu(v16u16 _1) { return __builtin_lasx_xvexth_wu_hu(_1); } -+// CHECK-LABEL: @xvexth_du_wu( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvexth_du_wu(v8u32 _1) { return __builtin_lasx_xvexth_du_wu(_1); } -+// CHECK-LABEL: @xvexth_qu_du( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvexth_qu_du(v4u64 _1) { return __builtin_lasx_xvexth_qu_du(_1); } -+// CHECK-LABEL: @xvrotri_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvrotri_b(v32i8 _1) { return __builtin_lasx_xvrotri_b(_1, 1); } -+// CHECK-LABEL: @xvrotri_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvrotri_h(v16i16 _1) { return __builtin_lasx_xvrotri_h(_1, 1); } -+// CHECK-LABEL: @xvrotri_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvrotri_w(v8i32 _1) { return __builtin_lasx_xvrotri_w(_1, 1); } -+// CHECK-LABEL: @xvrotri_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvrotri_d(v4i64 _1) { return __builtin_lasx_xvrotri_d(_1, 1); } -+// CHECK-LABEL: @xvextl_q_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvextl_q_d(v4i64 _1) { return __builtin_lasx_xvextl_q_d(_1); } -+// CHECK-LABEL: @xvsrlni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlni_b_h(_1, _2, 1); } -+// CHECK-LABEL: @xvsrlni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlni_h_w(_1, _2, 1); } -+// CHECK-LABEL: @xvsrlni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlni_w_d(_1, _2, 1); } -+// CHECK-LABEL: @xvsrlni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlni_d_q(_1, _2, 1); } -+// CHECK-LABEL: @xvsrlrni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlrni_b_h(_1, _2, 1); } -+// CHECK-LABEL: @xvsrlrni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrni_h_w(_1, _2, 1); } -+// CHECK-LABEL: @xvsrlrni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrni_w_d(_1, _2, 1); } -+// CHECK-LABEL: @xvsrlrni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrni_d_q(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_b_h(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_h_w(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_w_d(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_d_q(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlni_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_bu_h(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlni_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_hu_w(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlni_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_wu_d(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlni_du_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_du_q(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlrni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_b_h(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlrni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_h_w(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlrni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_w_d(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlrni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_d_q(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlrni_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_bu_h(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlrni_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_hu_w(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlrni_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_wu_d(_1, _2, 1); } -+// CHECK-LABEL: @xvssrlrni_du_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_du_q(_1, _2, 1); } -+// CHECK-LABEL: @xvsrani_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrani_b_h(_1, _2, 1); } -+// CHECK-LABEL: @xvsrani_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrani_h_w(_1, _2, 1); } -+// CHECK-LABEL: @xvsrani_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrani_w_d(_1, _2, 1); } -+// CHECK-LABEL: @xvsrani_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrani_d_q(_1, _2, 1); } -+// CHECK-LABEL: @xvsrarni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrarni_b_h(_1, _2, 1); } -+// CHECK-LABEL: @xvsrarni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarni_h_w(_1, _2, 1); } -+// CHECK-LABEL: @xvsrarni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarni_w_d(_1, _2, 1); } -+// CHECK-LABEL: @xvsrarni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarni_d_q(_1, _2, 1); } -+// CHECK-LABEL: @xvssrani_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_b_h(_1, _2, 1); } -+// CHECK-LABEL: @xvssrani_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_h_w(_1, _2, 1); } -+// CHECK-LABEL: @xvssrani_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_w_d(_1, _2, 1); } -+// CHECK-LABEL: @xvssrani_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_d_q(_1, _2, 1); } -+// CHECK-LABEL: @xvssrani_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_bu_h(_1, _2, 1); } -+// CHECK-LABEL: @xvssrani_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_hu_w(_1, _2, 1); } -+// CHECK-LABEL: @xvssrani_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_wu_d(_1, _2, 1); } -+// CHECK-LABEL: @xvssrani_du_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_du_q(_1, _2, 1); } -+// CHECK-LABEL: @xvssrarni_b_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_b_h(_1, _2, 1); } -+// CHECK-LABEL: @xvssrarni_h_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_h_w(_1, _2, 1); } -+// CHECK-LABEL: @xvssrarni_w_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_w_d(_1, _2, 1); } -+// CHECK-LABEL: @xvssrarni_d_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_d_q(_1, _2, 1); } -+// CHECK-LABEL: @xvssrarni_bu_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_bu_h(_1, _2, 1); } -+// CHECK-LABEL: @xvssrarni_hu_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_hu_w(_1, _2, 1); } -+// CHECK-LABEL: @xvssrarni_wu_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_wu_d(_1, _2, 1); } -+// CHECK-LABEL: @xvssrarni_du_q( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_du_q(_1, _2, 1); } -+// CHECK-LABEL: @xbnz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbnz_b(v32u8 _1) { return __builtin_lasx_xbnz_b(_1); } -+// CHECK-LABEL: @xbnz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbnz_d(v4u64 _1) { return __builtin_lasx_xbnz_d(_1); } -+// CHECK-LABEL: @xbnz_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbnz_h(v16u16 _1) { return __builtin_lasx_xbnz_h(_1); } -+// CHECK-LABEL: @xbnz_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbnz_v(v32u8 _1) { return __builtin_lasx_xbnz_v(_1); } -+// CHECK-LABEL: @xbnz_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbnz_w(v8u32 _1) { return __builtin_lasx_xbnz_w(_1); } -+// CHECK-LABEL: @xbz_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbz_b(v32u8 _1) { return __builtin_lasx_xbz_b(_1); } -+// CHECK-LABEL: @xbz_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbz_d(v4u64 _1) { return __builtin_lasx_xbz_d(_1); } -+// CHECK-LABEL: @xbz_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbz_h(v16u16 _1) { return __builtin_lasx_xbz_h(_1); } -+// CHECK-LABEL: @xbz_v( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbz_v(v32u8 _1) { return __builtin_lasx_xbz_v(_1); } -+// CHECK-LABEL: @xbz_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1:%.*]]) -+// CHECK-NEXT: ret i32 [[TMP0]] -+// -+int xbz_w(v8u32 _1) { return __builtin_lasx_xbz_w(_1); } -+// CHECK-LABEL: @xvfcmp_caf_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_caf_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_caf_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_caf_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_ceq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_ceq_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_ceq_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_ceq_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cle_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cle_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cle_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cle_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_clt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_clt_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_clt_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_clt_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cne_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cne_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cne_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cne_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cor_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cor_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cor_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cor_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cueq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cueq_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cueq_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cueq_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cule_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cule_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cule_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cule_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cult_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cult_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cult_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cult_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cun_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cun_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cune_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cune_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cune_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cune_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_cun_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cun_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_saf_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_saf_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_saf_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_saf_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_seq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_seq_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_seq_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_seq_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sle_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sle_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sle_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sle_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_slt_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_slt_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_slt_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_slt_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sne_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sne_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sne_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sne_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sor_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sor_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sor_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sor_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sueq_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sueq_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sueq_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sueq_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sule_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sule_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sule_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sule_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sult_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sult_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sult_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sult_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sun_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sun_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sune_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sune_d(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sune_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sune_s(_1, _2); } -+// CHECK-LABEL: @xvfcmp_sun_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sun_s(_1, _2); } -+// CHECK-LABEL: @xvpickve_d_f( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// -+v4f64 xvpickve_d_f(v4f64 _1) { return __builtin_lasx_xvpickve_d_f(_1, 1); } -+// CHECK-LABEL: @xvpickve_w_f( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1:%.*]], i32 1) -+// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// -+v8f32 xvpickve_w_f(v8f32 _1) { return __builtin_lasx_xvpickve_w_f(_1, 1); } -+// CHECK-LABEL: @xvrepli_b( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) -+// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// -+v32i8 xvrepli_b() { return __builtin_lasx_xvrepli_b(1); } -+// CHECK-LABEL: @xvrepli_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) -+// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// -+v4i64 xvrepli_d() { return __builtin_lasx_xvrepli_d(1); } -+// CHECK-LABEL: @xvrepli_h( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) -+// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// -+v16i16 xvrepli_h() { return __builtin_lasx_xvrepli_h(1); } -+// CHECK-LABEL: @xvrepli_w( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) -+// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// -+v8i32 xvrepli_w() { return __builtin_lasx_xvrepli_w(1); } --- -2.20.1 - - -From 5e7ffbc865541b46b1274418a7b2900295ebb5af Mon Sep 17 00:00:00 2001 -From: yjijd -Date: Tue, 2 Jan 2024 11:46:00 +0800 -Subject: [PATCH 8/8] [Clang][LoongArch] Do not pass vector arguments via - vector registers (#74990) - -psABI v2.30 clarifies that vector arguments are passed according to the -base ABI by default. - -(cherry picked from commit 0e01c72c5645259d9a08a1a7ed39cb5cc41ce311) ---- - clang/lib/CodeGen/Targets/LoongArch.cpp | 7 - - .../CodeGen/LoongArch/lasx/builtin-alias.c | 4876 ++++++++++++----- - clang/test/CodeGen/LoongArch/lasx/builtin.c | 4874 +++++++++++----- - .../CodeGen/LoongArch/lsx/builtin-alias.c | 4746 +++++++++++----- - clang/test/CodeGen/LoongArch/lsx/builtin.c | 4746 +++++++++++----- - 5 files changed, 13485 insertions(+), 5764 deletions(-) - -diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp -index 26c68c3583b2..7483bf6d6d1e 100644 ---- a/clang/lib/CodeGen/Targets/LoongArch.cpp -+++ b/clang/lib/CodeGen/Targets/LoongArch.cpp -@@ -321,13 +321,6 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, - return ABIArgInfo::getDirect(); - } - -- // Pass 128-bit/256-bit vector values via vector registers directly. -- if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) && -- (getTarget().hasFeature("lsx"))) || -- ((getContext().getTypeSize(Ty) == 256) && -- getTarget().hasFeature("lasx")))) -- return ABIArgInfo::getDirect(); -- - // Complex types for the *f or *d ABI must be passed directly rather than - // using CoerceAndExpand. - if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) { -diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c -index 09b2d5fcacf5..9a8ce224bcfd 100644 ---- a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c -+++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c -@@ -5,4426 +5,6382 @@ - - // CHECK-LABEL: @xvsll_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __lasx_xvsll_b(_1, _2); } - // CHECK-LABEL: @xvsll_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __lasx_xvsll_h(_1, _2); } - // CHECK-LABEL: @xvsll_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __lasx_xvsll_w(_1, _2); } - // CHECK-LABEL: @xvsll_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __lasx_xvsll_d(_1, _2); } - // CHECK-LABEL: @xvslli_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvslli_b(v32i8 _1) { return __lasx_xvslli_b(_1, 1); } - // CHECK-LABEL: @xvslli_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvslli_h(v16i16 _1) { return __lasx_xvslli_h(_1, 1); } - // CHECK-LABEL: @xvslli_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvslli_w(v8i32 _1) { return __lasx_xvslli_w(_1, 1); } - // CHECK-LABEL: @xvslli_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvslli_d(v4i64 _1) { return __lasx_xvslli_d(_1, 1); } - // CHECK-LABEL: @xvsra_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __lasx_xvsra_b(_1, _2); } - // CHECK-LABEL: @xvsra_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __lasx_xvsra_h(_1, _2); } - // CHECK-LABEL: @xvsra_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __lasx_xvsra_w(_1, _2); } - // CHECK-LABEL: @xvsra_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __lasx_xvsra_d(_1, _2); } - // CHECK-LABEL: @xvsrai_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrai_b(v32i8 _1) { return __lasx_xvsrai_b(_1, 1); } - // CHECK-LABEL: @xvsrai_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrai_h(v16i16 _1) { return __lasx_xvsrai_h(_1, 1); } - // CHECK-LABEL: @xvsrai_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrai_w(v8i32 _1) { return __lasx_xvsrai_w(_1, 1); } - // CHECK-LABEL: @xvsrai_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrai_d(v4i64 _1) { return __lasx_xvsrai_d(_1, 1); } - // CHECK-LABEL: @xvsrar_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrar_b(_1, _2); } - // CHECK-LABEL: @xvsrar_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrar_h(_1, _2); } - // CHECK-LABEL: @xvsrar_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrar_w(_1, _2); } - // CHECK-LABEL: @xvsrar_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrar_d(_1, _2); } - // CHECK-LABEL: @xvsrari_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrari_b(v32i8 _1) { return __lasx_xvsrari_b(_1, 1); } - // CHECK-LABEL: @xvsrari_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrari_h(v16i16 _1) { return __lasx_xvsrari_h(_1, 1); } - // CHECK-LABEL: @xvsrari_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrari_w(v8i32 _1) { return __lasx_xvsrari_w(_1, 1); } - // CHECK-LABEL: @xvsrari_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrari_d(v4i64 _1) { return __lasx_xvsrari_d(_1, 1); } - // CHECK-LABEL: @xvsrl_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrl_b(_1, _2); } - // CHECK-LABEL: @xvsrl_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrl_h(_1, _2); } - // CHECK-LABEL: @xvsrl_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrl_w(_1, _2); } - // CHECK-LABEL: @xvsrl_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrl_d(_1, _2); } - // CHECK-LABEL: @xvsrli_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrli_b(v32i8 _1) { return __lasx_xvsrli_b(_1, 1); } - // CHECK-LABEL: @xvsrli_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrli_h(v16i16 _1) { return __lasx_xvsrli_h(_1, 1); } - // CHECK-LABEL: @xvsrli_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrli_w(v8i32 _1) { return __lasx_xvsrli_w(_1, 1); } - // CHECK-LABEL: @xvsrli_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrli_d(v4i64 _1) { return __lasx_xvsrli_d(_1, 1); } - // CHECK-LABEL: @xvsrlr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrlr_b(_1, _2); } - // CHECK-LABEL: @xvsrlr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlr_h(_1, _2); } - // CHECK-LABEL: @xvsrlr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlr_w(_1, _2); } - // CHECK-LABEL: @xvsrlr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlr_d(_1, _2); } - // CHECK-LABEL: @xvsrlri_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrlri_b(v32i8 _1) { return __lasx_xvsrlri_b(_1, 1); } - // CHECK-LABEL: @xvsrlri_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrlri_h(v16i16 _1) { return __lasx_xvsrlri_h(_1, 1); } - // CHECK-LABEL: @xvsrlri_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrlri_w(v8i32 _1) { return __lasx_xvsrlri_w(_1, 1); } - // CHECK-LABEL: @xvsrlri_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrlri_d(v4i64 _1) { return __lasx_xvsrlri_d(_1, 1); } - // CHECK-LABEL: @xvbitclr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitclr_b(_1, _2); } - // CHECK-LABEL: @xvbitclr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitclr_h(_1, _2); } - // CHECK-LABEL: @xvbitclr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitclr_w(_1, _2); } - // CHECK-LABEL: @xvbitclr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitclr_d(_1, _2); } - // CHECK-LABEL: @xvbitclri_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvbitclri_b(v32u8 _1) { return __lasx_xvbitclri_b(_1, 1); } - // CHECK-LABEL: @xvbitclri_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvbitclri_h(v16u16 _1) { return __lasx_xvbitclri_h(_1, 1); } - // CHECK-LABEL: @xvbitclri_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvbitclri_w(v8u32 _1) { return __lasx_xvbitclri_w(_1, 1); } - // CHECK-LABEL: @xvbitclri_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvbitclri_d(v4u64 _1) { return __lasx_xvbitclri_d(_1, 1); } - // CHECK-LABEL: @xvbitset_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitset_b(_1, _2); } - // CHECK-LABEL: @xvbitset_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitset_h(_1, _2); } - // CHECK-LABEL: @xvbitset_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitset_w(_1, _2); } - // CHECK-LABEL: @xvbitset_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitset_d(_1, _2); } - // CHECK-LABEL: @xvbitseti_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvbitseti_b(v32u8 _1) { return __lasx_xvbitseti_b(_1, 1); } - // CHECK-LABEL: @xvbitseti_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvbitseti_h(v16u16 _1) { return __lasx_xvbitseti_h(_1, 1); } - // CHECK-LABEL: @xvbitseti_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvbitseti_w(v8u32 _1) { return __lasx_xvbitseti_w(_1, 1); } - // CHECK-LABEL: @xvbitseti_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvbitseti_d(v4u64 _1) { return __lasx_xvbitseti_d(_1, 1); } - // CHECK-LABEL: @xvbitrev_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitrev_b(_1, _2); } - // CHECK-LABEL: @xvbitrev_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitrev_h(_1, _2); } - // CHECK-LABEL: @xvbitrev_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitrev_w(_1, _2); } - // CHECK-LABEL: @xvbitrev_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitrev_d(_1, _2); } - // CHECK-LABEL: @xvbitrevi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvbitrevi_b(v32u8 _1) { return __lasx_xvbitrevi_b(_1, 1); } - // CHECK-LABEL: @xvbitrevi_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvbitrevi_h(v16u16 _1) { return __lasx_xvbitrevi_h(_1, 1); } - // CHECK-LABEL: @xvbitrevi_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvbitrevi_w(v8u32 _1) { return __lasx_xvbitrevi_w(_1, 1); } - // CHECK-LABEL: @xvbitrevi_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvbitrevi_d(v4u64 _1) { return __lasx_xvbitrevi_d(_1, 1); } - // CHECK-LABEL: @xvadd_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvadd_b(_1, _2); } - // CHECK-LABEL: @xvadd_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvadd_h(_1, _2); } - // CHECK-LABEL: @xvadd_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvadd_w(_1, _2); } - // CHECK-LABEL: @xvadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvadd_d(_1, _2); } - // CHECK-LABEL: @xvaddi_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvaddi_bu(v32i8 _1) { return __lasx_xvaddi_bu(_1, 1); } - // CHECK-LABEL: @xvaddi_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvaddi_hu(v16i16 _1) { return __lasx_xvaddi_hu(_1, 1); } - // CHECK-LABEL: @xvaddi_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvaddi_wu(v8i32 _1) { return __lasx_xvaddi_wu(_1, 1); } - // CHECK-LABEL: @xvaddi_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddi_du(v4i64 _1) { return __lasx_xvaddi_du(_1, 1); } - // CHECK-LABEL: @xvsub_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __lasx_xvsub_b(_1, _2); } - // CHECK-LABEL: @xvsub_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __lasx_xvsub_h(_1, _2); } - // CHECK-LABEL: @xvsub_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __lasx_xvsub_w(_1, _2); } - // CHECK-LABEL: @xvsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __lasx_xvsub_d(_1, _2); } - // CHECK-LABEL: @xvsubi_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsubi_bu(v32i8 _1) { return __lasx_xvsubi_bu(_1, 1); } - // CHECK-LABEL: @xvsubi_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsubi_hu(v16i16 _1) { return __lasx_xvsubi_hu(_1, 1); } - // CHECK-LABEL: @xvsubi_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsubi_wu(v8i32 _1) { return __lasx_xvsubi_wu(_1, 1); } - // CHECK-LABEL: @xvsubi_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsubi_du(v4i64 _1) { return __lasx_xvsubi_du(_1, 1); } - // CHECK-LABEL: @xvmax_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __lasx_xvmax_b(_1, _2); } - // CHECK-LABEL: @xvmax_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __lasx_xvmax_h(_1, _2); } - // CHECK-LABEL: @xvmax_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __lasx_xvmax_w(_1, _2); } - // CHECK-LABEL: @xvmax_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __lasx_xvmax_d(_1, _2); } - // CHECK-LABEL: @xvmaxi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmaxi_b(v32i8 _1) { return __lasx_xvmaxi_b(_1, 1); } - // CHECK-LABEL: @xvmaxi_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmaxi_h(v16i16 _1) { return __lasx_xvmaxi_h(_1, 1); } - // CHECK-LABEL: @xvmaxi_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmaxi_w(v8i32 _1) { return __lasx_xvmaxi_w(_1, 1); } - // CHECK-LABEL: @xvmaxi_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmaxi_d(v4i64 _1) { return __lasx_xvmaxi_d(_1, 1); } - // CHECK-LABEL: @xvmax_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmax_bu(_1, _2); } - // CHECK-LABEL: @xvmax_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmax_hu(_1, _2); } - // CHECK-LABEL: @xvmax_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmax_wu(_1, _2); } - // CHECK-LABEL: @xvmax_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __lasx_xvmax_du(_1, _2); } - // CHECK-LABEL: @xvmaxi_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvmaxi_bu(v32u8 _1) { return __lasx_xvmaxi_bu(_1, 1); } - // CHECK-LABEL: @xvmaxi_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvmaxi_hu(v16u16 _1) { return __lasx_xvmaxi_hu(_1, 1); } - // CHECK-LABEL: @xvmaxi_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvmaxi_wu(v8u32 _1) { return __lasx_xvmaxi_wu(_1, 1); } - // CHECK-LABEL: @xvmaxi_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmaxi_du(v4u64 _1) { return __lasx_xvmaxi_du(_1, 1); } - // CHECK-LABEL: @xvmin_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __lasx_xvmin_b(_1, _2); } - // CHECK-LABEL: @xvmin_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __lasx_xvmin_h(_1, _2); } - // CHECK-LABEL: @xvmin_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __lasx_xvmin_w(_1, _2); } - // CHECK-LABEL: @xvmin_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __lasx_xvmin_d(_1, _2); } - // CHECK-LABEL: @xvmini_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmini_b(v32i8 _1) { return __lasx_xvmini_b(_1, 1); } - // CHECK-LABEL: @xvmini_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmini_h(v16i16 _1) { return __lasx_xvmini_h(_1, 1); } - // CHECK-LABEL: @xvmini_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmini_w(v8i32 _1) { return __lasx_xvmini_w(_1, 1); } - // CHECK-LABEL: @xvmini_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmini_d(v4i64 _1) { return __lasx_xvmini_d(_1, 1); } - // CHECK-LABEL: @xvmin_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmin_bu(_1, _2); } - // CHECK-LABEL: @xvmin_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmin_hu(_1, _2); } - // CHECK-LABEL: @xvmin_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmin_wu(_1, _2); } - // CHECK-LABEL: @xvmin_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __lasx_xvmin_du(_1, _2); } - // CHECK-LABEL: @xvmini_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvmini_bu(v32u8 _1) { return __lasx_xvmini_bu(_1, 1); } - // CHECK-LABEL: @xvmini_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvmini_hu(v16u16 _1) { return __lasx_xvmini_hu(_1, 1); } - // CHECK-LABEL: @xvmini_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvmini_wu(v8u32 _1) { return __lasx_xvmini_wu(_1, 1); } - // CHECK-LABEL: @xvmini_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmini_du(v4u64 _1) { return __lasx_xvmini_du(_1, 1); } - // CHECK-LABEL: @xvseq_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __lasx_xvseq_b(_1, _2); } - // CHECK-LABEL: @xvseq_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __lasx_xvseq_h(_1, _2); } - // CHECK-LABEL: @xvseq_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __lasx_xvseq_w(_1, _2); } - // CHECK-LABEL: @xvseq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __lasx_xvseq_d(_1, _2); } - // CHECK-LABEL: @xvseqi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvseqi_b(v32i8 _1) { return __lasx_xvseqi_b(_1, 1); } - // CHECK-LABEL: @xvseqi_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvseqi_h(v16i16 _1) { return __lasx_xvseqi_h(_1, 1); } - // CHECK-LABEL: @xvseqi_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvseqi_w(v8i32 _1) { return __lasx_xvseqi_w(_1, 1); } - // CHECK-LABEL: @xvseqi_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvseqi_d(v4i64 _1) { return __lasx_xvseqi_d(_1, 1); } - // CHECK-LABEL: @xvslt_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __lasx_xvslt_b(_1, _2); } - // CHECK-LABEL: @xvslt_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __lasx_xvslt_h(_1, _2); } - // CHECK-LABEL: @xvslt_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __lasx_xvslt_w(_1, _2); } - // CHECK-LABEL: @xvslt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __lasx_xvslt_d(_1, _2); } - // CHECK-LABEL: @xvslti_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvslti_b(v32i8 _1) { return __lasx_xvslti_b(_1, 1); } - // CHECK-LABEL: @xvslti_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvslti_h(v16i16 _1) { return __lasx_xvslti_h(_1, 1); } - // CHECK-LABEL: @xvslti_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvslti_w(v8i32 _1) { return __lasx_xvslti_w(_1, 1); } - // CHECK-LABEL: @xvslti_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvslti_d(v4i64 _1) { return __lasx_xvslti_d(_1, 1); } - // CHECK-LABEL: @xvslt_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __lasx_xvslt_bu(_1, _2); } - // CHECK-LABEL: @xvslt_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __lasx_xvslt_hu(_1, _2); } - // CHECK-LABEL: @xvslt_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __lasx_xvslt_wu(_1, _2); } - // CHECK-LABEL: @xvslt_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __lasx_xvslt_du(_1, _2); } - // CHECK-LABEL: @xvslti_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvslti_bu(v32u8 _1) { return __lasx_xvslti_bu(_1, 1); } - // CHECK-LABEL: @xvslti_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvslti_hu(v16u16 _1) { return __lasx_xvslti_hu(_1, 1); } - // CHECK-LABEL: @xvslti_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvslti_wu(v8u32 _1) { return __lasx_xvslti_wu(_1, 1); } - // CHECK-LABEL: @xvslti_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvslti_du(v4u64 _1) { return __lasx_xvslti_du(_1, 1); } - // CHECK-LABEL: @xvsle_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __lasx_xvsle_b(_1, _2); } - // CHECK-LABEL: @xvsle_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __lasx_xvsle_h(_1, _2); } - // CHECK-LABEL: @xvsle_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __lasx_xvsle_w(_1, _2); } - // CHECK-LABEL: @xvsle_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __lasx_xvsle_d(_1, _2); } - // CHECK-LABEL: @xvslei_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvslei_b(v32i8 _1) { return __lasx_xvslei_b(_1, 1); } - // CHECK-LABEL: @xvslei_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvslei_h(v16i16 _1) { return __lasx_xvslei_h(_1, 1); } - // CHECK-LABEL: @xvslei_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvslei_w(v8i32 _1) { return __lasx_xvslei_w(_1, 1); } - // CHECK-LABEL: @xvslei_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvslei_d(v4i64 _1) { return __lasx_xvslei_d(_1, 1); } - // CHECK-LABEL: @xvsle_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsle_bu(_1, _2); } - // CHECK-LABEL: @xvsle_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsle_hu(_1, _2); } - // CHECK-LABEL: @xvsle_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsle_wu(_1, _2); } - // CHECK-LABEL: @xvsle_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __lasx_xvsle_du(_1, _2); } - // CHECK-LABEL: @xvslei_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvslei_bu(v32u8 _1) { return __lasx_xvslei_bu(_1, 1); } - // CHECK-LABEL: @xvslei_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvslei_hu(v16u16 _1) { return __lasx_xvslei_hu(_1, 1); } - // CHECK-LABEL: @xvslei_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvslei_wu(v8u32 _1) { return __lasx_xvslei_wu(_1, 1); } - // CHECK-LABEL: @xvslei_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvslei_du(v4u64 _1) { return __lasx_xvslei_du(_1, 1); } - // CHECK-LABEL: @xvsat_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsat_b(v32i8 _1) { return __lasx_xvsat_b(_1, 1); } - // CHECK-LABEL: @xvsat_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsat_h(v16i16 _1) { return __lasx_xvsat_h(_1, 1); } - // CHECK-LABEL: @xvsat_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsat_w(v8i32 _1) { return __lasx_xvsat_w(_1, 1); } - // CHECK-LABEL: @xvsat_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsat_d(v4i64 _1) { return __lasx_xvsat_d(_1, 1); } - // CHECK-LABEL: @xvsat_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvsat_bu(v32u8 _1) { return __lasx_xvsat_bu(_1, 1); } - // CHECK-LABEL: @xvsat_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvsat_hu(v16u16 _1) { return __lasx_xvsat_hu(_1, 1); } - // CHECK-LABEL: @xvsat_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvsat_wu(v8u32 _1) { return __lasx_xvsat_wu(_1, 1); } - // CHECK-LABEL: @xvsat_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvsat_du(v4u64 _1) { return __lasx_xvsat_du(_1, 1); } - // CHECK-LABEL: @xvadda_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __lasx_xvadda_b(_1, _2); } - // CHECK-LABEL: @xvadda_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __lasx_xvadda_h(_1, _2); } - // CHECK-LABEL: @xvadda_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __lasx_xvadda_w(_1, _2); } - // CHECK-LABEL: @xvadda_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __lasx_xvadda_d(_1, _2); } - // CHECK-LABEL: @xvsadd_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvsadd_b(_1, _2); } - // CHECK-LABEL: @xvsadd_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvsadd_h(_1, _2); } - // CHECK-LABEL: @xvsadd_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvsadd_w(_1, _2); } - // CHECK-LABEL: @xvsadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvsadd_d(_1, _2); } - // CHECK-LABEL: @xvsadd_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsadd_bu(_1, _2); } - // CHECK-LABEL: @xvsadd_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsadd_hu(_1, _2); } - // CHECK-LABEL: @xvsadd_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsadd_wu(_1, _2); } - // CHECK-LABEL: @xvsadd_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __lasx_xvsadd_du(_1, _2); } - // CHECK-LABEL: @xvavg_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __lasx_xvavg_b(_1, _2); } - // CHECK-LABEL: @xvavg_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __lasx_xvavg_h(_1, _2); } - // CHECK-LABEL: @xvavg_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __lasx_xvavg_w(_1, _2); } - // CHECK-LABEL: @xvavg_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __lasx_xvavg_d(_1, _2); } - // CHECK-LABEL: @xvavg_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavg_bu(_1, _2); } - // CHECK-LABEL: @xvavg_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavg_hu(_1, _2); } - // CHECK-LABEL: @xvavg_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavg_wu(_1, _2); } - // CHECK-LABEL: @xvavg_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __lasx_xvavg_du(_1, _2); } - // CHECK-LABEL: @xvavgr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __lasx_xvavgr_b(_1, _2); } - // CHECK-LABEL: @xvavgr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __lasx_xvavgr_h(_1, _2); } - // CHECK-LABEL: @xvavgr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __lasx_xvavgr_w(_1, _2); } - // CHECK-LABEL: @xvavgr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __lasx_xvavgr_d(_1, _2); } - // CHECK-LABEL: @xvavgr_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavgr_bu(_1, _2); } - // CHECK-LABEL: @xvavgr_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavgr_hu(_1, _2); } - // CHECK-LABEL: @xvavgr_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavgr_wu(_1, _2); } - // CHECK-LABEL: @xvavgr_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __lasx_xvavgr_du(_1, _2); } - // CHECK-LABEL: @xvssub_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __lasx_xvssub_b(_1, _2); } - // CHECK-LABEL: @xvssub_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __lasx_xvssub_h(_1, _2); } - // CHECK-LABEL: @xvssub_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __lasx_xvssub_w(_1, _2); } - // CHECK-LABEL: @xvssub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __lasx_xvssub_d(_1, _2); } - // CHECK-LABEL: @xvssub_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __lasx_xvssub_bu(_1, _2); } - // CHECK-LABEL: @xvssub_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __lasx_xvssub_hu(_1, _2); } - // CHECK-LABEL: @xvssub_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __lasx_xvssub_wu(_1, _2); } - // CHECK-LABEL: @xvssub_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __lasx_xvssub_du(_1, _2); } - // CHECK-LABEL: @xvabsd_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __lasx_xvabsd_b(_1, _2); } - // CHECK-LABEL: @xvabsd_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __lasx_xvabsd_h(_1, _2); } - // CHECK-LABEL: @xvabsd_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __lasx_xvabsd_w(_1, _2); } - // CHECK-LABEL: @xvabsd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __lasx_xvabsd_d(_1, _2); } - // CHECK-LABEL: @xvabsd_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvabsd_bu(_1, _2); } - // CHECK-LABEL: @xvabsd_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvabsd_hu(_1, _2); } - // CHECK-LABEL: @xvabsd_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvabsd_wu(_1, _2); } - // CHECK-LABEL: @xvabsd_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __lasx_xvabsd_du(_1, _2); } - // CHECK-LABEL: @xvmul_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __lasx_xvmul_b(_1, _2); } - // CHECK-LABEL: @xvmul_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __lasx_xvmul_h(_1, _2); } - // CHECK-LABEL: @xvmul_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __lasx_xvmul_w(_1, _2); } - // CHECK-LABEL: @xvmul_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __lasx_xvmul_d(_1, _2); } - // CHECK-LABEL: @xvmadd_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -+// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmadd_b(_1, _2, _3); } - // CHECK-LABEL: @xvmadd_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmadd_h(_1, _2, _3); } - // CHECK-LABEL: @xvmadd_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) -+// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmadd_w(_1, _2, _3); } - // CHECK-LABEL: @xvmadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmadd_d(_1, _2, _3); } - // CHECK-LABEL: @xvmsub_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -+// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmsub_b(_1, _2, _3); } - // CHECK-LABEL: @xvmsub_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmsub_h(_1, _2, _3); } - // CHECK-LABEL: @xvmsub_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) -+// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmsub_w(_1, _2, _3); } - // CHECK-LABEL: @xvmsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmsub_d(_1, _2, _3); } - // CHECK-LABEL: @xvdiv_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __lasx_xvdiv_b(_1, _2); } - // CHECK-LABEL: @xvdiv_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __lasx_xvdiv_h(_1, _2); } - // CHECK-LABEL: @xvdiv_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __lasx_xvdiv_w(_1, _2); } - // CHECK-LABEL: @xvdiv_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __lasx_xvdiv_d(_1, _2); } - // CHECK-LABEL: @xvdiv_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __lasx_xvdiv_bu(_1, _2); } - // CHECK-LABEL: @xvdiv_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __lasx_xvdiv_hu(_1, _2); } - // CHECK-LABEL: @xvdiv_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __lasx_xvdiv_wu(_1, _2); } - // CHECK-LABEL: @xvdiv_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __lasx_xvdiv_du(_1, _2); } - // CHECK-LABEL: @xvhaddw_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhaddw_h_b(_1, _2); } - // CHECK-LABEL: @xvhaddw_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhaddw_w_h(_1, _2); } - // CHECK-LABEL: @xvhaddw_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhaddw_d_w(_1, _2); } - // CHECK-LABEL: @xvhaddw_hu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhaddw_hu_bu(_1, _2); } - // CHECK-LABEL: @xvhaddw_wu_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhaddw_wu_hu(_1, _2); } - // CHECK-LABEL: @xvhaddw_du_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhaddw_du_wu(_1, _2); } - // CHECK-LABEL: @xvhsubw_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhsubw_h_b(_1, _2); } - // CHECK-LABEL: @xvhsubw_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhsubw_w_h(_1, _2); } - // CHECK-LABEL: @xvhsubw_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhsubw_d_w(_1, _2); } - // CHECK-LABEL: @xvhsubw_hu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhsubw_hu_bu(_1, _2); } - // CHECK-LABEL: @xvhsubw_wu_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhsubw_wu_hu(_1, _2); } - // CHECK-LABEL: @xvhsubw_du_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhsubw_du_wu(_1, _2); } - // CHECK-LABEL: @xvmod_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __lasx_xvmod_b(_1, _2); } - // CHECK-LABEL: @xvmod_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __lasx_xvmod_h(_1, _2); } - // CHECK-LABEL: @xvmod_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __lasx_xvmod_w(_1, _2); } - // CHECK-LABEL: @xvmod_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __lasx_xvmod_d(_1, _2); } - // CHECK-LABEL: @xvmod_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmod_bu(_1, _2); } - // CHECK-LABEL: @xvmod_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmod_hu(_1, _2); } - // CHECK-LABEL: @xvmod_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmod_wu(_1, _2); } - // CHECK-LABEL: @xvmod_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __lasx_xvmod_du(_1, _2); } - // CHECK-LABEL: @xvrepl128vei_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvrepl128vei_b(v32i8 _1) { return __lasx_xvrepl128vei_b(_1, 1); } - // CHECK-LABEL: @xvrepl128vei_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvrepl128vei_h(v16i16 _1) { return __lasx_xvrepl128vei_h(_1, 1); } - // CHECK-LABEL: @xvrepl128vei_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvrepl128vei_w(v8i32 _1) { return __lasx_xvrepl128vei_w(_1, 1); } - // CHECK-LABEL: @xvrepl128vei_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvrepl128vei_d(v4i64 _1) { return __lasx_xvrepl128vei_d(_1, 1); } - // CHECK-LABEL: @xvpickev_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickev_b(_1, _2); } - // CHECK-LABEL: @xvpickev_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickev_h(_1, _2); } - // CHECK-LABEL: @xvpickev_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickev_w(_1, _2); } - // CHECK-LABEL: @xvpickev_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickev_d(_1, _2); } - // CHECK-LABEL: @xvpickod_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickod_b(_1, _2); } - // CHECK-LABEL: @xvpickod_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickod_h(_1, _2); } - // CHECK-LABEL: @xvpickod_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickod_w(_1, _2); } - // CHECK-LABEL: @xvpickod_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickod_d(_1, _2); } - // CHECK-LABEL: @xvilvh_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvh_b(_1, _2); } - // CHECK-LABEL: @xvilvh_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvh_h(_1, _2); } - // CHECK-LABEL: @xvilvh_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvh_w(_1, _2); } - // CHECK-LABEL: @xvilvh_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvh_d(_1, _2); } - // CHECK-LABEL: @xvilvl_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvl_b(_1, _2); } - // CHECK-LABEL: @xvilvl_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvl_h(_1, _2); } - // CHECK-LABEL: @xvilvl_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvl_w(_1, _2); } - // CHECK-LABEL: @xvilvl_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvl_d(_1, _2); } - // CHECK-LABEL: @xvpackev_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackev_b(_1, _2); } - // CHECK-LABEL: @xvpackev_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackev_h(_1, _2); } - // CHECK-LABEL: @xvpackev_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackev_w(_1, _2); } - // CHECK-LABEL: @xvpackev_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackev_d(_1, _2); } - // CHECK-LABEL: @xvpackod_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackod_b(_1, _2); } - // CHECK-LABEL: @xvpackod_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackod_h(_1, _2); } - // CHECK-LABEL: @xvpackod_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackod_w(_1, _2); } - // CHECK-LABEL: @xvpackod_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackod_d(_1, _2); } - // CHECK-LABEL: @xvshuf_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -+// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvshuf_b(_1, _2, _3); } - // CHECK-LABEL: @xvshuf_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvshuf_h(_1, _2, _3); } - // CHECK-LABEL: @xvshuf_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) -+// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvshuf_w(_1, _2, _3); } - // CHECK-LABEL: @xvshuf_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvshuf_d(_1, _2, _3); } - // CHECK-LABEL: @xvand_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __lasx_xvand_v(_1, _2); } - // CHECK-LABEL: @xvandi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvandi_b(v32u8 _1) { return __lasx_xvandi_b(_1, 1); } - // CHECK-LABEL: @xvor_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __lasx_xvor_v(_1, _2); } - // CHECK-LABEL: @xvori_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvori_b(v32u8 _1) { return __lasx_xvori_b(_1, 1); } - // CHECK-LABEL: @xvnor_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __lasx_xvnor_v(_1, _2); } - // CHECK-LABEL: @xvnori_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvnori_b(v32u8 _1) { return __lasx_xvnori_b(_1, 1); } - // CHECK-LABEL: @xvxor_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __lasx_xvxor_v(_1, _2); } - // CHECK-LABEL: @xvxori_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvxori_b(v32u8 _1) { return __lasx_xvxori_b(_1, 1); } - // CHECK-LABEL: @xvbitsel_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -+// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __lasx_xvbitsel_v(_1, _2, _3); } - // CHECK-LABEL: @xvbitseli_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitseli_b(_1, _2, 1); } - // CHECK-LABEL: @xvshuf4i_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvshuf4i_b(v32i8 _1) { return __lasx_xvshuf4i_b(_1, 1); } - // CHECK-LABEL: @xvshuf4i_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvshuf4i_h(v16i16 _1) { return __lasx_xvshuf4i_h(_1, 1); } - // CHECK-LABEL: @xvshuf4i_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvshuf4i_w(v8i32 _1) { return __lasx_xvshuf4i_w(_1, 1); } - // CHECK-LABEL: @xvreplgr2vr_b( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvreplgr2vr_b(int _1) { return __lasx_xvreplgr2vr_b(_1); } - // CHECK-LABEL: @xvreplgr2vr_h( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvreplgr2vr_h(int _1) { return __lasx_xvreplgr2vr_h(_1); } - // CHECK-LABEL: @xvreplgr2vr_w( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvreplgr2vr_w(int _1) { return __lasx_xvreplgr2vr_w(_1); } - // CHECK-LABEL: @xvreplgr2vr_d( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 - // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvreplgr2vr_d(int _1) { return __lasx_xvreplgr2vr_d(_1); } - // CHECK-LABEL: @xvpcnt_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_112]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvpcnt_b(v32i8 _1) { return __lasx_xvpcnt_b(_1); } - // CHECK-LABEL: @xvpcnt_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_112]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvpcnt_h(v16i16 _1) { return __lasx_xvpcnt_h(_1); } - // CHECK-LABEL: @xvpcnt_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_112]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvpcnt_w(v8i32 _1) { return __lasx_xvpcnt_w(_1); } - // CHECK-LABEL: @xvpcnt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvpcnt_d(v4i64 _1) { return __lasx_xvpcnt_d(_1); } - // CHECK-LABEL: @xvclo_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_112]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvclo_b(v32i8 _1) { return __lasx_xvclo_b(_1); } - // CHECK-LABEL: @xvclo_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_112]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvclo_h(v16i16 _1) { return __lasx_xvclo_h(_1); } - // CHECK-LABEL: @xvclo_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_112]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvclo_w(v8i32 _1) { return __lasx_xvclo_w(_1); } - // CHECK-LABEL: @xvclo_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvclo_d(v4i64 _1) { return __lasx_xvclo_d(_1); } - // CHECK-LABEL: @xvclz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_112]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvclz_b(v32i8 _1) { return __lasx_xvclz_b(_1); } - // CHECK-LABEL: @xvclz_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_112]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvclz_h(v16i16 _1) { return __lasx_xvclz_h(_1); } - // CHECK-LABEL: @xvclz_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_112]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvclz_w(v8i32 _1) { return __lasx_xvclz_w(_1); } - // CHECK-LABEL: @xvclz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvclz_d(v4i64 _1) { return __lasx_xvclz_d(_1); } - // CHECK-LABEL: @xvfadd_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __lasx_xvfadd_s(_1, _2); } - // CHECK-LABEL: @xvfadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __lasx_xvfadd_d(_1, _2); } - // CHECK-LABEL: @xvfsub_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __lasx_xvfsub_s(_1, _2); } - // CHECK-LABEL: @xvfsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __lasx_xvfsub_d(_1, _2); } - // CHECK-LABEL: @xvfmul_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmul_s(_1, _2); } - // CHECK-LABEL: @xvfmul_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmul_d(_1, _2); } - // CHECK-LABEL: @xvfdiv_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __lasx_xvfdiv_s(_1, _2); } - // CHECK-LABEL: @xvfdiv_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __lasx_xvfdiv_d(_1, _2); } - // CHECK-LABEL: @xvfcvt_h_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcvt_h_s(_1, _2); } - // CHECK-LABEL: @xvfcvt_s_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcvt_s_d(_1, _2); } - // CHECK-LABEL: @xvfmin_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmin_s(_1, _2); } - // CHECK-LABEL: @xvfmin_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmin_d(_1, _2); } - // CHECK-LABEL: @xvfmina_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmina_s(_1, _2); } - // CHECK-LABEL: @xvfmina_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmina_d(_1, _2); } - // CHECK-LABEL: @xvfmax_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmax_s(_1, _2); } - // CHECK-LABEL: @xvfmax_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmax_d(_1, _2); } - // CHECK-LABEL: @xvfmaxa_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmaxa_s(_1, _2); } - // CHECK-LABEL: @xvfmaxa_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmaxa_d(_1, _2); } - // CHECK-LABEL: @xvfclass_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfclass_s(v8f32 _1) { return __lasx_xvfclass_s(_1); } - // CHECK-LABEL: @xvfclass_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfclass_d(v4f64 _1) { return __lasx_xvfclass_d(_1); } - // CHECK-LABEL: @xvfsqrt_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfsqrt_s(v8f32 _1) { return __lasx_xvfsqrt_s(_1); } - // CHECK-LABEL: @xvfsqrt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfsqrt_d(v4f64 _1) { return __lasx_xvfsqrt_d(_1); } - // CHECK-LABEL: @xvfrecip_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfrecip_s(v8f32 _1) { return __lasx_xvfrecip_s(_1); } - // CHECK-LABEL: @xvfrecip_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfrecip_d(v4f64 _1) { return __lasx_xvfrecip_d(_1); } - // CHECK-LABEL: @xvfrint_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfrint_s(v8f32 _1) { return __lasx_xvfrint_s(_1); } - // CHECK-LABEL: @xvfrint_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfrint_d(v4f64 _1) { return __lasx_xvfrint_d(_1); } - // CHECK-LABEL: @xvfrsqrt_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfrsqrt_s(v8f32 _1) { return __lasx_xvfrsqrt_s(_1); } - // CHECK-LABEL: @xvfrsqrt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfrsqrt_d(v4f64 _1) { return __lasx_xvfrsqrt_d(_1); } - // CHECK-LABEL: @xvflogb_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvflogb_s(v8f32 _1) { return __lasx_xvflogb_s(_1); } - // CHECK-LABEL: @xvflogb_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvflogb_d(v4f64 _1) { return __lasx_xvflogb_d(_1); } - // CHECK-LABEL: @xvfcvth_s_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_112]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfcvth_s_h(v16i16 _1) { return __lasx_xvfcvth_s_h(_1); } - // CHECK-LABEL: @xvfcvth_d_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfcvth_d_s(v8f32 _1) { return __lasx_xvfcvth_d_s(_1); } - // CHECK-LABEL: @xvfcvtl_s_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_112]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfcvtl_s_h(v16i16 _1) { return __lasx_xvfcvtl_s_h(_1); } - // CHECK-LABEL: @xvfcvtl_d_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfcvtl_d_s(v8f32 _1) { return __lasx_xvfcvtl_d_s(_1); } - // CHECK-LABEL: @xvftint_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftint_w_s(v8f32 _1) { return __lasx_xvftint_w_s(_1); } - // CHECK-LABEL: @xvftint_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftint_l_d(v4f64 _1) { return __lasx_xvftint_l_d(_1); } - // CHECK-LABEL: @xvftint_wu_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvftint_wu_s(v8f32 _1) { return __lasx_xvftint_wu_s(_1); } - // CHECK-LABEL: @xvftint_lu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvftint_lu_d(v4f64 _1) { return __lasx_xvftint_lu_d(_1); } - // CHECK-LABEL: @xvftintrz_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftintrz_w_s(v8f32 _1) { return __lasx_xvftintrz_w_s(_1); } - // CHECK-LABEL: @xvftintrz_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrz_l_d(v4f64 _1) { return __lasx_xvftintrz_l_d(_1); } - // CHECK-LABEL: @xvftintrz_wu_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvftintrz_wu_s(v8f32 _1) { return __lasx_xvftintrz_wu_s(_1); } - // CHECK-LABEL: @xvftintrz_lu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvftintrz_lu_d(v4f64 _1) { return __lasx_xvftintrz_lu_d(_1); } - // CHECK-LABEL: @xvffint_s_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_112]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvffint_s_w(v8i32 _1) { return __lasx_xvffint_s_w(_1); } - // CHECK-LABEL: @xvffint_d_l( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvffint_d_l(v4i64 _1) { return __lasx_xvffint_d_l(_1); } - // CHECK-LABEL: @xvffint_s_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_112]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvffint_s_wu(v8u32 _1) { return __lasx_xvffint_s_wu(_1); } - // CHECK-LABEL: @xvffint_d_lu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvffint_d_lu(v4u64 _1) { return __lasx_xvffint_d_lu(_1); } - // CHECK-LABEL: @xvreplve_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1:%.*]], i32 [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_112]], i32 [[_2:%.*]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvreplve_b(v32i8 _1, int _2) { return __lasx_xvreplve_b(_1, _2); } - // CHECK-LABEL: @xvreplve_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1:%.*]], i32 [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_112]], i32 [[_2:%.*]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvreplve_h(v16i16 _1, int _2) { return __lasx_xvreplve_h(_1, _2); } - // CHECK-LABEL: @xvreplve_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1:%.*]], i32 [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_112]], i32 [[_2:%.*]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvreplve_w(v8i32 _1, int _2) { return __lasx_xvreplve_w(_1, _2); } - // CHECK-LABEL: @xvreplve_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1:%.*]], i32 [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2:%.*]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvreplve_d(v4i64 _1, int _2) { return __lasx_xvreplve_d(_1, _2); } - // CHECK-LABEL: @xvpermi_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __lasx_xvpermi_w(_1, _2, 1); } - // CHECK-LABEL: @xvandn_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __lasx_xvandn_v(_1, _2); } - // CHECK-LABEL: @xvneg_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_112]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvneg_b(v32i8 _1) { return __lasx_xvneg_b(_1); } - // CHECK-LABEL: @xvneg_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_112]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvneg_h(v16i16 _1) { return __lasx_xvneg_h(_1); } - // CHECK-LABEL: @xvneg_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_112]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvneg_w(v8i32 _1) { return __lasx_xvneg_w(_1); } - // CHECK-LABEL: @xvneg_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvneg_d(v4i64 _1) { return __lasx_xvneg_d(_1); } - // CHECK-LABEL: @xvmuh_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __lasx_xvmuh_b(_1, _2); } - // CHECK-LABEL: @xvmuh_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __lasx_xvmuh_h(_1, _2); } - // CHECK-LABEL: @xvmuh_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __lasx_xvmuh_w(_1, _2); } - // CHECK-LABEL: @xvmuh_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __lasx_xvmuh_d(_1, _2); } - // CHECK-LABEL: @xvmuh_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmuh_bu(_1, _2); } - // CHECK-LABEL: @xvmuh_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmuh_hu(_1, _2); } - // CHECK-LABEL: @xvmuh_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmuh_wu(_1, _2); } - // CHECK-LABEL: @xvmuh_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __lasx_xvmuh_du(_1, _2); } - // CHECK-LABEL: @xvsllwil_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsllwil_h_b(v32i8 _1) { return __lasx_xvsllwil_h_b(_1, 1); } - // CHECK-LABEL: @xvsllwil_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsllwil_w_h(v16i16 _1) { return __lasx_xvsllwil_w_h(_1, 1); } - // CHECK-LABEL: @xvsllwil_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsllwil_d_w(v8i32 _1) { return __lasx_xvsllwil_d_w(_1, 1); } - // CHECK-LABEL: @xvsllwil_hu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvsllwil_hu_bu(v32u8 _1) { return __lasx_xvsllwil_hu_bu(_1, 1); } - // CHECK-LABEL: @xvsllwil_wu_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvsllwil_wu_hu(v16u16 _1) { return __lasx_xvsllwil_wu_hu(_1, 1); } - // CHECK-LABEL: @xvsllwil_du_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvsllwil_du_wu(v8u32 _1) { return __lasx_xvsllwil_du_wu(_1, 1); } - // CHECK-LABEL: @xvsran_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsran_b_h(_1, _2); } - // CHECK-LABEL: @xvsran_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsran_h_w(_1, _2); } - // CHECK-LABEL: @xvsran_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsran_w_d(_1, _2); } - // CHECK-LABEL: @xvssran_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssran_b_h(_1, _2); } - // CHECK-LABEL: @xvssran_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssran_h_w(_1, _2); } - // CHECK-LABEL: @xvssran_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssran_w_d(_1, _2); } - // CHECK-LABEL: @xvssran_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssran_bu_h(_1, _2); } - // CHECK-LABEL: @xvssran_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssran_hu_w(_1, _2); } - // CHECK-LABEL: @xvssran_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssran_wu_d(_1, _2); } - // CHECK-LABEL: @xvsrarn_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrarn_b_h(_1, _2); } - // CHECK-LABEL: @xvsrarn_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrarn_h_w(_1, _2); } - // CHECK-LABEL: @xvsrarn_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrarn_w_d(_1, _2); } - // CHECK-LABEL: @xvssrarn_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrarn_b_h(_1, _2); } - // CHECK-LABEL: @xvssrarn_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrarn_h_w(_1, _2); } - // CHECK-LABEL: @xvssrarn_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrarn_w_d(_1, _2); } - // CHECK-LABEL: @xvssrarn_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrarn_bu_h(_1, _2); } - // CHECK-LABEL: @xvssrarn_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrarn_hu_w(_1, _2); } - // CHECK-LABEL: @xvssrarn_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrarn_wu_d(_1, _2); } - // CHECK-LABEL: @xvsrln_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrln_b_h(_1, _2); } - // CHECK-LABEL: @xvsrln_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrln_h_w(_1, _2); } - // CHECK-LABEL: @xvsrln_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrln_w_d(_1, _2); } - // CHECK-LABEL: @xvssrln_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrln_bu_h(_1, _2); } - // CHECK-LABEL: @xvssrln_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrln_hu_w(_1, _2); } - // CHECK-LABEL: @xvssrln_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrln_wu_d(_1, _2); } - // CHECK-LABEL: @xvsrlrn_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrn_b_h(_1, _2); } - // CHECK-LABEL: @xvsrlrn_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrn_h_w(_1, _2); } - // CHECK-LABEL: @xvsrlrn_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrn_w_d(_1, _2); } - // CHECK-LABEL: @xvssrlrn_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrlrn_bu_h(_1, _2); } - // CHECK-LABEL: @xvssrlrn_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrlrn_hu_w(_1, _2); } - // CHECK-LABEL: @xvssrlrn_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrlrn_wu_d(_1, _2); } - // CHECK-LABEL: @xvfrstpi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __lasx_xvfrstpi_b(_1, _2, 1); } - // CHECK-LABEL: @xvfrstpi_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __lasx_xvfrstpi_h(_1, _2, 1); } - // CHECK-LABEL: @xvfrstp_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -+// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvfrstp_b(_1, _2, _3); } - // CHECK-LABEL: @xvfrstp_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvfrstp_h(_1, _2, _3); } - // CHECK-LABEL: @xvshuf4i_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __lasx_xvshuf4i_d(_1, _2, 1); } - // CHECK-LABEL: @xvbsrl_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvbsrl_v(v32i8 _1) { return __lasx_xvbsrl_v(_1, 1); } - // CHECK-LABEL: @xvbsll_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvbsll_v(v32i8 _1) { return __lasx_xvbsll_v(_1, 1); } - // CHECK-LABEL: @xvextrins_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __lasx_xvextrins_b(_1, _2, 1); } - // CHECK-LABEL: @xvextrins_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __lasx_xvextrins_h(_1, _2, 1); } - // CHECK-LABEL: @xvextrins_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __lasx_xvextrins_w(_1, _2, 1); } - // CHECK-LABEL: @xvextrins_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __lasx_xvextrins_d(_1, _2, 1); } - // CHECK-LABEL: @xvmskltz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_112]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmskltz_b(v32i8 _1) { return __lasx_xvmskltz_b(_1); } - // CHECK-LABEL: @xvmskltz_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_112]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmskltz_h(v16i16 _1) { return __lasx_xvmskltz_h(_1); } - // CHECK-LABEL: @xvmskltz_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_112]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmskltz_w(v8i32 _1) { return __lasx_xvmskltz_w(_1); } - // CHECK-LABEL: @xvmskltz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmskltz_d(v4i64 _1) { return __lasx_xvmskltz_d(_1); } - // CHECK-LABEL: @xvsigncov_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __lasx_xvsigncov_b(_1, _2); } - // CHECK-LABEL: @xvsigncov_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __lasx_xvsigncov_h(_1, _2); } - // CHECK-LABEL: @xvsigncov_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __lasx_xvsigncov_w(_1, _2); } - // CHECK-LABEL: @xvsigncov_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __lasx_xvsigncov_d(_1, _2); } - // CHECK-LABEL: @xvfmadd_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -+// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmadd_s(_1, _2, _3); } - // CHECK-LABEL: @xvfmadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -+// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmadd_d(_1, _2, _3); } - // CHECK-LABEL: @xvfmsub_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -+// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmsub_s(_1, _2, _3); } - // CHECK-LABEL: @xvfmsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -+// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmsub_d(_1, _2, _3); } - // CHECK-LABEL: @xvfnmadd_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -+// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmadd_s(_1, _2, _3); } - // CHECK-LABEL: @xvfnmadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -+// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmadd_d(_1, _2, _3); } - // CHECK-LABEL: @xvfnmsub_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -+// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmsub_s(_1, _2, _3); } - // CHECK-LABEL: @xvfnmsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -+// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmsub_d(_1, _2, _3); } - // CHECK-LABEL: @xvftintrne_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftintrne_w_s(v8f32 _1) { return __lasx_xvftintrne_w_s(_1); } - // CHECK-LABEL: @xvftintrne_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrne_l_d(v4f64 _1) { return __lasx_xvftintrne_l_d(_1); } - // CHECK-LABEL: @xvftintrp_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftintrp_w_s(v8f32 _1) { return __lasx_xvftintrp_w_s(_1); } - // CHECK-LABEL: @xvftintrp_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrp_l_d(v4f64 _1) { return __lasx_xvftintrp_l_d(_1); } - // CHECK-LABEL: @xvftintrm_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftintrm_w_s(v8f32 _1) { return __lasx_xvftintrm_w_s(_1); } - // CHECK-LABEL: @xvftintrm_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrm_l_d(v4f64 _1) { return __lasx_xvftintrm_l_d(_1); } - // CHECK-LABEL: @xvftint_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftint_w_d(_1, _2); } - // CHECK-LABEL: @xvffint_s_l( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __lasx_xvffint_s_l(_1, _2); } - // CHECK-LABEL: @xvftintrz_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrz_w_d(_1, _2); } - // CHECK-LABEL: @xvftintrp_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrp_w_d(_1, _2); } - // CHECK-LABEL: @xvftintrm_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrm_w_d(_1, _2); } - // CHECK-LABEL: @xvftintrne_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrne_w_d(_1, _2); } - // CHECK-LABEL: @xvftinth_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftinth_l_s(v8f32 _1) { return __lasx_xvftinth_l_s(_1); } - // CHECK-LABEL: @xvftintl_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintl_l_s(v8f32 _1) { return __lasx_xvftintl_l_s(_1); } - // CHECK-LABEL: @xvffinth_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_112]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvffinth_d_w(v8i32 _1) { return __lasx_xvffinth_d_w(_1); } - // CHECK-LABEL: @xvffintl_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_112]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvffintl_d_w(v8i32 _1) { return __lasx_xvffintl_d_w(_1); } - // CHECK-LABEL: @xvftintrzh_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrzh_l_s(v8f32 _1) { return __lasx_xvftintrzh_l_s(_1); } - // CHECK-LABEL: @xvftintrzl_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrzl_l_s(v8f32 _1) { return __lasx_xvftintrzl_l_s(_1); } - // CHECK-LABEL: @xvftintrph_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrph_l_s(v8f32 _1) { return __lasx_xvftintrph_l_s(_1); } - // CHECK-LABEL: @xvftintrpl_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrpl_l_s(v8f32 _1) { return __lasx_xvftintrpl_l_s(_1); } - // CHECK-LABEL: @xvftintrmh_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrmh_l_s(v8f32 _1) { return __lasx_xvftintrmh_l_s(_1); } - // CHECK-LABEL: @xvftintrml_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrml_l_s(v8f32 _1) { return __lasx_xvftintrml_l_s(_1); } - // CHECK-LABEL: @xvftintrneh_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrneh_l_s(v8f32 _1) { return __lasx_xvftintrneh_l_s(_1); } - // CHECK-LABEL: @xvftintrnel_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrnel_l_s(v8f32 _1) { return __lasx_xvftintrnel_l_s(_1); } - // CHECK-LABEL: @xvfrintrne_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> --// CHECK-NEXT: ret <8 x i32> [[TMP1]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfrintrne_s(v8f32 _1) { return __lasx_xvfrintrne_s(_1); } - // CHECK-LABEL: @xvfrintrne_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> --// CHECK-NEXT: ret <4 x i64> [[TMP1]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfrintrne_d(v4f64 _1) { return __lasx_xvfrintrne_d(_1); } - // CHECK-LABEL: @xvfrintrz_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> --// CHECK-NEXT: ret <8 x i32> [[TMP1]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfrintrz_s(v8f32 _1) { return __lasx_xvfrintrz_s(_1); } - // CHECK-LABEL: @xvfrintrz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> --// CHECK-NEXT: ret <4 x i64> [[TMP1]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfrintrz_d(v4f64 _1) { return __lasx_xvfrintrz_d(_1); } - // CHECK-LABEL: @xvfrintrp_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> --// CHECK-NEXT: ret <8 x i32> [[TMP1]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfrintrp_s(v8f32 _1) { return __lasx_xvfrintrp_s(_1); } - // CHECK-LABEL: @xvfrintrp_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> --// CHECK-NEXT: ret <4 x i64> [[TMP1]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfrintrp_d(v4f64 _1) { return __lasx_xvfrintrp_d(_1); } - // CHECK-LABEL: @xvfrintrm_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> --// CHECK-NEXT: ret <8 x i32> [[TMP1]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfrintrm_s(v8f32 _1) { return __lasx_xvfrintrm_s(_1); } - // CHECK-LABEL: @xvfrintrm_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> --// CHECK-NEXT: ret <4 x i64> [[TMP1]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfrintrm_d(v4f64 _1) { return __lasx_xvfrintrm_d(_1); } - // CHECK-LABEL: @xvld( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvld(void * _1) { return __lasx_xvld(_1, 1); } - // CHECK-LABEL: @xvst( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1) - // CHECK-NEXT: ret void - // - void xvst(v32i8 _1, void * _2) { return __lasx_xvst(_1, _2, 1); } - // CHECK-LABEL: @xvstelm_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1, i32 1) - // CHECK-NEXT: ret void - // - void xvstelm_b(v32i8 _1, void * _2) { return __lasx_xvstelm_b(_1, _2, 1, 1); } - // CHECK-LABEL: @xvstelm_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2:%.*]], i32 2, i32 1) - // CHECK-NEXT: ret void - // - void xvstelm_h(v16i16 _1, void * _2) { return __lasx_xvstelm_h(_1, _2, 2, 1); } - // CHECK-LABEL: @xvstelm_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2:%.*]], i32 4, i32 1) - // CHECK-NEXT: ret void - // - void xvstelm_w(v8i32 _1, void * _2) { return __lasx_xvstelm_w(_1, _2, 4, 1); } - // CHECK-LABEL: @xvstelm_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2:%.*]], i32 8, i32 1) - // CHECK-NEXT: ret void - // - void xvstelm_d(v4i64 _1, void * _2) { return __lasx_xvstelm_d(_1, _2, 8, 1); } - // CHECK-LABEL: @xvinsve0_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __lasx_xvinsve0_w(_1, _2, 1); } - // CHECK-LABEL: @xvinsve0_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __lasx_xvinsve0_d(_1, _2, 1); } - // CHECK-LABEL: @xvpickve_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvpickve_w(v8i32 _1) { return __lasx_xvpickve_w(_1, 1); } - // CHECK-LABEL: @xvpickve_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvpickve_d(v4i64 _1) { return __lasx_xvpickve_d(_1, 1); } - // CHECK-LABEL: @xvssrlrn_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrn_b_h(_1, _2); } - // CHECK-LABEL: @xvssrlrn_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrn_h_w(_1, _2); } - // CHECK-LABEL: @xvssrlrn_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrn_w_d(_1, _2); } - // CHECK-LABEL: @xvssrln_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrln_b_h(_1, _2); } - // CHECK-LABEL: @xvssrln_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrln_h_w(_1, _2); } - // CHECK-LABEL: @xvssrln_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrln_w_d(_1, _2); } - // CHECK-LABEL: @xvorn_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __lasx_xvorn_v(_1, _2); } - // CHECK-LABEL: @xvldi( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvldi() { return __lasx_xvldi(1); } - // CHECK-LABEL: @xvldx( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1), !noalias [[META5:![0-9]+]] -+// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvldx(void * _1) { return __lasx_xvldx(_1, 1); } - // CHECK-LABEL: @xvstx( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_112]], ptr [[_2:%.*]], i64 1) - // CHECK-NEXT: ret void - // - void xvstx(v32i8 _1, void * _2) { return __lasx_xvstx(_1, _2, 1); } - // CHECK-LABEL: @xvextl_qu_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvextl_qu_du(v4u64 _1) { return __lasx_xvextl_qu_du(_1); } - // CHECK-LABEL: @xvinsgr2vr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1:%.*]], i32 1, i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvinsgr2vr_w(v8i32 _1) { return __lasx_xvinsgr2vr_w(_1, 1, 1); } - // CHECK-LABEL: @xvinsgr2vr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1:%.*]], i64 1, i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvinsgr2vr_d(v4i64 _1) { return __lasx_xvinsgr2vr_d(_1, 1, 1); } - // CHECK-LABEL: @xvreplve0_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_112]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvreplve0_b(v32i8 _1) { return __lasx_xvreplve0_b(_1); } - // CHECK-LABEL: @xvreplve0_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_112]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvreplve0_h(v16i16 _1) { return __lasx_xvreplve0_h(_1); } - // CHECK-LABEL: @xvreplve0_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_112]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvreplve0_w(v8i32 _1) { return __lasx_xvreplve0_w(_1); } - // CHECK-LABEL: @xvreplve0_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvreplve0_d(v4i64 _1) { return __lasx_xvreplve0_d(_1); } - // CHECK-LABEL: @xvreplve0_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_112]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvreplve0_q(v32i8 _1) { return __lasx_xvreplve0_q(_1); } - // CHECK-LABEL: @vext2xv_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_112]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 vext2xv_h_b(v32i8 _1) { return __lasx_vext2xv_h_b(_1); } - // CHECK-LABEL: @vext2xv_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_112]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 vext2xv_w_h(v16i16 _1) { return __lasx_vext2xv_w_h(_1); } - // CHECK-LABEL: @vext2xv_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_112]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 vext2xv_d_w(v8i32 _1) { return __lasx_vext2xv_d_w(_1); } - // CHECK-LABEL: @vext2xv_w_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_112]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 vext2xv_w_b(v32i8 _1) { return __lasx_vext2xv_w_b(_1); } - // CHECK-LABEL: @vext2xv_d_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_112]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 vext2xv_d_h(v16i16 _1) { return __lasx_vext2xv_d_h(_1); } - // CHECK-LABEL: @vext2xv_d_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_112]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 vext2xv_d_b(v32i8 _1) { return __lasx_vext2xv_d_b(_1); } - // CHECK-LABEL: @vext2xv_hu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_112]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 vext2xv_hu_bu(v32i8 _1) { return __lasx_vext2xv_hu_bu(_1); } - // CHECK-LABEL: @vext2xv_wu_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_112]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 vext2xv_wu_hu(v16i16 _1) { return __lasx_vext2xv_wu_hu(_1); } - // CHECK-LABEL: @vext2xv_du_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_112]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 vext2xv_du_wu(v8i32 _1) { return __lasx_vext2xv_du_wu(_1); } - // CHECK-LABEL: @vext2xv_wu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_112]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 vext2xv_wu_bu(v32i8 _1) { return __lasx_vext2xv_wu_bu(_1); } - // CHECK-LABEL: @vext2xv_du_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_112]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 vext2xv_du_hu(v16i16 _1) { return __lasx_vext2xv_du_hu(_1); } - // CHECK-LABEL: @vext2xv_du_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_112]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 vext2xv_du_bu(v32i8 _1) { return __lasx_vext2xv_du_bu(_1); } - // CHECK-LABEL: @xvpermi_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __lasx_xvpermi_q(_1, _2, 1); } - // CHECK-LABEL: @xvpermi_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvpermi_d(v4i64 _1) { return __lasx_xvpermi_d(_1, 1); } - // CHECK-LABEL: @xvperm_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __lasx_xvperm_w(_1, _2); } - // CHECK-LABEL: @xvldrepl_b( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvldrepl_b(void * _1) { return __lasx_xvldrepl_b(_1, 1); } - // CHECK-LABEL: @xvldrepl_h( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvldrepl_h(void * _1) { return __lasx_xvldrepl_h(_1, 2); } - // CHECK-LABEL: @xvldrepl_w( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvldrepl_w(void * _1) { return __lasx_xvldrepl_w(_1, 4); } - // CHECK-LABEL: @xvldrepl_d( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvldrepl_d(void * _1) { return __lasx_xvldrepl_d(_1, 8); } - // CHECK-LABEL: @xvpickve2gr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xvpickve2gr_w(v8i32 _1) { return __lasx_xvpickve2gr_w(_1, 1); } - // CHECK-LABEL: @xvpickve2gr_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - unsigned int xvpickve2gr_wu(v8i32 _1) { return __lasx_xvpickve2gr_wu(_1, 1); } - // CHECK-LABEL: @xvpickve2gr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i64 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: ret i64 [[TMP1]] - // - long xvpickve2gr_d(v4i64 _1) { return __lasx_xvpickve2gr_d(_1, 1); } - // CHECK-LABEL: @xvpickve2gr_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i64 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: ret i64 [[TMP1]] - // - unsigned long int xvpickve2gr_du(v4i64 _1) { return __lasx_xvpickve2gr_du(_1, 1); } - // CHECK-LABEL: @xvaddwev_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwev_q_d(_1, _2); } - // CHECK-LABEL: @xvaddwev_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwev_d_w(_1, _2); } - // CHECK-LABEL: @xvaddwev_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwev_w_h(_1, _2); } - // CHECK-LABEL: @xvaddwev_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwev_h_b(_1, _2); } - // CHECK-LABEL: @xvaddwev_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwev_q_du(_1, _2); } - // CHECK-LABEL: @xvaddwev_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwev_d_wu(_1, _2); } - // CHECK-LABEL: @xvaddwev_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwev_w_hu(_1, _2); } - // CHECK-LABEL: @xvaddwev_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwev_h_bu(_1, _2); } - // CHECK-LABEL: @xvsubwev_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwev_q_d(_1, _2); } - // CHECK-LABEL: @xvsubwev_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwev_d_w(_1, _2); } - // CHECK-LABEL: @xvsubwev_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwev_w_h(_1, _2); } - // CHECK-LABEL: @xvsubwev_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwev_h_b(_1, _2); } - // CHECK-LABEL: @xvsubwev_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwev_q_du(_1, _2); } - // CHECK-LABEL: @xvsubwev_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwev_d_wu(_1, _2); } - // CHECK-LABEL: @xvsubwev_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwev_w_hu(_1, _2); } - // CHECK-LABEL: @xvsubwev_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwev_h_bu(_1, _2); } - // CHECK-LABEL: @xvmulwev_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwev_q_d(_1, _2); } - // CHECK-LABEL: @xvmulwev_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwev_d_w(_1, _2); } - // CHECK-LABEL: @xvmulwev_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwev_w_h(_1, _2); } - // CHECK-LABEL: @xvmulwev_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwev_h_b(_1, _2); } - // CHECK-LABEL: @xvmulwev_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwev_q_du(_1, _2); } - // CHECK-LABEL: @xvmulwev_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwev_d_wu(_1, _2); } - // CHECK-LABEL: @xvmulwev_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwev_w_hu(_1, _2); } - // CHECK-LABEL: @xvmulwev_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwev_h_bu(_1, _2); } - // CHECK-LABEL: @xvaddwod_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwod_q_d(_1, _2); } - // CHECK-LABEL: @xvaddwod_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwod_d_w(_1, _2); } - // CHECK-LABEL: @xvaddwod_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwod_w_h(_1, _2); } - // CHECK-LABEL: @xvaddwod_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwod_h_b(_1, _2); } - // CHECK-LABEL: @xvaddwod_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwod_q_du(_1, _2); } - // CHECK-LABEL: @xvaddwod_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwod_d_wu(_1, _2); } - // CHECK-LABEL: @xvaddwod_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwod_w_hu(_1, _2); } - // CHECK-LABEL: @xvaddwod_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwod_h_bu(_1, _2); } - // CHECK-LABEL: @xvsubwod_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwod_q_d(_1, _2); } - // CHECK-LABEL: @xvsubwod_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwod_d_w(_1, _2); } - // CHECK-LABEL: @xvsubwod_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwod_w_h(_1, _2); } - // CHECK-LABEL: @xvsubwod_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwod_h_b(_1, _2); } - // CHECK-LABEL: @xvsubwod_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwod_q_du(_1, _2); } - // CHECK-LABEL: @xvsubwod_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwod_d_wu(_1, _2); } - // CHECK-LABEL: @xvsubwod_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwod_w_hu(_1, _2); } - // CHECK-LABEL: @xvsubwod_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwod_h_bu(_1, _2); } - // CHECK-LABEL: @xvmulwod_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwod_q_d(_1, _2); } - // CHECK-LABEL: @xvmulwod_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwod_d_w(_1, _2); } - // CHECK-LABEL: @xvmulwod_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwod_w_h(_1, _2); } - // CHECK-LABEL: @xvmulwod_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwod_h_b(_1, _2); } - // CHECK-LABEL: @xvmulwod_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwod_q_du(_1, _2); } - // CHECK-LABEL: @xvmulwod_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwod_d_wu(_1, _2); } - // CHECK-LABEL: @xvmulwod_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwod_w_hu(_1, _2); } - // CHECK-LABEL: @xvmulwod_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwod_h_bu(_1, _2); } - // CHECK-LABEL: @xvaddwev_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwev_d_wu_w(_1, _2); } - // CHECK-LABEL: @xvaddwev_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwev_w_hu_h(_1, _2); } - // CHECK-LABEL: @xvaddwev_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwev_h_bu_b(_1, _2); } - // CHECK-LABEL: @xvmulwev_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwev_d_wu_w(_1, _2); } - // CHECK-LABEL: @xvmulwev_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwev_w_hu_h(_1, _2); } - // CHECK-LABEL: @xvmulwev_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwev_h_bu_b(_1, _2); } - // CHECK-LABEL: @xvaddwod_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwod_d_wu_w(_1, _2); } - // CHECK-LABEL: @xvaddwod_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwod_w_hu_h(_1, _2); } - // CHECK-LABEL: @xvaddwod_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwod_h_bu_b(_1, _2); } - // CHECK-LABEL: @xvmulwod_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwod_d_wu_w(_1, _2); } - // CHECK-LABEL: @xvmulwod_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwod_w_hu_h(_1, _2); } - // CHECK-LABEL: @xvmulwod_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwod_h_bu_b(_1, _2); } - // CHECK-LABEL: @xvhaddw_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhaddw_q_d(_1, _2); } - // CHECK-LABEL: @xvhaddw_qu_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhaddw_qu_du(_1, _2); } - // CHECK-LABEL: @xvhsubw_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhsubw_q_d(_1, _2); } - // CHECK-LABEL: @xvhsubw_qu_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhsubw_qu_du(_1, _2); } - // CHECK-LABEL: @xvmaddwev_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_d(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_w(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -+// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_h(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_b(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwev_q_du(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwev_d_wu(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -+// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwev_w_hu(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwev_h_bu(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_d(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_w(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -+// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_h(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_b(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwod_q_du(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwod_d_wu(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -+// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwod_w_hu(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwod_h_bu(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_du_d(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_wu_w(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -+// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_hu_h(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_bu_b(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_du_d(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_wu_w(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) -+// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_hu_h(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_bu_b(_1, _2, _3); } - // CHECK-LABEL: @xvrotr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __lasx_xvrotr_b(_1, _2); } - // CHECK-LABEL: @xvrotr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __lasx_xvrotr_h(_1, _2); } - // CHECK-LABEL: @xvrotr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __lasx_xvrotr_w(_1, _2); } - // CHECK-LABEL: @xvrotr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __lasx_xvrotr_d(_1, _2); } - // CHECK-LABEL: @xvadd_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __lasx_xvadd_q(_1, _2); } - // CHECK-LABEL: @xvsub_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __lasx_xvsub_q(_1, _2); } - // CHECK-LABEL: @xvaddwev_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwev_q_du_d(_1, _2); } - // CHECK-LABEL: @xvaddwod_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwod_q_du_d(_1, _2); } - // CHECK-LABEL: @xvmulwev_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwev_q_du_d(_1, _2); } - // CHECK-LABEL: @xvmulwod_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwod_q_du_d(_1, _2); } - // CHECK-LABEL: @xvmskgez_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_112]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmskgez_b(v32i8 _1) { return __lasx_xvmskgez_b(_1); } - // CHECK-LABEL: @xvmsknz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_112]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmsknz_b(v32i8 _1) { return __lasx_xvmsknz_b(_1); } - // CHECK-LABEL: @xvexth_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_112]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvexth_h_b(v32i8 _1) { return __lasx_xvexth_h_b(_1); } - // CHECK-LABEL: @xvexth_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_112]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvexth_w_h(v16i16 _1) { return __lasx_xvexth_w_h(_1); } - // CHECK-LABEL: @xvexth_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_112]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvexth_d_w(v8i32 _1) { return __lasx_xvexth_d_w(_1); } - // CHECK-LABEL: @xvexth_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvexth_q_d(v4i64 _1) { return __lasx_xvexth_q_d(_1); } - // CHECK-LABEL: @xvexth_hu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_112]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvexth_hu_bu(v32u8 _1) { return __lasx_xvexth_hu_bu(_1); } - // CHECK-LABEL: @xvexth_wu_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_112]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvexth_wu_hu(v16u16 _1) { return __lasx_xvexth_wu_hu(_1); } - // CHECK-LABEL: @xvexth_du_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_112]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvexth_du_wu(v8u32 _1) { return __lasx_xvexth_du_wu(_1); } - // CHECK-LABEL: @xvexth_qu_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvexth_qu_du(v4u64 _1) { return __lasx_xvexth_qu_du(_1); } - // CHECK-LABEL: @xvrotri_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvrotri_b(v32i8 _1) { return __lasx_xvrotri_b(_1, 1); } - // CHECK-LABEL: @xvrotri_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvrotri_h(v16i16 _1) { return __lasx_xvrotri_h(_1, 1); } - // CHECK-LABEL: @xvrotri_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvrotri_w(v8i32 _1) { return __lasx_xvrotri_w(_1, 1); } - // CHECK-LABEL: @xvrotri_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvrotri_d(v4i64 _1) { return __lasx_xvrotri_d(_1, 1); } - // CHECK-LABEL: @xvextl_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvextl_q_d(v4i64 _1) { return __lasx_xvextl_q_d(_1); } - // CHECK-LABEL: @xvsrlni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlni_b_h(_1, _2, 1); } - // CHECK-LABEL: @xvsrlni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlni_h_w(_1, _2, 1); } - // CHECK-LABEL: @xvsrlni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlni_w_d(_1, _2, 1); } - // CHECK-LABEL: @xvsrlni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlni_d_q(_1, _2, 1); } - // CHECK-LABEL: @xvsrlrni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlrni_b_h(_1, _2, 1); } - // CHECK-LABEL: @xvsrlrni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrni_h_w(_1, _2, 1); } - // CHECK-LABEL: @xvsrlrni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrni_w_d(_1, _2, 1); } - // CHECK-LABEL: @xvsrlrni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrni_d_q(_1, _2, 1); } - // CHECK-LABEL: @xvssrlni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlni_b_h(_1, _2, 1); } - // CHECK-LABEL: @xvssrlni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlni_h_w(_1, _2, 1); } - // CHECK-LABEL: @xvssrlni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlni_w_d(_1, _2, 1); } - // CHECK-LABEL: @xvssrlni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlni_d_q(_1, _2, 1); } - // CHECK-LABEL: @xvssrlni_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlni_bu_h(_1, _2, 1); } - // CHECK-LABEL: @xvssrlni_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlni_hu_w(_1, _2, 1); } - // CHECK-LABEL: @xvssrlni_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlni_wu_d(_1, _2, 1); } - // CHECK-LABEL: @xvssrlni_du_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlni_du_q(_1, _2, 1); } - // CHECK-LABEL: @xvssrlrni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlrni_b_h(_1, _2, 1); } - // CHECK-LABEL: @xvssrlrni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrni_h_w(_1, _2, 1); } - // CHECK-LABEL: @xvssrlrni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrni_w_d(_1, _2, 1); } - // CHECK-LABEL: @xvssrlrni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrni_d_q(_1, _2, 1); } - // CHECK-LABEL: @xvssrlrni_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlrni_bu_h(_1, _2, 1); } - // CHECK-LABEL: @xvssrlrni_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlrni_hu_w(_1, _2, 1); } - // CHECK-LABEL: @xvssrlrni_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlrni_wu_d(_1, _2, 1); } - // CHECK-LABEL: @xvssrlrni_du_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlrni_du_q(_1, _2, 1); } - // CHECK-LABEL: @xvsrani_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrani_b_h(_1, _2, 1); } - // CHECK-LABEL: @xvsrani_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrani_h_w(_1, _2, 1); } - // CHECK-LABEL: @xvsrani_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrani_w_d(_1, _2, 1); } - // CHECK-LABEL: @xvsrani_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrani_d_q(_1, _2, 1); } - // CHECK-LABEL: @xvsrarni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrarni_b_h(_1, _2, 1); } - // CHECK-LABEL: @xvsrarni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrarni_h_w(_1, _2, 1); } - // CHECK-LABEL: @xvsrarni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrarni_w_d(_1, _2, 1); } - // CHECK-LABEL: @xvsrarni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrarni_d_q(_1, _2, 1); } - // CHECK-LABEL: @xvssrani_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrani_b_h(_1, _2, 1); } - // CHECK-LABEL: @xvssrani_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrani_h_w(_1, _2, 1); } - // CHECK-LABEL: @xvssrani_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrani_w_d(_1, _2, 1); } - // CHECK-LABEL: @xvssrani_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrani_d_q(_1, _2, 1); } - // CHECK-LABEL: @xvssrani_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrani_bu_h(_1, _2, 1); } - // CHECK-LABEL: @xvssrani_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrani_hu_w(_1, _2, 1); } - // CHECK-LABEL: @xvssrani_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrani_wu_d(_1, _2, 1); } - // CHECK-LABEL: @xvssrani_du_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrani_du_q(_1, _2, 1); } - // CHECK-LABEL: @xvssrarni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrarni_b_h(_1, _2, 1); } - // CHECK-LABEL: @xvssrarni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrarni_h_w(_1, _2, 1); } - // CHECK-LABEL: @xvssrarni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrarni_w_d(_1, _2, 1); } - // CHECK-LABEL: @xvssrarni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrarni_d_q(_1, _2, 1); } - // CHECK-LABEL: @xvssrarni_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrarni_bu_h(_1, _2, 1); } - // CHECK-LABEL: @xvssrarni_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrarni_hu_w(_1, _2, 1); } - // CHECK-LABEL: @xvssrarni_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrarni_wu_d(_1, _2, 1); } - // CHECK-LABEL: @xvssrarni_du_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrarni_du_q(_1, _2, 1); } - // CHECK-LABEL: @xbnz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbnz_b(v32u8 _1) { return __lasx_xbnz_b(_1); } - // CHECK-LABEL: @xbnz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbnz_d(v4u64 _1) { return __lasx_xbnz_d(_1); } - // CHECK-LABEL: @xbnz_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbnz_h(v16u16 _1) { return __lasx_xbnz_h(_1); } - // CHECK-LABEL: @xbnz_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbnz_v(v32u8 _1) { return __lasx_xbnz_v(_1); } - // CHECK-LABEL: @xbnz_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbnz_w(v8u32 _1) { return __lasx_xbnz_w(_1); } - // CHECK-LABEL: @xbz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbz_b(v32u8 _1) { return __lasx_xbz_b(_1); } - // CHECK-LABEL: @xbz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbz_d(v4u64 _1) { return __lasx_xbz_d(_1); } - // CHECK-LABEL: @xbz_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbz_h(v16u16 _1) { return __lasx_xbz_h(_1); } - // CHECK-LABEL: @xbz_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbz_v(v32u8 _1) { return __lasx_xbz_v(_1); } - // CHECK-LABEL: @xbz_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbz_w(v8u32 _1) { return __lasx_xbz_w(_1); } - // CHECK-LABEL: @xvfcmp_caf_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_caf_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_caf_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_caf_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_ceq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_ceq_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_ceq_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_ceq_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_cle_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cle_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_cle_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cle_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_clt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_clt_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_clt_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_clt_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_cne_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cne_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_cne_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cne_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_cor_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cor_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_cor_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cor_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_cueq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cueq_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_cueq_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cueq_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_cule_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cule_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_cule_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cule_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_cult_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cult_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_cult_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cult_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_cun_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cun_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_cune_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cune_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_cune_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cune_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_cun_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cun_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_saf_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_saf_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_saf_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_saf_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_seq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_seq_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_seq_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_seq_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_sle_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sle_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_sle_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sle_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_slt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_slt_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_slt_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_slt_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_sne_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sne_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_sne_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sne_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_sor_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sor_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_sor_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sor_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_sueq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sueq_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_sueq_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sueq_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_sule_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sule_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_sule_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sule_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_sult_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sult_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_sult_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sult_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_sun_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sun_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_sune_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sune_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_sune_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sune_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_sun_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sun_s(_1, _2); } - // CHECK-LABEL: @xvpickve_d_f( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvpickve_d_f(v4f64 _1) { return __lasx_xvpickve_d_f(_1, 1); } - // CHECK-LABEL: @xvpickve_w_f( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvpickve_w_f(v8f32 _1) { return __lasx_xvpickve_w_f(_1, 1); } - // CHECK-LABEL: @xvrepli_b( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvrepli_b() { return __lasx_xvrepli_b(1); } - // CHECK-LABEL: @xvrepli_d( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvrepli_d() { return __lasx_xvrepli_d(1); } - // CHECK-LABEL: @xvrepli_h( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); } - // CHECK-LABEL: @xvrepli_w( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); } -diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin.c b/clang/test/CodeGen/LoongArch/lasx/builtin.c -index 0185f2004d52..f52a23a5faea 100644 ---- a/clang/test/CodeGen/LoongArch/lasx/builtin.c -+++ b/clang/test/CodeGen/LoongArch/lasx/builtin.c -@@ -27,4426 +27,6382 @@ typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); - - // CHECK-LABEL: @xvsll_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsll_b(_1, _2); } - // CHECK-LABEL: @xvsll_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsll_h(_1, _2); } - // CHECK-LABEL: @xvsll_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsll_w(_1, _2); } - // CHECK-LABEL: @xvsll_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsll_d(_1, _2); } - // CHECK-LABEL: @xvslli_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvslli_b(v32i8 _1) { return __builtin_lasx_xvslli_b(_1, 1); } - // CHECK-LABEL: @xvslli_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvslli_h(v16i16 _1) { return __builtin_lasx_xvslli_h(_1, 1); } - // CHECK-LABEL: @xvslli_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvslli_w(v8i32 _1) { return __builtin_lasx_xvslli_w(_1, 1); } - // CHECK-LABEL: @xvslli_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvslli_d(v4i64 _1) { return __builtin_lasx_xvslli_d(_1, 1); } - // CHECK-LABEL: @xvsra_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsra_b(_1, _2); } - // CHECK-LABEL: @xvsra_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsra_h(_1, _2); } - // CHECK-LABEL: @xvsra_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsra_w(_1, _2); } - // CHECK-LABEL: @xvsra_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsra_d(_1, _2); } - // CHECK-LABEL: @xvsrai_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrai_b(v32i8 _1) { return __builtin_lasx_xvsrai_b(_1, 1); } - // CHECK-LABEL: @xvsrai_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrai_h(v16i16 _1) { return __builtin_lasx_xvsrai_h(_1, 1); } - // CHECK-LABEL: @xvsrai_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrai_w(v8i32 _1) { return __builtin_lasx_xvsrai_w(_1, 1); } - // CHECK-LABEL: @xvsrai_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrai_d(v4i64 _1) { return __builtin_lasx_xvsrai_d(_1, 1); } - // CHECK-LABEL: @xvsrar_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrar_b(_1, _2); } - // CHECK-LABEL: @xvsrar_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrar_h(_1, _2); } - // CHECK-LABEL: @xvsrar_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrar_w(_1, _2); } - // CHECK-LABEL: @xvsrar_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrar_d(_1, _2); } - // CHECK-LABEL: @xvsrari_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrari_b(v32i8 _1) { return __builtin_lasx_xvsrari_b(_1, 1); } - // CHECK-LABEL: @xvsrari_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrari_h(v16i16 _1) { return __builtin_lasx_xvsrari_h(_1, 1); } - // CHECK-LABEL: @xvsrari_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrari_w(v8i32 _1) { return __builtin_lasx_xvsrari_w(_1, 1); } - // CHECK-LABEL: @xvsrari_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrari_d(v4i64 _1) { return __builtin_lasx_xvsrari_d(_1, 1); } - // CHECK-LABEL: @xvsrl_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrl_b(_1, _2); } - // CHECK-LABEL: @xvsrl_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrl_h(_1, _2); } - // CHECK-LABEL: @xvsrl_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrl_w(_1, _2); } - // CHECK-LABEL: @xvsrl_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrl_d(_1, _2); } - // CHECK-LABEL: @xvsrli_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrli_b(v32i8 _1) { return __builtin_lasx_xvsrli_b(_1, 1); } - // CHECK-LABEL: @xvsrli_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrli_h(v16i16 _1) { return __builtin_lasx_xvsrli_h(_1, 1); } - // CHECK-LABEL: @xvsrli_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrli_w(v8i32 _1) { return __builtin_lasx_xvsrli_w(_1, 1); } - // CHECK-LABEL: @xvsrli_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrli_d(v4i64 _1) { return __builtin_lasx_xvsrli_d(_1, 1); } - // CHECK-LABEL: @xvsrlr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlr_b(_1, _2); } - // CHECK-LABEL: @xvsrlr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlr_h(_1, _2); } - // CHECK-LABEL: @xvsrlr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlr_w(_1, _2); } - // CHECK-LABEL: @xvsrlr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlr_d(_1, _2); } - // CHECK-LABEL: @xvsrlri_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrlri_b(v32i8 _1) { return __builtin_lasx_xvsrlri_b(_1, 1); } - // CHECK-LABEL: @xvsrlri_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrlri_h(v16i16 _1) { return __builtin_lasx_xvsrlri_h(_1, 1); } - // CHECK-LABEL: @xvsrlri_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrlri_w(v8i32 _1) { return __builtin_lasx_xvsrlri_w(_1, 1); } - // CHECK-LABEL: @xvsrlri_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrlri_d(v4i64 _1) { return __builtin_lasx_xvsrlri_d(_1, 1); } - // CHECK-LABEL: @xvbitclr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitclr_b(_1, _2); } - // CHECK-LABEL: @xvbitclr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitclr_h(_1, _2); } - // CHECK-LABEL: @xvbitclr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitclr_w(_1, _2); } - // CHECK-LABEL: @xvbitclr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitclr_d(_1, _2); } - // CHECK-LABEL: @xvbitclri_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvbitclri_b(v32u8 _1) { return __builtin_lasx_xvbitclri_b(_1, 1); } - // CHECK-LABEL: @xvbitclri_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvbitclri_h(v16u16 _1) { return __builtin_lasx_xvbitclri_h(_1, 1); } - // CHECK-LABEL: @xvbitclri_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvbitclri_w(v8u32 _1) { return __builtin_lasx_xvbitclri_w(_1, 1); } - // CHECK-LABEL: @xvbitclri_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvbitclri_d(v4u64 _1) { return __builtin_lasx_xvbitclri_d(_1, 1); } - // CHECK-LABEL: @xvbitset_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitset_b(_1, _2); } - // CHECK-LABEL: @xvbitset_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitset_h(_1, _2); } - // CHECK-LABEL: @xvbitset_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitset_w(_1, _2); } - // CHECK-LABEL: @xvbitset_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitset_d(_1, _2); } - // CHECK-LABEL: @xvbitseti_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvbitseti_b(v32u8 _1) { return __builtin_lasx_xvbitseti_b(_1, 1); } - // CHECK-LABEL: @xvbitseti_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvbitseti_h(v16u16 _1) { return __builtin_lasx_xvbitseti_h(_1, 1); } - // CHECK-LABEL: @xvbitseti_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvbitseti_w(v8u32 _1) { return __builtin_lasx_xvbitseti_w(_1, 1); } - // CHECK-LABEL: @xvbitseti_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvbitseti_d(v4u64 _1) { return __builtin_lasx_xvbitseti_d(_1, 1); } - // CHECK-LABEL: @xvbitrev_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitrev_b(_1, _2); } - // CHECK-LABEL: @xvbitrev_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitrev_h(_1, _2); } - // CHECK-LABEL: @xvbitrev_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitrev_w(_1, _2); } - // CHECK-LABEL: @xvbitrev_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitrev_d(_1, _2); } - // CHECK-LABEL: @xvbitrevi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvbitrevi_b(v32u8 _1) { return __builtin_lasx_xvbitrevi_b(_1, 1); } - // CHECK-LABEL: @xvbitrevi_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvbitrevi_h(v16u16 _1) { return __builtin_lasx_xvbitrevi_h(_1, 1); } - // CHECK-LABEL: @xvbitrevi_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvbitrevi_w(v8u32 _1) { return __builtin_lasx_xvbitrevi_w(_1, 1); } - // CHECK-LABEL: @xvbitrevi_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvbitrevi_d(v4u64 _1) { return __builtin_lasx_xvbitrevi_d(_1, 1); } - // CHECK-LABEL: @xvadd_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadd_b(_1, _2); } - // CHECK-LABEL: @xvadd_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadd_h(_1, _2); } - // CHECK-LABEL: @xvadd_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadd_w(_1, _2); } - // CHECK-LABEL: @xvadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_d(_1, _2); } - // CHECK-LABEL: @xvaddi_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvaddi_bu(v32i8 _1) { return __builtin_lasx_xvaddi_bu(_1, 1); } - // CHECK-LABEL: @xvaddi_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvaddi_hu(v16i16 _1) { return __builtin_lasx_xvaddi_hu(_1, 1); } - // CHECK-LABEL: @xvaddi_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvaddi_wu(v8i32 _1) { return __builtin_lasx_xvaddi_wu(_1, 1); } - // CHECK-LABEL: @xvaddi_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddi_du(v4i64 _1) { return __builtin_lasx_xvaddi_du(_1, 1); } - // CHECK-LABEL: @xvsub_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsub_b(_1, _2); } - // CHECK-LABEL: @xvsub_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsub_h(_1, _2); } - // CHECK-LABEL: @xvsub_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsub_w(_1, _2); } - // CHECK-LABEL: @xvsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_d(_1, _2); } - // CHECK-LABEL: @xvsubi_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsubi_bu(v32i8 _1) { return __builtin_lasx_xvsubi_bu(_1, 1); } - // CHECK-LABEL: @xvsubi_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsubi_hu(v16i16 _1) { return __builtin_lasx_xvsubi_hu(_1, 1); } - // CHECK-LABEL: @xvsubi_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsubi_wu(v8i32 _1) { return __builtin_lasx_xvsubi_wu(_1, 1); } - // CHECK-LABEL: @xvsubi_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsubi_du(v4i64 _1) { return __builtin_lasx_xvsubi_du(_1, 1); } - // CHECK-LABEL: @xvmax_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmax_b(_1, _2); } - // CHECK-LABEL: @xvmax_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmax_h(_1, _2); } - // CHECK-LABEL: @xvmax_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmax_w(_1, _2); } - // CHECK-LABEL: @xvmax_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmax_d(_1, _2); } - // CHECK-LABEL: @xvmaxi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmaxi_b(v32i8 _1) { return __builtin_lasx_xvmaxi_b(_1, 1); } - // CHECK-LABEL: @xvmaxi_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmaxi_h(v16i16 _1) { return __builtin_lasx_xvmaxi_h(_1, 1); } - // CHECK-LABEL: @xvmaxi_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmaxi_w(v8i32 _1) { return __builtin_lasx_xvmaxi_w(_1, 1); } - // CHECK-LABEL: @xvmaxi_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmaxi_d(v4i64 _1) { return __builtin_lasx_xvmaxi_d(_1, 1); } - // CHECK-LABEL: @xvmax_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmax_bu(_1, _2); } - // CHECK-LABEL: @xvmax_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmax_hu(_1, _2); } - // CHECK-LABEL: @xvmax_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmax_wu(_1, _2); } - // CHECK-LABEL: @xvmax_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmax_du(_1, _2); } - // CHECK-LABEL: @xvmaxi_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvmaxi_bu(v32u8 _1) { return __builtin_lasx_xvmaxi_bu(_1, 1); } - // CHECK-LABEL: @xvmaxi_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvmaxi_hu(v16u16 _1) { return __builtin_lasx_xvmaxi_hu(_1, 1); } - // CHECK-LABEL: @xvmaxi_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvmaxi_wu(v8u32 _1) { return __builtin_lasx_xvmaxi_wu(_1, 1); } - // CHECK-LABEL: @xvmaxi_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmaxi_du(v4u64 _1) { return __builtin_lasx_xvmaxi_du(_1, 1); } - // CHECK-LABEL: @xvmin_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmin_b(_1, _2); } - // CHECK-LABEL: @xvmin_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmin_h(_1, _2); } - // CHECK-LABEL: @xvmin_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmin_w(_1, _2); } - // CHECK-LABEL: @xvmin_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmin_d(_1, _2); } - // CHECK-LABEL: @xvmini_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmini_b(v32i8 _1) { return __builtin_lasx_xvmini_b(_1, 1); } - // CHECK-LABEL: @xvmini_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmini_h(v16i16 _1) { return __builtin_lasx_xvmini_h(_1, 1); } - // CHECK-LABEL: @xvmini_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmini_w(v8i32 _1) { return __builtin_lasx_xvmini_w(_1, 1); } - // CHECK-LABEL: @xvmini_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmini_d(v4i64 _1) { return __builtin_lasx_xvmini_d(_1, 1); } - // CHECK-LABEL: @xvmin_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmin_bu(_1, _2); } - // CHECK-LABEL: @xvmin_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmin_hu(_1, _2); } - // CHECK-LABEL: @xvmin_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmin_wu(_1, _2); } - // CHECK-LABEL: @xvmin_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmin_du(_1, _2); } - // CHECK-LABEL: @xvmini_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvmini_bu(v32u8 _1) { return __builtin_lasx_xvmini_bu(_1, 1); } - // CHECK-LABEL: @xvmini_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvmini_hu(v16u16 _1) { return __builtin_lasx_xvmini_hu(_1, 1); } - // CHECK-LABEL: @xvmini_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvmini_wu(v8u32 _1) { return __builtin_lasx_xvmini_wu(_1, 1); } - // CHECK-LABEL: @xvmini_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmini_du(v4u64 _1) { return __builtin_lasx_xvmini_du(_1, 1); } - // CHECK-LABEL: @xvseq_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvseq_b(_1, _2); } - // CHECK-LABEL: @xvseq_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvseq_h(_1, _2); } - // CHECK-LABEL: @xvseq_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvseq_w(_1, _2); } - // CHECK-LABEL: @xvseq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvseq_d(_1, _2); } - // CHECK-LABEL: @xvseqi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvseqi_b(v32i8 _1) { return __builtin_lasx_xvseqi_b(_1, 1); } - // CHECK-LABEL: @xvseqi_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvseqi_h(v16i16 _1) { return __builtin_lasx_xvseqi_h(_1, 1); } - // CHECK-LABEL: @xvseqi_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvseqi_w(v8i32 _1) { return __builtin_lasx_xvseqi_w(_1, 1); } - // CHECK-LABEL: @xvseqi_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvseqi_d(v4i64 _1) { return __builtin_lasx_xvseqi_d(_1, 1); } - // CHECK-LABEL: @xvslt_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvslt_b(_1, _2); } - // CHECK-LABEL: @xvslt_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvslt_h(_1, _2); } - // CHECK-LABEL: @xvslt_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvslt_w(_1, _2); } - // CHECK-LABEL: @xvslt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvslt_d(_1, _2); } - // CHECK-LABEL: @xvslti_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvslti_b(v32i8 _1) { return __builtin_lasx_xvslti_b(_1, 1); } - // CHECK-LABEL: @xvslti_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvslti_h(v16i16 _1) { return __builtin_lasx_xvslti_h(_1, 1); } - // CHECK-LABEL: @xvslti_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvslti_w(v8i32 _1) { return __builtin_lasx_xvslti_w(_1, 1); } - // CHECK-LABEL: @xvslti_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvslti_d(v4i64 _1) { return __builtin_lasx_xvslti_d(_1, 1); } - // CHECK-LABEL: @xvslt_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvslt_bu(_1, _2); } - // CHECK-LABEL: @xvslt_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvslt_hu(_1, _2); } - // CHECK-LABEL: @xvslt_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvslt_wu(_1, _2); } - // CHECK-LABEL: @xvslt_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvslt_du(_1, _2); } - // CHECK-LABEL: @xvslti_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvslti_bu(v32u8 _1) { return __builtin_lasx_xvslti_bu(_1, 1); } - // CHECK-LABEL: @xvslti_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvslti_hu(v16u16 _1) { return __builtin_lasx_xvslti_hu(_1, 1); } - // CHECK-LABEL: @xvslti_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvslti_wu(v8u32 _1) { return __builtin_lasx_xvslti_wu(_1, 1); } - // CHECK-LABEL: @xvslti_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvslti_du(v4u64 _1) { return __builtin_lasx_xvslti_du(_1, 1); } - // CHECK-LABEL: @xvsle_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsle_b(_1, _2); } - // CHECK-LABEL: @xvsle_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsle_h(_1, _2); } - // CHECK-LABEL: @xvsle_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsle_w(_1, _2); } - // CHECK-LABEL: @xvsle_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsle_d(_1, _2); } - // CHECK-LABEL: @xvslei_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvslei_b(v32i8 _1) { return __builtin_lasx_xvslei_b(_1, 1); } - // CHECK-LABEL: @xvslei_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvslei_h(v16i16 _1) { return __builtin_lasx_xvslei_h(_1, 1); } - // CHECK-LABEL: @xvslei_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvslei_w(v8i32 _1) { return __builtin_lasx_xvslei_w(_1, 1); } - // CHECK-LABEL: @xvslei_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvslei_d(v4i64 _1) { return __builtin_lasx_xvslei_d(_1, 1); } - // CHECK-LABEL: @xvsle_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsle_bu(_1, _2); } - // CHECK-LABEL: @xvsle_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsle_hu(_1, _2); } - // CHECK-LABEL: @xvsle_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsle_wu(_1, _2); } - // CHECK-LABEL: @xvsle_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsle_du(_1, _2); } - // CHECK-LABEL: @xvslei_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvslei_bu(v32u8 _1) { return __builtin_lasx_xvslei_bu(_1, 1); } - // CHECK-LABEL: @xvslei_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvslei_hu(v16u16 _1) { return __builtin_lasx_xvslei_hu(_1, 1); } - // CHECK-LABEL: @xvslei_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvslei_wu(v8u32 _1) { return __builtin_lasx_xvslei_wu(_1, 1); } - // CHECK-LABEL: @xvslei_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvslei_du(v4u64 _1) { return __builtin_lasx_xvslei_du(_1, 1); } - // CHECK-LABEL: @xvsat_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsat_b(v32i8 _1) { return __builtin_lasx_xvsat_b(_1, 1); } - // CHECK-LABEL: @xvsat_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsat_h(v16i16 _1) { return __builtin_lasx_xvsat_h(_1, 1); } - // CHECK-LABEL: @xvsat_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsat_w(v8i32 _1) { return __builtin_lasx_xvsat_w(_1, 1); } - // CHECK-LABEL: @xvsat_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsat_d(v4i64 _1) { return __builtin_lasx_xvsat_d(_1, 1); } - // CHECK-LABEL: @xvsat_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvsat_bu(v32u8 _1) { return __builtin_lasx_xvsat_bu(_1, 1); } - // CHECK-LABEL: @xvsat_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvsat_hu(v16u16 _1) { return __builtin_lasx_xvsat_hu(_1, 1); } - // CHECK-LABEL: @xvsat_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvsat_wu(v8u32 _1) { return __builtin_lasx_xvsat_wu(_1, 1); } - // CHECK-LABEL: @xvsat_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvsat_du(v4u64 _1) { return __builtin_lasx_xvsat_du(_1, 1); } - // CHECK-LABEL: @xvadda_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadda_b(_1, _2); } - // CHECK-LABEL: @xvadda_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadda_h(_1, _2); } - // CHECK-LABEL: @xvadda_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadda_w(_1, _2); } - // CHECK-LABEL: @xvadda_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadda_d(_1, _2); } - // CHECK-LABEL: @xvsadd_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsadd_b(_1, _2); } - // CHECK-LABEL: @xvsadd_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsadd_h(_1, _2); } - // CHECK-LABEL: @xvsadd_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsadd_w(_1, _2); } - // CHECK-LABEL: @xvsadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsadd_d(_1, _2); } - // CHECK-LABEL: @xvsadd_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsadd_bu(_1, _2); } - // CHECK-LABEL: @xvsadd_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsadd_hu(_1, _2); } - // CHECK-LABEL: @xvsadd_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsadd_wu(_1, _2); } - // CHECK-LABEL: @xvsadd_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsadd_du(_1, _2); } - // CHECK-LABEL: @xvavg_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavg_b(_1, _2); } - // CHECK-LABEL: @xvavg_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavg_h(_1, _2); } - // CHECK-LABEL: @xvavg_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavg_w(_1, _2); } - // CHECK-LABEL: @xvavg_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavg_d(_1, _2); } - // CHECK-LABEL: @xvavg_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavg_bu(_1, _2); } - // CHECK-LABEL: @xvavg_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavg_hu(_1, _2); } - // CHECK-LABEL: @xvavg_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavg_wu(_1, _2); } - // CHECK-LABEL: @xvavg_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavg_du(_1, _2); } - // CHECK-LABEL: @xvavgr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavgr_b(_1, _2); } - // CHECK-LABEL: @xvavgr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavgr_h(_1, _2); } - // CHECK-LABEL: @xvavgr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavgr_w(_1, _2); } - // CHECK-LABEL: @xvavgr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavgr_d(_1, _2); } - // CHECK-LABEL: @xvavgr_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavgr_bu(_1, _2); } - // CHECK-LABEL: @xvavgr_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavgr_hu(_1, _2); } - // CHECK-LABEL: @xvavgr_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavgr_wu(_1, _2); } - // CHECK-LABEL: @xvavgr_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavgr_du(_1, _2); } - // CHECK-LABEL: @xvssub_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssub_b(_1, _2); } - // CHECK-LABEL: @xvssub_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssub_h(_1, _2); } - // CHECK-LABEL: @xvssub_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssub_w(_1, _2); } - // CHECK-LABEL: @xvssub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssub_d(_1, _2); } - // CHECK-LABEL: @xvssub_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvssub_bu(_1, _2); } - // CHECK-LABEL: @xvssub_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssub_hu(_1, _2); } - // CHECK-LABEL: @xvssub_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssub_wu(_1, _2); } - // CHECK-LABEL: @xvssub_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssub_du(_1, _2); } - // CHECK-LABEL: @xvabsd_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvabsd_b(_1, _2); } - // CHECK-LABEL: @xvabsd_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvabsd_h(_1, _2); } - // CHECK-LABEL: @xvabsd_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvabsd_w(_1, _2); } - // CHECK-LABEL: @xvabsd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvabsd_d(_1, _2); } - // CHECK-LABEL: @xvabsd_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvabsd_bu(_1, _2); } - // CHECK-LABEL: @xvabsd_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvabsd_hu(_1, _2); } - // CHECK-LABEL: @xvabsd_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvabsd_wu(_1, _2); } - // CHECK-LABEL: @xvabsd_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvabsd_du(_1, _2); } - // CHECK-LABEL: @xvmul_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmul_b(_1, _2); } - // CHECK-LABEL: @xvmul_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmul_h(_1, _2); } - // CHECK-LABEL: @xvmul_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmul_w(_1, _2); } - // CHECK-LABEL: @xvmul_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmul_d(_1, _2); } - // CHECK-LABEL: @xvmadd_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -+// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmadd_b(_1, _2, _3); } - // CHECK-LABEL: @xvmadd_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmadd_h(_1, _2, _3); } - // CHECK-LABEL: @xvmadd_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -+// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmadd_w(_1, _2, _3); } - // CHECK-LABEL: @xvmadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmadd_d(_1, _2, _3); } - // CHECK-LABEL: @xvmsub_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -+// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmsub_b(_1, _2, _3); } - // CHECK-LABEL: @xvmsub_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmsub_h(_1, _2, _3); } - // CHECK-LABEL: @xvmsub_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -+// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmsub_w(_1, _2, _3); } - // CHECK-LABEL: @xvmsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmsub_d(_1, _2, _3); } - // CHECK-LABEL: @xvdiv_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvdiv_b(_1, _2); } - // CHECK-LABEL: @xvdiv_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvdiv_h(_1, _2); } - // CHECK-LABEL: @xvdiv_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvdiv_w(_1, _2); } - // CHECK-LABEL: @xvdiv_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvdiv_d(_1, _2); } - // CHECK-LABEL: @xvdiv_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvdiv_bu(_1, _2); } - // CHECK-LABEL: @xvdiv_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvdiv_hu(_1, _2); } - // CHECK-LABEL: @xvdiv_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvdiv_wu(_1, _2); } - // CHECK-LABEL: @xvdiv_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvdiv_du(_1, _2); } - // CHECK-LABEL: @xvhaddw_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhaddw_h_b(_1, _2); } - // CHECK-LABEL: @xvhaddw_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhaddw_w_h(_1, _2); } - // CHECK-LABEL: @xvhaddw_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhaddw_d_w(_1, _2); } - // CHECK-LABEL: @xvhaddw_hu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhaddw_hu_bu(_1, _2); } - // CHECK-LABEL: @xvhaddw_wu_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhaddw_wu_hu(_1, _2); } - // CHECK-LABEL: @xvhaddw_du_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhaddw_du_wu(_1, _2); } - // CHECK-LABEL: @xvhsubw_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhsubw_h_b(_1, _2); } - // CHECK-LABEL: @xvhsubw_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhsubw_w_h(_1, _2); } - // CHECK-LABEL: @xvhsubw_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhsubw_d_w(_1, _2); } - // CHECK-LABEL: @xvhsubw_hu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhsubw_hu_bu(_1, _2); } - // CHECK-LABEL: @xvhsubw_wu_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhsubw_wu_hu(_1, _2); } - // CHECK-LABEL: @xvhsubw_du_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhsubw_du_wu(_1, _2); } - // CHECK-LABEL: @xvmod_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmod_b(_1, _2); } - // CHECK-LABEL: @xvmod_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmod_h(_1, _2); } - // CHECK-LABEL: @xvmod_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmod_w(_1, _2); } - // CHECK-LABEL: @xvmod_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmod_d(_1, _2); } - // CHECK-LABEL: @xvmod_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmod_bu(_1, _2); } - // CHECK-LABEL: @xvmod_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmod_hu(_1, _2); } - // CHECK-LABEL: @xvmod_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmod_wu(_1, _2); } - // CHECK-LABEL: @xvmod_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmod_du(_1, _2); } - // CHECK-LABEL: @xvrepl128vei_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvrepl128vei_b(v32i8 _1) { return __builtin_lasx_xvrepl128vei_b(_1, 1); } - // CHECK-LABEL: @xvrepl128vei_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvrepl128vei_h(v16i16 _1) { return __builtin_lasx_xvrepl128vei_h(_1, 1); } - // CHECK-LABEL: @xvrepl128vei_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvrepl128vei_w(v8i32 _1) { return __builtin_lasx_xvrepl128vei_w(_1, 1); } - // CHECK-LABEL: @xvrepl128vei_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvrepl128vei_d(v4i64 _1) { return __builtin_lasx_xvrepl128vei_d(_1, 1); } - // CHECK-LABEL: @xvpickev_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickev_b(_1, _2); } - // CHECK-LABEL: @xvpickev_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickev_h(_1, _2); } - // CHECK-LABEL: @xvpickev_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickev_w(_1, _2); } - // CHECK-LABEL: @xvpickev_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickev_d(_1, _2); } - // CHECK-LABEL: @xvpickod_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickod_b(_1, _2); } - // CHECK-LABEL: @xvpickod_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickod_h(_1, _2); } - // CHECK-LABEL: @xvpickod_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickod_w(_1, _2); } - // CHECK-LABEL: @xvpickod_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickod_d(_1, _2); } - // CHECK-LABEL: @xvilvh_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvh_b(_1, _2); } - // CHECK-LABEL: @xvilvh_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvh_h(_1, _2); } - // CHECK-LABEL: @xvilvh_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvh_w(_1, _2); } - // CHECK-LABEL: @xvilvh_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvh_d(_1, _2); } - // CHECK-LABEL: @xvilvl_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvl_b(_1, _2); } - // CHECK-LABEL: @xvilvl_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvl_h(_1, _2); } - // CHECK-LABEL: @xvilvl_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvl_w(_1, _2); } - // CHECK-LABEL: @xvilvl_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvl_d(_1, _2); } - // CHECK-LABEL: @xvpackev_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackev_b(_1, _2); } - // CHECK-LABEL: @xvpackev_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackev_h(_1, _2); } - // CHECK-LABEL: @xvpackev_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackev_w(_1, _2); } - // CHECK-LABEL: @xvpackev_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackev_d(_1, _2); } - // CHECK-LABEL: @xvpackod_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackod_b(_1, _2); } - // CHECK-LABEL: @xvpackod_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackod_h(_1, _2); } - // CHECK-LABEL: @xvpackod_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackod_w(_1, _2); } - // CHECK-LABEL: @xvpackod_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackod_d(_1, _2); } - // CHECK-LABEL: @xvshuf_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -+// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvshuf_b(_1, _2, _3); } - // CHECK-LABEL: @xvshuf_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvshuf_h(_1, _2, _3); } - // CHECK-LABEL: @xvshuf_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -+// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvshuf_w(_1, _2, _3); } - // CHECK-LABEL: @xvshuf_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvshuf_d(_1, _2, _3); } - // CHECK-LABEL: @xvand_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvand_v(_1, _2); } - // CHECK-LABEL: @xvandi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvandi_b(v32u8 _1) { return __builtin_lasx_xvandi_b(_1, 1); } - // CHECK-LABEL: @xvor_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvor_v(_1, _2); } - // CHECK-LABEL: @xvori_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvori_b(v32u8 _1) { return __builtin_lasx_xvori_b(_1, 1); } - // CHECK-LABEL: @xvnor_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvnor_v(_1, _2); } - // CHECK-LABEL: @xvnori_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvnori_b(v32u8 _1) { return __builtin_lasx_xvnori_b(_1, 1); } - // CHECK-LABEL: @xvxor_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvxor_v(_1, _2); } - // CHECK-LABEL: @xvxori_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvxori_b(v32u8 _1) { return __builtin_lasx_xvxori_b(_1, 1); } - // CHECK-LABEL: @xvbitsel_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -+// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvbitsel_v(_1, _2, _3); } - // CHECK-LABEL: @xvbitseli_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitseli_b(_1, _2, 1); } - // CHECK-LABEL: @xvshuf4i_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvshuf4i_b(v32i8 _1) { return __builtin_lasx_xvshuf4i_b(_1, 1); } - // CHECK-LABEL: @xvshuf4i_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvshuf4i_h(v16i16 _1) { return __builtin_lasx_xvshuf4i_h(_1, 1); } - // CHECK-LABEL: @xvshuf4i_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvshuf4i_w(v8i32 _1) { return __builtin_lasx_xvshuf4i_w(_1, 1); } - // CHECK-LABEL: @xvreplgr2vr_b( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvreplgr2vr_b(int _1) { return __builtin_lasx_xvreplgr2vr_b(_1); } - // CHECK-LABEL: @xvreplgr2vr_h( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvreplgr2vr_h(int _1) { return __builtin_lasx_xvreplgr2vr_h(_1); } - // CHECK-LABEL: @xvreplgr2vr_w( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvreplgr2vr_w(int _1) { return __builtin_lasx_xvreplgr2vr_w(_1); } - // CHECK-LABEL: @xvreplgr2vr_d( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 - // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvreplgr2vr_d(int _1) { return __builtin_lasx_xvreplgr2vr_d(_1); } - // CHECK-LABEL: @xvpcnt_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvpcnt_b(v32i8 _1) { return __builtin_lasx_xvpcnt_b(_1); } - // CHECK-LABEL: @xvpcnt_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvpcnt_h(v16i16 _1) { return __builtin_lasx_xvpcnt_h(_1); } - // CHECK-LABEL: @xvpcnt_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvpcnt_w(v8i32 _1) { return __builtin_lasx_xvpcnt_w(_1); } - // CHECK-LABEL: @xvpcnt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvpcnt_d(v4i64 _1) { return __builtin_lasx_xvpcnt_d(_1); } - // CHECK-LABEL: @xvclo_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvclo_b(v32i8 _1) { return __builtin_lasx_xvclo_b(_1); } - // CHECK-LABEL: @xvclo_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvclo_h(v16i16 _1) { return __builtin_lasx_xvclo_h(_1); } - // CHECK-LABEL: @xvclo_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvclo_w(v8i32 _1) { return __builtin_lasx_xvclo_w(_1); } - // CHECK-LABEL: @xvclo_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvclo_d(v4i64 _1) { return __builtin_lasx_xvclo_d(_1); } - // CHECK-LABEL: @xvclz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvclz_b(v32i8 _1) { return __builtin_lasx_xvclz_b(_1); } - // CHECK-LABEL: @xvclz_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvclz_h(v16i16 _1) { return __builtin_lasx_xvclz_h(_1); } - // CHECK-LABEL: @xvclz_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvclz_w(v8i32 _1) { return __builtin_lasx_xvclz_w(_1); } - // CHECK-LABEL: @xvclz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvclz_d(v4i64 _1) { return __builtin_lasx_xvclz_d(_1); } - // CHECK-LABEL: @xvfadd_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfadd_s(_1, _2); } - // CHECK-LABEL: @xvfadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfadd_d(_1, _2); } - // CHECK-LABEL: @xvfsub_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfsub_s(_1, _2); } - // CHECK-LABEL: @xvfsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfsub_d(_1, _2); } - // CHECK-LABEL: @xvfmul_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmul_s(_1, _2); } - // CHECK-LABEL: @xvfmul_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmul_d(_1, _2); } - // CHECK-LABEL: @xvfdiv_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfdiv_s(_1, _2); } - // CHECK-LABEL: @xvfdiv_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfdiv_d(_1, _2); } - // CHECK-LABEL: @xvfcvt_h_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcvt_h_s(_1, _2); } - // CHECK-LABEL: @xvfcvt_s_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcvt_s_d(_1, _2); } - // CHECK-LABEL: @xvfmin_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmin_s(_1, _2); } - // CHECK-LABEL: @xvfmin_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmin_d(_1, _2); } - // CHECK-LABEL: @xvfmina_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmina_s(_1, _2); } - // CHECK-LABEL: @xvfmina_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmina_d(_1, _2); } - // CHECK-LABEL: @xvfmax_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmax_s(_1, _2); } - // CHECK-LABEL: @xvfmax_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmax_d(_1, _2); } - // CHECK-LABEL: @xvfmaxa_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmaxa_s(_1, _2); } - // CHECK-LABEL: @xvfmaxa_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmaxa_d(_1, _2); } - // CHECK-LABEL: @xvfclass_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfclass_s(v8f32 _1) { return __builtin_lasx_xvfclass_s(_1); } - // CHECK-LABEL: @xvfclass_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfclass_d(v4f64 _1) { return __builtin_lasx_xvfclass_d(_1); } - // CHECK-LABEL: @xvfsqrt_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfsqrt_s(v8f32 _1) { return __builtin_lasx_xvfsqrt_s(_1); } - // CHECK-LABEL: @xvfsqrt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfsqrt_d(v4f64 _1) { return __builtin_lasx_xvfsqrt_d(_1); } - // CHECK-LABEL: @xvfrecip_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfrecip_s(v8f32 _1) { return __builtin_lasx_xvfrecip_s(_1); } - // CHECK-LABEL: @xvfrecip_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfrecip_d(v4f64 _1) { return __builtin_lasx_xvfrecip_d(_1); } - // CHECK-LABEL: @xvfrint_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfrint_s(v8f32 _1) { return __builtin_lasx_xvfrint_s(_1); } - // CHECK-LABEL: @xvfrint_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfrint_d(v4f64 _1) { return __builtin_lasx_xvfrint_d(_1); } - // CHECK-LABEL: @xvfrsqrt_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfrsqrt_s(v8f32 _1) { return __builtin_lasx_xvfrsqrt_s(_1); } - // CHECK-LABEL: @xvfrsqrt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfrsqrt_d(v4f64 _1) { return __builtin_lasx_xvfrsqrt_d(_1); } - // CHECK-LABEL: @xvflogb_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvflogb_s(v8f32 _1) { return __builtin_lasx_xvflogb_s(_1); } - // CHECK-LABEL: @xvflogb_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvflogb_d(v4f64 _1) { return __builtin_lasx_xvflogb_d(_1); } - // CHECK-LABEL: @xvfcvth_s_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfcvth_s_h(v16i16 _1) { return __builtin_lasx_xvfcvth_s_h(_1); } - // CHECK-LABEL: @xvfcvth_d_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfcvth_d_s(v8f32 _1) { return __builtin_lasx_xvfcvth_d_s(_1); } - // CHECK-LABEL: @xvfcvtl_s_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfcvtl_s_h(v16i16 _1) { return __builtin_lasx_xvfcvtl_s_h(_1); } - // CHECK-LABEL: @xvfcvtl_d_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfcvtl_d_s(v8f32 _1) { return __builtin_lasx_xvfcvtl_d_s(_1); } - // CHECK-LABEL: @xvftint_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftint_w_s(v8f32 _1) { return __builtin_lasx_xvftint_w_s(_1); } - // CHECK-LABEL: @xvftint_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftint_l_d(v4f64 _1) { return __builtin_lasx_xvftint_l_d(_1); } - // CHECK-LABEL: @xvftint_wu_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvftint_wu_s(v8f32 _1) { return __builtin_lasx_xvftint_wu_s(_1); } - // CHECK-LABEL: @xvftint_lu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvftint_lu_d(v4f64 _1) { return __builtin_lasx_xvftint_lu_d(_1); } - // CHECK-LABEL: @xvftintrz_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftintrz_w_s(v8f32 _1) { return __builtin_lasx_xvftintrz_w_s(_1); } - // CHECK-LABEL: @xvftintrz_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrz_l_d(v4f64 _1) { return __builtin_lasx_xvftintrz_l_d(_1); } - // CHECK-LABEL: @xvftintrz_wu_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvftintrz_wu_s(v8f32 _1) { return __builtin_lasx_xvftintrz_wu_s(_1); } - // CHECK-LABEL: @xvftintrz_lu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvftintrz_lu_d(v4f64 _1) { return __builtin_lasx_xvftintrz_lu_d(_1); } - // CHECK-LABEL: @xvffint_s_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvffint_s_w(v8i32 _1) { return __builtin_lasx_xvffint_s_w(_1); } - // CHECK-LABEL: @xvffint_d_l( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvffint_d_l(v4i64 _1) { return __builtin_lasx_xvffint_d_l(_1); } - // CHECK-LABEL: @xvffint_s_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvffint_s_wu(v8u32 _1) { return __builtin_lasx_xvffint_s_wu(_1); } - // CHECK-LABEL: @xvffint_d_lu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvffint_d_lu(v4u64 _1) { return __builtin_lasx_xvffint_d_lu(_1); } - // CHECK-LABEL: @xvreplve_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1:%.*]], i32 [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1]], i32 [[_2:%.*]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvreplve_b(v32i8 _1, int _2) { return __builtin_lasx_xvreplve_b(_1, _2); } - // CHECK-LABEL: @xvreplve_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1:%.*]], i32 [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1]], i32 [[_2:%.*]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvreplve_h(v16i16 _1, int _2) { return __builtin_lasx_xvreplve_h(_1, _2); } - // CHECK-LABEL: @xvreplve_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1:%.*]], i32 [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1]], i32 [[_2:%.*]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvreplve_w(v8i32 _1, int _2) { return __builtin_lasx_xvreplve_w(_1, _2); } - // CHECK-LABEL: @xvreplve_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1:%.*]], i32 [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2:%.*]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvreplve_d(v4i64 _1, int _2) { return __builtin_lasx_xvreplve_d(_1, _2); } - // CHECK-LABEL: @xvpermi_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpermi_w(_1, _2, 1); } - // CHECK-LABEL: @xvandn_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvandn_v(_1, _2); } - // CHECK-LABEL: @xvneg_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvneg_b(v32i8 _1) { return __builtin_lasx_xvneg_b(_1); } - // CHECK-LABEL: @xvneg_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvneg_h(v16i16 _1) { return __builtin_lasx_xvneg_h(_1); } - // CHECK-LABEL: @xvneg_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvneg_w(v8i32 _1) { return __builtin_lasx_xvneg_w(_1); } - // CHECK-LABEL: @xvneg_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvneg_d(v4i64 _1) { return __builtin_lasx_xvneg_d(_1); } - // CHECK-LABEL: @xvmuh_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmuh_b(_1, _2); } - // CHECK-LABEL: @xvmuh_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmuh_h(_1, _2); } - // CHECK-LABEL: @xvmuh_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmuh_w(_1, _2); } - // CHECK-LABEL: @xvmuh_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmuh_d(_1, _2); } - // CHECK-LABEL: @xvmuh_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmuh_bu(_1, _2); } - // CHECK-LABEL: @xvmuh_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmuh_hu(_1, _2); } - // CHECK-LABEL: @xvmuh_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmuh_wu(_1, _2); } - // CHECK-LABEL: @xvmuh_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmuh_du(_1, _2); } - // CHECK-LABEL: @xvsllwil_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsllwil_h_b(v32i8 _1) { return __builtin_lasx_xvsllwil_h_b(_1, 1); } - // CHECK-LABEL: @xvsllwil_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsllwil_w_h(v16i16 _1) { return __builtin_lasx_xvsllwil_w_h(_1, 1); } - // CHECK-LABEL: @xvsllwil_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsllwil_d_w(v8i32 _1) { return __builtin_lasx_xvsllwil_d_w(_1, 1); } - // CHECK-LABEL: @xvsllwil_hu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvsllwil_hu_bu(v32u8 _1) { return __builtin_lasx_xvsllwil_hu_bu(_1, 1); } - // CHECK-LABEL: @xvsllwil_wu_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvsllwil_wu_hu(v16u16 _1) { return __builtin_lasx_xvsllwil_wu_hu(_1, 1); } - // CHECK-LABEL: @xvsllwil_du_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvsllwil_du_wu(v8u32 _1) { return __builtin_lasx_xvsllwil_du_wu(_1, 1); } - // CHECK-LABEL: @xvsran_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsran_b_h(_1, _2); } - // CHECK-LABEL: @xvsran_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsran_h_w(_1, _2); } - // CHECK-LABEL: @xvsran_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsran_w_d(_1, _2); } - // CHECK-LABEL: @xvssran_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssran_b_h(_1, _2); } - // CHECK-LABEL: @xvssran_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssran_h_w(_1, _2); } - // CHECK-LABEL: @xvssran_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssran_w_d(_1, _2); } - // CHECK-LABEL: @xvssran_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssran_bu_h(_1, _2); } - // CHECK-LABEL: @xvssran_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssran_hu_w(_1, _2); } - // CHECK-LABEL: @xvssran_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssran_wu_d(_1, _2); } - // CHECK-LABEL: @xvsrarn_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarn_b_h(_1, _2); } - // CHECK-LABEL: @xvsrarn_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarn_h_w(_1, _2); } - // CHECK-LABEL: @xvsrarn_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarn_w_d(_1, _2); } - // CHECK-LABEL: @xvssrarn_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarn_b_h(_1, _2); } - // CHECK-LABEL: @xvssrarn_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarn_h_w(_1, _2); } - // CHECK-LABEL: @xvssrarn_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarn_w_d(_1, _2); } - // CHECK-LABEL: @xvssrarn_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrarn_bu_h(_1, _2); } - // CHECK-LABEL: @xvssrarn_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrarn_hu_w(_1, _2); } - // CHECK-LABEL: @xvssrarn_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrarn_wu_d(_1, _2); } - // CHECK-LABEL: @xvsrln_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrln_b_h(_1, _2); } - // CHECK-LABEL: @xvsrln_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrln_h_w(_1, _2); } - // CHECK-LABEL: @xvsrln_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrln_w_d(_1, _2); } - // CHECK-LABEL: @xvssrln_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrln_bu_h(_1, _2); } - // CHECK-LABEL: @xvssrln_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrln_hu_w(_1, _2); } - // CHECK-LABEL: @xvssrln_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrln_wu_d(_1, _2); } - // CHECK-LABEL: @xvsrlrn_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrn_b_h(_1, _2); } - // CHECK-LABEL: @xvsrlrn_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrn_h_w(_1, _2); } - // CHECK-LABEL: @xvsrlrn_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrn_w_d(_1, _2); } - // CHECK-LABEL: @xvssrlrn_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrlrn_bu_h(_1, _2); } - // CHECK-LABEL: @xvssrlrn_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrlrn_hu_w(_1, _2); } - // CHECK-LABEL: @xvssrlrn_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrlrn_wu_d(_1, _2); } - // CHECK-LABEL: @xvfrstpi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvfrstpi_b(_1, _2, 1); } - // CHECK-LABEL: @xvfrstpi_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvfrstpi_h(_1, _2, 1); } - // CHECK-LABEL: @xvfrstp_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -+// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvfrstp_b(_1, _2, _3); } - // CHECK-LABEL: @xvfrstp_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvfrstp_h(_1, _2, _3); } - // CHECK-LABEL: @xvshuf4i_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvshuf4i_d(_1, _2, 1); } - // CHECK-LABEL: @xvbsrl_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvbsrl_v(v32i8 _1) { return __builtin_lasx_xvbsrl_v(_1, 1); } - // CHECK-LABEL: @xvbsll_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvbsll_v(v32i8 _1) { return __builtin_lasx_xvbsll_v(_1, 1); } - // CHECK-LABEL: @xvextrins_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvextrins_b(_1, _2, 1); } - // CHECK-LABEL: @xvextrins_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvextrins_h(_1, _2, 1); } - // CHECK-LABEL: @xvextrins_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvextrins_w(_1, _2, 1); } - // CHECK-LABEL: @xvextrins_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvextrins_d(_1, _2, 1); } - // CHECK-LABEL: @xvmskltz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmskltz_b(v32i8 _1) { return __builtin_lasx_xvmskltz_b(_1); } - // CHECK-LABEL: @xvmskltz_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmskltz_h(v16i16 _1) { return __builtin_lasx_xvmskltz_h(_1); } - // CHECK-LABEL: @xvmskltz_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmskltz_w(v8i32 _1) { return __builtin_lasx_xvmskltz_w(_1); } - // CHECK-LABEL: @xvmskltz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmskltz_d(v4i64 _1) { return __builtin_lasx_xvmskltz_d(_1); } - // CHECK-LABEL: @xvsigncov_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsigncov_b(_1, _2); } - // CHECK-LABEL: @xvsigncov_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsigncov_h(_1, _2); } - // CHECK-LABEL: @xvsigncov_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsigncov_w(_1, _2); } - // CHECK-LABEL: @xvsigncov_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsigncov_d(_1, _2); } - // CHECK-LABEL: @xvfmadd_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -+// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmadd_s(_1, _2, _3); } - // CHECK-LABEL: @xvfmadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -+// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmadd_d(_1, _2, _3); } - // CHECK-LABEL: @xvfmsub_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -+// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmsub_s(_1, _2, _3); } - // CHECK-LABEL: @xvfmsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -+// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmsub_d(_1, _2, _3); } - // CHECK-LABEL: @xvfnmadd_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -+// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmadd_s(_1, _2, _3); } - // CHECK-LABEL: @xvfnmadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -+// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmadd_d(_1, _2, _3); } - // CHECK-LABEL: @xvfnmsub_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) -+// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmsub_s(_1, _2, _3); } - // CHECK-LABEL: @xvfnmsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) -+// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmsub_d(_1, _2, _3); } - // CHECK-LABEL: @xvftintrne_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftintrne_w_s(v8f32 _1) { return __builtin_lasx_xvftintrne_w_s(_1); } - // CHECK-LABEL: @xvftintrne_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrne_l_d(v4f64 _1) { return __builtin_lasx_xvftintrne_l_d(_1); } - // CHECK-LABEL: @xvftintrp_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftintrp_w_s(v8f32 _1) { return __builtin_lasx_xvftintrp_w_s(_1); } - // CHECK-LABEL: @xvftintrp_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrp_l_d(v4f64 _1) { return __builtin_lasx_xvftintrp_l_d(_1); } - // CHECK-LABEL: @xvftintrm_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftintrm_w_s(v8f32 _1) { return __builtin_lasx_xvftintrm_w_s(_1); } - // CHECK-LABEL: @xvftintrm_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrm_l_d(v4f64 _1) { return __builtin_lasx_xvftintrm_l_d(_1); } - // CHECK-LABEL: @xvftint_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftint_w_d(_1, _2); } - // CHECK-LABEL: @xvffint_s_l( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvffint_s_l(_1, _2); } - // CHECK-LABEL: @xvftintrz_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrz_w_d(_1, _2); } - // CHECK-LABEL: @xvftintrp_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrp_w_d(_1, _2); } - // CHECK-LABEL: @xvftintrm_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrm_w_d(_1, _2); } - // CHECK-LABEL: @xvftintrne_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrne_w_d(_1, _2); } - // CHECK-LABEL: @xvftinth_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftinth_l_s(v8f32 _1) { return __builtin_lasx_xvftinth_l_s(_1); } - // CHECK-LABEL: @xvftintl_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintl_l_s(v8f32 _1) { return __builtin_lasx_xvftintl_l_s(_1); } - // CHECK-LABEL: @xvffinth_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvffinth_d_w(v8i32 _1) { return __builtin_lasx_xvffinth_d_w(_1); } - // CHECK-LABEL: @xvffintl_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvffintl_d_w(v8i32 _1) { return __builtin_lasx_xvffintl_d_w(_1); } - // CHECK-LABEL: @xvftintrzh_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrzh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzh_l_s(_1); } - // CHECK-LABEL: @xvftintrzl_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrzl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzl_l_s(_1); } - // CHECK-LABEL: @xvftintrph_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrph_l_s(v8f32 _1) { return __builtin_lasx_xvftintrph_l_s(_1); } - // CHECK-LABEL: @xvftintrpl_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrpl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrpl_l_s(_1); } - // CHECK-LABEL: @xvftintrmh_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrmh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrmh_l_s(_1); } - // CHECK-LABEL: @xvftintrml_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrml_l_s(v8f32 _1) { return __builtin_lasx_xvftintrml_l_s(_1); } - // CHECK-LABEL: @xvftintrneh_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrneh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrneh_l_s(_1); } - // CHECK-LABEL: @xvftintrnel_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvftintrnel_l_s(v8f32 _1) { return __builtin_lasx_xvftintrnel_l_s(_1); } - // CHECK-LABEL: @xvfrintrne_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> --// CHECK-NEXT: ret <8 x i32> [[TMP1]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfrintrne_s(v8f32 _1) { return __builtin_lasx_xvfrintrne_s(_1); } - // CHECK-LABEL: @xvfrintrne_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> --// CHECK-NEXT: ret <4 x i64> [[TMP1]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfrintrne_d(v4f64 _1) { return __builtin_lasx_xvfrintrne_d(_1); } - // CHECK-LABEL: @xvfrintrz_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> --// CHECK-NEXT: ret <8 x i32> [[TMP1]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfrintrz_s(v8f32 _1) { return __builtin_lasx_xvfrintrz_s(_1); } - // CHECK-LABEL: @xvfrintrz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> --// CHECK-NEXT: ret <4 x i64> [[TMP1]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfrintrz_d(v4f64 _1) { return __builtin_lasx_xvfrintrz_d(_1); } - // CHECK-LABEL: @xvfrintrp_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> --// CHECK-NEXT: ret <8 x i32> [[TMP1]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfrintrp_s(v8f32 _1) { return __builtin_lasx_xvfrintrp_s(_1); } - // CHECK-LABEL: @xvfrintrp_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> --// CHECK-NEXT: ret <4 x i64> [[TMP1]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfrintrp_d(v4f64 _1) { return __builtin_lasx_xvfrintrp_d(_1); } - // CHECK-LABEL: @xvfrintrm_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> --// CHECK-NEXT: ret <8 x i32> [[TMP1]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfrintrm_s(v8f32 _1) { return __builtin_lasx_xvfrintrm_s(_1); } - // CHECK-LABEL: @xvfrintrm_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> --// CHECK-NEXT: ret <4 x i64> [[TMP1]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfrintrm_d(v4f64 _1) { return __builtin_lasx_xvfrintrm_d(_1); } - // CHECK-LABEL: @xvld( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvld(void *_1) { return __builtin_lasx_xvld(_1, 1); } - // CHECK-LABEL: @xvst( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1) - // CHECK-NEXT: ret void - // - void xvst(v32i8 _1, void *_2) { return __builtin_lasx_xvst(_1, _2, 1); } - // CHECK-LABEL: @xvstelm_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1, i32 1) - // CHECK-NEXT: ret void - // - void xvstelm_b(v32i8 _1, void * _2) { return __builtin_lasx_xvstelm_b(_1, _2, 1, 1); } - // CHECK-LABEL: @xvstelm_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2:%.*]], i32 2, i32 1) - // CHECK-NEXT: ret void - // - void xvstelm_h(v16i16 _1, void * _2) { return __builtin_lasx_xvstelm_h(_1, _2, 2, 1); } - // CHECK-LABEL: @xvstelm_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2:%.*]], i32 4, i32 1) - // CHECK-NEXT: ret void - // - void xvstelm_w(v8i32 _1, void * _2) { return __builtin_lasx_xvstelm_w(_1, _2, 4, 1); } - // CHECK-LABEL: @xvstelm_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2:%.*]], i32 8, i32 1) - // CHECK-NEXT: ret void - // - void xvstelm_d(v4i64 _1, void * _2) { return __builtin_lasx_xvstelm_d(_1, _2, 8, 1); } - // CHECK-LABEL: @xvinsve0_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvinsve0_w(_1, _2, 1); } - // CHECK-LABEL: @xvinsve0_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvinsve0_d(_1, _2, 1); } - // CHECK-LABEL: @xvpickve_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvpickve_w(v8i32 _1) { return __builtin_lasx_xvpickve_w(_1, 1); } - // CHECK-LABEL: @xvpickve_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvpickve_d(v4i64 _1) { return __builtin_lasx_xvpickve_d(_1, 1); } - // CHECK-LABEL: @xvssrlrn_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrn_b_h(_1, _2); } - // CHECK-LABEL: @xvssrlrn_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrn_h_w(_1, _2); } - // CHECK-LABEL: @xvssrlrn_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrn_w_d(_1, _2); } - // CHECK-LABEL: @xvssrln_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrln_b_h(_1, _2); } - // CHECK-LABEL: @xvssrln_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrln_h_w(_1, _2); } - // CHECK-LABEL: @xvssrln_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrln_w_d(_1, _2); } - // CHECK-LABEL: @xvorn_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvorn_v(_1, _2); } - // CHECK-LABEL: @xvldi( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvldi() { return __builtin_lasx_xvldi(1); } - // CHECK-LABEL: @xvldx( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvldx(void *_1) { return __builtin_lasx_xvldx(_1, 1); } - // CHECK-LABEL: @xvstx( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1]], ptr [[_2:%.*]], i64 1) - // CHECK-NEXT: ret void - // - void xvstx(v32i8 _1, void *_2) { return __builtin_lasx_xvstx(_1, _2, 1); } - // CHECK-LABEL: @xvextl_qu_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvextl_qu_du(v4u64 _1) { return __builtin_lasx_xvextl_qu_du(_1); } - // CHECK-LABEL: @xvinsgr2vr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1:%.*]], i32 1, i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvinsgr2vr_w(v8i32 _1) { return __builtin_lasx_xvinsgr2vr_w(_1, 1, 1); } - // CHECK-LABEL: @xvinsgr2vr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1:%.*]], i64 1, i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvinsgr2vr_d(v4i64 _1) { return __builtin_lasx_xvinsgr2vr_d(_1, 1, 1); } - // CHECK-LABEL: @xvreplve0_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvreplve0_b(v32i8 _1) { return __builtin_lasx_xvreplve0_b(_1); } - // CHECK-LABEL: @xvreplve0_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvreplve0_h(v16i16 _1) { return __builtin_lasx_xvreplve0_h(_1); } - // CHECK-LABEL: @xvreplve0_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvreplve0_w(v8i32 _1) { return __builtin_lasx_xvreplve0_w(_1); } - // CHECK-LABEL: @xvreplve0_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvreplve0_d(v4i64 _1) { return __builtin_lasx_xvreplve0_d(_1); } - // CHECK-LABEL: @xvreplve0_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvreplve0_q(v32i8 _1) { return __builtin_lasx_xvreplve0_q(_1); } - // CHECK-LABEL: @vext2xv_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 vext2xv_h_b(v32i8 _1) { return __builtin_lasx_vext2xv_h_b(_1); } - // CHECK-LABEL: @vext2xv_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 vext2xv_w_h(v16i16 _1) { return __builtin_lasx_vext2xv_w_h(_1); } - // CHECK-LABEL: @vext2xv_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 vext2xv_d_w(v8i32 _1) { return __builtin_lasx_vext2xv_d_w(_1); } - // CHECK-LABEL: @vext2xv_w_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 vext2xv_w_b(v32i8 _1) { return __builtin_lasx_vext2xv_w_b(_1); } - // CHECK-LABEL: @vext2xv_d_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 vext2xv_d_h(v16i16 _1) { return __builtin_lasx_vext2xv_d_h(_1); } - // CHECK-LABEL: @vext2xv_d_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 vext2xv_d_b(v32i8 _1) { return __builtin_lasx_vext2xv_d_b(_1); } - // CHECK-LABEL: @vext2xv_hu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 vext2xv_hu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_hu_bu(_1); } - // CHECK-LABEL: @vext2xv_wu_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 vext2xv_wu_hu(v16i16 _1) { return __builtin_lasx_vext2xv_wu_hu(_1); } - // CHECK-LABEL: @vext2xv_du_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 vext2xv_du_wu(v8i32 _1) { return __builtin_lasx_vext2xv_du_wu(_1); } - // CHECK-LABEL: @vext2xv_wu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 vext2xv_wu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_wu_bu(_1); } - // CHECK-LABEL: @vext2xv_du_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 vext2xv_du_hu(v16i16 _1) { return __builtin_lasx_vext2xv_du_hu(_1); } - // CHECK-LABEL: @vext2xv_du_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 vext2xv_du_bu(v32i8 _1) { return __builtin_lasx_vext2xv_du_bu(_1); } - // CHECK-LABEL: @xvpermi_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpermi_q(_1, _2, 1); } - // CHECK-LABEL: @xvpermi_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvpermi_d(v4i64 _1) { return __builtin_lasx_xvpermi_d(_1, 1); } - // CHECK-LABEL: @xvperm_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvperm_w(_1, _2); } - // CHECK-LABEL: @xvldrepl_b( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvldrepl_b(void *_1) { return __builtin_lasx_xvldrepl_b(_1, 1); } - // CHECK-LABEL: @xvldrepl_h( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvldrepl_h(void *_1) { return __builtin_lasx_xvldrepl_h(_1, 2); } - // CHECK-LABEL: @xvldrepl_w( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvldrepl_w(void *_1) { return __builtin_lasx_xvldrepl_w(_1, 4); } - // CHECK-LABEL: @xvldrepl_d( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvldrepl_d(void *_1) { return __builtin_lasx_xvldrepl_d(_1, 8); } - // CHECK-LABEL: @xvpickve2gr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xvpickve2gr_w(v8i32 _1) { return __builtin_lasx_xvpickve2gr_w(_1, 1); } - // CHECK-LABEL: @xvpickve2gr_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - unsigned int xvpickve2gr_wu(v8i32 _1) { return __builtin_lasx_xvpickve2gr_wu(_1, 1); } - // CHECK-LABEL: @xvpickve2gr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i64 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: ret i64 [[TMP1]] - // - long xvpickve2gr_d(v4i64 _1) { return __builtin_lasx_xvpickve2gr_d(_1, 1); } - // CHECK-LABEL: @xvpickve2gr_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i64 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: ret i64 [[TMP1]] - // - unsigned long int xvpickve2gr_du(v4i64 _1) { return __builtin_lasx_xvpickve2gr_du(_1, 1); } - // CHECK-LABEL: @xvaddwev_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_d(_1, _2); } - // CHECK-LABEL: @xvaddwev_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_w(_1, _2); } - // CHECK-LABEL: @xvaddwev_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_h(_1, _2); } - // CHECK-LABEL: @xvaddwev_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_b(_1, _2); } - // CHECK-LABEL: @xvaddwev_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwev_q_du(_1, _2); } - // CHECK-LABEL: @xvaddwev_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwev_d_wu(_1, _2); } - // CHECK-LABEL: @xvaddwev_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwev_w_hu(_1, _2); } - // CHECK-LABEL: @xvaddwev_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwev_h_bu(_1, _2); } - // CHECK-LABEL: @xvsubwev_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwev_q_d(_1, _2); } - // CHECK-LABEL: @xvsubwev_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwev_d_w(_1, _2); } - // CHECK-LABEL: @xvsubwev_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwev_w_h(_1, _2); } - // CHECK-LABEL: @xvsubwev_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwev_h_b(_1, _2); } - // CHECK-LABEL: @xvsubwev_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwev_q_du(_1, _2); } - // CHECK-LABEL: @xvsubwev_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwev_d_wu(_1, _2); } - // CHECK-LABEL: @xvsubwev_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwev_w_hu(_1, _2); } - // CHECK-LABEL: @xvsubwev_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwev_h_bu(_1, _2); } - // CHECK-LABEL: @xvmulwev_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_d(_1, _2); } - // CHECK-LABEL: @xvmulwev_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_w(_1, _2); } - // CHECK-LABEL: @xvmulwev_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_h(_1, _2); } - // CHECK-LABEL: @xvmulwev_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_b(_1, _2); } - // CHECK-LABEL: @xvmulwev_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwev_q_du(_1, _2); } - // CHECK-LABEL: @xvmulwev_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwev_d_wu(_1, _2); } - // CHECK-LABEL: @xvmulwev_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwev_w_hu(_1, _2); } - // CHECK-LABEL: @xvmulwev_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwev_h_bu(_1, _2); } - // CHECK-LABEL: @xvaddwod_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_d(_1, _2); } - // CHECK-LABEL: @xvaddwod_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_w(_1, _2); } - // CHECK-LABEL: @xvaddwod_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_h(_1, _2); } - // CHECK-LABEL: @xvaddwod_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_b(_1, _2); } - // CHECK-LABEL: @xvaddwod_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwod_q_du(_1, _2); } - // CHECK-LABEL: @xvaddwod_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwod_d_wu(_1, _2); } - // CHECK-LABEL: @xvaddwod_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwod_w_hu(_1, _2); } - // CHECK-LABEL: @xvaddwod_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwod_h_bu(_1, _2); } - // CHECK-LABEL: @xvsubwod_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwod_q_d(_1, _2); } - // CHECK-LABEL: @xvsubwod_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwod_d_w(_1, _2); } - // CHECK-LABEL: @xvsubwod_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwod_w_h(_1, _2); } - // CHECK-LABEL: @xvsubwod_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwod_h_b(_1, _2); } - // CHECK-LABEL: @xvsubwod_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwod_q_du(_1, _2); } - // CHECK-LABEL: @xvsubwod_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwod_d_wu(_1, _2); } - // CHECK-LABEL: @xvsubwod_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwod_w_hu(_1, _2); } - // CHECK-LABEL: @xvsubwod_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwod_h_bu(_1, _2); } - // CHECK-LABEL: @xvmulwod_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_d(_1, _2); } - // CHECK-LABEL: @xvmulwod_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_w(_1, _2); } - // CHECK-LABEL: @xvmulwod_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_h(_1, _2); } - // CHECK-LABEL: @xvmulwod_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_b(_1, _2); } - // CHECK-LABEL: @xvmulwod_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwod_q_du(_1, _2); } - // CHECK-LABEL: @xvmulwod_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwod_d_wu(_1, _2); } - // CHECK-LABEL: @xvmulwod_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwod_w_hu(_1, _2); } - // CHECK-LABEL: @xvmulwod_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwod_h_bu(_1, _2); } - // CHECK-LABEL: @xvaddwev_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_wu_w(_1, _2); } - // CHECK-LABEL: @xvaddwev_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_hu_h(_1, _2); } - // CHECK-LABEL: @xvaddwev_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_bu_b(_1, _2); } - // CHECK-LABEL: @xvmulwev_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_wu_w(_1, _2); } - // CHECK-LABEL: @xvmulwev_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_hu_h(_1, _2); } - // CHECK-LABEL: @xvmulwev_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_bu_b(_1, _2); } - // CHECK-LABEL: @xvaddwod_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_wu_w(_1, _2); } - // CHECK-LABEL: @xvaddwod_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_hu_h(_1, _2); } - // CHECK-LABEL: @xvaddwod_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_bu_b(_1, _2); } - // CHECK-LABEL: @xvmulwod_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_wu_w(_1, _2); } - // CHECK-LABEL: @xvmulwod_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_hu_h(_1, _2); } - // CHECK-LABEL: @xvmulwod_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_bu_b(_1, _2); } - // CHECK-LABEL: @xvhaddw_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhaddw_q_d(_1, _2); } - // CHECK-LABEL: @xvhaddw_qu_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhaddw_qu_du(_1, _2); } - // CHECK-LABEL: @xvhsubw_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhsubw_q_d(_1, _2); } - // CHECK-LABEL: @xvhsubw_qu_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhsubw_qu_du(_1, _2); } - // CHECK-LABEL: @xvmaddwev_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_d(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_w(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -+// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_h(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_b(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwev_q_du(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwev_d_wu(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -+// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwev_w_hu(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwev_h_bu(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_d(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_w(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -+// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_h(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_b(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwod_q_du(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwod_d_wu(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -+// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwod_w_hu(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwod_h_bu(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_du_d(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_wu_w(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -+// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_hu_h(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwev_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_bu_b(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_du_d(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) -+// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_wu_w(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) -+// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_hu_h(_1, _2, _3); } - // CHECK-LABEL: @xvmaddwod_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) -+// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_bu_b(_1, _2, _3); } - // CHECK-LABEL: @xvrotr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvrotr_b(_1, _2); } - // CHECK-LABEL: @xvrotr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvrotr_h(_1, _2); } - // CHECK-LABEL: @xvrotr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvrotr_w(_1, _2); } - // CHECK-LABEL: @xvrotr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvrotr_d(_1, _2); } - // CHECK-LABEL: @xvadd_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_q(_1, _2); } - // CHECK-LABEL: @xvsub_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_q(_1, _2); } - // CHECK-LABEL: @xvaddwev_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_du_d(_1, _2); } - // CHECK-LABEL: @xvaddwod_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_du_d(_1, _2); } - // CHECK-LABEL: @xvmulwev_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_du_d(_1, _2); } - // CHECK-LABEL: @xvmulwod_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_du_d(_1, _2); } - // CHECK-LABEL: @xvmskgez_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmskgez_b(v32i8 _1) { return __builtin_lasx_xvmskgez_b(_1); } - // CHECK-LABEL: @xvmsknz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1]]) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvmsknz_b(v32i8 _1) { return __builtin_lasx_xvmsknz_b(_1); } - // CHECK-LABEL: @xvexth_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvexth_h_b(v32i8 _1) { return __builtin_lasx_xvexth_h_b(_1); } - // CHECK-LABEL: @xvexth_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvexth_w_h(v16i16 _1) { return __builtin_lasx_xvexth_w_h(_1); } - // CHECK-LABEL: @xvexth_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvexth_d_w(v8i32 _1) { return __builtin_lasx_xvexth_d_w(_1); } - // CHECK-LABEL: @xvexth_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvexth_q_d(v4i64 _1) { return __builtin_lasx_xvexth_q_d(_1); } - // CHECK-LABEL: @xvexth_hu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1]]) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvexth_hu_bu(v32u8 _1) { return __builtin_lasx_xvexth_hu_bu(_1); } - // CHECK-LABEL: @xvexth_wu_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1]]) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvexth_wu_hu(v16u16 _1) { return __builtin_lasx_xvexth_wu_hu(_1); } - // CHECK-LABEL: @xvexth_du_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvexth_du_wu(v8u32 _1) { return __builtin_lasx_xvexth_du_wu(_1); } - // CHECK-LABEL: @xvexth_qu_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvexth_qu_du(v4u64 _1) { return __builtin_lasx_xvexth_qu_du(_1); } - // CHECK-LABEL: @xvrotri_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvrotri_b(v32i8 _1) { return __builtin_lasx_xvrotri_b(_1, 1); } - // CHECK-LABEL: @xvrotri_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvrotri_h(v16i16 _1) { return __builtin_lasx_xvrotri_h(_1, 1); } - // CHECK-LABEL: @xvrotri_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvrotri_w(v8i32 _1) { return __builtin_lasx_xvrotri_w(_1, 1); } - // CHECK-LABEL: @xvrotri_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvrotri_d(v4i64 _1) { return __builtin_lasx_xvrotri_d(_1, 1); } - // CHECK-LABEL: @xvextl_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvextl_q_d(v4i64 _1) { return __builtin_lasx_xvextl_q_d(_1); } - // CHECK-LABEL: @xvsrlni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlni_b_h(_1, _2, 1); } - // CHECK-LABEL: @xvsrlni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlni_h_w(_1, _2, 1); } - // CHECK-LABEL: @xvsrlni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlni_w_d(_1, _2, 1); } - // CHECK-LABEL: @xvsrlni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlni_d_q(_1, _2, 1); } - // CHECK-LABEL: @xvsrlrni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlrni_b_h(_1, _2, 1); } - // CHECK-LABEL: @xvsrlrni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrni_h_w(_1, _2, 1); } - // CHECK-LABEL: @xvsrlrni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrni_w_d(_1, _2, 1); } - // CHECK-LABEL: @xvsrlrni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrni_d_q(_1, _2, 1); } - // CHECK-LABEL: @xvssrlni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_b_h(_1, _2, 1); } - // CHECK-LABEL: @xvssrlni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_h_w(_1, _2, 1); } - // CHECK-LABEL: @xvssrlni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_w_d(_1, _2, 1); } - // CHECK-LABEL: @xvssrlni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_d_q(_1, _2, 1); } - // CHECK-LABEL: @xvssrlni_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_bu_h(_1, _2, 1); } - // CHECK-LABEL: @xvssrlni_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_hu_w(_1, _2, 1); } - // CHECK-LABEL: @xvssrlni_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_wu_d(_1, _2, 1); } - // CHECK-LABEL: @xvssrlni_du_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_du_q(_1, _2, 1); } - // CHECK-LABEL: @xvssrlrni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_b_h(_1, _2, 1); } - // CHECK-LABEL: @xvssrlrni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_h_w(_1, _2, 1); } - // CHECK-LABEL: @xvssrlrni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_w_d(_1, _2, 1); } - // CHECK-LABEL: @xvssrlrni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_d_q(_1, _2, 1); } - // CHECK-LABEL: @xvssrlrni_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_bu_h(_1, _2, 1); } - // CHECK-LABEL: @xvssrlrni_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_hu_w(_1, _2, 1); } - // CHECK-LABEL: @xvssrlrni_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_wu_d(_1, _2, 1); } - // CHECK-LABEL: @xvssrlrni_du_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_du_q(_1, _2, 1); } - // CHECK-LABEL: @xvsrani_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrani_b_h(_1, _2, 1); } - // CHECK-LABEL: @xvsrani_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrani_h_w(_1, _2, 1); } - // CHECK-LABEL: @xvsrani_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrani_w_d(_1, _2, 1); } - // CHECK-LABEL: @xvsrani_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrani_d_q(_1, _2, 1); } - // CHECK-LABEL: @xvsrarni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrarni_b_h(_1, _2, 1); } - // CHECK-LABEL: @xvsrarni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarni_h_w(_1, _2, 1); } - // CHECK-LABEL: @xvsrarni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarni_w_d(_1, _2, 1); } - // CHECK-LABEL: @xvsrarni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarni_d_q(_1, _2, 1); } - // CHECK-LABEL: @xvssrani_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_b_h(_1, _2, 1); } - // CHECK-LABEL: @xvssrani_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_h_w(_1, _2, 1); } - // CHECK-LABEL: @xvssrani_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_w_d(_1, _2, 1); } - // CHECK-LABEL: @xvssrani_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_d_q(_1, _2, 1); } - // CHECK-LABEL: @xvssrani_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_bu_h(_1, _2, 1); } - // CHECK-LABEL: @xvssrani_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_hu_w(_1, _2, 1); } - // CHECK-LABEL: @xvssrani_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_wu_d(_1, _2, 1); } - // CHECK-LABEL: @xvssrani_du_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_du_q(_1, _2, 1); } - // CHECK-LABEL: @xvssrarni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_b_h(_1, _2, 1); } - // CHECK-LABEL: @xvssrarni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_h_w(_1, _2, 1); } - // CHECK-LABEL: @xvssrarni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_w_d(_1, _2, 1); } - // CHECK-LABEL: @xvssrarni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_d_q(_1, _2, 1); } - // CHECK-LABEL: @xvssrarni_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) -+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_bu_h(_1, _2, 1); } - // CHECK-LABEL: @xvssrarni_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) -+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_hu_w(_1, _2, 1); } - // CHECK-LABEL: @xvssrarni_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_wu_d(_1, _2, 1); } - // CHECK-LABEL: @xvssrarni_du_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_du_q(_1, _2, 1); } - // CHECK-LABEL: @xbnz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbnz_b(v32u8 _1) { return __builtin_lasx_xbnz_b(_1); } - // CHECK-LABEL: @xbnz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbnz_d(v4u64 _1) { return __builtin_lasx_xbnz_d(_1); } - // CHECK-LABEL: @xbnz_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbnz_h(v16u16 _1) { return __builtin_lasx_xbnz_h(_1); } - // CHECK-LABEL: @xbnz_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbnz_v(v32u8 _1) { return __builtin_lasx_xbnz_v(_1); } - // CHECK-LABEL: @xbnz_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbnz_w(v8u32 _1) { return __builtin_lasx_xbnz_w(_1); } - // CHECK-LABEL: @xbz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbz_b(v32u8 _1) { return __builtin_lasx_xbz_b(_1); } - // CHECK-LABEL: @xbz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbz_d(v4u64 _1) { return __builtin_lasx_xbz_d(_1); } - // CHECK-LABEL: @xbz_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbz_h(v16u16 _1) { return __builtin_lasx_xbz_h(_1); } - // CHECK-LABEL: @xbz_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbz_v(v32u8 _1) { return __builtin_lasx_xbz_v(_1); } - // CHECK-LABEL: @xbz_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int xbz_w(v8u32 _1) { return __builtin_lasx_xbz_w(_1); } - // CHECK-LABEL: @xvfcmp_caf_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_caf_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_caf_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_caf_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_ceq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_ceq_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_ceq_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_ceq_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_cle_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cle_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_cle_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cle_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_clt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_clt_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_clt_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_clt_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_cne_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cne_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_cne_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cne_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_cor_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cor_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_cor_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cor_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_cueq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cueq_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_cueq_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cueq_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_cule_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cule_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_cule_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cule_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_cult_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cult_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_cult_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cult_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_cun_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cun_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_cune_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cune_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_cune_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cune_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_cun_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cun_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_saf_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_saf_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_saf_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_saf_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_seq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_seq_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_seq_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_seq_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_sle_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sle_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_sle_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sle_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_slt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_slt_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_slt_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_slt_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_sne_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sne_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_sne_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sne_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_sor_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sor_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_sor_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sor_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_sueq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sueq_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_sueq_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sueq_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_sule_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sule_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_sule_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sule_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_sult_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sult_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_sult_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sult_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_sun_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sun_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_sune_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]]) -+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sune_d(_1, _2); } - // CHECK-LABEL: @xvfcmp_sune_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sune_s(_1, _2); } - // CHECK-LABEL: @xvfcmp_sun_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]]) -+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sun_s(_1, _2); } - // CHECK-LABEL: @xvpickve_d_f( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x double> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4f64 xvpickve_d_f(v4f64 _1) { return __builtin_lasx_xvpickve_d_f(_1, 1); } - // CHECK-LABEL: @xvpickve_w_f( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x float> [[TMP0]] -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8f32 xvpickve_w_f(v8f32 _1) { return __builtin_lasx_xvpickve_w_f(_1, 1); } - // CHECK-LABEL: @xvrepli_b( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) --// CHECK-NEXT: ret <32 x i8> [[TMP0]] -+// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v32i8 xvrepli_b() { return __builtin_lasx_xvrepli_b(1); } - // CHECK-LABEL: @xvrepli_d( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) --// CHECK-NEXT: ret <4 x i64> [[TMP0]] -+// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v4i64 xvrepli_d() { return __builtin_lasx_xvrepli_d(1); } - // CHECK-LABEL: @xvrepli_h( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) --// CHECK-NEXT: ret <16 x i16> [[TMP0]] -+// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v16i16 xvrepli_h() { return __builtin_lasx_xvrepli_h(1); } - // CHECK-LABEL: @xvrepli_w( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) --// CHECK-NEXT: ret <8 x i32> [[TMP0]] -+// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void - // - v8i32 xvrepli_w() { return __builtin_lasx_xvrepli_w(1); } -diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c -index 331e29fb7d17..7a84e0ae24f9 100644 ---- a/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c -+++ b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c -@@ -5,4080 +5,5838 @@ - - // CHECK-LABEL: @vsll_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __lsx_vsll_b(_1, _2); } - // CHECK-LABEL: @vsll_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __lsx_vsll_h(_1, _2); } - // CHECK-LABEL: @vsll_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __lsx_vsll_w(_1, _2); } - // CHECK-LABEL: @vsll_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __lsx_vsll_d(_1, _2); } - // CHECK-LABEL: @vslli_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vslli_b(v16i8 _1) { return __lsx_vslli_b(_1, 1); } - // CHECK-LABEL: @vslli_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vslli_h(v8i16 _1) { return __lsx_vslli_h(_1, 1); } - // CHECK-LABEL: @vslli_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vslli_w(v4i32 _1) { return __lsx_vslli_w(_1, 1); } - // CHECK-LABEL: @vslli_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vslli_d(v2i64 _1) { return __lsx_vslli_d(_1, 1); } - // CHECK-LABEL: @vsra_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __lsx_vsra_b(_1, _2); } - // CHECK-LABEL: @vsra_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __lsx_vsra_h(_1, _2); } - // CHECK-LABEL: @vsra_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __lsx_vsra_w(_1, _2); } - // CHECK-LABEL: @vsra_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __lsx_vsra_d(_1, _2); } - // CHECK-LABEL: @vsrai_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vsrai_b(v16i8 _1) { return __lsx_vsrai_b(_1, 1); } - // CHECK-LABEL: @vsrai_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vsrai_h(v8i16 _1) { return __lsx_vsrai_h(_1, 1); } - // CHECK-LABEL: @vsrai_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vsrai_w(v4i32 _1) { return __lsx_vsrai_w(_1, 1); } - // CHECK-LABEL: @vsrai_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vsrai_d(v2i64 _1) { return __lsx_vsrai_d(_1, 1); } - // CHECK-LABEL: @vsrar_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrar_b(v16i8 _1, v16i8 _2) { return __lsx_vsrar_b(_1, _2); } - // CHECK-LABEL: @vsrar_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrar_h(v8i16 _1, v8i16 _2) { return __lsx_vsrar_h(_1, _2); } - // CHECK-LABEL: @vsrar_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrar_w(v4i32 _1, v4i32 _2) { return __lsx_vsrar_w(_1, _2); } - // CHECK-LABEL: @vsrar_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsrar_d(v2i64 _1, v2i64 _2) { return __lsx_vsrar_d(_1, _2); } - // CHECK-LABEL: @vsrari_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vsrari_b(v16i8 _1) { return __lsx_vsrari_b(_1, 1); } - // CHECK-LABEL: @vsrari_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vsrari_h(v8i16 _1) { return __lsx_vsrari_h(_1, 1); } - // CHECK-LABEL: @vsrari_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vsrari_w(v4i32 _1) { return __lsx_vsrari_w(_1, 1); } - // CHECK-LABEL: @vsrari_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vsrari_d(v2i64 _1) { return __lsx_vsrari_d(_1, 1); } - // CHECK-LABEL: @vsrl_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __lsx_vsrl_b(_1, _2); } - // CHECK-LABEL: @vsrl_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __lsx_vsrl_h(_1, _2); } - // CHECK-LABEL: @vsrl_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __lsx_vsrl_w(_1, _2); } - // CHECK-LABEL: @vsrl_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __lsx_vsrl_d(_1, _2); } - // CHECK-LABEL: @vsrli_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vsrli_b(v16i8 _1) { return __lsx_vsrli_b(_1, 1); } - // CHECK-LABEL: @vsrli_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vsrli_h(v8i16 _1) { return __lsx_vsrli_h(_1, 1); } - // CHECK-LABEL: @vsrli_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vsrli_w(v4i32 _1) { return __lsx_vsrli_w(_1, 1); } - // CHECK-LABEL: @vsrli_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vsrli_d(v2i64 _1) { return __lsx_vsrli_d(_1, 1); } - // CHECK-LABEL: @vsrlr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { return __lsx_vsrlr_b(_1, _2); } - // CHECK-LABEL: @vsrlr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlr_h(_1, _2); } - // CHECK-LABEL: @vsrlr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlr_w(_1, _2); } - // CHECK-LABEL: @vsrlr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlr_d(_1, _2); } - // CHECK-LABEL: @vsrlri_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vsrlri_b(v16i8 _1) { return __lsx_vsrlri_b(_1, 1); } - // CHECK-LABEL: @vsrlri_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vsrlri_h(v8i16 _1) { return __lsx_vsrlri_h(_1, 1); } - // CHECK-LABEL: @vsrlri_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vsrlri_w(v4i32 _1) { return __lsx_vsrlri_w(_1, 1); } - // CHECK-LABEL: @vsrlri_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vsrlri_d(v2i64 _1) { return __lsx_vsrlri_d(_1, 1); } - // CHECK-LABEL: @vbitclr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { return __lsx_vbitclr_b(_1, _2); } - // CHECK-LABEL: @vbitclr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { return __lsx_vbitclr_h(_1, _2); } - // CHECK-LABEL: @vbitclr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { return __lsx_vbitclr_w(_1, _2); } - // CHECK-LABEL: @vbitclr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { return __lsx_vbitclr_d(_1, _2); } - // CHECK-LABEL: @vbitclri_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vbitclri_b(v16u8 _1) { return __lsx_vbitclri_b(_1, 1); } - // CHECK-LABEL: @vbitclri_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8u16 vbitclri_h(v8u16 _1) { return __lsx_vbitclri_h(_1, 1); } - // CHECK-LABEL: @vbitclri_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vbitclri_w(v4u32 _1) { return __lsx_vbitclri_w(_1, 1); } - // CHECK-LABEL: @vbitclri_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vbitclri_d(v2u64 _1) { return __lsx_vbitclri_d(_1, 1); } - // CHECK-LABEL: @vbitset_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vbitset_b(v16u8 _1, v16u8 _2) { return __lsx_vbitset_b(_1, _2); } - // CHECK-LABEL: @vbitset_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vbitset_h(v8u16 _1, v8u16 _2) { return __lsx_vbitset_h(_1, _2); } - // CHECK-LABEL: @vbitset_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vbitset_w(v4u32 _1, v4u32 _2) { return __lsx_vbitset_w(_1, _2); } - // CHECK-LABEL: @vbitset_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vbitset_d(v2u64 _1, v2u64 _2) { return __lsx_vbitset_d(_1, _2); } - // CHECK-LABEL: @vbitseti_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vbitseti_b(v16u8 _1) { return __lsx_vbitseti_b(_1, 1); } - // CHECK-LABEL: @vbitseti_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8u16 vbitseti_h(v8u16 _1) { return __lsx_vbitseti_h(_1, 1); } - // CHECK-LABEL: @vbitseti_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vbitseti_w(v4u32 _1) { return __lsx_vbitseti_w(_1, 1); } - // CHECK-LABEL: @vbitseti_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vbitseti_d(v2u64 _1) { return __lsx_vbitseti_d(_1, 1); } - // CHECK-LABEL: @vbitrev_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { return __lsx_vbitrev_b(_1, _2); } - // CHECK-LABEL: @vbitrev_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { return __lsx_vbitrev_h(_1, _2); } - // CHECK-LABEL: @vbitrev_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { return __lsx_vbitrev_w(_1, _2); } - // CHECK-LABEL: @vbitrev_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { return __lsx_vbitrev_d(_1, _2); } - // CHECK-LABEL: @vbitrevi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vbitrevi_b(v16u8 _1) { return __lsx_vbitrevi_b(_1, 1); } - // CHECK-LABEL: @vbitrevi_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8u16 vbitrevi_h(v8u16 _1) { return __lsx_vbitrevi_h(_1, 1); } - // CHECK-LABEL: @vbitrevi_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vbitrevi_w(v4u32 _1) { return __lsx_vbitrevi_w(_1, 1); } - // CHECK-LABEL: @vbitrevi_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vbitrevi_d(v2u64 _1) { return __lsx_vbitrevi_d(_1, 1); } - // CHECK-LABEL: @vadd_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __lsx_vadd_b(_1, _2); } - // CHECK-LABEL: @vadd_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __lsx_vadd_h(_1, _2); } - // CHECK-LABEL: @vadd_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __lsx_vadd_w(_1, _2); } - // CHECK-LABEL: @vadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __lsx_vadd_d(_1, _2); } - // CHECK-LABEL: @vaddi_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vaddi_bu(v16i8 _1) { return __lsx_vaddi_bu(_1, 1); } - // CHECK-LABEL: @vaddi_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vaddi_hu(v8i16 _1) { return __lsx_vaddi_hu(_1, 1); } - // CHECK-LABEL: @vaddi_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vaddi_wu(v4i32 _1) { return __lsx_vaddi_wu(_1, 1); } - // CHECK-LABEL: @vaddi_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vaddi_du(v2i64 _1) { return __lsx_vaddi_du(_1, 1); } - // CHECK-LABEL: @vsub_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __lsx_vsub_b(_1, _2); } - // CHECK-LABEL: @vsub_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __lsx_vsub_h(_1, _2); } - // CHECK-LABEL: @vsub_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __lsx_vsub_w(_1, _2); } - // CHECK-LABEL: @vsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __lsx_vsub_d(_1, _2); } - // CHECK-LABEL: @vsubi_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vsubi_bu(v16i8 _1) { return __lsx_vsubi_bu(_1, 1); } - // CHECK-LABEL: @vsubi_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vsubi_hu(v8i16 _1) { return __lsx_vsubi_hu(_1, 1); } - // CHECK-LABEL: @vsubi_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vsubi_wu(v4i32 _1) { return __lsx_vsubi_wu(_1, 1); } - // CHECK-LABEL: @vsubi_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vsubi_du(v2i64 _1) { return __lsx_vsubi_du(_1, 1); } - // CHECK-LABEL: @vmax_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __lsx_vmax_b(_1, _2); } - // CHECK-LABEL: @vmax_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __lsx_vmax_h(_1, _2); } - // CHECK-LABEL: @vmax_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __lsx_vmax_w(_1, _2); } - // CHECK-LABEL: @vmax_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __lsx_vmax_d(_1, _2); } - // CHECK-LABEL: @vmaxi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vmaxi_b(v16i8 _1) { return __lsx_vmaxi_b(_1, 1); } - // CHECK-LABEL: @vmaxi_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vmaxi_h(v8i16 _1) { return __lsx_vmaxi_h(_1, 1); } - // CHECK-LABEL: @vmaxi_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vmaxi_w(v4i32 _1) { return __lsx_vmaxi_w(_1, 1); } - // CHECK-LABEL: @vmaxi_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vmaxi_d(v2i64 _1) { return __lsx_vmaxi_d(_1, 1); } - // CHECK-LABEL: @vmax_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vmax_bu(v16u8 _1, v16u8 _2) { return __lsx_vmax_bu(_1, _2); } - // CHECK-LABEL: @vmax_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vmax_hu(v8u16 _1, v8u16 _2) { return __lsx_vmax_hu(_1, _2); } - // CHECK-LABEL: @vmax_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vmax_wu(v4u32 _1, v4u32 _2) { return __lsx_vmax_wu(_1, _2); } - // CHECK-LABEL: @vmax_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vmax_du(v2u64 _1, v2u64 _2) { return __lsx_vmax_du(_1, _2); } - // CHECK-LABEL: @vmaxi_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vmaxi_bu(v16u8 _1) { return __lsx_vmaxi_bu(_1, 1); } - // CHECK-LABEL: @vmaxi_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8u16 vmaxi_hu(v8u16 _1) { return __lsx_vmaxi_hu(_1, 1); } - // CHECK-LABEL: @vmaxi_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vmaxi_wu(v4u32 _1) { return __lsx_vmaxi_wu(_1, 1); } - // CHECK-LABEL: @vmaxi_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vmaxi_du(v2u64 _1) { return __lsx_vmaxi_du(_1, 1); } - // CHECK-LABEL: @vmin_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __lsx_vmin_b(_1, _2); } - // CHECK-LABEL: @vmin_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __lsx_vmin_h(_1, _2); } - // CHECK-LABEL: @vmin_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __lsx_vmin_w(_1, _2); } - // CHECK-LABEL: @vmin_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __lsx_vmin_d(_1, _2); } - // CHECK-LABEL: @vmini_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vmini_b(v16i8 _1) { return __lsx_vmini_b(_1, 1); } - // CHECK-LABEL: @vmini_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vmini_h(v8i16 _1) { return __lsx_vmini_h(_1, 1); } - // CHECK-LABEL: @vmini_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vmini_w(v4i32 _1) { return __lsx_vmini_w(_1, 1); } - // CHECK-LABEL: @vmini_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vmini_d(v2i64 _1) { return __lsx_vmini_d(_1, 1); } - // CHECK-LABEL: @vmin_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vmin_bu(v16u8 _1, v16u8 _2) { return __lsx_vmin_bu(_1, _2); } - // CHECK-LABEL: @vmin_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vmin_hu(v8u16 _1, v8u16 _2) { return __lsx_vmin_hu(_1, _2); } - // CHECK-LABEL: @vmin_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vmin_wu(v4u32 _1, v4u32 _2) { return __lsx_vmin_wu(_1, _2); } - // CHECK-LABEL: @vmin_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vmin_du(v2u64 _1, v2u64 _2) { return __lsx_vmin_du(_1, _2); } - // CHECK-LABEL: @vmini_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vmini_bu(v16u8 _1) { return __lsx_vmini_bu(_1, 1); } - // CHECK-LABEL: @vmini_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8u16 vmini_hu(v8u16 _1) { return __lsx_vmini_hu(_1, 1); } - // CHECK-LABEL: @vmini_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vmini_wu(v4u32 _1) { return __lsx_vmini_wu(_1, 1); } - // CHECK-LABEL: @vmini_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vmini_du(v2u64 _1) { return __lsx_vmini_du(_1, 1); } - // CHECK-LABEL: @vseq_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __lsx_vseq_b(_1, _2); } - // CHECK-LABEL: @vseq_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __lsx_vseq_h(_1, _2); } - // CHECK-LABEL: @vseq_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __lsx_vseq_w(_1, _2); } - // CHECK-LABEL: @vseq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __lsx_vseq_d(_1, _2); } - // CHECK-LABEL: @vseqi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vseqi_b(v16i8 _1) { return __lsx_vseqi_b(_1, 1); } - // CHECK-LABEL: @vseqi_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vseqi_h(v8i16 _1) { return __lsx_vseqi_h(_1, 1); } - // CHECK-LABEL: @vseqi_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vseqi_w(v4i32 _1) { return __lsx_vseqi_w(_1, 1); } - // CHECK-LABEL: @vseqi_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vseqi_d(v2i64 _1) { return __lsx_vseqi_d(_1, 1); } - // CHECK-LABEL: @vslti_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vslti_b(v16i8 _1) { return __lsx_vslti_b(_1, 1); } - // CHECK-LABEL: @vslt_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __lsx_vslt_b(_1, _2); } - // CHECK-LABEL: @vslt_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __lsx_vslt_h(_1, _2); } - // CHECK-LABEL: @vslt_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __lsx_vslt_w(_1, _2); } - // CHECK-LABEL: @vslt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __lsx_vslt_d(_1, _2); } - // CHECK-LABEL: @vslti_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vslti_h(v8i16 _1) { return __lsx_vslti_h(_1, 1); } - // CHECK-LABEL: @vslti_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vslti_w(v4i32 _1) { return __lsx_vslti_w(_1, 1); } - // CHECK-LABEL: @vslti_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vslti_d(v2i64 _1) { return __lsx_vslti_d(_1, 1); } - // CHECK-LABEL: @vslt_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vslt_bu(v16u8 _1, v16u8 _2) { return __lsx_vslt_bu(_1, _2); } - // CHECK-LABEL: @vslt_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vslt_hu(v8u16 _1, v8u16 _2) { return __lsx_vslt_hu(_1, _2); } - // CHECK-LABEL: @vslt_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vslt_wu(v4u32 _1, v4u32 _2) { return __lsx_vslt_wu(_1, _2); } - // CHECK-LABEL: @vslt_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vslt_du(v2u64 _1, v2u64 _2) { return __lsx_vslt_du(_1, _2); } - // CHECK-LABEL: @vslti_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vslti_bu(v16u8 _1) { return __lsx_vslti_bu(_1, 1); } - // CHECK-LABEL: @vslti_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vslti_hu(v8u16 _1) { return __lsx_vslti_hu(_1, 1); } - // CHECK-LABEL: @vslti_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vslti_wu(v4u32 _1) { return __lsx_vslti_wu(_1, 1); } - // CHECK-LABEL: @vslti_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vslti_du(v2u64 _1) { return __lsx_vslti_du(_1, 1); } - // CHECK-LABEL: @vsle_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __lsx_vsle_b(_1, _2); } - // CHECK-LABEL: @vsle_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __lsx_vsle_h(_1, _2); } - // CHECK-LABEL: @vsle_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __lsx_vsle_w(_1, _2); } - // CHECK-LABEL: @vsle_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __lsx_vsle_d(_1, _2); } - // CHECK-LABEL: @vslei_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vslei_b(v16i8 _1) { return __lsx_vslei_b(_1, 1); } - // CHECK-LABEL: @vslei_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vslei_h(v8i16 _1) { return __lsx_vslei_h(_1, 1); } - // CHECK-LABEL: @vslei_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vslei_w(v4i32 _1) { return __lsx_vslei_w(_1, 1); } - // CHECK-LABEL: @vslei_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vslei_d(v2i64 _1) { return __lsx_vslei_d(_1, 1); } - // CHECK-LABEL: @vsle_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsle_bu(v16u8 _1, v16u8 _2) { return __lsx_vsle_bu(_1, _2); } - // CHECK-LABEL: @vsle_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsle_hu(v8u16 _1, v8u16 _2) { return __lsx_vsle_hu(_1, _2); } - // CHECK-LABEL: @vsle_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsle_wu(v4u32 _1, v4u32 _2) { return __lsx_vsle_wu(_1, _2); } - // CHECK-LABEL: @vsle_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsle_du(v2u64 _1, v2u64 _2) { return __lsx_vsle_du(_1, _2); } - // CHECK-LABEL: @vslei_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vslei_bu(v16u8 _1) { return __lsx_vslei_bu(_1, 1); } - // CHECK-LABEL: @vslei_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vslei_hu(v8u16 _1) { return __lsx_vslei_hu(_1, 1); } - // CHECK-LABEL: @vslei_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vslei_wu(v4u32 _1) { return __lsx_vslei_wu(_1, 1); } - // CHECK-LABEL: @vslei_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vslei_du(v2u64 _1) { return __lsx_vslei_du(_1, 1); } - // CHECK-LABEL: @vsat_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vsat_b(v16i8 _1) { return __lsx_vsat_b(_1, 1); } - // CHECK-LABEL: @vsat_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vsat_h(v8i16 _1) { return __lsx_vsat_h(_1, 1); } - // CHECK-LABEL: @vsat_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vsat_w(v4i32 _1) { return __lsx_vsat_w(_1, 1); } - // CHECK-LABEL: @vsat_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vsat_d(v2i64 _1) { return __lsx_vsat_d(_1, 1); } - // CHECK-LABEL: @vsat_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vsat_bu(v16u8 _1) { return __lsx_vsat_bu(_1, 1); } - // CHECK-LABEL: @vsat_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8u16 vsat_hu(v8u16 _1) { return __lsx_vsat_hu(_1, 1); } - // CHECK-LABEL: @vsat_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vsat_wu(v4u32 _1) { return __lsx_vsat_wu(_1, 1); } - // CHECK-LABEL: @vsat_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vsat_du(v2u64 _1) { return __lsx_vsat_du(_1, 1); } - // CHECK-LABEL: @vadda_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vadda_b(v16i8 _1, v16i8 _2) { return __lsx_vadda_b(_1, _2); } - // CHECK-LABEL: @vadda_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vadda_h(v8i16 _1, v8i16 _2) { return __lsx_vadda_h(_1, _2); } - // CHECK-LABEL: @vadda_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vadda_w(v4i32 _1, v4i32 _2) { return __lsx_vadda_w(_1, _2); } - // CHECK-LABEL: @vadda_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vadda_d(v2i64 _1, v2i64 _2) { return __lsx_vadda_d(_1, _2); } - // CHECK-LABEL: @vsadd_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsadd_b(v16i8 _1, v16i8 _2) { return __lsx_vsadd_b(_1, _2); } - // CHECK-LABEL: @vsadd_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsadd_h(v8i16 _1, v8i16 _2) { return __lsx_vsadd_h(_1, _2); } - // CHECK-LABEL: @vsadd_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsadd_w(v4i32 _1, v4i32 _2) { return __lsx_vsadd_w(_1, _2); } - // CHECK-LABEL: @vsadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsadd_d(v2i64 _1, v2i64 _2) { return __lsx_vsadd_d(_1, _2); } - // CHECK-LABEL: @vsadd_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { return __lsx_vsadd_bu(_1, _2); } - // CHECK-LABEL: @vsadd_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { return __lsx_vsadd_hu(_1, _2); } - // CHECK-LABEL: @vsadd_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { return __lsx_vsadd_wu(_1, _2); } - // CHECK-LABEL: @vsadd_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vsadd_du(v2u64 _1, v2u64 _2) { return __lsx_vsadd_du(_1, _2); } - // CHECK-LABEL: @vavg_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __lsx_vavg_b(_1, _2); } - // CHECK-LABEL: @vavg_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __lsx_vavg_h(_1, _2); } - // CHECK-LABEL: @vavg_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __lsx_vavg_w(_1, _2); } - // CHECK-LABEL: @vavg_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __lsx_vavg_d(_1, _2); } - // CHECK-LABEL: @vavg_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vavg_bu(v16u8 _1, v16u8 _2) { return __lsx_vavg_bu(_1, _2); } - // CHECK-LABEL: @vavg_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vavg_hu(v8u16 _1, v8u16 _2) { return __lsx_vavg_hu(_1, _2); } - // CHECK-LABEL: @vavg_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vavg_wu(v4u32 _1, v4u32 _2) { return __lsx_vavg_wu(_1, _2); } - // CHECK-LABEL: @vavg_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vavg_du(v2u64 _1, v2u64 _2) { return __lsx_vavg_du(_1, _2); } - // CHECK-LABEL: @vavgr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vavgr_b(v16i8 _1, v16i8 _2) { return __lsx_vavgr_b(_1, _2); } - // CHECK-LABEL: @vavgr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vavgr_h(v8i16 _1, v8i16 _2) { return __lsx_vavgr_h(_1, _2); } - // CHECK-LABEL: @vavgr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vavgr_w(v4i32 _1, v4i32 _2) { return __lsx_vavgr_w(_1, _2); } - // CHECK-LABEL: @vavgr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vavgr_d(v2i64 _1, v2i64 _2) { return __lsx_vavgr_d(_1, _2); } - // CHECK-LABEL: @vavgr_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { return __lsx_vavgr_bu(_1, _2); } - // CHECK-LABEL: @vavgr_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { return __lsx_vavgr_hu(_1, _2); } - // CHECK-LABEL: @vavgr_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { return __lsx_vavgr_wu(_1, _2); } - // CHECK-LABEL: @vavgr_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vavgr_du(v2u64 _1, v2u64 _2) { return __lsx_vavgr_du(_1, _2); } - // CHECK-LABEL: @vssub_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vssub_b(v16i8 _1, v16i8 _2) { return __lsx_vssub_b(_1, _2); } - // CHECK-LABEL: @vssub_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vssub_h(v8i16 _1, v8i16 _2) { return __lsx_vssub_h(_1, _2); } - // CHECK-LABEL: @vssub_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vssub_w(v4i32 _1, v4i32 _2) { return __lsx_vssub_w(_1, _2); } - // CHECK-LABEL: @vssub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vssub_d(v2i64 _1, v2i64 _2) { return __lsx_vssub_d(_1, _2); } - // CHECK-LABEL: @vssub_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vssub_bu(v16u8 _1, v16u8 _2) { return __lsx_vssub_bu(_1, _2); } - // CHECK-LABEL: @vssub_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vssub_hu(v8u16 _1, v8u16 _2) { return __lsx_vssub_hu(_1, _2); } - // CHECK-LABEL: @vssub_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vssub_wu(v4u32 _1, v4u32 _2) { return __lsx_vssub_wu(_1, _2); } - // CHECK-LABEL: @vssub_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vssub_du(v2u64 _1, v2u64 _2) { return __lsx_vssub_du(_1, _2); } - // CHECK-LABEL: @vabsd_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vabsd_b(v16i8 _1, v16i8 _2) { return __lsx_vabsd_b(_1, _2); } - // CHECK-LABEL: @vabsd_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vabsd_h(v8i16 _1, v8i16 _2) { return __lsx_vabsd_h(_1, _2); } - // CHECK-LABEL: @vabsd_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vabsd_w(v4i32 _1, v4i32 _2) { return __lsx_vabsd_w(_1, _2); } - // CHECK-LABEL: @vabsd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vabsd_d(v2i64 _1, v2i64 _2) { return __lsx_vabsd_d(_1, _2); } - // CHECK-LABEL: @vabsd_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { return __lsx_vabsd_bu(_1, _2); } - // CHECK-LABEL: @vabsd_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { return __lsx_vabsd_hu(_1, _2); } - // CHECK-LABEL: @vabsd_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { return __lsx_vabsd_wu(_1, _2); } - // CHECK-LABEL: @vabsd_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vabsd_du(v2u64 _1, v2u64 _2) { return __lsx_vabsd_du(_1, _2); } - // CHECK-LABEL: @vmul_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __lsx_vmul_b(_1, _2); } - // CHECK-LABEL: @vmul_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __lsx_vmul_h(_1, _2); } - // CHECK-LABEL: @vmul_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __lsx_vmul_w(_1, _2); } - // CHECK-LABEL: @vmul_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __lsx_vmul_d(_1, _2); } - // CHECK-LABEL: @vmadd_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { - return __lsx_vmadd_b(_1, _2, _3); - } - // CHECK-LABEL: @vmadd_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { - return __lsx_vmadd_h(_1, _2, _3); - } - // CHECK-LABEL: @vmadd_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { - return __lsx_vmadd_w(_1, _2, _3); - } - // CHECK-LABEL: @vmadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { - return __lsx_vmadd_d(_1, _2, _3); - } - // CHECK-LABEL: @vmsub_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { - return __lsx_vmsub_b(_1, _2, _3); - } - // CHECK-LABEL: @vmsub_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { - return __lsx_vmsub_h(_1, _2, _3); - } - // CHECK-LABEL: @vmsub_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { - return __lsx_vmsub_w(_1, _2, _3); - } - // CHECK-LABEL: @vmsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { - return __lsx_vmsub_d(_1, _2, _3); - } - // CHECK-LABEL: @vdiv_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __lsx_vdiv_b(_1, _2); } - // CHECK-LABEL: @vdiv_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __lsx_vdiv_h(_1, _2); } - // CHECK-LABEL: @vdiv_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __lsx_vdiv_w(_1, _2); } - // CHECK-LABEL: @vdiv_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __lsx_vdiv_d(_1, _2); } - // CHECK-LABEL: @vdiv_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { return __lsx_vdiv_bu(_1, _2); } - // CHECK-LABEL: @vdiv_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { return __lsx_vdiv_hu(_1, _2); } - // CHECK-LABEL: @vdiv_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { return __lsx_vdiv_wu(_1, _2); } - // CHECK-LABEL: @vdiv_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vdiv_du(v2u64 _1, v2u64 _2) { return __lsx_vdiv_du(_1, _2); } - // CHECK-LABEL: @vhaddw_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhaddw_h_b(_1, _2); } - // CHECK-LABEL: @vhaddw_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhaddw_w_h(_1, _2); } - // CHECK-LABEL: @vhaddw_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhaddw_d_w(_1, _2); } - // CHECK-LABEL: @vhaddw_hu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhaddw_hu_bu(_1, _2); } - // CHECK-LABEL: @vhaddw_wu_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhaddw_wu_hu(_1, _2); } - // CHECK-LABEL: @vhaddw_du_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhaddw_du_wu(_1, _2); } - // CHECK-LABEL: @vhsubw_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhsubw_h_b(_1, _2); } - // CHECK-LABEL: @vhsubw_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhsubw_w_h(_1, _2); } - // CHECK-LABEL: @vhsubw_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhsubw_d_w(_1, _2); } - // CHECK-LABEL: @vhsubw_hu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhsubw_hu_bu(_1, _2); } - // CHECK-LABEL: @vhsubw_wu_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhsubw_wu_hu(_1, _2); } - // CHECK-LABEL: @vhsubw_du_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhsubw_du_wu(_1, _2); } - // CHECK-LABEL: @vmod_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __lsx_vmod_b(_1, _2); } - // CHECK-LABEL: @vmod_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __lsx_vmod_h(_1, _2); } - // CHECK-LABEL: @vmod_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __lsx_vmod_w(_1, _2); } - // CHECK-LABEL: @vmod_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __lsx_vmod_d(_1, _2); } - // CHECK-LABEL: @vmod_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vmod_bu(v16u8 _1, v16u8 _2) { return __lsx_vmod_bu(_1, _2); } - // CHECK-LABEL: @vmod_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vmod_hu(v8u16 _1, v8u16 _2) { return __lsx_vmod_hu(_1, _2); } - // CHECK-LABEL: @vmod_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vmod_wu(v4u32 _1, v4u32 _2) { return __lsx_vmod_wu(_1, _2); } - // CHECK-LABEL: @vmod_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vmod_du(v2u64 _1, v2u64 _2) { return __lsx_vmod_du(_1, _2); } - // CHECK-LABEL: @vreplve_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[_1:%.*]], i32 [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[TMP0]], i32 [[_2:%.*]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vreplve_b(v16i8 _1, int _2) { return __lsx_vreplve_b(_1, _2); } - // CHECK-LABEL: @vreplve_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[_1:%.*]], i32 [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[TMP0]], i32 [[_2:%.*]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vreplve_h(v8i16 _1, int _2) { return __lsx_vreplve_h(_1, _2); } - // CHECK-LABEL: @vreplve_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[_1:%.*]], i32 [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[TMP0]], i32 [[_2:%.*]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vreplve_w(v4i32 _1, int _2) { return __lsx_vreplve_w(_1, _2); } - // CHECK-LABEL: @vreplve_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[_1:%.*]], i32 [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[TMP0]], i32 [[_2:%.*]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vreplve_d(v2i64 _1, int _2) { return __lsx_vreplve_d(_1, _2); } - // CHECK-LABEL: @vreplvei_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vreplvei_b(v16i8 _1) { return __lsx_vreplvei_b(_1, 1); } - // CHECK-LABEL: @vreplvei_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vreplvei_h(v8i16 _1) { return __lsx_vreplvei_h(_1, 1); } - // CHECK-LABEL: @vreplvei_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vreplvei_w(v4i32 _1) { return __lsx_vreplvei_w(_1, 1); } - // CHECK-LABEL: @vreplvei_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vreplvei_d(v2i64 _1) { return __lsx_vreplvei_d(_1, 1); } - // CHECK-LABEL: @vpickev_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vpickev_b(v16i8 _1, v16i8 _2) { return __lsx_vpickev_b(_1, _2); } - // CHECK-LABEL: @vpickev_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vpickev_h(v8i16 _1, v8i16 _2) { return __lsx_vpickev_h(_1, _2); } - // CHECK-LABEL: @vpickev_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vpickev_w(v4i32 _1, v4i32 _2) { return __lsx_vpickev_w(_1, _2); } - // CHECK-LABEL: @vpickev_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vpickev_d(v2i64 _1, v2i64 _2) { return __lsx_vpickev_d(_1, _2); } - // CHECK-LABEL: @vpickod_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vpickod_b(v16i8 _1, v16i8 _2) { return __lsx_vpickod_b(_1, _2); } - // CHECK-LABEL: @vpickod_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vpickod_h(v8i16 _1, v8i16 _2) { return __lsx_vpickod_h(_1, _2); } - // CHECK-LABEL: @vpickod_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vpickod_w(v4i32 _1, v4i32 _2) { return __lsx_vpickod_w(_1, _2); } - // CHECK-LABEL: @vpickod_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vpickod_d(v2i64 _1, v2i64 _2) { return __lsx_vpickod_d(_1, _2); } - // CHECK-LABEL: @vilvh_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vilvh_b(v16i8 _1, v16i8 _2) { return __lsx_vilvh_b(_1, _2); } - // CHECK-LABEL: @vilvh_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vilvh_h(v8i16 _1, v8i16 _2) { return __lsx_vilvh_h(_1, _2); } - // CHECK-LABEL: @vilvh_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vilvh_w(v4i32 _1, v4i32 _2) { return __lsx_vilvh_w(_1, _2); } - // CHECK-LABEL: @vilvh_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vilvh_d(v2i64 _1, v2i64 _2) { return __lsx_vilvh_d(_1, _2); } - // CHECK-LABEL: @vilvl_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vilvl_b(v16i8 _1, v16i8 _2) { return __lsx_vilvl_b(_1, _2); } - // CHECK-LABEL: @vilvl_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vilvl_h(v8i16 _1, v8i16 _2) { return __lsx_vilvl_h(_1, _2); } - // CHECK-LABEL: @vilvl_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vilvl_w(v4i32 _1, v4i32 _2) { return __lsx_vilvl_w(_1, _2); } - // CHECK-LABEL: @vilvl_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vilvl_d(v2i64 _1, v2i64 _2) { return __lsx_vilvl_d(_1, _2); } - // CHECK-LABEL: @vpackev_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vpackev_b(v16i8 _1, v16i8 _2) { return __lsx_vpackev_b(_1, _2); } - // CHECK-LABEL: @vpackev_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vpackev_h(v8i16 _1, v8i16 _2) { return __lsx_vpackev_h(_1, _2); } - // CHECK-LABEL: @vpackev_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vpackev_w(v4i32 _1, v4i32 _2) { return __lsx_vpackev_w(_1, _2); } - // CHECK-LABEL: @vpackev_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vpackev_d(v2i64 _1, v2i64 _2) { return __lsx_vpackev_d(_1, _2); } - // CHECK-LABEL: @vpackod_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vpackod_b(v16i8 _1, v16i8 _2) { return __lsx_vpackod_b(_1, _2); } - // CHECK-LABEL: @vpackod_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vpackod_h(v8i16 _1, v8i16 _2) { return __lsx_vpackod_h(_1, _2); } - // CHECK-LABEL: @vpackod_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vpackod_w(v4i32 _1, v4i32 _2) { return __lsx_vpackod_w(_1, _2); } - // CHECK-LABEL: @vpackod_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vpackod_d(v2i64 _1, v2i64 _2) { return __lsx_vpackod_d(_1, _2); } - // CHECK-LABEL: @vshuf_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { - return __lsx_vshuf_h(_1, _2, _3); - } - // CHECK-LABEL: @vshuf_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { - return __lsx_vshuf_w(_1, _2, _3); - } - // CHECK-LABEL: @vshuf_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { - return __lsx_vshuf_d(_1, _2, _3); - } - // CHECK-LABEL: @vand_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vand_v(v16u8 _1, v16u8 _2) { return __lsx_vand_v(_1, _2); } - // CHECK-LABEL: @vandi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vandi_b(v16u8 _1) { return __lsx_vandi_b(_1, 1); } - // CHECK-LABEL: @vor_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vor_v(v16u8 _1, v16u8 _2) { return __lsx_vor_v(_1, _2); } - // CHECK-LABEL: @vori_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vori_b(v16u8 _1) { return __lsx_vori_b(_1, 1); } - // CHECK-LABEL: @vnor_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __lsx_vnor_v(_1, _2); } - // CHECK-LABEL: @vnori_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vnori_b(v16u8 _1) { return __lsx_vnori_b(_1, 1); } - // CHECK-LABEL: @vxor_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __lsx_vxor_v(_1, _2); } - // CHECK-LABEL: @vxori_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vxori_b(v16u8 _1) { return __lsx_vxori_b(_1, 1); } - // CHECK-LABEL: @vbitsel_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { - return __lsx_vbitsel_v(_1, _2, _3); - } - // CHECK-LABEL: @vbitseli_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { return __lsx_vbitseli_b(_1, _2, 1); } - // CHECK-LABEL: @vshuf4i_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vshuf4i_b(v16i8 _1) { return __lsx_vshuf4i_b(_1, 1); } - // CHECK-LABEL: @vshuf4i_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vshuf4i_h(v8i16 _1) { return __lsx_vshuf4i_h(_1, 1); } - // CHECK-LABEL: @vshuf4i_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vshuf4i_w(v4i32 _1) { return __lsx_vshuf4i_w(_1, 1); } - // CHECK-LABEL: @vreplgr2vr_b( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v16i8 vreplgr2vr_b(int _1) { return __lsx_vreplgr2vr_b(_1); } - // CHECK-LABEL: @vreplgr2vr_h( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v8i16 vreplgr2vr_h(int _1) { return __lsx_vreplgr2vr_h(_1); } - // CHECK-LABEL: @vreplgr2vr_w( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v4i32 vreplgr2vr_w(int _1) { return __lsx_vreplgr2vr_w(_1); } - // CHECK-LABEL: @vreplgr2vr_d( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v2i64 vreplgr2vr_d(long _1) { return __lsx_vreplgr2vr_d(_1); } - // CHECK-LABEL: @vpcnt_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vpcnt_b(v16i8 _1) { return __lsx_vpcnt_b(_1); } - // CHECK-LABEL: @vpcnt_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vpcnt_h(v8i16 _1) { return __lsx_vpcnt_h(_1); } - // CHECK-LABEL: @vpcnt_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vpcnt_w(v4i32 _1) { return __lsx_vpcnt_w(_1); } - // CHECK-LABEL: @vpcnt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vpcnt_d(v2i64 _1) { return __lsx_vpcnt_d(_1); } - // CHECK-LABEL: @vclo_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vclo_b(v16i8 _1) { return __lsx_vclo_b(_1); } - // CHECK-LABEL: @vclo_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vclo_h(v8i16 _1) { return __lsx_vclo_h(_1); } - // CHECK-LABEL: @vclo_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vclo_w(v4i32 _1) { return __lsx_vclo_w(_1); } - // CHECK-LABEL: @vclo_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vclo_d(v2i64 _1) { return __lsx_vclo_d(_1); } - // CHECK-LABEL: @vclz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vclz_b(v16i8 _1) { return __lsx_vclz_b(_1); } - // CHECK-LABEL: @vclz_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vclz_h(v8i16 _1) { return __lsx_vclz_h(_1); } - // CHECK-LABEL: @vclz_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vclz_w(v4i32 _1) { return __lsx_vclz_w(_1); } - // CHECK-LABEL: @vclz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vclz_d(v2i64 _1) { return __lsx_vclz_d(_1); } - // CHECK-LABEL: @vpickve2gr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int vpickve2gr_b(v16i8 _1) { return __lsx_vpickve2gr_b(_1, 1); } - // CHECK-LABEL: @vpickve2gr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int vpickve2gr_h(v8i16 _1) { return __lsx_vpickve2gr_h(_1, 1); } - // CHECK-LABEL: @vpickve2gr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int vpickve2gr_w(v4i32 _1) { return __lsx_vpickve2gr_w(_1, 1); } - // CHECK-LABEL: @vpickve2gr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i64 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: ret i64 [[TMP1]] - // - long vpickve2gr_d(v2i64 _1) { return __lsx_vpickve2gr_d(_1, 1); } - // CHECK-LABEL: @vpickve2gr_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - unsigned int vpickve2gr_bu(v16i8 _1) { return __lsx_vpickve2gr_bu(_1, 1); } - // CHECK-LABEL: @vpickve2gr_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - unsigned int vpickve2gr_hu(v8i16 _1) { return __lsx_vpickve2gr_hu(_1, 1); } - // CHECK-LABEL: @vpickve2gr_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - unsigned int vpickve2gr_wu(v4i32 _1) { return __lsx_vpickve2gr_wu(_1, 1); } - // CHECK-LABEL: @vpickve2gr_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i64 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: ret i64 [[TMP1]] - // - unsigned long int vpickve2gr_du(v2i64 _1) { return __lsx_vpickve2gr_du(_1, 1); } - // CHECK-LABEL: @vinsgr2vr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[_1:%.*]], i32 1, i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[TMP0]], i32 1, i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vinsgr2vr_b(v16i8 _1) { return __lsx_vinsgr2vr_b(_1, 1, 1); } - // CHECK-LABEL: @vinsgr2vr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[_1:%.*]], i32 1, i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[TMP0]], i32 1, i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vinsgr2vr_h(v8i16 _1) { return __lsx_vinsgr2vr_h(_1, 1, 1); } - // CHECK-LABEL: @vinsgr2vr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[_1:%.*]], i32 1, i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[TMP0]], i32 1, i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vinsgr2vr_w(v4i32 _1) { return __lsx_vinsgr2vr_w(_1, 1, 1); } - // CHECK-LABEL: @vinsgr2vr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[_1:%.*]], i64 1, i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[TMP0]], i64 1, i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vinsgr2vr_d(v2i64 _1) { return __lsx_vinsgr2vr_d(_1, 1, 1); } - // CHECK-LABEL: @vfadd_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vfadd_s(v4f32 _1, v4f32 _2) { return __lsx_vfadd_s(_1, _2); } - // CHECK-LABEL: @vfadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2f64 vfadd_d(v2f64 _1, v2f64 _2) { return __lsx_vfadd_d(_1, _2); } - // CHECK-LABEL: @vfsub_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vfsub_s(v4f32 _1, v4f32 _2) { return __lsx_vfsub_s(_1, _2); } - // CHECK-LABEL: @vfsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2f64 vfsub_d(v2f64 _1, v2f64 _2) { return __lsx_vfsub_d(_1, _2); } - // CHECK-LABEL: @vfmul_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vfmul_s(v4f32 _1, v4f32 _2) { return __lsx_vfmul_s(_1, _2); } - // CHECK-LABEL: @vfmul_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2f64 vfmul_d(v2f64 _1, v2f64 _2) { return __lsx_vfmul_d(_1, _2); } - // CHECK-LABEL: @vfdiv_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { return __lsx_vfdiv_s(_1, _2); } - // CHECK-LABEL: @vfdiv_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { return __lsx_vfdiv_d(_1, _2); } - // CHECK-LABEL: @vfcvt_h_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { return __lsx_vfcvt_h_s(_1, _2); } - // CHECK-LABEL: @vfcvt_s_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { return __lsx_vfcvt_s_d(_1, _2); } - // CHECK-LABEL: @vfmin_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vfmin_s(v4f32 _1, v4f32 _2) { return __lsx_vfmin_s(_1, _2); } - // CHECK-LABEL: @vfmin_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2f64 vfmin_d(v2f64 _1, v2f64 _2) { return __lsx_vfmin_d(_1, _2); } - // CHECK-LABEL: @vfmina_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vfmina_s(v4f32 _1, v4f32 _2) { return __lsx_vfmina_s(_1, _2); } - // CHECK-LABEL: @vfmina_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2f64 vfmina_d(v2f64 _1, v2f64 _2) { return __lsx_vfmina_d(_1, _2); } - // CHECK-LABEL: @vfmax_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vfmax_s(v4f32 _1, v4f32 _2) { return __lsx_vfmax_s(_1, _2); } - // CHECK-LABEL: @vfmax_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2f64 vfmax_d(v2f64 _1, v2f64 _2) { return __lsx_vfmax_d(_1, _2); } - // CHECK-LABEL: @vfmaxa_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { return __lsx_vfmaxa_s(_1, _2); } - // CHECK-LABEL: @vfmaxa_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { return __lsx_vfmaxa_d(_1, _2); } - // CHECK-LABEL: @vfclass_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vfclass_s(v4f32 _1) { return __lsx_vfclass_s(_1); } - // CHECK-LABEL: @vfclass_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vfclass_d(v2f64 _1) { return __lsx_vfclass_d(_1); } - // CHECK-LABEL: @vfsqrt_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4f32 vfsqrt_s(v4f32 _1) { return __lsx_vfsqrt_s(_1); } - // CHECK-LABEL: @vfsqrt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vfsqrt_d(v2f64 _1) { return __lsx_vfsqrt_d(_1); } - // CHECK-LABEL: @vfrecip_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4f32 vfrecip_s(v4f32 _1) { return __lsx_vfrecip_s(_1); } - // CHECK-LABEL: @vfrecip_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vfrecip_d(v2f64 _1) { return __lsx_vfrecip_d(_1); } - // CHECK-LABEL: @vfrint_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4f32 vfrint_s(v4f32 _1) { return __lsx_vfrint_s(_1); } - // CHECK-LABEL: @vfrint_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vfrint_d(v2f64 _1) { return __lsx_vfrint_d(_1); } - // CHECK-LABEL: @vfrsqrt_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4f32 vfrsqrt_s(v4f32 _1) { return __lsx_vfrsqrt_s(_1); } - // CHECK-LABEL: @vfrsqrt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vfrsqrt_d(v2f64 _1) { return __lsx_vfrsqrt_d(_1); } - // CHECK-LABEL: @vflogb_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4f32 vflogb_s(v4f32 _1) { return __lsx_vflogb_s(_1); } - // CHECK-LABEL: @vflogb_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vflogb_d(v2f64 _1) { return __lsx_vflogb_d(_1); } - // CHECK-LABEL: @vfcvth_s_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4f32 vfcvth_s_h(v8i16 _1) { return __lsx_vfcvth_s_h(_1); } - // CHECK-LABEL: @vfcvth_d_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vfcvth_d_s(v4f32 _1) { return __lsx_vfcvth_d_s(_1); } - // CHECK-LABEL: @vfcvtl_s_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4f32 vfcvtl_s_h(v8i16 _1) { return __lsx_vfcvtl_s_h(_1); } - // CHECK-LABEL: @vfcvtl_d_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vfcvtl_d_s(v4f32 _1) { return __lsx_vfcvtl_d_s(_1); } - // CHECK-LABEL: @vftint_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vftint_w_s(v4f32 _1) { return __lsx_vftint_w_s(_1); } - // CHECK-LABEL: @vftint_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftint_l_d(v2f64 _1) { return __lsx_vftint_l_d(_1); } - // CHECK-LABEL: @vftint_wu_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vftint_wu_s(v4f32 _1) { return __lsx_vftint_wu_s(_1); } - // CHECK-LABEL: @vftint_lu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vftint_lu_d(v2f64 _1) { return __lsx_vftint_lu_d(_1); } - // CHECK-LABEL: @vftintrz_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vftintrz_w_s(v4f32 _1) { return __lsx_vftintrz_w_s(_1); } - // CHECK-LABEL: @vftintrz_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrz_l_d(v2f64 _1) { return __lsx_vftintrz_l_d(_1); } - // CHECK-LABEL: @vftintrz_wu_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vftintrz_wu_s(v4f32 _1) { return __lsx_vftintrz_wu_s(_1); } - // CHECK-LABEL: @vftintrz_lu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vftintrz_lu_d(v2f64 _1) { return __lsx_vftintrz_lu_d(_1); } - // CHECK-LABEL: @vffint_s_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4f32 vffint_s_w(v4i32 _1) { return __lsx_vffint_s_w(_1); } - // CHECK-LABEL: @vffint_d_l( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vffint_d_l(v2i64 _1) { return __lsx_vffint_d_l(_1); } - // CHECK-LABEL: @vffint_s_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4f32 vffint_s_wu(v4u32 _1) { return __lsx_vffint_s_wu(_1); } - // CHECK-LABEL: @vffint_d_lu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vffint_d_lu(v2u64 _1) { return __lsx_vffint_d_lu(_1); } - // CHECK-LABEL: @vandn_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vandn_v(v16u8 _1, v16u8 _2) { return __lsx_vandn_v(_1, _2); } - // CHECK-LABEL: @vneg_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vneg_b(v16i8 _1) { return __lsx_vneg_b(_1); } - // CHECK-LABEL: @vneg_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vneg_h(v8i16 _1) { return __lsx_vneg_h(_1); } - // CHECK-LABEL: @vneg_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vneg_w(v4i32 _1) { return __lsx_vneg_w(_1); } - // CHECK-LABEL: @vneg_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vneg_d(v2i64 _1) { return __lsx_vneg_d(_1); } - // CHECK-LABEL: @vmuh_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __lsx_vmuh_b(_1, _2); } - // CHECK-LABEL: @vmuh_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __lsx_vmuh_h(_1, _2); } - // CHECK-LABEL: @vmuh_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __lsx_vmuh_w(_1, _2); } - // CHECK-LABEL: @vmuh_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __lsx_vmuh_d(_1, _2); } - // CHECK-LABEL: @vmuh_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { return __lsx_vmuh_bu(_1, _2); } - // CHECK-LABEL: @vmuh_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { return __lsx_vmuh_hu(_1, _2); } - // CHECK-LABEL: @vmuh_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { return __lsx_vmuh_wu(_1, _2); } - // CHECK-LABEL: @vmuh_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vmuh_du(v2u64 _1, v2u64 _2) { return __lsx_vmuh_du(_1, _2); } - // CHECK-LABEL: @vsllwil_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vsllwil_h_b(v16i8 _1) { return __lsx_vsllwil_h_b(_1, 1); } - // CHECK-LABEL: @vsllwil_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vsllwil_w_h(v8i16 _1) { return __lsx_vsllwil_w_h(_1, 1); } - // CHECK-LABEL: @vsllwil_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vsllwil_d_w(v4i32 _1) { return __lsx_vsllwil_d_w(_1, 1); } - // CHECK-LABEL: @vsllwil_hu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8u16 vsllwil_hu_bu(v16u8 _1) { return __lsx_vsllwil_hu_bu(_1, 1); } - // CHECK-LABEL: @vsllwil_wu_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vsllwil_wu_hu(v8u16 _1) { return __lsx_vsllwil_wu_hu(_1, 1); } - // CHECK-LABEL: @vsllwil_du_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vsllwil_du_wu(v4u32 _1) { return __lsx_vsllwil_du_wu(_1, 1); } - // CHECK-LABEL: @vsran_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsran_b_h(_1, _2); } - // CHECK-LABEL: @vsran_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsran_h_w(_1, _2); } - // CHECK-LABEL: @vsran_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsran_w_d(_1, _2); } - // CHECK-LABEL: @vssran_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssran_b_h(_1, _2); } - // CHECK-LABEL: @vssran_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssran_h_w(_1, _2); } - // CHECK-LABEL: @vssran_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssran_w_d(_1, _2); } - // CHECK-LABEL: @vssran_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssran_bu_h(_1, _2); } - // CHECK-LABEL: @vssran_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssran_hu_w(_1, _2); } - // CHECK-LABEL: @vssran_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssran_wu_d(_1, _2); } - // CHECK-LABEL: @vsrarn_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrarn_b_h(_1, _2); } - // CHECK-LABEL: @vsrarn_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrarn_h_w(_1, _2); } - // CHECK-LABEL: @vsrarn_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrarn_w_d(_1, _2); } - // CHECK-LABEL: @vssrarn_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrarn_b_h(_1, _2); } - // CHECK-LABEL: @vssrarn_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrarn_h_w(_1, _2); } - // CHECK-LABEL: @vssrarn_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrarn_w_d(_1, _2); } - // CHECK-LABEL: @vssrarn_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrarn_bu_h(_1, _2); } - // CHECK-LABEL: @vssrarn_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrarn_hu_w(_1, _2); } - // CHECK-LABEL: @vssrarn_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrarn_wu_d(_1, _2); } - // CHECK-LABEL: @vsrln_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrln_b_h(_1, _2); } - // CHECK-LABEL: @vsrln_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrln_h_w(_1, _2); } - // CHECK-LABEL: @vsrln_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrln_w_d(_1, _2); } - // CHECK-LABEL: @vssrln_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrln_bu_h(_1, _2); } - // CHECK-LABEL: @vssrln_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrln_hu_w(_1, _2); } - // CHECK-LABEL: @vssrln_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrln_wu_d(_1, _2); } - // CHECK-LABEL: @vsrlrn_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlrn_b_h(_1, _2); } - // CHECK-LABEL: @vsrlrn_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlrn_h_w(_1, _2); } - // CHECK-LABEL: @vsrlrn_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlrn_w_d(_1, _2); } - // CHECK-LABEL: @vssrlrn_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrlrn_bu_h(_1, _2); } - // CHECK-LABEL: @vssrlrn_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrlrn_hu_w(_1, _2); } - // CHECK-LABEL: @vssrlrn_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrlrn_wu_d(_1, _2); } - // CHECK-LABEL: @vfrstpi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { return __lsx_vfrstpi_b(_1, _2, 1); } - // CHECK-LABEL: @vfrstpi_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { return __lsx_vfrstpi_h(_1, _2, 1); } - // CHECK-LABEL: @vfrstp_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { - return __lsx_vfrstp_b(_1, _2, _3); - } - // CHECK-LABEL: @vfrstp_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { - return __lsx_vfrstp_h(_1, _2, _3); - } - // CHECK-LABEL: @vshuf4i_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { return __lsx_vshuf4i_d(_1, _2, 1); } - // CHECK-LABEL: @vbsrl_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vbsrl_v(v16i8 _1) { return __lsx_vbsrl_v(_1, 1); } - // CHECK-LABEL: @vbsll_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vbsll_v(v16i8 _1) { return __lsx_vbsll_v(_1, 1); } - // CHECK-LABEL: @vextrins_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vextrins_b(v16i8 _1, v16i8 _2) { return __lsx_vextrins_b(_1, _2, 1); } - // CHECK-LABEL: @vextrins_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vextrins_h(v8i16 _1, v8i16 _2) { return __lsx_vextrins_h(_1, _2, 1); } - // CHECK-LABEL: @vextrins_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vextrins_w(v4i32 _1, v4i32 _2) { return __lsx_vextrins_w(_1, _2, 1); } - // CHECK-LABEL: @vextrins_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vextrins_d(v2i64 _1, v2i64 _2) { return __lsx_vextrins_d(_1, _2, 1); } - // CHECK-LABEL: @vmskltz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vmskltz_b(v16i8 _1) { return __lsx_vmskltz_b(_1); } - // CHECK-LABEL: @vmskltz_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vmskltz_h(v8i16 _1) { return __lsx_vmskltz_h(_1); } - // CHECK-LABEL: @vmskltz_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vmskltz_w(v4i32 _1) { return __lsx_vmskltz_w(_1); } - // CHECK-LABEL: @vmskltz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vmskltz_d(v2i64 _1) { return __lsx_vmskltz_d(_1); } - // CHECK-LABEL: @vsigncov_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { return __lsx_vsigncov_b(_1, _2); } - // CHECK-LABEL: @vsigncov_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { return __lsx_vsigncov_h(_1, _2); } - // CHECK-LABEL: @vsigncov_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { return __lsx_vsigncov_w(_1, _2); } - // CHECK-LABEL: @vsigncov_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { return __lsx_vsigncov_d(_1, _2); } - // CHECK-LABEL: @vfmadd_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { - return __lsx_vfmadd_s(_1, _2, _3); - } - // CHECK-LABEL: @vfmadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { - return __lsx_vfmadd_d(_1, _2, _3); - } - // CHECK-LABEL: @vfmsub_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { - return __lsx_vfmsub_s(_1, _2, _3); - } - // CHECK-LABEL: @vfmsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { - return __lsx_vfmsub_d(_1, _2, _3); - } - // CHECK-LABEL: @vfnmadd_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { - return __lsx_vfnmadd_s(_1, _2, _3); - } - // CHECK-LABEL: @vfnmadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { - return __lsx_vfnmadd_d(_1, _2, _3); - } - // CHECK-LABEL: @vfnmsub_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { - return __lsx_vfnmsub_s(_1, _2, _3); - } - // CHECK-LABEL: @vfnmsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { - return __lsx_vfnmsub_d(_1, _2, _3); - } - // CHECK-LABEL: @vftintrne_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vftintrne_w_s(v4f32 _1) { return __lsx_vftintrne_w_s(_1); } - // CHECK-LABEL: @vftintrne_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrne_l_d(v2f64 _1) { return __lsx_vftintrne_l_d(_1); } - // CHECK-LABEL: @vftintrp_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vftintrp_w_s(v4f32 _1) { return __lsx_vftintrp_w_s(_1); } - // CHECK-LABEL: @vftintrp_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrp_l_d(v2f64 _1) { return __lsx_vftintrp_l_d(_1); } - // CHECK-LABEL: @vftintrm_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vftintrm_w_s(v4f32 _1) { return __lsx_vftintrm_w_s(_1); } - // CHECK-LABEL: @vftintrm_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrm_l_d(v2f64 _1) { return __lsx_vftintrm_l_d(_1); } - // CHECK-LABEL: @vftint_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftint_w_d(_1, _2); } - // CHECK-LABEL: @vffint_s_l( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { return __lsx_vffint_s_l(_1, _2); } - // CHECK-LABEL: @vftintrz_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrz_w_d(_1, _2); } - // CHECK-LABEL: @vftintrp_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrp_w_d(_1, _2); } - // CHECK-LABEL: @vftintrm_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrm_w_d(_1, _2); } - // CHECK-LABEL: @vftintrne_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrne_w_d(_1, _2); } - // CHECK-LABEL: @vftintl_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintl_l_s(v4f32 _1) { return __lsx_vftintl_l_s(_1); } - // CHECK-LABEL: @vftinth_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftinth_l_s(v4f32 _1) { return __lsx_vftinth_l_s(_1); } - // CHECK-LABEL: @vffinth_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vffinth_d_w(v4i32 _1) { return __lsx_vffinth_d_w(_1); } - // CHECK-LABEL: @vffintl_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vffintl_d_w(v4i32 _1) { return __lsx_vffintl_d_w(_1); } - // CHECK-LABEL: @vftintrzl_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrzl_l_s(v4f32 _1) { return __lsx_vftintrzl_l_s(_1); } - // CHECK-LABEL: @vftintrzh_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrzh_l_s(v4f32 _1) { return __lsx_vftintrzh_l_s(_1); } - // CHECK-LABEL: @vftintrpl_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrpl_l_s(v4f32 _1) { return __lsx_vftintrpl_l_s(_1); } - // CHECK-LABEL: @vftintrph_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrph_l_s(v4f32 _1) { return __lsx_vftintrph_l_s(_1); } - // CHECK-LABEL: @vftintrml_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrml_l_s(v4f32 _1) { return __lsx_vftintrml_l_s(_1); } - // CHECK-LABEL: @vftintrmh_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrmh_l_s(v4f32 _1) { return __lsx_vftintrmh_l_s(_1); } - // CHECK-LABEL: @vftintrnel_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrnel_l_s(v4f32 _1) { return __lsx_vftintrnel_l_s(_1); } - // CHECK-LABEL: @vftintrneh_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrneh_l_s(v4f32 _1) { return __lsx_vftintrneh_l_s(_1); } - // CHECK-LABEL: @vfrintrne_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> --// CHECK-NEXT: ret <4 x i32> [[TMP1]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vfrintrne_s(v4f32 _1) { return __lsx_vfrintrne_s(_1); } - // CHECK-LABEL: @vfrintrne_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> --// CHECK-NEXT: ret <2 x i64> [[TMP1]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vfrintrne_d(v2f64 _1) { return __lsx_vfrintrne_d(_1); } - // CHECK-LABEL: @vfrintrz_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> --// CHECK-NEXT: ret <4 x i32> [[TMP1]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vfrintrz_s(v4f32 _1) { return __lsx_vfrintrz_s(_1); } - // CHECK-LABEL: @vfrintrz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> --// CHECK-NEXT: ret <2 x i64> [[TMP1]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vfrintrz_d(v2f64 _1) { return __lsx_vfrintrz_d(_1); } - // CHECK-LABEL: @vfrintrp_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> --// CHECK-NEXT: ret <4 x i32> [[TMP1]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vfrintrp_s(v4f32 _1) { return __lsx_vfrintrp_s(_1); } - // CHECK-LABEL: @vfrintrp_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> --// CHECK-NEXT: ret <2 x i64> [[TMP1]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vfrintrp_d(v2f64 _1) { return __lsx_vfrintrp_d(_1); } - // CHECK-LABEL: @vfrintrm_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> --// CHECK-NEXT: ret <4 x i32> [[TMP1]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vfrintrm_s(v4f32 _1) { return __lsx_vfrintrm_s(_1); } - // CHECK-LABEL: @vfrintrm_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> --// CHECK-NEXT: ret <2 x i64> [[TMP1]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vfrintrm_d(v2f64 _1) { return __lsx_vfrintrm_d(_1); } - // CHECK-LABEL: @vstelm_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1, i32 1) - // CHECK-NEXT: ret void - // - void vstelm_b(v16i8 _1, void *_2) { return __lsx_vstelm_b(_1, _2, 1, 1); } - // CHECK-LABEL: @vstelm_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[TMP0]], ptr [[_2:%.*]], i32 2, i32 1) - // CHECK-NEXT: ret void - // - void vstelm_h(v8i16 _1, void *_2) { return __lsx_vstelm_h(_1, _2, 2, 1); } - // CHECK-LABEL: @vstelm_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[TMP0]], ptr [[_2:%.*]], i32 4, i32 1) - // CHECK-NEXT: ret void - // - void vstelm_w(v4i32 _1, void *_2) { return __lsx_vstelm_w(_1, _2, 4, 1); } - // CHECK-LABEL: @vstelm_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[TMP0]], ptr [[_2:%.*]], i32 8, i32 1) - // CHECK-NEXT: ret void - // - void vstelm_d(v2i64 _1, void *_2) { return __lsx_vstelm_d(_1, _2, 8, 1); } - // CHECK-LABEL: @vaddwev_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwev_d_w(_1, _2); } - // CHECK-LABEL: @vaddwev_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwev_w_h(_1, _2); } - // CHECK-LABEL: @vaddwev_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwev_h_b(_1, _2); } - // CHECK-LABEL: @vaddwod_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwod_d_w(_1, _2); } - // CHECK-LABEL: @vaddwod_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwod_w_h(_1, _2); } - // CHECK-LABEL: @vaddwod_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwod_h_b(_1, _2); } - // CHECK-LABEL: @vaddwev_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwev_d_wu(_1, _2); } - // CHECK-LABEL: @vaddwev_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwev_w_hu(_1, _2); } - // CHECK-LABEL: @vaddwev_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwev_h_bu(_1, _2); } - // CHECK-LABEL: @vaddwod_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwod_d_wu(_1, _2); } - // CHECK-LABEL: @vaddwod_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwod_w_hu(_1, _2); } - // CHECK-LABEL: @vaddwod_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwod_h_bu(_1, _2); } - // CHECK-LABEL: @vaddwev_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { - return __lsx_vaddwev_d_wu_w(_1, _2); - } - // CHECK-LABEL: @vaddwev_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { - return __lsx_vaddwev_w_hu_h(_1, _2); - } - // CHECK-LABEL: @vaddwev_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { - return __lsx_vaddwev_h_bu_b(_1, _2); - } - // CHECK-LABEL: @vaddwod_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { - return __lsx_vaddwod_d_wu_w(_1, _2); - } - // CHECK-LABEL: @vaddwod_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { - return __lsx_vaddwod_w_hu_h(_1, _2); - } - // CHECK-LABEL: @vaddwod_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { - return __lsx_vaddwod_h_bu_b(_1, _2); - } - // CHECK-LABEL: @vsubwev_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwev_d_w(_1, _2); } - // CHECK-LABEL: @vsubwev_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwev_w_h(_1, _2); } - // CHECK-LABEL: @vsubwev_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwev_h_b(_1, _2); } - // CHECK-LABEL: @vsubwod_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwod_d_w(_1, _2); } - // CHECK-LABEL: @vsubwod_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwod_w_h(_1, _2); } - // CHECK-LABEL: @vsubwod_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwod_h_b(_1, _2); } - // CHECK-LABEL: @vsubwev_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwev_d_wu(_1, _2); } - // CHECK-LABEL: @vsubwev_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwev_w_hu(_1, _2); } - // CHECK-LABEL: @vsubwev_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwev_h_bu(_1, _2); } - // CHECK-LABEL: @vsubwod_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwod_d_wu(_1, _2); } - // CHECK-LABEL: @vsubwod_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwod_w_hu(_1, _2); } - // CHECK-LABEL: @vsubwod_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwod_h_bu(_1, _2); } - // CHECK-LABEL: @vaddwev_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwev_q_d(_1, _2); } - // CHECK-LABEL: @vaddwod_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwod_q_d(_1, _2); } - // CHECK-LABEL: @vaddwev_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwev_q_du(_1, _2); } - // CHECK-LABEL: @vaddwod_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwod_q_du(_1, _2); } - // CHECK-LABEL: @vsubwev_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwev_q_d(_1, _2); } - // CHECK-LABEL: @vsubwod_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwod_q_d(_1, _2); } - // CHECK-LABEL: @vsubwev_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwev_q_du(_1, _2); } - // CHECK-LABEL: @vsubwod_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwod_q_du(_1, _2); } - // CHECK-LABEL: @vaddwev_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { - return __lsx_vaddwev_q_du_d(_1, _2); - } - // CHECK-LABEL: @vaddwod_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { - return __lsx_vaddwod_q_du_d(_1, _2); - } - // CHECK-LABEL: @vmulwev_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwev_d_w(_1, _2); } - // CHECK-LABEL: @vmulwev_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwev_w_h(_1, _2); } - // CHECK-LABEL: @vmulwev_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwev_h_b(_1, _2); } - // CHECK-LABEL: @vmulwod_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwod_d_w(_1, _2); } - // CHECK-LABEL: @vmulwod_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwod_w_h(_1, _2); } - // CHECK-LABEL: @vmulwod_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwod_h_b(_1, _2); } - // CHECK-LABEL: @vmulwev_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwev_d_wu(_1, _2); } - // CHECK-LABEL: @vmulwev_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwev_w_hu(_1, _2); } - // CHECK-LABEL: @vmulwev_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwev_h_bu(_1, _2); } - // CHECK-LABEL: @vmulwod_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwod_d_wu(_1, _2); } - // CHECK-LABEL: @vmulwod_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwod_w_hu(_1, _2); } - // CHECK-LABEL: @vmulwod_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwod_h_bu(_1, _2); } - // CHECK-LABEL: @vmulwev_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { - return __lsx_vmulwev_d_wu_w(_1, _2); - } - // CHECK-LABEL: @vmulwev_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { - return __lsx_vmulwev_w_hu_h(_1, _2); - } - // CHECK-LABEL: @vmulwev_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { - return __lsx_vmulwev_h_bu_b(_1, _2); - } - // CHECK-LABEL: @vmulwod_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { - return __lsx_vmulwod_d_wu_w(_1, _2); - } - // CHECK-LABEL: @vmulwod_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { - return __lsx_vmulwod_w_hu_h(_1, _2); - } - // CHECK-LABEL: @vmulwod_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { - return __lsx_vmulwod_h_bu_b(_1, _2); - } - // CHECK-LABEL: @vmulwev_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwev_q_d(_1, _2); } - // CHECK-LABEL: @vmulwod_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwod_q_d(_1, _2); } - // CHECK-LABEL: @vmulwev_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwev_q_du(_1, _2); } - // CHECK-LABEL: @vmulwod_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwod_q_du(_1, _2); } - // CHECK-LABEL: @vmulwev_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { - return __lsx_vmulwev_q_du_d(_1, _2); - } - // CHECK-LABEL: @vmulwod_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { - return __lsx_vmulwod_q_du_d(_1, _2); - } - // CHECK-LABEL: @vhaddw_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhaddw_q_d(_1, _2); } - // CHECK-LABEL: @vhaddw_qu_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhaddw_qu_du(_1, _2); } - // CHECK-LABEL: @vhsubw_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhsubw_q_d(_1, _2); } - // CHECK-LABEL: @vhsubw_qu_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhsubw_qu_du(_1, _2); } - // CHECK-LABEL: @vmaddwev_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { - return __lsx_vmaddwev_d_w(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { - return __lsx_vmaddwev_w_h(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { - return __lsx_vmaddwev_h_b(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { - return __lsx_vmaddwev_d_wu(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { - return __lsx_vmaddwev_w_hu(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { - return __lsx_vmaddwev_h_bu(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { - return __lsx_vmaddwod_d_w(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { - return __lsx_vmaddwod_w_h(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { - return __lsx_vmaddwod_h_b(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { - return __lsx_vmaddwod_d_wu(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { - return __lsx_vmaddwod_w_hu(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { - return __lsx_vmaddwod_h_bu(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { - return __lsx_vmaddwev_d_wu_w(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { - return __lsx_vmaddwev_w_hu_h(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { - return __lsx_vmaddwev_h_bu_b(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { - return __lsx_vmaddwod_d_wu_w(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { - return __lsx_vmaddwod_w_hu_h(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { - return __lsx_vmaddwod_h_bu_b(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { - return __lsx_vmaddwev_q_d(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { - return __lsx_vmaddwod_q_d(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { - return __lsx_vmaddwev_q_du(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { - return __lsx_vmaddwod_q_du(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { - return __lsx_vmaddwev_q_du_d(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { - return __lsx_vmaddwod_q_du_d(_1, _2, _3); - } - // CHECK-LABEL: @vrotr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vrotr_b(v16i8 _1, v16i8 _2) { return __lsx_vrotr_b(_1, _2); } - // CHECK-LABEL: @vrotr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vrotr_h(v8i16 _1, v8i16 _2) { return __lsx_vrotr_h(_1, _2); } - // CHECK-LABEL: @vrotr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vrotr_w(v4i32 _1, v4i32 _2) { return __lsx_vrotr_w(_1, _2); } - // CHECK-LABEL: @vrotr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vrotr_d(v2i64 _1, v2i64 _2) { return __lsx_vrotr_d(_1, _2); } - // CHECK-LABEL: @vadd_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __lsx_vadd_q(_1, _2); } - // CHECK-LABEL: @vsub_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __lsx_vsub_q(_1, _2); } - // CHECK-LABEL: @vldrepl_b( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v16i8 vldrepl_b(void *_1) { return __lsx_vldrepl_b(_1, 1); } - // CHECK-LABEL: @vldrepl_h( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v8i16 vldrepl_h(void *_1) { return __lsx_vldrepl_h(_1, 2); } - // CHECK-LABEL: @vldrepl_w( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v4i32 vldrepl_w(void *_1) { return __lsx_vldrepl_w(_1, 4); } - // CHECK-LABEL: @vldrepl_d( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v2i64 vldrepl_d(void *_1) { return __lsx_vldrepl_d(_1, 8); } - // CHECK-LABEL: @vmskgez_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vmskgez_b(v16i8 _1) { return __lsx_vmskgez_b(_1); } - // CHECK-LABEL: @vmsknz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vmsknz_b(v16i8 _1) { return __lsx_vmsknz_b(_1); } - // CHECK-LABEL: @vexth_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vexth_h_b(v16i8 _1) { return __lsx_vexth_h_b(_1); } - // CHECK-LABEL: @vexth_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vexth_w_h(v8i16 _1) { return __lsx_vexth_w_h(_1); } - // CHECK-LABEL: @vexth_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vexth_d_w(v4i32 _1) { return __lsx_vexth_d_w(_1); } - // CHECK-LABEL: @vexth_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vexth_q_d(v2i64 _1) { return __lsx_vexth_q_d(_1); } - // CHECK-LABEL: @vexth_hu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8u16 vexth_hu_bu(v16u8 _1) { return __lsx_vexth_hu_bu(_1); } - // CHECK-LABEL: @vexth_wu_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vexth_wu_hu(v8u16 _1) { return __lsx_vexth_wu_hu(_1); } - // CHECK-LABEL: @vexth_du_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vexth_du_wu(v4u32 _1) { return __lsx_vexth_du_wu(_1); } - // CHECK-LABEL: @vexth_qu_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vexth_qu_du(v2u64 _1) { return __lsx_vexth_qu_du(_1); } - // CHECK-LABEL: @vrotri_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vrotri_b(v16i8 _1) { return __lsx_vrotri_b(_1, 1); } - // CHECK-LABEL: @vrotri_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vrotri_h(v8i16 _1) { return __lsx_vrotri_h(_1, 1); } - // CHECK-LABEL: @vrotri_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vrotri_w(v4i32 _1) { return __lsx_vrotri_w(_1, 1); } - // CHECK-LABEL: @vrotri_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vrotri_d(v2i64 _1) { return __lsx_vrotri_d(_1, 1); } - // CHECK-LABEL: @vextl_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vextl_q_d(v2i64 _1) { return __lsx_vextl_q_d(_1); } - // CHECK-LABEL: @vsrlni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlni_b_h(_1, _2, 1); } - // CHECK-LABEL: @vsrlni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlni_h_w(_1, _2, 1); } - // CHECK-LABEL: @vsrlni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlni_w_d(_1, _2, 1); } - // CHECK-LABEL: @vsrlni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlni_d_q(_1, _2, 1); } - // CHECK-LABEL: @vsrlrni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlrni_b_h(_1, _2, 1); } - // CHECK-LABEL: @vsrlrni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlrni_h_w(_1, _2, 1); } - // CHECK-LABEL: @vsrlrni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlrni_w_d(_1, _2, 1); } - // CHECK-LABEL: @vsrlrni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlrni_d_q(_1, _2, 1); } - // CHECK-LABEL: @vssrlni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlni_b_h(_1, _2, 1); } - // CHECK-LABEL: @vssrlni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlni_h_w(_1, _2, 1); } - // CHECK-LABEL: @vssrlni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlni_w_d(_1, _2, 1); } - // CHECK-LABEL: @vssrlni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlni_d_q(_1, _2, 1); } - // CHECK-LABEL: @vssrlni_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrlni_bu_h(_1, _2, 1); } - // CHECK-LABEL: @vssrlni_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrlni_hu_w(_1, _2, 1); } - // CHECK-LABEL: @vssrlni_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrlni_wu_d(_1, _2, 1); } - // CHECK-LABEL: @vssrlni_du_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrlni_du_q(_1, _2, 1); } - // CHECK-LABEL: @vssrlrni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlrni_b_h(_1, _2, 1); } - // CHECK-LABEL: @vssrlrni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlrni_h_w(_1, _2, 1); } - // CHECK-LABEL: @vssrlrni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlrni_w_d(_1, _2, 1); } - // CHECK-LABEL: @vssrlrni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlrni_d_q(_1, _2, 1); } - // CHECK-LABEL: @vssrlrni_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { - return __lsx_vssrlrni_bu_h(_1, _2, 1); - } - // CHECK-LABEL: @vssrlrni_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { - return __lsx_vssrlrni_hu_w(_1, _2, 1); - } - // CHECK-LABEL: @vssrlrni_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { - return __lsx_vssrlrni_wu_d(_1, _2, 1); - } - // CHECK-LABEL: @vssrlrni_du_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { - return __lsx_vssrlrni_du_q(_1, _2, 1); - } - // CHECK-LABEL: @vsrani_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrani_b_h(_1, _2, 1); } - // CHECK-LABEL: @vsrani_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrani_h_w(_1, _2, 1); } - // CHECK-LABEL: @vsrani_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrani_w_d(_1, _2, 1); } - // CHECK-LABEL: @vsrani_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrani_d_q(_1, _2, 1); } - // CHECK-LABEL: @vsrarni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrarni_b_h(_1, _2, 1); } - // CHECK-LABEL: @vsrarni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrarni_h_w(_1, _2, 1); } - // CHECK-LABEL: @vsrarni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrarni_w_d(_1, _2, 1); } - // CHECK-LABEL: @vsrarni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrarni_d_q(_1, _2, 1); } - // CHECK-LABEL: @vssrani_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrani_b_h(_1, _2, 1); } - // CHECK-LABEL: @vssrani_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrani_h_w(_1, _2, 1); } - // CHECK-LABEL: @vssrani_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrani_w_d(_1, _2, 1); } - // CHECK-LABEL: @vssrani_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrani_d_q(_1, _2, 1); } - // CHECK-LABEL: @vssrani_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrani_bu_h(_1, _2, 1); } - // CHECK-LABEL: @vssrani_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrani_hu_w(_1, _2, 1); } - // CHECK-LABEL: @vssrani_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrani_wu_d(_1, _2, 1); } - // CHECK-LABEL: @vssrani_du_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrani_du_q(_1, _2, 1); } - // CHECK-LABEL: @vssrarni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrarni_b_h(_1, _2, 1); } - // CHECK-LABEL: @vssrarni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrarni_h_w(_1, _2, 1); } - // CHECK-LABEL: @vssrarni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrarni_w_d(_1, _2, 1); } - // CHECK-LABEL: @vssrarni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrarni_d_q(_1, _2, 1); } - // CHECK-LABEL: @vssrarni_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { - return __lsx_vssrarni_bu_h(_1, _2, 1); - } - // CHECK-LABEL: @vssrarni_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { - return __lsx_vssrarni_hu_w(_1, _2, 1); - } - // CHECK-LABEL: @vssrarni_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { - return __lsx_vssrarni_wu_d(_1, _2, 1); - } - // CHECK-LABEL: @vssrarni_du_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { - return __lsx_vssrarni_du_q(_1, _2, 1); - } - // CHECK-LABEL: @vpermi_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vpermi_w(v4i32 _1, v4i32 _2) { return __lsx_vpermi_w(_1, _2, 1); } - // CHECK-LABEL: @vld( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v16i8 vld(void *_1) { return __lsx_vld(_1, 1); } - // CHECK-LABEL: @vst( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1) - // CHECK-NEXT: ret void - // - void vst(v16i8 _1, void *_2) { return __lsx_vst(_1, _2, 1); } - // CHECK-LABEL: @vssrlrn_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrlrn_b_h(_1, _2); } - // CHECK-LABEL: @vssrlrn_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrlrn_h_w(_1, _2); } - // CHECK-LABEL: @vssrlrn_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrlrn_w_d(_1, _2); } - // CHECK-LABEL: @vssrln_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrln_b_h(_1, _2); } - // CHECK-LABEL: @vssrln_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrln_h_w(_1, _2); } - // CHECK-LABEL: @vssrln_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrln_w_d(_1, _2); } - // CHECK-LABEL: @vorn_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __lsx_vorn_v(_1, _2); } - // CHECK-LABEL: @vldi( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v2i64 vldi() { return __lsx_vldi(1); } - // CHECK-LABEL: @vshuf_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { - return __lsx_vshuf_b(_1, _2, _3); -@@ -4086,366 +5844,516 @@ v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { - // CHECK-LABEL: @vldx( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v16i8 vldx(void *_1) { return __lsx_vldx(_1, 1); } - // CHECK-LABEL: @vstx( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i64 1) - // CHECK-NEXT: ret void - // - void vstx(v16i8 _1, void *_2) { return __lsx_vstx(_1, _2, 1); } - // CHECK-LABEL: @vextl_qu_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vextl_qu_du(v2u64 _1) { return __lsx_vextl_qu_du(_1); } - // CHECK-LABEL: @bnz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bnz_b(v16u8 _1) { return __lsx_bnz_b(_1); } - // CHECK-LABEL: @bnz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bnz_d(v2u64 _1) { return __lsx_bnz_d(_1); } - // CHECK-LABEL: @bnz_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bnz_h(v8u16 _1) { return __lsx_bnz_h(_1); } - // CHECK-LABEL: @bnz_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bnz_v(v16u8 _1) { return __lsx_bnz_v(_1); } - // CHECK-LABEL: @bnz_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bnz_w(v4u32 _1) { return __lsx_bnz_w(_1); } - // CHECK-LABEL: @bz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bz_b(v16u8 _1) { return __lsx_bz_b(_1); } - // CHECK-LABEL: @bz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bz_d(v2u64 _1) { return __lsx_bz_d(_1); } - // CHECK-LABEL: @bz_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bz_h(v8u16 _1) { return __lsx_bz_h(_1); } - // CHECK-LABEL: @bz_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bz_v(v16u8 _1) { return __lsx_bz_v(_1); } - // CHECK-LABEL: @bz_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bz_w(v4u32 _1) { return __lsx_bz_w(_1); } - // CHECK-LABEL: @vfcmp_caf_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_caf_d(_1, _2); } - // CHECK-LABEL: @vfcmp_caf_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_caf_s(_1, _2); } - // CHECK-LABEL: @vfcmp_ceq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_ceq_d(_1, _2); } - // CHECK-LABEL: @vfcmp_ceq_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_ceq_s(_1, _2); } - // CHECK-LABEL: @vfcmp_cle_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cle_d(_1, _2); } - // CHECK-LABEL: @vfcmp_cle_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cle_s(_1, _2); } - // CHECK-LABEL: @vfcmp_clt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_clt_d(_1, _2); } - // CHECK-LABEL: @vfcmp_clt_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_clt_s(_1, _2); } - // CHECK-LABEL: @vfcmp_cne_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cne_d(_1, _2); } - // CHECK-LABEL: @vfcmp_cne_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cne_s(_1, _2); } - // CHECK-LABEL: @vfcmp_cor_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cor_d(_1, _2); } - // CHECK-LABEL: @vfcmp_cor_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cor_s(_1, _2); } - // CHECK-LABEL: @vfcmp_cueq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cueq_d(_1, _2); } - // CHECK-LABEL: @vfcmp_cueq_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cueq_s(_1, _2); } - // CHECK-LABEL: @vfcmp_cule_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cule_d(_1, _2); } - // CHECK-LABEL: @vfcmp_cule_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cule_s(_1, _2); } - // CHECK-LABEL: @vfcmp_cult_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cult_d(_1, _2); } - // CHECK-LABEL: @vfcmp_cult_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cult_s(_1, _2); } - // CHECK-LABEL: @vfcmp_cun_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cun_d(_1, _2); } - // CHECK-LABEL: @vfcmp_cune_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cune_d(_1, _2); } - // CHECK-LABEL: @vfcmp_cune_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cune_s(_1, _2); } - // CHECK-LABEL: @vfcmp_cun_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cun_s(_1, _2); } - // CHECK-LABEL: @vfcmp_saf_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_saf_d(_1, _2); } - // CHECK-LABEL: @vfcmp_saf_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_saf_s(_1, _2); } - // CHECK-LABEL: @vfcmp_seq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_seq_d(_1, _2); } - // CHECK-LABEL: @vfcmp_seq_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_seq_s(_1, _2); } - // CHECK-LABEL: @vfcmp_sle_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sle_d(_1, _2); } - // CHECK-LABEL: @vfcmp_sle_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sle_s(_1, _2); } - // CHECK-LABEL: @vfcmp_slt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_slt_d(_1, _2); } - // CHECK-LABEL: @vfcmp_slt_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_slt_s(_1, _2); } - // CHECK-LABEL: @vfcmp_sne_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sne_d(_1, _2); } - // CHECK-LABEL: @vfcmp_sne_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sne_s(_1, _2); } - // CHECK-LABEL: @vfcmp_sor_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sor_d(_1, _2); } - // CHECK-LABEL: @vfcmp_sor_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sor_s(_1, _2); } - // CHECK-LABEL: @vfcmp_sueq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sueq_d(_1, _2); } - // CHECK-LABEL: @vfcmp_sueq_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sueq_s(_1, _2); } - // CHECK-LABEL: @vfcmp_sule_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sule_d(_1, _2); } - // CHECK-LABEL: @vfcmp_sule_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sule_s(_1, _2); } - // CHECK-LABEL: @vfcmp_sult_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sult_d(_1, _2); } - // CHECK-LABEL: @vfcmp_sult_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sult_s(_1, _2); } - // CHECK-LABEL: @vfcmp_sun_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sun_d(_1, _2); } - // CHECK-LABEL: @vfcmp_sune_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sune_d(_1, _2); } - // CHECK-LABEL: @vfcmp_sune_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sune_s(_1, _2); } - // CHECK-LABEL: @vfcmp_sun_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sun_s(_1, _2); } - // CHECK-LABEL: @vrepli_b( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v16i8 vrepli_b() { return __lsx_vrepli_b(1); } - // CHECK-LABEL: @vrepli_d( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v2i64 vrepli_d() { return __lsx_vrepli_d(1); } - // CHECK-LABEL: @vrepli_h( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v8i16 vrepli_h() { return __lsx_vrepli_h(1); } - // CHECK-LABEL: @vrepli_w( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v4i32 vrepli_w() { return __lsx_vrepli_w(1); } -diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin.c b/clang/test/CodeGen/LoongArch/lsx/builtin.c -index ef5a390e1838..05a3d13a7fb9 100644 ---- a/clang/test/CodeGen/LoongArch/lsx/builtin.c -+++ b/clang/test/CodeGen/LoongArch/lsx/builtin.c -@@ -29,3319 +29,4547 @@ typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); - - // CHECK-LABEL: @vsll_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsll_b(_1, _2); } - // CHECK-LABEL: @vsll_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsll_h(_1, _2); } - // CHECK-LABEL: @vsll_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsll_w(_1, _2); } - // CHECK-LABEL: @vsll_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsll_d(_1, _2); } - // CHECK-LABEL: @vslli_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vslli_b(v16i8 _1) { return __builtin_lsx_vslli_b(_1, 1); } - // CHECK-LABEL: @vslli_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vslli_h(v8i16 _1) { return __builtin_lsx_vslli_h(_1, 1); } - // CHECK-LABEL: @vslli_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vslli_w(v4i32 _1) { return __builtin_lsx_vslli_w(_1, 1); } - // CHECK-LABEL: @vslli_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vslli_d(v2i64 _1) { return __builtin_lsx_vslli_d(_1, 1); } - // CHECK-LABEL: @vsra_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsra_b(_1, _2); } - // CHECK-LABEL: @vsra_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsra_h(_1, _2); } - // CHECK-LABEL: @vsra_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsra_w(_1, _2); } - // CHECK-LABEL: @vsra_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsra_d(_1, _2); } - // CHECK-LABEL: @vsrai_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vsrai_b(v16i8 _1) { return __builtin_lsx_vsrai_b(_1, 1); } - // CHECK-LABEL: @vsrai_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vsrai_h(v8i16 _1) { return __builtin_lsx_vsrai_h(_1, 1); } - // CHECK-LABEL: @vsrai_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vsrai_w(v4i32 _1) { return __builtin_lsx_vsrai_w(_1, 1); } - // CHECK-LABEL: @vsrai_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vsrai_d(v2i64 _1) { return __builtin_lsx_vsrai_d(_1, 1); } - // CHECK-LABEL: @vsrar_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrar_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vsrar_b(_1, _2); - } - // CHECK-LABEL: @vsrar_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrar_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vsrar_h(_1, _2); - } - // CHECK-LABEL: @vsrar_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrar_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vsrar_w(_1, _2); - } - // CHECK-LABEL: @vsrar_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsrar_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vsrar_d(_1, _2); - } - // CHECK-LABEL: @vsrari_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vsrari_b(v16i8 _1) { return __builtin_lsx_vsrari_b(_1, 1); } - // CHECK-LABEL: @vsrari_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vsrari_h(v8i16 _1) { return __builtin_lsx_vsrari_h(_1, 1); } - // CHECK-LABEL: @vsrari_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vsrari_w(v4i32 _1) { return __builtin_lsx_vsrari_w(_1, 1); } - // CHECK-LABEL: @vsrari_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vsrari_d(v2i64 _1) { return __builtin_lsx_vsrari_d(_1, 1); } - // CHECK-LABEL: @vsrl_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsrl_b(_1, _2); } - // CHECK-LABEL: @vsrl_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsrl_h(_1, _2); } - // CHECK-LABEL: @vsrl_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsrl_w(_1, _2); } - // CHECK-LABEL: @vsrl_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsrl_d(_1, _2); } - // CHECK-LABEL: @vsrli_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vsrli_b(v16i8 _1) { return __builtin_lsx_vsrli_b(_1, 1); } - // CHECK-LABEL: @vsrli_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vsrli_h(v8i16 _1) { return __builtin_lsx_vsrli_h(_1, 1); } - // CHECK-LABEL: @vsrli_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vsrli_w(v4i32 _1) { return __builtin_lsx_vsrli_w(_1, 1); } - // CHECK-LABEL: @vsrli_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vsrli_d(v2i64 _1) { return __builtin_lsx_vsrli_d(_1, 1); } - // CHECK-LABEL: @vsrlr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vsrlr_b(_1, _2); - } - // CHECK-LABEL: @vsrlr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vsrlr_h(_1, _2); - } - // CHECK-LABEL: @vsrlr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vsrlr_w(_1, _2); - } - // CHECK-LABEL: @vsrlr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vsrlr_d(_1, _2); - } - // CHECK-LABEL: @vsrlri_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vsrlri_b(v16i8 _1) { return __builtin_lsx_vsrlri_b(_1, 1); } - // CHECK-LABEL: @vsrlri_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vsrlri_h(v8i16 _1) { return __builtin_lsx_vsrlri_h(_1, 1); } - // CHECK-LABEL: @vsrlri_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vsrlri_w(v4i32 _1) { return __builtin_lsx_vsrlri_w(_1, 1); } - // CHECK-LABEL: @vsrlri_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vsrlri_d(v2i64 _1) { return __builtin_lsx_vsrlri_d(_1, 1); } - // CHECK-LABEL: @vbitclr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vbitclr_b(_1, _2); - } - // CHECK-LABEL: @vbitclr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vbitclr_h(_1, _2); - } - // CHECK-LABEL: @vbitclr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vbitclr_w(_1, _2); - } - // CHECK-LABEL: @vbitclr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vbitclr_d(_1, _2); - } - // CHECK-LABEL: @vbitclri_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vbitclri_b(v16u8 _1) { return __builtin_lsx_vbitclri_b(_1, 1); } - // CHECK-LABEL: @vbitclri_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8u16 vbitclri_h(v8u16 _1) { return __builtin_lsx_vbitclri_h(_1, 1); } - // CHECK-LABEL: @vbitclri_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vbitclri_w(v4u32 _1) { return __builtin_lsx_vbitclri_w(_1, 1); } - // CHECK-LABEL: @vbitclri_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vbitclri_d(v2u64 _1) { return __builtin_lsx_vbitclri_d(_1, 1); } - // CHECK-LABEL: @vbitset_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vbitset_b(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vbitset_b(_1, _2); - } - // CHECK-LABEL: @vbitset_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vbitset_h(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vbitset_h(_1, _2); - } - // CHECK-LABEL: @vbitset_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vbitset_w(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vbitset_w(_1, _2); - } - // CHECK-LABEL: @vbitset_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vbitset_d(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vbitset_d(_1, _2); - } - // CHECK-LABEL: @vbitseti_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vbitseti_b(v16u8 _1) { return __builtin_lsx_vbitseti_b(_1, 1); } - // CHECK-LABEL: @vbitseti_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8u16 vbitseti_h(v8u16 _1) { return __builtin_lsx_vbitseti_h(_1, 1); } - // CHECK-LABEL: @vbitseti_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vbitseti_w(v4u32 _1) { return __builtin_lsx_vbitseti_w(_1, 1); } - // CHECK-LABEL: @vbitseti_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vbitseti_d(v2u64 _1) { return __builtin_lsx_vbitseti_d(_1, 1); } - // CHECK-LABEL: @vbitrev_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vbitrev_b(_1, _2); - } - // CHECK-LABEL: @vbitrev_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vbitrev_h(_1, _2); - } - // CHECK-LABEL: @vbitrev_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vbitrev_w(_1, _2); - } - // CHECK-LABEL: @vbitrev_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vbitrev_d(_1, _2); - } - // CHECK-LABEL: @vbitrevi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vbitrevi_b(v16u8 _1) { return __builtin_lsx_vbitrevi_b(_1, 1); } - // CHECK-LABEL: @vbitrevi_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8u16 vbitrevi_h(v8u16 _1) { return __builtin_lsx_vbitrevi_h(_1, 1); } - // CHECK-LABEL: @vbitrevi_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vbitrevi_w(v4u32 _1) { return __builtin_lsx_vbitrevi_w(_1, 1); } - // CHECK-LABEL: @vbitrevi_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vbitrevi_d(v2u64 _1) { return __builtin_lsx_vbitrevi_d(_1, 1); } - // CHECK-LABEL: @vadd_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vadd_b(_1, _2); } - // CHECK-LABEL: @vadd_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vadd_h(_1, _2); } - // CHECK-LABEL: @vadd_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vadd_w(_1, _2); } - // CHECK-LABEL: @vadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_d(_1, _2); } - // CHECK-LABEL: @vaddi_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vaddi_bu(v16i8 _1) { return __builtin_lsx_vaddi_bu(_1, 1); } - // CHECK-LABEL: @vaddi_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vaddi_hu(v8i16 _1) { return __builtin_lsx_vaddi_hu(_1, 1); } - // CHECK-LABEL: @vaddi_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vaddi_wu(v4i32 _1) { return __builtin_lsx_vaddi_wu(_1, 1); } - // CHECK-LABEL: @vaddi_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vaddi_du(v2i64 _1) { return __builtin_lsx_vaddi_du(_1, 1); } - // CHECK-LABEL: @vsub_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsub_b(_1, _2); } - // CHECK-LABEL: @vsub_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsub_h(_1, _2); } - // CHECK-LABEL: @vsub_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsub_w(_1, _2); } - // CHECK-LABEL: @vsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_d(_1, _2); } - // CHECK-LABEL: @vsubi_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vsubi_bu(v16i8 _1) { return __builtin_lsx_vsubi_bu(_1, 1); } - // CHECK-LABEL: @vsubi_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vsubi_hu(v8i16 _1) { return __builtin_lsx_vsubi_hu(_1, 1); } - // CHECK-LABEL: @vsubi_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vsubi_wu(v4i32 _1) { return __builtin_lsx_vsubi_wu(_1, 1); } - // CHECK-LABEL: @vsubi_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vsubi_du(v2i64 _1) { return __builtin_lsx_vsubi_du(_1, 1); } - // CHECK-LABEL: @vmax_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmax_b(_1, _2); } - // CHECK-LABEL: @vmax_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmax_h(_1, _2); } - // CHECK-LABEL: @vmax_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmax_w(_1, _2); } - // CHECK-LABEL: @vmax_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmax_d(_1, _2); } - // CHECK-LABEL: @vmaxi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vmaxi_b(v16i8 _1) { return __builtin_lsx_vmaxi_b(_1, 1); } - // CHECK-LABEL: @vmaxi_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vmaxi_h(v8i16 _1) { return __builtin_lsx_vmaxi_h(_1, 1); } - // CHECK-LABEL: @vmaxi_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vmaxi_w(v4i32 _1) { return __builtin_lsx_vmaxi_w(_1, 1); } - // CHECK-LABEL: @vmaxi_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vmaxi_d(v2i64 _1) { return __builtin_lsx_vmaxi_d(_1, 1); } - // CHECK-LABEL: @vmax_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vmax_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vmax_bu(_1, _2); - } - // CHECK-LABEL: @vmax_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vmax_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vmax_hu(_1, _2); - } - // CHECK-LABEL: @vmax_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vmax_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vmax_wu(_1, _2); - } - // CHECK-LABEL: @vmax_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vmax_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vmax_du(_1, _2); - } - // CHECK-LABEL: @vmaxi_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vmaxi_bu(v16u8 _1) { return __builtin_lsx_vmaxi_bu(_1, 1); } - // CHECK-LABEL: @vmaxi_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8u16 vmaxi_hu(v8u16 _1) { return __builtin_lsx_vmaxi_hu(_1, 1); } - // CHECK-LABEL: @vmaxi_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vmaxi_wu(v4u32 _1) { return __builtin_lsx_vmaxi_wu(_1, 1); } - // CHECK-LABEL: @vmaxi_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vmaxi_du(v2u64 _1) { return __builtin_lsx_vmaxi_du(_1, 1); } - // CHECK-LABEL: @vmin_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmin_b(_1, _2); } - // CHECK-LABEL: @vmin_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmin_h(_1, _2); } - // CHECK-LABEL: @vmin_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmin_w(_1, _2); } - // CHECK-LABEL: @vmin_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmin_d(_1, _2); } - // CHECK-LABEL: @vmini_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vmini_b(v16i8 _1) { return __builtin_lsx_vmini_b(_1, 1); } - // CHECK-LABEL: @vmini_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vmini_h(v8i16 _1) { return __builtin_lsx_vmini_h(_1, 1); } - // CHECK-LABEL: @vmini_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vmini_w(v4i32 _1) { return __builtin_lsx_vmini_w(_1, 1); } - // CHECK-LABEL: @vmini_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vmini_d(v2i64 _1) { return __builtin_lsx_vmini_d(_1, 1); } - // CHECK-LABEL: @vmin_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vmin_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vmin_bu(_1, _2); - } - // CHECK-LABEL: @vmin_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vmin_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vmin_hu(_1, _2); - } - // CHECK-LABEL: @vmin_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vmin_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vmin_wu(_1, _2); - } - // CHECK-LABEL: @vmin_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vmin_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vmin_du(_1, _2); - } - // CHECK-LABEL: @vmini_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vmini_bu(v16u8 _1) { return __builtin_lsx_vmini_bu(_1, 1); } - // CHECK-LABEL: @vmini_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8u16 vmini_hu(v8u16 _1) { return __builtin_lsx_vmini_hu(_1, 1); } - // CHECK-LABEL: @vmini_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vmini_wu(v4u32 _1) { return __builtin_lsx_vmini_wu(_1, 1); } - // CHECK-LABEL: @vmini_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vmini_du(v2u64 _1) { return __builtin_lsx_vmini_du(_1, 1); } - // CHECK-LABEL: @vseq_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vseq_b(_1, _2); } - // CHECK-LABEL: @vseq_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vseq_h(_1, _2); } - // CHECK-LABEL: @vseq_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vseq_w(_1, _2); } - // CHECK-LABEL: @vseq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vseq_d(_1, _2); } - // CHECK-LABEL: @vseqi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vseqi_b(v16i8 _1) { return __builtin_lsx_vseqi_b(_1, 1); } - // CHECK-LABEL: @vseqi_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vseqi_h(v8i16 _1) { return __builtin_lsx_vseqi_h(_1, 1); } - // CHECK-LABEL: @vseqi_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vseqi_w(v4i32 _1) { return __builtin_lsx_vseqi_w(_1, 1); } - // CHECK-LABEL: @vseqi_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vseqi_d(v2i64 _1) { return __builtin_lsx_vseqi_d(_1, 1); } - // CHECK-LABEL: @vslti_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vslti_b(v16i8 _1) { return __builtin_lsx_vslti_b(_1, 1); } - // CHECK-LABEL: @vslt_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vslt_b(_1, _2); } - // CHECK-LABEL: @vslt_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vslt_h(_1, _2); } - // CHECK-LABEL: @vslt_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vslt_w(_1, _2); } - // CHECK-LABEL: @vslt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vslt_d(_1, _2); } - // CHECK-LABEL: @vslti_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vslti_h(v8i16 _1) { return __builtin_lsx_vslti_h(_1, 1); } - // CHECK-LABEL: @vslti_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vslti_w(v4i32 _1) { return __builtin_lsx_vslti_w(_1, 1); } - // CHECK-LABEL: @vslti_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vslti_d(v2i64 _1) { return __builtin_lsx_vslti_d(_1, 1); } - // CHECK-LABEL: @vslt_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vslt_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vslt_bu(_1, _2); - } - // CHECK-LABEL: @vslt_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vslt_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vslt_hu(_1, _2); - } - // CHECK-LABEL: @vslt_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vslt_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vslt_wu(_1, _2); - } - // CHECK-LABEL: @vslt_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vslt_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vslt_du(_1, _2); - } - // CHECK-LABEL: @vslti_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vslti_bu(v16u8 _1) { return __builtin_lsx_vslti_bu(_1, 1); } - // CHECK-LABEL: @vslti_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vslti_hu(v8u16 _1) { return __builtin_lsx_vslti_hu(_1, 1); } - // CHECK-LABEL: @vslti_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vslti_wu(v4u32 _1) { return __builtin_lsx_vslti_wu(_1, 1); } - // CHECK-LABEL: @vslti_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vslti_du(v2u64 _1) { return __builtin_lsx_vslti_du(_1, 1); } - // CHECK-LABEL: @vsle_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsle_b(_1, _2); } - // CHECK-LABEL: @vsle_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsle_h(_1, _2); } - // CHECK-LABEL: @vsle_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsle_w(_1, _2); } - // CHECK-LABEL: @vsle_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsle_d(_1, _2); } - // CHECK-LABEL: @vslei_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vslei_b(v16i8 _1) { return __builtin_lsx_vslei_b(_1, 1); } - // CHECK-LABEL: @vslei_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vslei_h(v8i16 _1) { return __builtin_lsx_vslei_h(_1, 1); } - // CHECK-LABEL: @vslei_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vslei_w(v4i32 _1) { return __builtin_lsx_vslei_w(_1, 1); } - // CHECK-LABEL: @vslei_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vslei_d(v2i64 _1) { return __builtin_lsx_vslei_d(_1, 1); } - // CHECK-LABEL: @vsle_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsle_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vsle_bu(_1, _2); - } - // CHECK-LABEL: @vsle_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsle_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vsle_hu(_1, _2); - } - // CHECK-LABEL: @vsle_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsle_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vsle_wu(_1, _2); - } - // CHECK-LABEL: @vsle_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsle_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vsle_du(_1, _2); - } - // CHECK-LABEL: @vslei_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vslei_bu(v16u8 _1) { return __builtin_lsx_vslei_bu(_1, 1); } - // CHECK-LABEL: @vslei_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vslei_hu(v8u16 _1) { return __builtin_lsx_vslei_hu(_1, 1); } - // CHECK-LABEL: @vslei_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vslei_wu(v4u32 _1) { return __builtin_lsx_vslei_wu(_1, 1); } - // CHECK-LABEL: @vslei_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vslei_du(v2u64 _1) { return __builtin_lsx_vslei_du(_1, 1); } - // CHECK-LABEL: @vsat_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vsat_b(v16i8 _1) { return __builtin_lsx_vsat_b(_1, 1); } - // CHECK-LABEL: @vsat_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vsat_h(v8i16 _1) { return __builtin_lsx_vsat_h(_1, 1); } - // CHECK-LABEL: @vsat_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vsat_w(v4i32 _1) { return __builtin_lsx_vsat_w(_1, 1); } - // CHECK-LABEL: @vsat_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vsat_d(v2i64 _1) { return __builtin_lsx_vsat_d(_1, 1); } - // CHECK-LABEL: @vsat_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vsat_bu(v16u8 _1) { return __builtin_lsx_vsat_bu(_1, 1); } - // CHECK-LABEL: @vsat_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8u16 vsat_hu(v8u16 _1) { return __builtin_lsx_vsat_hu(_1, 1); } - // CHECK-LABEL: @vsat_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vsat_wu(v4u32 _1) { return __builtin_lsx_vsat_wu(_1, 1); } - // CHECK-LABEL: @vsat_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vsat_du(v2u64 _1) { return __builtin_lsx_vsat_du(_1, 1); } - // CHECK-LABEL: @vadda_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vadda_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vadda_b(_1, _2); - } - // CHECK-LABEL: @vadda_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vadda_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vadda_h(_1, _2); - } - // CHECK-LABEL: @vadda_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vadda_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vadda_w(_1, _2); - } - // CHECK-LABEL: @vadda_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vadda_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vadda_d(_1, _2); - } - // CHECK-LABEL: @vsadd_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsadd_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vsadd_b(_1, _2); - } - // CHECK-LABEL: @vsadd_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsadd_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vsadd_h(_1, _2); - } - // CHECK-LABEL: @vsadd_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsadd_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vsadd_w(_1, _2); - } - // CHECK-LABEL: @vsadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsadd_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vsadd_d(_1, _2); - } - // CHECK-LABEL: @vsadd_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vsadd_bu(_1, _2); - } - // CHECK-LABEL: @vsadd_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vsadd_hu(_1, _2); - } - // CHECK-LABEL: @vsadd_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vsadd_wu(_1, _2); - } - // CHECK-LABEL: @vsadd_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vsadd_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vsadd_du(_1, _2); - } - // CHECK-LABEL: @vavg_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vavg_b(_1, _2); } - // CHECK-LABEL: @vavg_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vavg_h(_1, _2); } - // CHECK-LABEL: @vavg_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vavg_w(_1, _2); } - // CHECK-LABEL: @vavg_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vavg_d(_1, _2); } - // CHECK-LABEL: @vavg_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vavg_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vavg_bu(_1, _2); - } - // CHECK-LABEL: @vavg_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vavg_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vavg_hu(_1, _2); - } - // CHECK-LABEL: @vavg_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vavg_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vavg_wu(_1, _2); - } - // CHECK-LABEL: @vavg_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vavg_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vavg_du(_1, _2); - } - // CHECK-LABEL: @vavgr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vavgr_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vavgr_b(_1, _2); - } - // CHECK-LABEL: @vavgr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vavgr_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vavgr_h(_1, _2); - } - // CHECK-LABEL: @vavgr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vavgr_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vavgr_w(_1, _2); - } - // CHECK-LABEL: @vavgr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vavgr_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vavgr_d(_1, _2); - } - // CHECK-LABEL: @vavgr_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vavgr_bu(_1, _2); - } - // CHECK-LABEL: @vavgr_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vavgr_hu(_1, _2); - } - // CHECK-LABEL: @vavgr_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vavgr_wu(_1, _2); - } - // CHECK-LABEL: @vavgr_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vavgr_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vavgr_du(_1, _2); - } - // CHECK-LABEL: @vssub_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vssub_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vssub_b(_1, _2); - } - // CHECK-LABEL: @vssub_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vssub_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vssub_h(_1, _2); - } - // CHECK-LABEL: @vssub_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vssub_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vssub_w(_1, _2); - } - // CHECK-LABEL: @vssub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vssub_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vssub_d(_1, _2); - } - // CHECK-LABEL: @vssub_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vssub_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vssub_bu(_1, _2); - } - // CHECK-LABEL: @vssub_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vssub_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vssub_hu(_1, _2); - } - // CHECK-LABEL: @vssub_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vssub_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vssub_wu(_1, _2); - } - // CHECK-LABEL: @vssub_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vssub_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vssub_du(_1, _2); - } - // CHECK-LABEL: @vabsd_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vabsd_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vabsd_b(_1, _2); - } - // CHECK-LABEL: @vabsd_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vabsd_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vabsd_h(_1, _2); - } - // CHECK-LABEL: @vabsd_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vabsd_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vabsd_w(_1, _2); - } - // CHECK-LABEL: @vabsd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vabsd_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vabsd_d(_1, _2); - } - // CHECK-LABEL: @vabsd_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vabsd_bu(_1, _2); - } - // CHECK-LABEL: @vabsd_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vabsd_hu(_1, _2); - } - // CHECK-LABEL: @vabsd_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vabsd_wu(_1, _2); - } - // CHECK-LABEL: @vabsd_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vabsd_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vabsd_du(_1, _2); - } - // CHECK-LABEL: @vmul_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmul_b(_1, _2); } - // CHECK-LABEL: @vmul_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmul_h(_1, _2); } - // CHECK-LABEL: @vmul_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmul_w(_1, _2); } - // CHECK-LABEL: @vmul_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmul_d(_1, _2); } - // CHECK-LABEL: @vmadd_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { - return __builtin_lsx_vmadd_b(_1, _2, _3); - } - // CHECK-LABEL: @vmadd_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { - return __builtin_lsx_vmadd_h(_1, _2, _3); - } - // CHECK-LABEL: @vmadd_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { - return __builtin_lsx_vmadd_w(_1, _2, _3); - } - // CHECK-LABEL: @vmadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { - return __builtin_lsx_vmadd_d(_1, _2, _3); - } - // CHECK-LABEL: @vmsub_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { - return __builtin_lsx_vmsub_b(_1, _2, _3); - } - // CHECK-LABEL: @vmsub_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { - return __builtin_lsx_vmsub_h(_1, _2, _3); - } - // CHECK-LABEL: @vmsub_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { - return __builtin_lsx_vmsub_w(_1, _2, _3); - } - // CHECK-LABEL: @vmsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { - return __builtin_lsx_vmsub_d(_1, _2, _3); - } - // CHECK-LABEL: @vdiv_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vdiv_b(_1, _2); } - // CHECK-LABEL: @vdiv_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vdiv_h(_1, _2); } - // CHECK-LABEL: @vdiv_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vdiv_w(_1, _2); } - // CHECK-LABEL: @vdiv_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vdiv_d(_1, _2); } - // CHECK-LABEL: @vdiv_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vdiv_bu(_1, _2); - } - // CHECK-LABEL: @vdiv_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vdiv_hu(_1, _2); - } - // CHECK-LABEL: @vdiv_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vdiv_wu(_1, _2); - } - // CHECK-LABEL: @vdiv_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vdiv_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vdiv_du(_1, _2); - } - // CHECK-LABEL: @vhaddw_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vhaddw_h_b(_1, _2); - } - // CHECK-LABEL: @vhaddw_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vhaddw_w_h(_1, _2); - } - // CHECK-LABEL: @vhaddw_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vhaddw_d_w(_1, _2); - } - // CHECK-LABEL: @vhaddw_hu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vhaddw_hu_bu(_1, _2); - } - // CHECK-LABEL: @vhaddw_wu_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vhaddw_wu_hu(_1, _2); - } - // CHECK-LABEL: @vhaddw_du_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vhaddw_du_wu(_1, _2); - } - // CHECK-LABEL: @vhsubw_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vhsubw_h_b(_1, _2); - } - // CHECK-LABEL: @vhsubw_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vhsubw_w_h(_1, _2); - } - // CHECK-LABEL: @vhsubw_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vhsubw_d_w(_1, _2); - } - // CHECK-LABEL: @vhsubw_hu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vhsubw_hu_bu(_1, _2); - } - // CHECK-LABEL: @vhsubw_wu_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vhsubw_wu_hu(_1, _2); - } - // CHECK-LABEL: @vhsubw_du_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vhsubw_du_wu(_1, _2); - } - // CHECK-LABEL: @vmod_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmod_b(_1, _2); } - // CHECK-LABEL: @vmod_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmod_h(_1, _2); } - // CHECK-LABEL: @vmod_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmod_w(_1, _2); } - // CHECK-LABEL: @vmod_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmod_d(_1, _2); } - // CHECK-LABEL: @vmod_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vmod_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vmod_bu(_1, _2); - } - // CHECK-LABEL: @vmod_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vmod_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vmod_hu(_1, _2); - } - // CHECK-LABEL: @vmod_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vmod_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vmod_wu(_1, _2); - } - // CHECK-LABEL: @vmod_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vmod_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vmod_du(_1, _2); - } - // CHECK-LABEL: @vreplve_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[_1:%.*]], i32 [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[TMP0]], i32 [[_2:%.*]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vreplve_b(v16i8 _1, int _2) { - return __builtin_lsx_vreplve_b(_1, _2); - } - // CHECK-LABEL: @vreplve_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[_1:%.*]], i32 [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[TMP0]], i32 [[_2:%.*]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vreplve_h(v8i16 _1, int _2) { - return __builtin_lsx_vreplve_h(_1, _2); - } - // CHECK-LABEL: @vreplve_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[_1:%.*]], i32 [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[TMP0]], i32 [[_2:%.*]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vreplve_w(v4i32 _1, int _2) { - return __builtin_lsx_vreplve_w(_1, _2); - } - // CHECK-LABEL: @vreplve_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[_1:%.*]], i32 [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[TMP0]], i32 [[_2:%.*]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vreplve_d(v2i64 _1, int _2) { - return __builtin_lsx_vreplve_d(_1, _2); - } - // CHECK-LABEL: @vreplvei_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vreplvei_b(v16i8 _1) { return __builtin_lsx_vreplvei_b(_1, 1); } - // CHECK-LABEL: @vreplvei_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vreplvei_h(v8i16 _1) { return __builtin_lsx_vreplvei_h(_1, 1); } - // CHECK-LABEL: @vreplvei_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vreplvei_w(v4i32 _1) { return __builtin_lsx_vreplvei_w(_1, 1); } - // CHECK-LABEL: @vreplvei_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vreplvei_d(v2i64 _1) { return __builtin_lsx_vreplvei_d(_1, 1); } - // CHECK-LABEL: @vpickev_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vpickev_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vpickev_b(_1, _2); - } - // CHECK-LABEL: @vpickev_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vpickev_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vpickev_h(_1, _2); - } - // CHECK-LABEL: @vpickev_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vpickev_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vpickev_w(_1, _2); - } - // CHECK-LABEL: @vpickev_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vpickev_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vpickev_d(_1, _2); - } - // CHECK-LABEL: @vpickod_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vpickod_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vpickod_b(_1, _2); - } - // CHECK-LABEL: @vpickod_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vpickod_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vpickod_h(_1, _2); - } - // CHECK-LABEL: @vpickod_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vpickod_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vpickod_w(_1, _2); - } - // CHECK-LABEL: @vpickod_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vpickod_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vpickod_d(_1, _2); - } - // CHECK-LABEL: @vilvh_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vilvh_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vilvh_b(_1, _2); - } - // CHECK-LABEL: @vilvh_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vilvh_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vilvh_h(_1, _2); - } - // CHECK-LABEL: @vilvh_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vilvh_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vilvh_w(_1, _2); - } - // CHECK-LABEL: @vilvh_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vilvh_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vilvh_d(_1, _2); - } - // CHECK-LABEL: @vilvl_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vilvl_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vilvl_b(_1, _2); - } - // CHECK-LABEL: @vilvl_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vilvl_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vilvl_h(_1, _2); - } - // CHECK-LABEL: @vilvl_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vilvl_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vilvl_w(_1, _2); - } - // CHECK-LABEL: @vilvl_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vilvl_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vilvl_d(_1, _2); - } - // CHECK-LABEL: @vpackev_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vpackev_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vpackev_b(_1, _2); - } - // CHECK-LABEL: @vpackev_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vpackev_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vpackev_h(_1, _2); - } - // CHECK-LABEL: @vpackev_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vpackev_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vpackev_w(_1, _2); - } - // CHECK-LABEL: @vpackev_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vpackev_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vpackev_d(_1, _2); - } - // CHECK-LABEL: @vpackod_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vpackod_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vpackod_b(_1, _2); - } - // CHECK-LABEL: @vpackod_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vpackod_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vpackod_h(_1, _2); - } - // CHECK-LABEL: @vpackod_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vpackod_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vpackod_w(_1, _2); - } - // CHECK-LABEL: @vpackod_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vpackod_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vpackod_d(_1, _2); - } - // CHECK-LABEL: @vshuf_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { - return __builtin_lsx_vshuf_h(_1, _2, _3); - } - // CHECK-LABEL: @vshuf_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { - return __builtin_lsx_vshuf_w(_1, _2, _3); - } - // CHECK-LABEL: @vshuf_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { - return __builtin_lsx_vshuf_d(_1, _2, _3); - } - // CHECK-LABEL: @vand_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vand_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vand_v(_1, _2); } - // CHECK-LABEL: @vandi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vandi_b(v16u8 _1) { return __builtin_lsx_vandi_b(_1, 1); } - // CHECK-LABEL: @vor_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vor_v(_1, _2); } - // CHECK-LABEL: @vori_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vori_b(v16u8 _1) { return __builtin_lsx_vori_b(_1, 1); } - // CHECK-LABEL: @vnor_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vnor_v(_1, _2); } - // CHECK-LABEL: @vnori_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vnori_b(v16u8 _1) { return __builtin_lsx_vnori_b(_1, 1); } - // CHECK-LABEL: @vxor_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vxor_v(_1, _2); } - // CHECK-LABEL: @vxori_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16u8 vxori_b(v16u8 _1) { return __builtin_lsx_vxori_b(_1, 1); } - // CHECK-LABEL: @vbitsel_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { - return __builtin_lsx_vbitsel_v(_1, _2, _3); - } - // CHECK-LABEL: @vbitseli_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vbitseli_b(_1, _2, 1); - } - // CHECK-LABEL: @vshuf4i_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vshuf4i_b(v16i8 _1) { return __builtin_lsx_vshuf4i_b(_1, 1); } - // CHECK-LABEL: @vshuf4i_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vshuf4i_h(v8i16 _1) { return __builtin_lsx_vshuf4i_h(_1, 1); } - // CHECK-LABEL: @vshuf4i_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vshuf4i_w(v4i32 _1) { return __builtin_lsx_vshuf4i_w(_1, 1); } - // CHECK-LABEL: @vreplgr2vr_b( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v16i8 vreplgr2vr_b(int _1) { return __builtin_lsx_vreplgr2vr_b(_1); } - // CHECK-LABEL: @vreplgr2vr_h( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v8i16 vreplgr2vr_h(int _1) { return __builtin_lsx_vreplgr2vr_h(_1); } - // CHECK-LABEL: @vreplgr2vr_w( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v4i32 vreplgr2vr_w(int _1) { return __builtin_lsx_vreplgr2vr_w(_1); } - // CHECK-LABEL: @vreplgr2vr_d( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v2i64 vreplgr2vr_d(long _1) { return __builtin_lsx_vreplgr2vr_d(_1); } - // CHECK-LABEL: @vpcnt_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vpcnt_b(v16i8 _1) { return __builtin_lsx_vpcnt_b(_1); } - // CHECK-LABEL: @vpcnt_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vpcnt_h(v8i16 _1) { return __builtin_lsx_vpcnt_h(_1); } - // CHECK-LABEL: @vpcnt_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vpcnt_w(v4i32 _1) { return __builtin_lsx_vpcnt_w(_1); } - // CHECK-LABEL: @vpcnt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vpcnt_d(v2i64 _1) { return __builtin_lsx_vpcnt_d(_1); } - // CHECK-LABEL: @vclo_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vclo_b(v16i8 _1) { return __builtin_lsx_vclo_b(_1); } - // CHECK-LABEL: @vclo_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vclo_h(v8i16 _1) { return __builtin_lsx_vclo_h(_1); } - // CHECK-LABEL: @vclo_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vclo_w(v4i32 _1) { return __builtin_lsx_vclo_w(_1); } - // CHECK-LABEL: @vclo_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vclo_d(v2i64 _1) { return __builtin_lsx_vclo_d(_1); } - // CHECK-LABEL: @vclz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vclz_b(v16i8 _1) { return __builtin_lsx_vclz_b(_1); } - // CHECK-LABEL: @vclz_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vclz_h(v8i16 _1) { return __builtin_lsx_vclz_h(_1); } - // CHECK-LABEL: @vclz_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vclz_w(v4i32 _1) { return __builtin_lsx_vclz_w(_1); } - // CHECK-LABEL: @vclz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vclz_d(v2i64 _1) { return __builtin_lsx_vclz_d(_1); } - // CHECK-LABEL: @vpickve2gr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int vpickve2gr_b(v16i8 _1) { return __builtin_lsx_vpickve2gr_b(_1, 1); } - // CHECK-LABEL: @vpickve2gr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int vpickve2gr_h(v8i16 _1) { return __builtin_lsx_vpickve2gr_h(_1, 1); } - // CHECK-LABEL: @vpickve2gr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int vpickve2gr_w(v4i32 _1) { return __builtin_lsx_vpickve2gr_w(_1, 1); } - // CHECK-LABEL: @vpickve2gr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i64 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: ret i64 [[TMP1]] - // - long vpickve2gr_d(v2i64 _1) { return __builtin_lsx_vpickve2gr_d(_1, 1); } - // CHECK-LABEL: @vpickve2gr_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - unsigned int vpickve2gr_bu(v16i8 _1) { - return __builtin_lsx_vpickve2gr_bu(_1, 1); - } - // CHECK-LABEL: @vpickve2gr_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - unsigned int vpickve2gr_hu(v8i16 _1) { - return __builtin_lsx_vpickve2gr_hu(_1, 1); - } - // CHECK-LABEL: @vpickve2gr_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - unsigned int vpickve2gr_wu(v4i32 _1) { - return __builtin_lsx_vpickve2gr_wu(_1, 1); - } - // CHECK-LABEL: @vpickve2gr_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret i64 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: ret i64 [[TMP1]] - // - unsigned long int vpickve2gr_du(v2i64 _1) { - return __builtin_lsx_vpickve2gr_du(_1, 1); - } - // CHECK-LABEL: @vinsgr2vr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[_1:%.*]], i32 1, i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[TMP0]], i32 1, i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vinsgr2vr_b(v16i8 _1) { - return __builtin_lsx_vinsgr2vr_b(_1, 1, 1); - } - // CHECK-LABEL: @vinsgr2vr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[_1:%.*]], i32 1, i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[TMP0]], i32 1, i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vinsgr2vr_h(v8i16 _1) { - return __builtin_lsx_vinsgr2vr_h(_1, 1, 1); - } - // CHECK-LABEL: @vinsgr2vr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[_1:%.*]], i32 1, i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[TMP0]], i32 1, i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vinsgr2vr_w(v4i32 _1) { - return __builtin_lsx_vinsgr2vr_w(_1, 1, 1); - } - // CHECK-LABEL: @vinsgr2vr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[_1:%.*]], i64 1, i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[TMP0]], i64 1, i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vinsgr2vr_d(v2i64 _1) { - return __builtin_lsx_vinsgr2vr_d(_1, 1, 1); - } - // CHECK-LABEL: @vfadd_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vfadd_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfadd_s(_1, _2); - } - // CHECK-LABEL: @vfadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2f64 vfadd_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfadd_d(_1, _2); - } - // CHECK-LABEL: @vfsub_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vfsub_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfsub_s(_1, _2); - } - // CHECK-LABEL: @vfsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2f64 vfsub_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfsub_d(_1, _2); - } - // CHECK-LABEL: @vfmul_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vfmul_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfmul_s(_1, _2); - } - // CHECK-LABEL: @vfmul_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2f64 vfmul_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfmul_d(_1, _2); - } - // CHECK-LABEL: @vfdiv_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfdiv_s(_1, _2); - } - // CHECK-LABEL: @vfdiv_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfdiv_d(_1, _2); - } - // CHECK-LABEL: @vfcvt_h_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcvt_h_s(_1, _2); - } - // CHECK-LABEL: @vfcvt_s_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcvt_s_d(_1, _2); - } - // CHECK-LABEL: @vfmin_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vfmin_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfmin_s(_1, _2); - } - // CHECK-LABEL: @vfmin_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2f64 vfmin_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfmin_d(_1, _2); - } - // CHECK-LABEL: @vfmina_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vfmina_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfmina_s(_1, _2); - } - // CHECK-LABEL: @vfmina_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2f64 vfmina_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfmina_d(_1, _2); - } - // CHECK-LABEL: @vfmax_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vfmax_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfmax_s(_1, _2); - } - // CHECK-LABEL: @vfmax_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2f64 vfmax_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfmax_d(_1, _2); - } - // CHECK-LABEL: @vfmaxa_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfmaxa_s(_1, _2); - } - // CHECK-LABEL: @vfmaxa_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfmaxa_d(_1, _2); - } - // CHECK-LABEL: @vfclass_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vfclass_s(v4f32 _1) { return __builtin_lsx_vfclass_s(_1); } - // CHECK-LABEL: @vfclass_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vfclass_d(v2f64 _1) { return __builtin_lsx_vfclass_d(_1); } - // CHECK-LABEL: @vfsqrt_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4f32 vfsqrt_s(v4f32 _1) { return __builtin_lsx_vfsqrt_s(_1); } - // CHECK-LABEL: @vfsqrt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vfsqrt_d(v2f64 _1) { return __builtin_lsx_vfsqrt_d(_1); } - // CHECK-LABEL: @vfrecip_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4f32 vfrecip_s(v4f32 _1) { return __builtin_lsx_vfrecip_s(_1); } - // CHECK-LABEL: @vfrecip_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vfrecip_d(v2f64 _1) { return __builtin_lsx_vfrecip_d(_1); } - // CHECK-LABEL: @vfrint_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4f32 vfrint_s(v4f32 _1) { return __builtin_lsx_vfrint_s(_1); } - // CHECK-LABEL: @vfrint_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vfrint_d(v2f64 _1) { return __builtin_lsx_vfrint_d(_1); } - // CHECK-LABEL: @vfrsqrt_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4f32 vfrsqrt_s(v4f32 _1) { return __builtin_lsx_vfrsqrt_s(_1); } - // CHECK-LABEL: @vfrsqrt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vfrsqrt_d(v2f64 _1) { return __builtin_lsx_vfrsqrt_d(_1); } - // CHECK-LABEL: @vflogb_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4f32 vflogb_s(v4f32 _1) { return __builtin_lsx_vflogb_s(_1); } - // CHECK-LABEL: @vflogb_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vflogb_d(v2f64 _1) { return __builtin_lsx_vflogb_d(_1); } - // CHECK-LABEL: @vfcvth_s_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4f32 vfcvth_s_h(v8i16 _1) { return __builtin_lsx_vfcvth_s_h(_1); } - // CHECK-LABEL: @vfcvth_d_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vfcvth_d_s(v4f32 _1) { return __builtin_lsx_vfcvth_d_s(_1); } - // CHECK-LABEL: @vfcvtl_s_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4f32 vfcvtl_s_h(v8i16 _1) { return __builtin_lsx_vfcvtl_s_h(_1); } - // CHECK-LABEL: @vfcvtl_d_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vfcvtl_d_s(v4f32 _1) { return __builtin_lsx_vfcvtl_d_s(_1); } - // CHECK-LABEL: @vftint_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vftint_w_s(v4f32 _1) { return __builtin_lsx_vftint_w_s(_1); } - // CHECK-LABEL: @vftint_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftint_l_d(v2f64 _1) { return __builtin_lsx_vftint_l_d(_1); } - // CHECK-LABEL: @vftint_wu_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vftint_wu_s(v4f32 _1) { return __builtin_lsx_vftint_wu_s(_1); } - // CHECK-LABEL: @vftint_lu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vftint_lu_d(v2f64 _1) { return __builtin_lsx_vftint_lu_d(_1); } - // CHECK-LABEL: @vftintrz_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vftintrz_w_s(v4f32 _1) { return __builtin_lsx_vftintrz_w_s(_1); } - // CHECK-LABEL: @vftintrz_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrz_l_d(v2f64 _1) { return __builtin_lsx_vftintrz_l_d(_1); } - // CHECK-LABEL: @vftintrz_wu_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vftintrz_wu_s(v4f32 _1) { return __builtin_lsx_vftintrz_wu_s(_1); } - // CHECK-LABEL: @vftintrz_lu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vftintrz_lu_d(v2f64 _1) { return __builtin_lsx_vftintrz_lu_d(_1); } - // CHECK-LABEL: @vffint_s_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4f32 vffint_s_w(v4i32 _1) { return __builtin_lsx_vffint_s_w(_1); } - // CHECK-LABEL: @vffint_d_l( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vffint_d_l(v2i64 _1) { return __builtin_lsx_vffint_d_l(_1); } - // CHECK-LABEL: @vffint_s_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4f32 vffint_s_wu(v4u32 _1) { return __builtin_lsx_vffint_s_wu(_1); } - // CHECK-LABEL: @vffint_d_lu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vffint_d_lu(v2u64 _1) { return __builtin_lsx_vffint_d_lu(_1); } - // CHECK-LABEL: @vandn_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vandn_v(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vandn_v(_1, _2); - } - // CHECK-LABEL: @vneg_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vneg_b(v16i8 _1) { return __builtin_lsx_vneg_b(_1); } - // CHECK-LABEL: @vneg_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vneg_h(v8i16 _1) { return __builtin_lsx_vneg_h(_1); } - // CHECK-LABEL: @vneg_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vneg_w(v4i32 _1) { return __builtin_lsx_vneg_w(_1); } - // CHECK-LABEL: @vneg_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vneg_d(v2i64 _1) { return __builtin_lsx_vneg_d(_1); } - // CHECK-LABEL: @vmuh_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmuh_b(_1, _2); } - // CHECK-LABEL: @vmuh_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmuh_h(_1, _2); } - // CHECK-LABEL: @vmuh_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmuh_w(_1, _2); } - // CHECK-LABEL: @vmuh_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmuh_d(_1, _2); } - // CHECK-LABEL: @vmuh_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vmuh_bu(_1, _2); - } - // CHECK-LABEL: @vmuh_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vmuh_hu(_1, _2); - } - // CHECK-LABEL: @vmuh_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vmuh_wu(_1, _2); - } - // CHECK-LABEL: @vmuh_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vmuh_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vmuh_du(_1, _2); - } - // CHECK-LABEL: @vsllwil_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vsllwil_h_b(v16i8 _1) { return __builtin_lsx_vsllwil_h_b(_1, 1); } - // CHECK-LABEL: @vsllwil_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vsllwil_w_h(v8i16 _1) { return __builtin_lsx_vsllwil_w_h(_1, 1); } - // CHECK-LABEL: @vsllwil_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vsllwil_d_w(v4i32 _1) { return __builtin_lsx_vsllwil_d_w(_1, 1); } - // CHECK-LABEL: @vsllwil_hu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8u16 vsllwil_hu_bu(v16u8 _1) { - return __builtin_lsx_vsllwil_hu_bu(_1, 1); - } - // CHECK-LABEL: @vsllwil_wu_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vsllwil_wu_hu(v8u16 _1) { - return __builtin_lsx_vsllwil_wu_hu(_1, 1); - } - // CHECK-LABEL: @vsllwil_du_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vsllwil_du_wu(v4u32 _1) { - return __builtin_lsx_vsllwil_du_wu(_1, 1); - } - // CHECK-LABEL: @vsran_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vsran_b_h(_1, _2); - } - // CHECK-LABEL: @vsran_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vsran_h_w(_1, _2); - } - // CHECK-LABEL: @vsran_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vsran_w_d(_1, _2); - } - // CHECK-LABEL: @vssran_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vssran_b_h(_1, _2); - } - // CHECK-LABEL: @vssran_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vssran_h_w(_1, _2); - } - // CHECK-LABEL: @vssran_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vssran_w_d(_1, _2); - } - // CHECK-LABEL: @vssran_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vssran_bu_h(_1, _2); - } - // CHECK-LABEL: @vssran_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vssran_hu_w(_1, _2); - } - // CHECK-LABEL: @vssran_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vssran_wu_d(_1, _2); - } - // CHECK-LABEL: @vsrarn_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vsrarn_b_h(_1, _2); - } - // CHECK-LABEL: @vsrarn_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vsrarn_h_w(_1, _2); - } - // CHECK-LABEL: @vsrarn_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vsrarn_w_d(_1, _2); - } - // CHECK-LABEL: @vssrarn_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vssrarn_b_h(_1, _2); - } - // CHECK-LABEL: @vssrarn_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vssrarn_h_w(_1, _2); - } - // CHECK-LABEL: @vssrarn_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vssrarn_w_d(_1, _2); - } - // CHECK-LABEL: @vssrarn_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vssrarn_bu_h(_1, _2); - } - // CHECK-LABEL: @vssrarn_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vssrarn_hu_w(_1, _2); - } - // CHECK-LABEL: @vssrarn_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vssrarn_wu_d(_1, _2); - } - // CHECK-LABEL: @vsrln_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vsrln_b_h(_1, _2); - } - // CHECK-LABEL: @vsrln_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vsrln_h_w(_1, _2); - } - // CHECK-LABEL: @vsrln_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vsrln_w_d(_1, _2); - } - // CHECK-LABEL: @vssrln_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vssrln_bu_h(_1, _2); - } - // CHECK-LABEL: @vssrln_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vssrln_hu_w(_1, _2); - } - // CHECK-LABEL: @vssrln_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vssrln_wu_d(_1, _2); - } - // CHECK-LABEL: @vsrlrn_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vsrlrn_b_h(_1, _2); - } - // CHECK-LABEL: @vsrlrn_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vsrlrn_h_w(_1, _2); - } - // CHECK-LABEL: @vsrlrn_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vsrlrn_w_d(_1, _2); - } - // CHECK-LABEL: @vssrlrn_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vssrlrn_bu_h(_1, _2); - } - // CHECK-LABEL: @vssrlrn_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vssrlrn_hu_w(_1, _2); - } - // CHECK-LABEL: @vssrlrn_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vssrlrn_wu_d(_1, _2); - } - // CHECK-LABEL: @vfrstpi_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vfrstpi_b(_1, _2, 1); - } - // CHECK-LABEL: @vfrstpi_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vfrstpi_h(_1, _2, 1); - } - // CHECK-LABEL: @vfrstp_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { - return __builtin_lsx_vfrstp_b(_1, _2, _3); - } - // CHECK-LABEL: @vfrstp_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { - return __builtin_lsx_vfrstp_h(_1, _2, _3); - } - // CHECK-LABEL: @vshuf4i_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vshuf4i_d(_1, _2, 1); - } - // CHECK-LABEL: @vbsrl_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vbsrl_v(v16i8 _1) { return __builtin_lsx_vbsrl_v(_1, 1); } - // CHECK-LABEL: @vbsll_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vbsll_v(v16i8 _1) { return __builtin_lsx_vbsll_v(_1, 1); } - // CHECK-LABEL: @vextrins_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vextrins_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vextrins_b(_1, _2, 1); - } - // CHECK-LABEL: @vextrins_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vextrins_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vextrins_h(_1, _2, 1); - } - // CHECK-LABEL: @vextrins_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vextrins_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vextrins_w(_1, _2, 1); - } - // CHECK-LABEL: @vextrins_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vextrins_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vextrins_d(_1, _2, 1); - } - // CHECK-LABEL: @vmskltz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vmskltz_b(v16i8 _1) { return __builtin_lsx_vmskltz_b(_1); } - // CHECK-LABEL: @vmskltz_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vmskltz_h(v8i16 _1) { return __builtin_lsx_vmskltz_h(_1); } - // CHECK-LABEL: @vmskltz_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vmskltz_w(v4i32 _1) { return __builtin_lsx_vmskltz_w(_1); } - // CHECK-LABEL: @vmskltz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vmskltz_d(v2i64 _1) { return __builtin_lsx_vmskltz_d(_1); } - // CHECK-LABEL: @vsigncov_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vsigncov_b(_1, _2); - } - // CHECK-LABEL: @vsigncov_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vsigncov_h(_1, _2); - } - // CHECK-LABEL: @vsigncov_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vsigncov_w(_1, _2); - } - // CHECK-LABEL: @vsigncov_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vsigncov_d(_1, _2); - } - // CHECK-LABEL: @vfmadd_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { - return __builtin_lsx_vfmadd_s(_1, _2, _3); - } - // CHECK-LABEL: @vfmadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { - return __builtin_lsx_vfmadd_d(_1, _2, _3); - } - // CHECK-LABEL: @vfmsub_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { - return __builtin_lsx_vfmsub_s(_1, _2, _3); - } - // CHECK-LABEL: @vfmsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { - return __builtin_lsx_vfmsub_d(_1, _2, _3); - } - // CHECK-LABEL: @vfnmadd_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { - return __builtin_lsx_vfnmadd_s(_1, _2, _3); - } - // CHECK-LABEL: @vfnmadd_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { - return __builtin_lsx_vfnmadd_d(_1, _2, _3); - } - // CHECK-LABEL: @vfnmsub_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { - return __builtin_lsx_vfnmsub_s(_1, _2, _3); - } - // CHECK-LABEL: @vfnmsub_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { - return __builtin_lsx_vfnmsub_d(_1, _2, _3); - } - // CHECK-LABEL: @vftintrne_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vftintrne_w_s(v4f32 _1) { return __builtin_lsx_vftintrne_w_s(_1); } - // CHECK-LABEL: @vftintrne_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrne_l_d(v2f64 _1) { return __builtin_lsx_vftintrne_l_d(_1); } - // CHECK-LABEL: @vftintrp_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vftintrp_w_s(v4f32 _1) { return __builtin_lsx_vftintrp_w_s(_1); } - // CHECK-LABEL: @vftintrp_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrp_l_d(v2f64 _1) { return __builtin_lsx_vftintrp_l_d(_1); } - // CHECK-LABEL: @vftintrm_w_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vftintrm_w_s(v4f32 _1) { return __builtin_lsx_vftintrm_w_s(_1); } - // CHECK-LABEL: @vftintrm_l_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrm_l_d(v2f64 _1) { return __builtin_lsx_vftintrm_l_d(_1); } - // CHECK-LABEL: @vftint_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vftint_w_d(_1, _2); - } - // CHECK-LABEL: @vffint_s_l( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x float> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vffint_s_l(_1, _2); - } - // CHECK-LABEL: @vftintrz_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vftintrz_w_d(_1, _2); - } - // CHECK-LABEL: @vftintrp_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vftintrp_w_d(_1, _2); - } - // CHECK-LABEL: @vftintrm_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vftintrm_w_d(_1, _2); - } - // CHECK-LABEL: @vftintrne_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vftintrne_w_d(_1, _2); - } - // CHECK-LABEL: @vftintl_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintl_l_s(v4f32 _1) { return __builtin_lsx_vftintl_l_s(_1); } - // CHECK-LABEL: @vftinth_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftinth_l_s(v4f32 _1) { return __builtin_lsx_vftinth_l_s(_1); } - // CHECK-LABEL: @vffinth_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vffinth_d_w(v4i32 _1) { return __builtin_lsx_vffinth_d_w(_1); } - // CHECK-LABEL: @vffintl_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x double> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2f64 vffintl_d_w(v4i32 _1) { return __builtin_lsx_vffintl_d_w(_1); } - // CHECK-LABEL: @vftintrzl_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrzl_l_s(v4f32 _1) { return __builtin_lsx_vftintrzl_l_s(_1); } - // CHECK-LABEL: @vftintrzh_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrzh_l_s(v4f32 _1) { return __builtin_lsx_vftintrzh_l_s(_1); } - // CHECK-LABEL: @vftintrpl_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrpl_l_s(v4f32 _1) { return __builtin_lsx_vftintrpl_l_s(_1); } - // CHECK-LABEL: @vftintrph_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrph_l_s(v4f32 _1) { return __builtin_lsx_vftintrph_l_s(_1); } - // CHECK-LABEL: @vftintrml_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrml_l_s(v4f32 _1) { return __builtin_lsx_vftintrml_l_s(_1); } - // CHECK-LABEL: @vftintrmh_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrmh_l_s(v4f32 _1) { return __builtin_lsx_vftintrmh_l_s(_1); } - // CHECK-LABEL: @vftintrnel_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrnel_l_s(v4f32 _1) { - return __builtin_lsx_vftintrnel_l_s(_1); - } - // CHECK-LABEL: @vftintrneh_l_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vftintrneh_l_s(v4f32 _1) { - return __builtin_lsx_vftintrneh_l_s(_1); - } - // CHECK-LABEL: @vfrintrne_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> --// CHECK-NEXT: ret <4 x i32> [[TMP1]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vfrintrne_s(v4f32 _1) { return __builtin_lsx_vfrintrne_s(_1); } - // CHECK-LABEL: @vfrintrne_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> --// CHECK-NEXT: ret <2 x i64> [[TMP1]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vfrintrne_d(v2f64 _1) { return __builtin_lsx_vfrintrne_d(_1); } - // CHECK-LABEL: @vfrintrz_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> --// CHECK-NEXT: ret <4 x i32> [[TMP1]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vfrintrz_s(v4f32 _1) { return __builtin_lsx_vfrintrz_s(_1); } - // CHECK-LABEL: @vfrintrz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> --// CHECK-NEXT: ret <2 x i64> [[TMP1]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vfrintrz_d(v2f64 _1) { return __builtin_lsx_vfrintrz_d(_1); } - // CHECK-LABEL: @vfrintrp_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> --// CHECK-NEXT: ret <4 x i32> [[TMP1]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vfrintrp_s(v4f32 _1) { return __builtin_lsx_vfrintrp_s(_1); } - // CHECK-LABEL: @vfrintrp_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> --// CHECK-NEXT: ret <2 x i64> [[TMP1]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vfrintrp_d(v2f64 _1) { return __builtin_lsx_vfrintrp_d(_1); } - // CHECK-LABEL: @vfrintrm_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> --// CHECK-NEXT: ret <4 x i32> [[TMP1]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vfrintrm_s(v4f32 _1) { return __builtin_lsx_vfrintrm_s(_1); } - // CHECK-LABEL: @vfrintrm_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[_1:%.*]]) --// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> --// CHECK-NEXT: ret <2 x i64> [[TMP1]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vfrintrm_d(v2f64 _1) { return __builtin_lsx_vfrintrm_d(_1); } - // CHECK-LABEL: @vstelm_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1, i32 1) - // CHECK-NEXT: ret void - // - void vstelm_b(v16i8 _1, void *_2) { -@@ -3349,7 +4577,8 @@ void vstelm_b(v16i8 _1, void *_2) { - } - // CHECK-LABEL: @vstelm_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[TMP0]], ptr [[_2:%.*]], i32 2, i32 1) - // CHECK-NEXT: ret void - // - void vstelm_h(v8i16 _1, void *_2) { -@@ -3357,7 +4586,8 @@ void vstelm_h(v8i16 _1, void *_2) { - } - // CHECK-LABEL: @vstelm_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[TMP0]], ptr [[_2:%.*]], i32 4, i32 1) - // CHECK-NEXT: ret void - // - void vstelm_w(v4i32 _1, void *_2) { -@@ -3365,7 +4595,8 @@ void vstelm_w(v4i32 _1, void *_2) { - } - // CHECK-LABEL: @vstelm_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[TMP0]], ptr [[_2:%.*]], i32 8, i32 1) - // CHECK-NEXT: ret void - // - void vstelm_d(v2i64 _1, void *_2) { -@@ -3373,1286 +4604,1785 @@ void vstelm_d(v2i64 _1, void *_2) { - } - // CHECK-LABEL: @vaddwev_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vaddwev_d_w(_1, _2); - } - // CHECK-LABEL: @vaddwev_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vaddwev_w_h(_1, _2); - } - // CHECK-LABEL: @vaddwev_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vaddwev_h_b(_1, _2); - } - // CHECK-LABEL: @vaddwod_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vaddwod_d_w(_1, _2); - } - // CHECK-LABEL: @vaddwod_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vaddwod_w_h(_1, _2); - } - // CHECK-LABEL: @vaddwod_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vaddwod_h_b(_1, _2); - } - // CHECK-LABEL: @vaddwev_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vaddwev_d_wu(_1, _2); - } - // CHECK-LABEL: @vaddwev_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vaddwev_w_hu(_1, _2); - } - // CHECK-LABEL: @vaddwev_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vaddwev_h_bu(_1, _2); - } - // CHECK-LABEL: @vaddwod_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vaddwod_d_wu(_1, _2); - } - // CHECK-LABEL: @vaddwod_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vaddwod_w_hu(_1, _2); - } - // CHECK-LABEL: @vaddwod_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vaddwod_h_bu(_1, _2); - } - // CHECK-LABEL: @vaddwev_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { - return __builtin_lsx_vaddwev_d_wu_w(_1, _2); - } - // CHECK-LABEL: @vaddwev_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { - return __builtin_lsx_vaddwev_w_hu_h(_1, _2); - } - // CHECK-LABEL: @vaddwev_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { - return __builtin_lsx_vaddwev_h_bu_b(_1, _2); - } - // CHECK-LABEL: @vaddwod_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { - return __builtin_lsx_vaddwod_d_wu_w(_1, _2); - } - // CHECK-LABEL: @vaddwod_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { - return __builtin_lsx_vaddwod_w_hu_h(_1, _2); - } - // CHECK-LABEL: @vaddwod_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { - return __builtin_lsx_vaddwod_h_bu_b(_1, _2); - } - // CHECK-LABEL: @vsubwev_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vsubwev_d_w(_1, _2); - } - // CHECK-LABEL: @vsubwev_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vsubwev_w_h(_1, _2); - } - // CHECK-LABEL: @vsubwev_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vsubwev_h_b(_1, _2); - } - // CHECK-LABEL: @vsubwod_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vsubwod_d_w(_1, _2); - } - // CHECK-LABEL: @vsubwod_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vsubwod_w_h(_1, _2); - } - // CHECK-LABEL: @vsubwod_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vsubwod_h_b(_1, _2); - } - // CHECK-LABEL: @vsubwev_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vsubwev_d_wu(_1, _2); - } - // CHECK-LABEL: @vsubwev_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vsubwev_w_hu(_1, _2); - } - // CHECK-LABEL: @vsubwev_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vsubwev_h_bu(_1, _2); - } - // CHECK-LABEL: @vsubwod_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vsubwod_d_wu(_1, _2); - } - // CHECK-LABEL: @vsubwod_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vsubwod_w_hu(_1, _2); - } - // CHECK-LABEL: @vsubwod_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vsubwod_h_bu(_1, _2); - } - // CHECK-LABEL: @vaddwev_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vaddwev_q_d(_1, _2); - } - // CHECK-LABEL: @vaddwod_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vaddwod_q_d(_1, _2); - } - // CHECK-LABEL: @vaddwev_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vaddwev_q_du(_1, _2); - } - // CHECK-LABEL: @vaddwod_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vaddwod_q_du(_1, _2); - } - // CHECK-LABEL: @vsubwev_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vsubwev_q_d(_1, _2); - } - // CHECK-LABEL: @vsubwod_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vsubwod_q_d(_1, _2); - } - // CHECK-LABEL: @vsubwev_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vsubwev_q_du(_1, _2); - } - // CHECK-LABEL: @vsubwod_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vsubwod_q_du(_1, _2); - } - // CHECK-LABEL: @vaddwev_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { - return __builtin_lsx_vaddwev_q_du_d(_1, _2); - } - // CHECK-LABEL: @vaddwod_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { - return __builtin_lsx_vaddwod_q_du_d(_1, _2); - } - // CHECK-LABEL: @vmulwev_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vmulwev_d_w(_1, _2); - } - // CHECK-LABEL: @vmulwev_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vmulwev_w_h(_1, _2); - } - // CHECK-LABEL: @vmulwev_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vmulwev_h_b(_1, _2); - } - // CHECK-LABEL: @vmulwod_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vmulwod_d_w(_1, _2); - } - // CHECK-LABEL: @vmulwod_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vmulwod_w_h(_1, _2); - } - // CHECK-LABEL: @vmulwod_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vmulwod_h_b(_1, _2); - } - // CHECK-LABEL: @vmulwev_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vmulwev_d_wu(_1, _2); - } - // CHECK-LABEL: @vmulwev_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vmulwev_w_hu(_1, _2); - } - // CHECK-LABEL: @vmulwev_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vmulwev_h_bu(_1, _2); - } - // CHECK-LABEL: @vmulwod_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { - return __builtin_lsx_vmulwod_d_wu(_1, _2); - } - // CHECK-LABEL: @vmulwod_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { - return __builtin_lsx_vmulwod_w_hu(_1, _2); - } - // CHECK-LABEL: @vmulwod_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { - return __builtin_lsx_vmulwod_h_bu(_1, _2); - } - // CHECK-LABEL: @vmulwev_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { - return __builtin_lsx_vmulwev_d_wu_w(_1, _2); - } - // CHECK-LABEL: @vmulwev_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { - return __builtin_lsx_vmulwev_w_hu_h(_1, _2); - } - // CHECK-LABEL: @vmulwev_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { - return __builtin_lsx_vmulwev_h_bu_b(_1, _2); - } - // CHECK-LABEL: @vmulwod_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { - return __builtin_lsx_vmulwod_d_wu_w(_1, _2); - } - // CHECK-LABEL: @vmulwod_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { - return __builtin_lsx_vmulwod_w_hu_h(_1, _2); - } - // CHECK-LABEL: @vmulwod_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { - return __builtin_lsx_vmulwod_h_bu_b(_1, _2); - } - // CHECK-LABEL: @vmulwev_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vmulwev_q_d(_1, _2); - } - // CHECK-LABEL: @vmulwod_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vmulwod_q_d(_1, _2); - } - // CHECK-LABEL: @vmulwev_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vmulwev_q_du(_1, _2); - } - // CHECK-LABEL: @vmulwod_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vmulwod_q_du(_1, _2); - } - // CHECK-LABEL: @vmulwev_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { - return __builtin_lsx_vmulwev_q_du_d(_1, _2); - } - // CHECK-LABEL: @vmulwod_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { - return __builtin_lsx_vmulwod_q_du_d(_1, _2); - } - // CHECK-LABEL: @vhaddw_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vhaddw_q_d(_1, _2); - } - // CHECK-LABEL: @vhaddw_qu_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vhaddw_qu_du(_1, _2); - } - // CHECK-LABEL: @vhsubw_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vhsubw_q_d(_1, _2); - } - // CHECK-LABEL: @vhsubw_qu_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { - return __builtin_lsx_vhsubw_qu_du(_1, _2); - } - // CHECK-LABEL: @vmaddwev_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { - return __builtin_lsx_vmaddwev_d_w(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { - return __builtin_lsx_vmaddwev_w_h(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { - return __builtin_lsx_vmaddwev_h_b(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { - return __builtin_lsx_vmaddwev_d_wu(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { - return __builtin_lsx_vmaddwev_w_hu(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { - return __builtin_lsx_vmaddwev_h_bu(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { - return __builtin_lsx_vmaddwod_d_w(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { - return __builtin_lsx_vmaddwod_w_h(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { - return __builtin_lsx_vmaddwod_h_b(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_d_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { - return __builtin_lsx_vmaddwod_d_wu(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_w_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { - return __builtin_lsx_vmaddwod_w_hu(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_h_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { - return __builtin_lsx_vmaddwod_h_bu(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { - return __builtin_lsx_vmaddwev_d_wu_w(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { - return __builtin_lsx_vmaddwev_w_hu_h(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { - return __builtin_lsx_vmaddwev_h_bu_b(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_d_wu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { - return __builtin_lsx_vmaddwod_d_wu_w(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_w_hu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { - return __builtin_lsx_vmaddwod_w_hu_h(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_h_bu_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { - return __builtin_lsx_vmaddwod_h_bu_b(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { - return __builtin_lsx_vmaddwev_q_d(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { - return __builtin_lsx_vmaddwod_q_d(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { - return __builtin_lsx_vmaddwev_q_du(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_q_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { - return __builtin_lsx_vmaddwod_q_du(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwev_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { - return __builtin_lsx_vmaddwev_q_du_d(_1, _2, _3); - } - // CHECK-LABEL: @vmaddwod_q_du_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { - return __builtin_lsx_vmaddwod_q_du_d(_1, _2, _3); - } - // CHECK-LABEL: @vrotr_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vrotr_b(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vrotr_b(_1, _2); - } - // CHECK-LABEL: @vrotr_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vrotr_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vrotr_h(_1, _2); - } - // CHECK-LABEL: @vrotr_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vrotr_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vrotr_w(_1, _2); - } - // CHECK-LABEL: @vrotr_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vrotr_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vrotr_d(_1, _2); - } - // CHECK-LABEL: @vadd_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_q(_1, _2); } - // CHECK-LABEL: @vsub_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_q(_1, _2); } - // CHECK-LABEL: @vldrepl_b( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v16i8 vldrepl_b(void *_1) { return __builtin_lsx_vldrepl_b(_1, 1); } - // CHECK-LABEL: @vldrepl_h( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v8i16 vldrepl_h(void *_1) { return __builtin_lsx_vldrepl_h(_1, 2); } - // CHECK-LABEL: @vldrepl_w( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v4i32 vldrepl_w(void *_1) { return __builtin_lsx_vldrepl_w(_1, 4); } - // CHECK-LABEL: @vldrepl_d( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v2i64 vldrepl_d(void *_1) { return __builtin_lsx_vldrepl_d(_1, 8); } - // CHECK-LABEL: @vmskgez_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vmskgez_b(v16i8 _1) { return __builtin_lsx_vmskgez_b(_1); } - // CHECK-LABEL: @vmsknz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vmsknz_b(v16i8 _1) { return __builtin_lsx_vmsknz_b(_1); } - // CHECK-LABEL: @vexth_h_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vexth_h_b(v16i8 _1) { return __builtin_lsx_vexth_h_b(_1); } - // CHECK-LABEL: @vexth_w_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vexth_w_h(v8i16 _1) { return __builtin_lsx_vexth_w_h(_1); } - // CHECK-LABEL: @vexth_d_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vexth_d_w(v4i32 _1) { return __builtin_lsx_vexth_d_w(_1); } - // CHECK-LABEL: @vexth_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vexth_q_d(v2i64 _1) { return __builtin_lsx_vexth_q_d(_1); } - // CHECK-LABEL: @vexth_hu_bu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8u16 vexth_hu_bu(v16u8 _1) { return __builtin_lsx_vexth_hu_bu(_1); } - // CHECK-LABEL: @vexth_wu_hu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4u32 vexth_wu_hu(v8u16 _1) { return __builtin_lsx_vexth_wu_hu(_1); } - // CHECK-LABEL: @vexth_du_wu( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vexth_du_wu(v4u32 _1) { return __builtin_lsx_vexth_du_wu(_1); } - // CHECK-LABEL: @vexth_qu_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vexth_qu_du(v2u64 _1) { return __builtin_lsx_vexth_qu_du(_1); } - // CHECK-LABEL: @vrotri_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v16i8 vrotri_b(v16i8 _1) { return __builtin_lsx_vrotri_b(_1, 1); } - // CHECK-LABEL: @vrotri_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v8i16 vrotri_h(v8i16 _1) { return __builtin_lsx_vrotri_h(_1, 1); } - // CHECK-LABEL: @vrotri_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v4i32 vrotri_w(v4i32 _1) { return __builtin_lsx_vrotri_w(_1, 1); } - // CHECK-LABEL: @vrotri_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[TMP0]], i32 1) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vrotri_d(v2i64 _1) { return __builtin_lsx_vrotri_d(_1, 1); } - // CHECK-LABEL: @vextl_q_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2i64 vextl_q_d(v2i64 _1) { return __builtin_lsx_vextl_q_d(_1); } - // CHECK-LABEL: @vsrlni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vsrlni_b_h(_1, _2, 1); - } - // CHECK-LABEL: @vsrlni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vsrlni_h_w(_1, _2, 1); - } - // CHECK-LABEL: @vsrlni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vsrlni_w_d(_1, _2, 1); - } - // CHECK-LABEL: @vsrlni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vsrlni_d_q(_1, _2, 1); - } - // CHECK-LABEL: @vsrlrni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vsrlrni_b_h(_1, _2, 1); - } - // CHECK-LABEL: @vsrlrni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vsrlrni_h_w(_1, _2, 1); - } - // CHECK-LABEL: @vsrlrni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vsrlrni_w_d(_1, _2, 1); - } - // CHECK-LABEL: @vsrlrni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vsrlrni_d_q(_1, _2, 1); - } - // CHECK-LABEL: @vssrlni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vssrlni_b_h(_1, _2, 1); - } - // CHECK-LABEL: @vssrlni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vssrlni_h_w(_1, _2, 1); - } - // CHECK-LABEL: @vssrlni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vssrlni_w_d(_1, _2, 1); - } - // CHECK-LABEL: @vssrlni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vssrlni_d_q(_1, _2, 1); - } - // CHECK-LABEL: @vssrlni_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { - return __builtin_lsx_vssrlni_bu_h(_1, _2, 1); - } - // CHECK-LABEL: @vssrlni_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { - return __builtin_lsx_vssrlni_hu_w(_1, _2, 1); - } - // CHECK-LABEL: @vssrlni_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { - return __builtin_lsx_vssrlni_wu_d(_1, _2, 1); - } - // CHECK-LABEL: @vssrlni_du_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { - return __builtin_lsx_vssrlni_du_q(_1, _2, 1); - } - // CHECK-LABEL: @vssrlrni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vssrlrni_b_h(_1, _2, 1); - } - // CHECK-LABEL: @vssrlrni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vssrlrni_h_w(_1, _2, 1); - } - // CHECK-LABEL: @vssrlrni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vssrlrni_w_d(_1, _2, 1); - } - // CHECK-LABEL: @vssrlrni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vssrlrni_d_q(_1, _2, 1); - } - // CHECK-LABEL: @vssrlrni_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { - return __builtin_lsx_vssrlrni_bu_h(_1, _2, 1); - } - // CHECK-LABEL: @vssrlrni_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { - return __builtin_lsx_vssrlrni_hu_w(_1, _2, 1); - } - // CHECK-LABEL: @vssrlrni_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { - return __builtin_lsx_vssrlrni_wu_d(_1, _2, 1); - } - // CHECK-LABEL: @vssrlrni_du_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { - return __builtin_lsx_vssrlrni_du_q(_1, _2, 1); - } - // CHECK-LABEL: @vsrani_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vsrani_b_h(_1, _2, 1); - } - // CHECK-LABEL: @vsrani_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vsrani_h_w(_1, _2, 1); - } - // CHECK-LABEL: @vsrani_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vsrani_w_d(_1, _2, 1); - } - // CHECK-LABEL: @vsrani_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vsrani_d_q(_1, _2, 1); - } - // CHECK-LABEL: @vsrarni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vsrarni_b_h(_1, _2, 1); - } - // CHECK-LABEL: @vsrarni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vsrarni_h_w(_1, _2, 1); - } - // CHECK-LABEL: @vsrarni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vsrarni_w_d(_1, _2, 1); - } - // CHECK-LABEL: @vsrarni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vsrarni_d_q(_1, _2, 1); - } - // CHECK-LABEL: @vssrani_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vssrani_b_h(_1, _2, 1); - } - // CHECK-LABEL: @vssrani_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vssrani_h_w(_1, _2, 1); - } - // CHECK-LABEL: @vssrani_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vssrani_w_d(_1, _2, 1); - } - // CHECK-LABEL: @vssrani_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vssrani_d_q(_1, _2, 1); - } - // CHECK-LABEL: @vssrani_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { - return __builtin_lsx_vssrani_bu_h(_1, _2, 1); - } - // CHECK-LABEL: @vssrani_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { - return __builtin_lsx_vssrani_hu_w(_1, _2, 1); - } - // CHECK-LABEL: @vssrani_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { - return __builtin_lsx_vssrani_wu_d(_1, _2, 1); - } - // CHECK-LABEL: @vssrani_du_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { - return __builtin_lsx_vssrani_du_q(_1, _2, 1); - } - // CHECK-LABEL: @vssrarni_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { - return __builtin_lsx_vssrarni_b_h(_1, _2, 1); - } - // CHECK-LABEL: @vssrarni_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vssrarni_h_w(_1, _2, 1); - } - // CHECK-LABEL: @vssrarni_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vssrarni_w_d(_1, _2, 1); - } - // CHECK-LABEL: @vssrarni_d_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vssrarni_d_q(_1, _2, 1); - } - // CHECK-LABEL: @vssrarni_bu_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { - return __builtin_lsx_vssrarni_bu_h(_1, _2, 1); - } - // CHECK-LABEL: @vssrarni_hu_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { - return __builtin_lsx_vssrarni_hu_w(_1, _2, 1); - } - // CHECK-LABEL: @vssrarni_wu_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { - return __builtin_lsx_vssrarni_wu_d(_1, _2, 1); - } - // CHECK-LABEL: @vssrarni_du_q( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { - return __builtin_lsx_vssrarni_du_q(_1, _2, 1); - } - // CHECK-LABEL: @vpermi_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vpermi_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vpermi_w(_1, _2, 1); -@@ -4660,79 +6390,107 @@ v4i32 vpermi_w(v4i32 _1, v4i32 _2) { - // CHECK-LABEL: @vld( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v16i8 vld(void *_1) { return __builtin_lsx_vld(_1, 1); } - // CHECK-LABEL: @vst( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1) - // CHECK-NEXT: ret void - // - void vst(v16i8 _1, void *_2) { return __builtin_lsx_vst(_1, _2, 1); } - // CHECK-LABEL: @vssrlrn_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vssrlrn_b_h(_1, _2); - } - // CHECK-LABEL: @vssrlrn_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vssrlrn_h_w(_1, _2); - } - // CHECK-LABEL: @vssrlrn_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vssrlrn_w_d(_1, _2); - } - // CHECK-LABEL: @vssrln_b_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { - return __builtin_lsx_vssrln_b_h(_1, _2); - } - // CHECK-LABEL: @vssrln_h_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { - return __builtin_lsx_vssrln_h_w(_1, _2); - } - // CHECK-LABEL: @vssrln_w_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { - return __builtin_lsx_vssrln_w_d(_1, _2); - } - // CHECK-LABEL: @vorn_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __builtin_lsx_vorn_v(_1, _2); } - // CHECK-LABEL: @vldi( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v2i64 vldi() { return __builtin_lsx_vldi(1); } - // CHECK-LABEL: @vshuf_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -+// CHECK-NEXT: ret i128 [[TMP4]] - // - v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { - return __builtin_lsx_vshuf_b(_1, _2, _3); -@@ -4740,429 +6498,575 @@ v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { - // CHECK-LABEL: @vldx( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v16i8 vldx(void *_1) { return __builtin_lsx_vldx(_1, 1); } - // CHECK-LABEL: @vstx( - // CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i64 1) - // CHECK-NEXT: ret void - // - void vstx(v16i8 _1, void *_2) { return __builtin_lsx_vstx(_1, _2, 1); } - // CHECK-LABEL: @vextl_qu_du( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] - // - v2u64 vextl_qu_du(v2u64 _1) { return __builtin_lsx_vextl_qu_du(_1); } - // CHECK-LABEL: @bnz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bnz_b(v16u8 _1) { return __builtin_lsx_bnz_b(_1); } - // CHECK-LABEL: @bnz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bnz_d(v2u64 _1) { return __builtin_lsx_bnz_d(_1); } - // CHECK-LABEL: @bnz_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bnz_h(v8u16 _1) { return __builtin_lsx_bnz_h(_1); } - // CHECK-LABEL: @bnz_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bnz_v(v16u8 _1) { return __builtin_lsx_bnz_v(_1); } - // CHECK-LABEL: @bnz_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bnz_w(v4u32 _1) { return __builtin_lsx_bnz_w(_1); } - // CHECK-LABEL: @bz_b( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bz_b(v16u8 _1) { return __builtin_lsx_bz_b(_1); } - // CHECK-LABEL: @bz_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bz_d(v2u64 _1) { return __builtin_lsx_bz_d(_1); } - // CHECK-LABEL: @bz_h( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bz_h(v8u16 _1) { return __builtin_lsx_bz_h(_1); } - // CHECK-LABEL: @bz_v( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bz_v(v16u8 _1) { return __builtin_lsx_bz_v(_1); } - // CHECK-LABEL: @bz_w( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[_1:%.*]]) --// CHECK-NEXT: ret i32 [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[TMP0]]) -+// CHECK-NEXT: ret i32 [[TMP1]] - // - int bz_w(v4u32 _1) { return __builtin_lsx_bz_w(_1); } - // CHECK-LABEL: @vfcmp_caf_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_caf_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_caf_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_caf_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_ceq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_ceq_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_ceq_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_ceq_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_cle_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_cle_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_cle_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_cle_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_clt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_clt_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_clt_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_clt_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_cne_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_cne_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_cne_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_cne_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_cor_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_cor_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_cor_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_cor_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_cueq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_cueq_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_cueq_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_cueq_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_cule_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_cule_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_cule_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_cule_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_cult_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_cult_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_cult_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_cult_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_cun_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_cun_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_cune_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_cune_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_cune_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_cune_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_cun_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_cun_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_saf_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_saf_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_saf_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_saf_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_seq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_seq_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_seq_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_seq_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_sle_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_sle_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_sle_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_sle_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_slt_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_slt_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_slt_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_slt_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_sne_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_sne_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_sne_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_sne_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_sor_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_sor_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_sor_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_sor_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_sueq_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_sueq_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_sueq_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_sueq_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_sule_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_sule_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_sule_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_sule_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_sult_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_sult_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_sult_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_sult_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_sun_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_sun_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_sune_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { - return __builtin_lsx_vfcmp_sune_d(_1, _2); - } - // CHECK-LABEL: @vfcmp_sune_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_sune_s(_1, _2); - } - // CHECK-LABEL: @vfcmp_sun_s( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -+// CHECK-NEXT: ret i128 [[TMP3]] - // - v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { - return __builtin_lsx_vfcmp_sun_s(_1, _2); -@@ -5170,24 +7074,28 @@ v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { - // CHECK-LABEL: @vrepli_b( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) --// CHECK-NEXT: ret <16 x i8> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v16i8 vrepli_b() { return __builtin_lsx_vrepli_b(1); } - // CHECK-LABEL: @vrepli_d( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) --// CHECK-NEXT: ret <2 x i64> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v2i64 vrepli_d() { return __builtin_lsx_vrepli_d(1); } - // CHECK-LABEL: @vrepli_h( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) --// CHECK-NEXT: ret <8 x i16> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v8i16 vrepli_h() { return __builtin_lsx_vrepli_h(1); } - // CHECK-LABEL: @vrepli_w( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) --// CHECK-NEXT: ret <4 x i32> [[TMP0]] -+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 -+// CHECK-NEXT: ret i128 [[TMP1]] - // - v4i32 vrepli_w() { return __builtin_lsx_vrepli_w(1); } --- -2.20.1 - diff --git a/0009-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch b/0009-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch deleted file mode 100644 index d858146d5926d899b0b1697f7ba7231e9a1f5eed..0000000000000000000000000000000000000000 --- a/0009-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch +++ /dev/null @@ -1,639 +0,0 @@ -From 2ff5a2bd0a7d14c675cbc5f0e11bf6916b960b20 Mon Sep 17 00:00:00 2001 -From: Ami-zhang <96056515+Ami-zhang@users.noreply.github.com> -Date: Thu, 28 Sep 2023 15:20:27 +0800 -Subject: [PATCH 1/9] [Driver] Support -fsanitize=cfi-icall on loongarch64 - (#67310) - -(cherry picked from commit 55accc82bec48acae769b086ad9a5dc29da77f02) ---- - clang/lib/Driver/ToolChain.cpp | 3 ++- - clang/test/Driver/fsanitize.c | 1 + - 2 files changed, 3 insertions(+), 1 deletion(-) - -diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp -index 0146d8af3549..69811c095594 100644 ---- a/clang/lib/Driver/ToolChain.cpp -+++ b/clang/lib/Driver/ToolChain.cpp -@@ -1273,7 +1273,8 @@ SanitizerMask ToolChain::getSupportedSanitizers() const { - if (getTriple().getArch() == llvm::Triple::x86 || - getTriple().getArch() == llvm::Triple::x86_64 || - getTriple().getArch() == llvm::Triple::arm || getTriple().isWasm() || -- getTriple().isAArch64() || getTriple().isRISCV()) -+ getTriple().isAArch64() || getTriple().isRISCV() || -+ getTriple().isLoongArch64()) - Res |= SanitizerKind::CFIICall; - if (getTriple().getArch() == llvm::Triple::x86_64 || - getTriple().isAArch64(64) || getTriple().isRISCV()) -diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c -index 9442f6b91471..4a525d75ea11 100644 ---- a/clang/test/Driver/fsanitize.c -+++ b/clang/test/Driver/fsanitize.c -@@ -600,6 +600,7 @@ - // RUN: %clang --target=aarch64_be -fvisibility=hidden -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI - // RUN: %clang --target=riscv32 -fvisibility=hidden -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI - // RUN: %clang --target=riscv64 -fvisibility=hidden -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI -+// RUN: %clang --target=loongarch64 -fvisibility=hidden -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI - // CHECK-CFI: -emit-llvm-bc{{.*}}-fsanitize=cfi-derived-cast,cfi-icall,cfi-mfcall,cfi-unrelated-cast,cfi-nvcall,cfi-vcall - // CHECK-CFI-NOMFCALL: -emit-llvm-bc{{.*}}-fsanitize=cfi-derived-cast,cfi-icall,cfi-unrelated-cast,cfi-nvcall,cfi-vcall - // CHECK-CFI-DCAST: -emit-llvm-bc{{.*}}-fsanitize=cfi-derived-cast --- -2.20.1 - - -From acfb50b03b0be3eda5282f26bad34ffc18595b30 Mon Sep 17 00:00:00 2001 -From: Nathan Chancellor -Date: Fri, 8 Sep 2023 10:54:35 -0700 -Subject: [PATCH 2/9] [Clang][LoongArch] Generate _mcount instead of mcount - (#65657) - -When building the LoongArch Linux kernel without -`CONFIG_DYNAMIC_FTRACE`, the build fails to link because the mcount -symbol is `mcount`, not `_mcount` like GCC generates and the kernel -expects: - -``` -ld.lld: error: undefined symbol: mcount ->>> referenced by version.c ->>> init/version.o:(early_hostname) in archive vmlinux.a ->>> referenced by do_mounts.c ->>> init/do_mounts.o:(rootfs_init_fs_context) in archive vmlinux.a ->>> referenced by main.c ->>> init/main.o:(__traceiter_initcall_level) in archive vmlinux.a ->>> referenced 97011 more times ->>> did you mean: _mcount ->>> defined in: vmlinux.a(arch/loongarch/kernel/mcount.o) -``` - -Set `MCountName` in `LoongArchTargetInfo` to `_mcount`, which resolves -the build failure. - -(cherry picked from commit cc2b09bee017147527e7bd1eb5272f4f70a7b900) ---- - clang/lib/Basic/Targets/LoongArch.h | 1 + - clang/test/CodeGen/mcount.c | 2 ++ - 2 files changed, 3 insertions(+) - -diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h -index 8f4150b2539d..3313102492cb 100644 ---- a/clang/lib/Basic/Targets/LoongArch.h -+++ b/clang/lib/Basic/Targets/LoongArch.h -@@ -40,6 +40,7 @@ public: - LongDoubleWidth = 128; - LongDoubleAlign = 128; - LongDoubleFormat = &llvm::APFloat::IEEEquad(); -+ MCountName = "_mcount"; - SuitableAlign = 128; - WCharType = SignedInt; - WIntType = UnsignedInt; -diff --git a/clang/test/CodeGen/mcount.c b/clang/test/CodeGen/mcount.c -index 8f994ab4e754..bdd609c1dfc5 100644 ---- a/clang/test/CodeGen/mcount.c -+++ b/clang/test/CodeGen/mcount.c -@@ -7,6 +7,8 @@ - // RUN: %clang_cc1 -pg -triple x86_64-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s - // RUN: %clang_cc1 -pg -triple arm-netbsd-eabi -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s - // RUN: %clang_cc1 -pg -triple aarch64-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s -+// RUN: %clang_cc1 -pg -triple loongarch32 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s -+// RUN: %clang_cc1 -pg -triple loongarch64 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s - // RUN: %clang_cc1 -pg -triple mips-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s - // RUN: %clang_cc1 -pg -triple mips-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s - // RUN: %clang_cc1 -pg -triple mipsel-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s --- -2.20.1 - - -From a841576d7a53d3d8fd61aa854af7d9c2dd204536 Mon Sep 17 00:00:00 2001 -From: Weining Lu -Date: Thu, 26 Oct 2023 11:50:28 +0800 -Subject: [PATCH 3/9] [LoongArch][test] Add some ABI regression tests for empty - struct. NFC - -How empty structs (not as fields of container struct) are passed in C++ -is not explicitly documented in psABI. This patch adds some tests -showing the current handing of clang. Some of the results are different -from gcc. Following patch(es) will try to fix the mismatch. - -(cherry picked from commit 8149066fa532d82ff62a0629d5a9fab6bd4da768) ---- - .../LoongArch/abi-lp64d-empty-structs.c | 53 +++++++++++++++++++ - 1 file changed, 53 insertions(+) - -diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c -index fb90bf556c19..d0daafac336e 100644 ---- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c -+++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c -@@ -81,9 +81,62 @@ struct s8 test_s8(struct s8 a) { - return a; - } - -+/// Note: Below tests check how empty structs are passed while above tests check -+/// empty structs as fields of container struct are ignored when flattening -+/// structs to examine whether the container structs can be passed via FARs. -+ - // CHECK-C: define{{.*}} void @test_s9() - // CHECK-CXX: define{{.*}} i64 @_Z7test_s92s9(i64 {{.*}}) - struct s9 { struct empty e; }; - struct s9 test_s9(struct s9 a) { - return a; - } -+ -+// CHECK-C: define{{.*}} void @test_s10() -+// CHECK-CXX: define{{.*}} void @_Z8test_s103s10() -+struct s10 { }; -+struct s10 test_s10(struct s10 a) { -+ return a; -+} -+ -+// CHECK-C: define{{.*}} void @test_s11() -+// CHECK-CXX: define{{.*}} i64 @_Z8test_s113s11(i64 {{.*}}) -+struct s11 { struct { } s; }; -+struct s11 test_s11(struct s11 a) { -+ return a; -+} -+ -+// CHECK-C: define{{.*}} void @test_s12() -+// CHECK-CXX: define{{.*}} void @_Z8test_s123s12() -+struct s12 { int i[0]; }; -+struct s12 test_s12(struct s12 a) { -+ return a; -+} -+ -+// CHECK-C: define{{.*}} void @test_s13() -+// CHECK-CXX: define{{.*}} void @_Z8test_s133s13() -+struct s13 { struct { } s[0]; }; -+struct s13 test_s13(struct s13 a) { -+ return a; -+} -+ -+// CHECK-C: define{{.*}} void @test_s14() -+// CHECK-CXX: define{{.*}} i64 @_Z8test_s143s14(i64 {{.*}}) -+struct s14 { struct { } s[1]; }; -+struct s14 test_s14(struct s14 a) { -+ return a; -+} -+ -+// CHECK-C: define{{.*}} void @test_s15() -+// CHECK-CXX: define{{.*}} void @_Z8test_s153s15() -+struct s15 { int : 0; }; -+struct s15 test_s15(struct s15 a) { -+ return a; -+} -+ -+// CHECK-C: define{{.*}} void @test_s16() -+// CHECK-CXX: define{{.*}} void @_Z8test_s163s16() -+struct s16 { int : 1; }; -+struct s16 test_s16(struct s16 a) { -+ return a; -+} --- -2.20.1 - - -From 6248fa0fc405952a8b907624c27b2dd1ee86a962 Mon Sep 17 00:00:00 2001 -From: Lu Weining -Date: Tue, 31 Oct 2023 21:18:06 +0800 -Subject: [PATCH 4/9] [LoongArch] Fix ABI mismatch with gcc/g++ about empty - structs passing (#70320) - -How empty structs (not as fields of container struct) are passed in C++ -is not explicitly documented in psABI. However, this patch fixes the -mismatch with g++. - -Note that the unnamed bitfield case `struct { int : 1; }` in C is also -fixed. Previously clang regards it as an empty struct and then ignores -it when passing. Now size of the struct is counted; since it's size is -not 0, clang will not ignore it even in C. - -While https://reviews.llvm.org/D156116 fixed the handling of empty -struct when considering eligibility of the container struct for the FP -calling convention ('flattening'), this patch fixes the handling of -passing the empty struct itself. - -Fix https://github.com/llvm/llvm-project/issues/70319 - -(cherry picked from commit 9ca6bf3fb7b7df373723b3275730f101f9ff816b) ---- - clang/lib/CodeGen/Targets/LoongArch.cpp | 10 ++++++---- - clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c | 8 ++++---- - 2 files changed, 10 insertions(+), 8 deletions(-) - -diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp -index 7483bf6d6d1e..bc508a99da9c 100644 ---- a/clang/lib/CodeGen/Targets/LoongArch.cpp -+++ b/clang/lib/CodeGen/Targets/LoongArch.cpp -@@ -308,12 +308,14 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, - CGCXXABI::RAA_DirectInMemory); - } - -- // Ignore empty structs/unions. -- if (isEmptyRecord(getContext(), Ty, true)) -- return ABIArgInfo::getIgnore(); -- - uint64_t Size = getContext().getTypeSize(Ty); - -+ // Ignore empty struct or union whose size is zero, e.g. `struct { }` in C or -+ // `struct { int a[0]; }` in C++. In C++, `struct { }` is empty but it's size -+ // is 1 byte and g++ doesn't ignore it; clang++ matches this behaviour. -+ if (isEmptyRecord(getContext(), Ty, true) && Size == 0) -+ return ABIArgInfo::getIgnore(); -+ - // Pass floating point values via FARs if possible. - if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() && - FRLen >= Size && FARsLeft) { -diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c -index d0daafac336e..281b7b15841a 100644 ---- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c -+++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c -@@ -93,7 +93,7 @@ struct s9 test_s9(struct s9 a) { - } - - // CHECK-C: define{{.*}} void @test_s10() --// CHECK-CXX: define{{.*}} void @_Z8test_s103s10() -+// CHECK-CXX: define{{.*}} i64 @_Z8test_s103s10(i64 {{.*}}) - struct s10 { }; - struct s10 test_s10(struct s10 a) { - return a; -@@ -128,14 +128,14 @@ struct s14 test_s14(struct s14 a) { - } - - // CHECK-C: define{{.*}} void @test_s15() --// CHECK-CXX: define{{.*}} void @_Z8test_s153s15() -+// CHECK-CXX: define{{.*}} i64 @_Z8test_s153s15(i64 {{.*}}) - struct s15 { int : 0; }; - struct s15 test_s15(struct s15 a) { - return a; - } - --// CHECK-C: define{{.*}} void @test_s16() --// CHECK-CXX: define{{.*}} void @_Z8test_s163s16() -+// CHECK-C: define{{.*}} i64 @test_s16(i64 {{.*}}) -+// CHECK-CXX: define{{.*}} i64 @_Z8test_s163s16(i64 {{.*}}) - struct s16 { int : 1; }; - struct s16 test_s16(struct s16 a) { - return a; --- -2.20.1 - - -From 028d0d88cd73c724f954577dc90cbbc2873a6832 Mon Sep 17 00:00:00 2001 -From: Weining Lu -Date: Thu, 2 Nov 2023 09:29:43 +0800 -Subject: [PATCH 5/9] [LoongArch] Pre-commit test for issue #70890 - -(cherry picked from commit 749083b91f31f370cf64831d3e7e6215b6d51442) ---- - .../LoongArch/abi-lp64d-empty-unions.c | 26 +++++++++++++++++++ - 1 file changed, 26 insertions(+) - create mode 100644 clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c - -diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c -new file mode 100644 -index 000000000000..b0607425336e ---- /dev/null -+++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c -@@ -0,0 +1,26 @@ -+// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - | \ -+// RUN: FileCheck --check-prefix=CHECK-C %s -+// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - -x c++ | \ -+// RUN: FileCheck --check-prefix=CHECK-CXX %s -+ -+#include -+ -+// CHECK-C: define{{.*}} void @test1() -+// CHECK-CXX: define{{.*}} i64 @_Z5test12u1(i64{{[^,]*}}) -+union u1 { }; -+union u1 test1(union u1 a) { -+ return a; -+} -+ -+struct s1 { -+ union u1 u; -+ int i; -+ float f; -+}; -+ -+// CHECK-C: define{{.*}} { i32, float } @test2(i32{{[^,]*}}, float{{[^,]*}}) -+/// FIXME: This doesn't match g++. -+// CHECK-CXX: define{{.*}} { i32, float } @_Z5test22s1(i32{{[^,]*}}, float{{[^,]*}}) -+struct s1 test2(struct s1 a) { -+ return a; -+} --- -2.20.1 - - -From 8c4371c0e53635a23852d0dc7025b4c48495277b Mon Sep 17 00:00:00 2001 -From: Lu Weining -Date: Sat, 4 Nov 2023 10:04:37 +0800 -Subject: [PATCH 6/9] [LoongArch] Fix ABI mismatch with g++ when handling empty - unions (#71025) - -In g++, empty unions are not ignored like empty structs when flattening -structs to examine whether the structs can be passed via FARs in C++. -This patch aligns clang++ with g++. - -Fix https://github.com/llvm/llvm-project/issues/70890. - -(cherry picked from commit 4253fdc2c462da61cc0deb74a43265665720c828) ---- - clang/lib/CodeGen/Targets/LoongArch.cpp | 7 ++++--- - clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c | 2 +- - clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c | 3 +-- - 3 files changed, 6 insertions(+), 6 deletions(-) - -diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp -index bc508a99da9c..63b9a1fdb988 100644 ---- a/clang/lib/CodeGen/Targets/LoongArch.cpp -+++ b/clang/lib/CodeGen/Targets/LoongArch.cpp -@@ -170,10 +170,11 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper( - // copy constructor are not eligible for the FP calling convention. - if (getRecordArgABI(Ty, CGT.getCXXABI())) - return false; -- if (isEmptyRecord(getContext(), Ty, true, true)) -- return true; - const RecordDecl *RD = RTy->getDecl(); -- // Unions aren't eligible unless they're empty (which is caught above). -+ if (isEmptyRecord(getContext(), Ty, true, true) && -+ (!RD->isUnion() || !isa(RD))) -+ return true; -+ // Unions aren't eligible unless they're empty in C (which is caught above). - if (RD->isUnion()) - return false; - const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); -diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c -index 281b7b15841a..2f7596f0ebdc 100644 ---- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c -+++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c -@@ -3,7 +3,7 @@ - // RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - -x c++ | \ - // RUN: FileCheck --check-prefix=CHECK-CXX %s - --// Fields containing empty structs or unions are ignored when flattening -+// Fields containing empty structs are ignored when flattening - // structs to examine whether the structs can be passed via FARs, even in C++. - // But there is an exception that non-zero-length array of empty structures are - // not ignored in C++. These rules are not documented in psABI -diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c -index b0607425336e..363e37efb646 100644 ---- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c -+++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c -@@ -19,8 +19,7 @@ struct s1 { - }; - - // CHECK-C: define{{.*}} { i32, float } @test2(i32{{[^,]*}}, float{{[^,]*}}) --/// FIXME: This doesn't match g++. --// CHECK-CXX: define{{.*}} { i32, float } @_Z5test22s1(i32{{[^,]*}}, float{{[^,]*}}) -+// CHECK-CXX: define{{.*}} [2 x i64] @_Z5test22s1([2 x i64]{{[^,]*}}) - struct s1 test2(struct s1 a) { - return a; - } --- -2.20.1 - - -From 8e855955a009ec398b9f7da88e980dae9d20c420 Mon Sep 17 00:00:00 2001 -From: Fangrui Song -Date: Tue, 14 Nov 2023 00:43:40 -0800 -Subject: [PATCH 7/9] [Driver] Default LoongArch to - -fno-direct-access-external-data for non-PIC (#72221) - -For -fno-pic, if an extern variable is defined in a DSO, a copy -relocation will be needed. However, loongarch*-linux does not and will -not support copy relocations. - -Change Driver to default to -fno-direct-access-external-data for -LoongArch && non-PIC. -Keep Frontend conditions unchanged (-fdirect-access-external-data || --fno-direct-access-external-data && PIC>0 => direct access). - -Fix #71645 - -(cherry picked from commit 47eeee297775347cbdb7624d6a766c2a3eec4a59) ---- - clang/lib/Driver/ToolChains/Clang.cpp | 7 ++++++- - clang/test/Driver/fdirect-access-external-data.c | 6 ++++++ - 2 files changed, 12 insertions(+), 1 deletion(-) - -diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp -index 6b5930990f11..b21aeaee7f5a 100644 ---- a/clang/lib/Driver/ToolChains/Clang.cpp -+++ b/clang/lib/Driver/ToolChains/Clang.cpp -@@ -5632,10 +5632,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, - // defaults to -fno-direct-access-external-data. Pass the option if different - // from the default. - if (Arg *A = Args.getLastArg(options::OPT_fdirect_access_external_data, -- options::OPT_fno_direct_access_external_data)) -+ options::OPT_fno_direct_access_external_data)) { - if (A->getOption().matches(options::OPT_fdirect_access_external_data) != - (PICLevel == 0)) - A->render(Args, CmdArgs); -+ } else if (PICLevel == 0 && Triple.isLoongArch()) { -+ // Some targets default to -fno-direct-access-external-data even for -+ // -fno-pic. -+ CmdArgs.push_back("-fno-direct-access-external-data"); -+ } - - if (Args.hasFlag(options::OPT_fno_plt, options::OPT_fplt, false)) { - CmdArgs.push_back("-fno-plt"); -diff --git a/clang/test/Driver/fdirect-access-external-data.c b/clang/test/Driver/fdirect-access-external-data.c -index f132b1b088af..a6da776e6977 100644 ---- a/clang/test/Driver/fdirect-access-external-data.c -+++ b/clang/test/Driver/fdirect-access-external-data.c -@@ -9,6 +9,12 @@ - // RUN: %clang -### -c -target aarch64 %s -fpic 2>&1 | FileCheck %s --check-prefix=DEFAULT - // RUN: %clang -### -c -target aarch64 %s -fpic -fdirect-access-external-data 2>&1 | FileCheck %s --check-prefix=DIRECT - -+/// loongarch* targets default to -fno-direct-access-external-data even for -fno-pic. -+// RUN: %clang -### -c --target=loongarch64 -fno-pic %s 2>&1 | FileCheck %s --check-prefix=INDIRECT -+// RUN: %clang -### -c --target=loongarch64 -fpie %s 2>&1 | FileCheck %s --check-prefix=DEFAULT -+// RUN: %clang -### -c --target=loongarch32 -fno-pic -fdirect-access-external-data %s 2>&1 | FileCheck %s --check-prefix=DEFAULT -+// RUN: %clang -### -c --target=loongarch32 -fpie -fdirect-access-external-data %s 2>&1 | FileCheck %s --check-prefix=DIRECT -+ - // DEFAULT-NOT: direct-access-external-data" - // DIRECT: "-fdirect-access-external-data" - // INDIRECT: "-fno-direct-access-external-data" --- -2.20.1 - - -From 29409970a5c68e20022a05457127102a66abfead Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Tue, 5 Mar 2024 19:44:28 +0800 -Subject: [PATCH 8/9] [Clang][LoongArch] Precommit test for fix wrong return - value type of __iocsrrd_h. NFC - -(cherry picked from commit aeda1a6e800e0dd6c91c0332b4db95094ad5b301) -(cherry picked from commit a9ba36c7e7d7fa076f201843e3b826b6c6d7f5ef) ---- - clang/test/CodeGen/LoongArch/intrinsic-la32.c | 29 ++++++++++++++----- - clang/test/CodeGen/LoongArch/intrinsic-la64.c | 21 ++++++++++++-- - 2 files changed, 40 insertions(+), 10 deletions(-) - -diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32.c b/clang/test/CodeGen/LoongArch/intrinsic-la32.c -index 93d54f511a9c..6a8d99880be3 100644 ---- a/clang/test/CodeGen/LoongArch/intrinsic-la32.c -+++ b/clang/test/CodeGen/LoongArch/intrinsic-la32.c -@@ -169,8 +169,8 @@ unsigned int cpucfg(unsigned int a) { - - // LA32-LABEL: @rdtime( - // LA32-NEXT: entry: --// LA32-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc !2 --// LA32-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !3 -+// LA32-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]] -+// LA32-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META3:![0-9]+]] - // LA32-NEXT: ret void - // - void rdtime() { -@@ -201,13 +201,28 @@ void loongarch_movgr2fcsr(int a) { - __builtin_loongarch_movgr2fcsr(1, a); - } - --// CHECK-LABEL: @cacop_w( --// CHECK-NEXT: entry: --// CHECK-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A:%.*]], i32 1024) --// CHECK-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A]], i32 1024) --// CHECK-NEXT: ret void -+// LA32-LABEL: @cacop_w( -+// LA32-NEXT: entry: -+// LA32-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A:%.*]], i32 1024) -+// LA32-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A]], i32 1024) -+// LA32-NEXT: ret void - // - void cacop_w(unsigned long int a) { - __cacop_w(1, a, 1024); - __builtin_loongarch_cacop_w(1, a, 1024); - } -+ -+// LA32-LABEL: @iocsrrd_h_result( -+// LA32-NEXT: entry: -+// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) -+// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) -+// LA32-NEXT: [[CONV2:%.*]] = and i32 [[TMP0]], 255 -+// LA32-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[CONV2]] -+// LA32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD]] to i16 -+// LA32-NEXT: ret i16 [[CONV4]] -+// -+unsigned short iocsrrd_h_result(unsigned int a) { -+ unsigned short b = __iocsrrd_h(a); -+ unsigned short c = __builtin_loongarch_iocsrrd_h(a); -+ return b+c; -+} -diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la64.c b/clang/test/CodeGen/LoongArch/intrinsic-la64.c -index a740882eef54..48b6a7a3d227 100644 ---- a/clang/test/CodeGen/LoongArch/intrinsic-la64.c -+++ b/clang/test/CodeGen/LoongArch/intrinsic-la64.c -@@ -387,7 +387,7 @@ unsigned int cpucfg(unsigned int a) { - - // CHECK-LABEL: @rdtime_d( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call { i64, i64 } asm sideeffect "rdtime.d $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc !2 -+// CHECK-NEXT: [[TMP0:%.*]] = tail call { i64, i64 } asm sideeffect "rdtime.d $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]] - // CHECK-NEXT: ret void - // - void rdtime_d() { -@@ -396,8 +396,8 @@ void rdtime_d() { - - // CHECK-LABEL: @rdtime( - // CHECK-NEXT: entry: --// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !3 --// CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !4 -+// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META3:![0-9]+]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META4:![0-9]+]] - // CHECK-NEXT: ret void - // - void rdtime() { -@@ -427,3 +427,18 @@ void loongarch_movgr2fcsr(int a) { - __movgr2fcsr(1, a); - __builtin_loongarch_movgr2fcsr(1, a); - } -+ -+// CHECK-LABEL: @iocsrrd_h_result( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) -+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) -+// CHECK-NEXT: [[CONV2:%.*]] = and i32 [[TMP0]], 255 -+// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[CONV2]] -+// CHECK-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD]] to i16 -+// CHECK-NEXT: ret i16 [[CONV4]] -+// -+unsigned short iocsrrd_h_result(unsigned int a) { -+ unsigned short b = __iocsrrd_h(a); -+ unsigned short c = __builtin_loongarch_iocsrrd_h(a); -+ return b+c; -+} --- -2.20.1 - - -From 47425dfdd1582ec652aba1c289f3a80fe25c1a8c Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Wed, 6 Mar 2024 10:03:28 +0800 -Subject: [PATCH 9/9] [Clang][LoongArch] Fix wrong return value type of - __iocsrrd_h (#84100) - -relate: -https: //gcc.gnu.org/pipermail/gcc-patches/2024-February/645016.html -(cherry picked from commit 2f479b811274fede36535e34ecb545ac22e399c3) -(cherry picked from commit 9b9aee16d4dcf1b4af49988ebd7918fa4ce77e44) ---- - clang/lib/Headers/larchintrin.h | 2 +- - clang/test/CodeGen/LoongArch/intrinsic-la32.c | 8 ++++---- - clang/test/CodeGen/LoongArch/intrinsic-la64.c | 8 ++++---- - 3 files changed, 9 insertions(+), 9 deletions(-) - -diff --git a/clang/lib/Headers/larchintrin.h b/clang/lib/Headers/larchintrin.h -index c5c533ee0b8c..24dd29ce91ff 100644 ---- a/clang/lib/Headers/larchintrin.h -+++ b/clang/lib/Headers/larchintrin.h -@@ -156,7 +156,7 @@ extern __inline unsigned char - return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); - } - --extern __inline unsigned char -+extern __inline unsigned short - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - __iocsrrd_h(unsigned int _1) { - return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); -diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32.c b/clang/test/CodeGen/LoongArch/intrinsic-la32.c -index 6a8d99880be3..eb3f8cbe7ac4 100644 ---- a/clang/test/CodeGen/LoongArch/intrinsic-la32.c -+++ b/clang/test/CodeGen/LoongArch/intrinsic-la32.c -@@ -215,11 +215,11 @@ void cacop_w(unsigned long int a) { - // LA32-LABEL: @iocsrrd_h_result( - // LA32-NEXT: entry: - // LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) -+// LA32-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP0]] to i16 - // LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) --// LA32-NEXT: [[CONV2:%.*]] = and i32 [[TMP0]], 255 --// LA32-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[CONV2]] --// LA32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD]] to i16 --// LA32-NEXT: ret i16 [[CONV4]] -+// LA32-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 -+// LA32-NEXT: [[CONV3:%.*]] = add i16 [[TMP2]], [[CONV_I]] -+// LA32-NEXT: ret i16 [[CONV3]] - // - unsigned short iocsrrd_h_result(unsigned int a) { - unsigned short b = __iocsrrd_h(a); -diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la64.c b/clang/test/CodeGen/LoongArch/intrinsic-la64.c -index 48b6a7a3d227..50ec358f546e 100644 ---- a/clang/test/CodeGen/LoongArch/intrinsic-la64.c -+++ b/clang/test/CodeGen/LoongArch/intrinsic-la64.c -@@ -431,11 +431,11 @@ void loongarch_movgr2fcsr(int a) { - // CHECK-LABEL: @iocsrrd_h_result( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) -+// CHECK-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP0]] to i16 - // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) --// CHECK-NEXT: [[CONV2:%.*]] = and i32 [[TMP0]], 255 --// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[CONV2]] --// CHECK-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD]] to i16 --// CHECK-NEXT: ret i16 [[CONV4]] -+// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 -+// CHECK-NEXT: [[CONV3:%.*]] = add i16 [[TMP2]], [[CONV_I]] -+// CHECK-NEXT: ret i16 [[CONV3]] - // - unsigned short iocsrrd_h_result(unsigned int a) { - unsigned short b = __iocsrrd_h(a); --- -2.20.1 - diff --git a/0010-Backport-clang-Avoid-Wshadow-warning-when-init-capture-named.patch b/0010-Backport-clang-Avoid-Wshadow-warning-when-init-capture-named.patch deleted file mode 100644 index f53e2b6c702bc7dfd91d8d9f0acf1f259eb746d1..0000000000000000000000000000000000000000 --- a/0010-Backport-clang-Avoid-Wshadow-warning-when-init-capture-named.patch +++ /dev/null @@ -1,255 +0,0 @@ -From 6804a15df7959a14b97d41dd8c069d2d695392e4 Mon Sep 17 00:00:00 2001 -From: Mariya Podchishchaeva -Date: Mon, 12 Feb 2024 12:44:20 +0300 -Subject: [PATCH] [clang] Avoid -Wshadow warning when init-capture named same - as class field (#74512) - -Shadowing warning doesn't make much sense since field is not available -in lambda's body without capturing this. - -Fixes https://github.com/llvm/llvm-project/issues/71976 - -(cherry picked from commit c13b7485b87909fcf739f62cfa382b55407433c0) ---- - clang/docs/ReleaseNotes.rst | 3 + - clang/include/clang/Sema/ScopeInfo.h | 4 +- - clang/lib/Sema/SemaDecl.cpp | 73 +++++++++------ - clang/test/SemaCXX/warn-shadow-in-lambdas.cpp | 92 ++++++++++++++++++- - 4 files changed, 141 insertions(+), 31 deletions(-) - -diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst -index a1143e14562e..5086a56eb496 100644 ---- a/clang/docs/ReleaseNotes.rst -+++ b/clang/docs/ReleaseNotes.rst -@@ -730,6 +730,9 @@ Bug Fixes in This Version - ``thread_local`` instead of ``_Thread_local``. - Fixes (`#70068 `_) and - (`#69167 `_) -+- Clang's ``-Wshadow`` no longer warns when an init-capture is named the same as -+ a class field unless the lambda can capture this. -+ Fixes (`#71976 `_) - - Bug Fixes to Compiler Builtins - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -diff --git a/clang/include/clang/Sema/ScopeInfo.h b/clang/include/clang/Sema/ScopeInfo.h -index 26c0387dfc44..f4e1dba4e20d 100644 ---- a/clang/include/clang/Sema/ScopeInfo.h -+++ b/clang/include/clang/Sema/ScopeInfo.h -@@ -915,8 +915,8 @@ public: - /// that were defined in parent contexts. Used to avoid warnings when the - /// shadowed variables are uncaptured by this lambda. - struct ShadowedOuterDecl { -- const VarDecl *VD; -- const VarDecl *ShadowedDecl; -+ const NamedDecl *VD; -+ const NamedDecl *ShadowedDecl; - }; - llvm::SmallVector ShadowingDecls; - -diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp -index 21b5781a71cd..5481bbd22c66 100644 ---- a/clang/lib/Sema/SemaDecl.cpp -+++ b/clang/lib/Sema/SemaDecl.cpp -@@ -8269,28 +8269,40 @@ void Sema::CheckShadow(NamedDecl *D, NamedDecl *ShadowedDecl, - - unsigned WarningDiag = diag::warn_decl_shadow; - SourceLocation CaptureLoc; -- if (isa(D) && isa(ShadowedDecl) && NewDC && -- isa(NewDC)) { -+ if (isa(D) && NewDC && isa(NewDC)) { - if (const auto *RD = dyn_cast(NewDC->getParent())) { - if (RD->isLambda() && OldDC->Encloses(NewDC->getLexicalParent())) { -- if (RD->getLambdaCaptureDefault() == LCD_None) { -- // Try to avoid warnings for lambdas with an explicit capture list. -+ if (const auto *VD = dyn_cast(ShadowedDecl)) { - const auto *LSI = cast(getCurFunction()); -- // Warn only when the lambda captures the shadowed decl explicitly. -- CaptureLoc = getCaptureLocation(LSI, cast(ShadowedDecl)); -- if (CaptureLoc.isInvalid()) -- WarningDiag = diag::warn_decl_shadow_uncaptured_local; -- } else { -- // Remember that this was shadowed so we can avoid the warning if the -- // shadowed decl isn't captured and the warning settings allow it. -+ if (RD->getLambdaCaptureDefault() == LCD_None) { -+ // Try to avoid warnings for lambdas with an explicit capture -+ // list. Warn only when the lambda captures the shadowed decl -+ // explicitly. -+ CaptureLoc = getCaptureLocation(LSI, VD); -+ if (CaptureLoc.isInvalid()) -+ WarningDiag = diag::warn_decl_shadow_uncaptured_local; -+ } else { -+ // Remember that this was shadowed so we can avoid the warning if -+ // the shadowed decl isn't captured and the warning settings allow -+ // it. -+ cast(getCurFunction()) -+ ->ShadowingDecls.push_back({D, VD}); -+ return; -+ } -+ } -+ if (isa(ShadowedDecl)) { -+ // If lambda can capture this, then emit default shadowing warning, -+ // Otherwise it is not really a shadowing case since field is not -+ // available in lambda's body. -+ // At this point we don't know that lambda can capture this, so -+ // remember that this was shadowed and delay until we know. - cast(getCurFunction()) -- ->ShadowingDecls.push_back( -- {cast(D), cast(ShadowedDecl)}); -+ ->ShadowingDecls.push_back({D, ShadowedDecl}); - return; - } - } -- -- if (cast(ShadowedDecl)->hasLocalStorage()) { -+ if (const auto *VD = dyn_cast(ShadowedDecl); -+ VD && VD->hasLocalStorage()) { - // A variable can't shadow a local variable in an enclosing scope, if - // they are separated by a non-capturing declaration context. - for (DeclContext *ParentDC = NewDC; -@@ -8337,19 +8349,28 @@ void Sema::CheckShadow(NamedDecl *D, NamedDecl *ShadowedDecl, - /// when these variables are captured by the lambda. - void Sema::DiagnoseShadowingLambdaDecls(const LambdaScopeInfo *LSI) { - for (const auto &Shadow : LSI->ShadowingDecls) { -- const VarDecl *ShadowedDecl = Shadow.ShadowedDecl; -+ const NamedDecl *ShadowedDecl = Shadow.ShadowedDecl; - // Try to avoid the warning when the shadowed decl isn't captured. -- SourceLocation CaptureLoc = getCaptureLocation(LSI, ShadowedDecl); - const DeclContext *OldDC = ShadowedDecl->getDeclContext(); -- Diag(Shadow.VD->getLocation(), CaptureLoc.isInvalid() -- ? diag::warn_decl_shadow_uncaptured_local -- : diag::warn_decl_shadow) -- << Shadow.VD->getDeclName() -- << computeShadowedDeclKind(ShadowedDecl, OldDC) << OldDC; -- if (!CaptureLoc.isInvalid()) -- Diag(CaptureLoc, diag::note_var_explicitly_captured_here) -- << Shadow.VD->getDeclName() << /*explicitly*/ 0; -- Diag(ShadowedDecl->getLocation(), diag::note_previous_declaration); -+ if (const auto *VD = dyn_cast(ShadowedDecl)) { -+ SourceLocation CaptureLoc = getCaptureLocation(LSI, VD); -+ Diag(Shadow.VD->getLocation(), -+ CaptureLoc.isInvalid() ? diag::warn_decl_shadow_uncaptured_local -+ : diag::warn_decl_shadow) -+ << Shadow.VD->getDeclName() -+ << computeShadowedDeclKind(ShadowedDecl, OldDC) << OldDC; -+ if (CaptureLoc.isValid()) -+ Diag(CaptureLoc, diag::note_var_explicitly_captured_here) -+ << Shadow.VD->getDeclName() << /*explicitly*/ 0; -+ Diag(ShadowedDecl->getLocation(), diag::note_previous_declaration); -+ } else if (isa(ShadowedDecl)) { -+ Diag(Shadow.VD->getLocation(), -+ LSI->isCXXThisCaptured() ? diag::warn_decl_shadow -+ : diag::warn_decl_shadow_uncaptured_local) -+ << Shadow.VD->getDeclName() -+ << computeShadowedDeclKind(ShadowedDecl, OldDC) << OldDC; -+ Diag(ShadowedDecl->getLocation(), diag::note_previous_declaration); -+ } - } - } - -diff --git a/clang/test/SemaCXX/warn-shadow-in-lambdas.cpp b/clang/test/SemaCXX/warn-shadow-in-lambdas.cpp -index bda6a65c0216..d54b394df4eb 100644 ---- a/clang/test/SemaCXX/warn-shadow-in-lambdas.cpp -+++ b/clang/test/SemaCXX/warn-shadow-in-lambdas.cpp -@@ -1,6 +1,6 @@ --// RUN: %clang_cc1 -std=c++14 -verify -fsyntax-only -Wshadow -D AVOID %s --// RUN: %clang_cc1 -std=c++14 -verify -fsyntax-only -Wshadow -Wshadow-uncaptured-local %s --// RUN: %clang_cc1 -std=c++14 -verify -fsyntax-only -Wshadow-all %s -+// RUN: %clang_cc1 -std=c++14 -verify=expected,cxx14 -fsyntax-only -Wshadow -D AVOID %s -+// RUN: %clang_cc1 -std=c++14 -verify=expected,cxx14 -fsyntax-only -Wshadow -Wshadow-uncaptured-local %s -+// RUN: %clang_cc1 -std=c++14 -verify=expected,cxx14 -fsyntax-only -Wshadow-all %s - // RUN: %clang_cc1 -std=c++17 -verify -fsyntax-only -Wshadow-all %s - // RUN: %clang_cc1 -std=c++20 -verify -fsyntax-only -Wshadow-all %s - -@@ -179,3 +179,89 @@ void f() { - #endif - } - } -+ -+namespace GH71976 { -+#ifdef AVOID -+struct A { -+ int b = 5; -+ int foo() { -+ return [b = b]() { return b; }(); // no -Wshadow diagnostic, init-capture does not shadow b due to not capturing this -+ } -+}; -+ -+struct B { -+ int a; -+ void foo() { -+ auto b = [a = this->a] {}; // no -Wshadow diagnostic, init-capture does not shadow a due to not capturing his -+ } -+}; -+ -+struct C { -+ int b = 5; -+ int foo() { -+ return [a = b]() { -+ return [=, b = a]() { // no -Wshadow diagnostic, init-capture does not shadow b due to outer lambda -+ return b; -+ }(); -+ }(); -+ } -+}; -+ -+#else -+struct A { -+ int b = 5; // expected-note {{previous}} -+ int foo() { -+ return [b = b]() { return b; }(); // expected-warning {{declaration shadows a field}} -+ } -+}; -+ -+struct B { -+ int a; // expected-note {{previous}} -+ void foo() { -+ auto b = [a = this->a] {}; // expected-warning {{declaration shadows a field}} -+ } -+}; -+ -+struct C { -+ int b = 5; // expected-note {{previous}} -+ int foo() { -+ return [a = b]() { -+ return [=, b = a]() { // expected-warning {{declaration shadows a field}} -+ return b; -+ }(); -+ }(); -+ } -+}; -+ -+struct D { -+ int b = 5; // expected-note {{previous}} -+ int foo() { -+ return [b = b, this]() { return b; }(); // expected-warning {{declaration shadows a field}} -+ } -+}; -+ -+struct E { -+ int b = 5; -+ int foo() { -+ return [a = b]() { // expected-note {{previous}} -+ return [=, a = a]() { // expected-warning {{shadows a local}} -+ return a; -+ }(); -+ }(); -+ } -+}; -+ -+#endif -+ -+struct S { -+ int a ; -+}; -+ -+int foo() { -+ auto [a] = S{0}; // expected-note {{previous}} \ -+ // cxx14-warning {{decomposition declarations are a C++17 extension}} -+ [a = a] () { // expected-warning {{declaration shadows a structured binding}} -+ }(); -+} -+ -+} --- -2.33.0 - diff --git a/0011-Add-the-support-for-classic-flang.patch b/0011-Add-the-support-for-classic-flang.patch deleted file mode 100644 index ec80f313f8d4b22cc1cd0cf453ddef1d6dfc338b..0000000000000000000000000000000000000000 --- a/0011-Add-the-support-for-classic-flang.patch +++ /dev/null @@ -1,4031 +0,0 @@ -From 3d36d3891f2a654d5ec2a0ce3c2ff47b597d1c42 Mon Sep 17 00:00:00 2001 -From: luofeng14 -Date: Mon, 11 Mar 2024 21:25:03 +0800 -Subject: [PATCH] sync classic flang patch - ---- - clang/CMakeLists.txt | 5 + - clang/include/clang/Basic/CodeGenOptions.h | 3 + - .../clang/Basic/DiagnosticDriverKinds.td | 6 + - clang/include/clang/Basic/MacroBuilder.h | 3 +- - clang/include/clang/Basic/Sanitizers.def | 2 + - clang/include/clang/Driver/CMakeLists.txt | 13 +- - clang/include/clang/Driver/Options.td | 197 ++- - clang/include/clang/Driver/ToolChain.h | 17 + - clang/include/clang/Driver/Types.def | 7 + - clang/include/clang/Driver/Types.h | 8 + - clang/include/clang/Frontend/Utils.h | 32 + - clang/lib/CodeGen/BackendUtil.cpp | 6 + - clang/lib/CodeGen/CGDebugInfo.cpp | 10 +- - clang/lib/Driver/CMakeLists.txt | 8 +- - clang/lib/Driver/Driver.cpp | 60 +- - clang/lib/Driver/OffloadBundler.cpp | 4 + - clang/lib/Driver/ToolChain.cpp | 36 +- - clang/lib/Driver/ToolChains/Clang.cpp | 60 + - clang/lib/Driver/ToolChains/ClassicFlang.cpp | 1217 +++++++++++++++++ - clang/lib/Driver/ToolChains/ClassicFlang.h | 49 + - clang/lib/Driver/ToolChains/CommonArgs.cpp | 69 +- - clang/lib/Driver/ToolChains/CommonArgs.h | 20 + - clang/lib/Driver/ToolChains/Cuda.cpp | 35 + - clang/lib/Driver/ToolChains/Cuda.h | 5 + - clang/lib/Driver/ToolChains/Darwin.cpp | 4 + - clang/lib/Driver/ToolChains/Gnu.cpp | 4 + - clang/lib/Driver/ToolChains/Linux.cpp | 190 +++ - clang/lib/Driver/ToolChains/Linux.h | 5 + - clang/lib/Driver/ToolChains/MSVC.cpp | 115 ++ - clang/lib/Driver/ToolChains/MSVC.h | 11 + - clang/lib/Driver/ToolChains/MinGW.cpp | 4 + - clang/lib/Driver/Types.cpp | 50 + - clang/lib/Frontend/InitPreprocessor.cpp | 20 - - clang/test/CMakeLists.txt | 1 + - .../test/CodeGen/libpgmath-logfun-aarch64.ll | 58 + - clang/test/CodeGen/libpgmath-logfun-x86_64.ll | 57 + - clang/test/Driver/autocomplete.c | 2 +- - clang/test/Driver/emit-flang-attrs.f90 | 58 + - .../test/Driver/flang/Inputs/llvm-ir-input.ll | 0 - .../flang/classic-flang-emit-flang-llvm.f95 | 10 + - .../flang/classic-flang-fp-contract.f95 | 27 + - .../flang/classic-flang-must-preprocess.F | 12 + - .../flang/classic-flang-must-preprocess.F95 | 12 + - .../test/Driver/flang/classic-flang-version.f | 3 + - .../flang/classic-flang-vscale-mbits.f95 | 28 + - .../Driver/flang/classic-flang-vscale.f95 | 28 + - clang/test/Driver/flang/classic-flang.f | 26 + - clang/test/Driver/flang/classic-flang.f95 | 120 ++ - clang/test/Driver/flang/flang.f90 | 2 + - clang/test/Driver/flang/flang_ucase.F90 | 2 + - clang/test/Driver/flang/llvm-ir-input.f | 7 + - .../Driver/flang/multiple-inputs-mixed.f90 | 2 + - clang/test/Driver/flang/multiple-inputs.f90 | 2 + - clang/test/Driver/flang/reassoc.f90 | 59 + - clang/test/Driver/fortran-phases.f90 | 119 ++ - clang/test/Driver/fortran-preprocessor.f90 | 48 + - clang/test/Driver/fortran.f95 | 2 + - clang/test/Driver/gfortran.f90 | 1 + - clang/test/Driver/lit.local.cfg | 3 + - clang/test/lit.cfg.py | 3 + - clang/test/lit.site.cfg.py.in | 1 + - clang/tools/driver/CMakeLists.txt | 2 +- - 62 files changed, 2933 insertions(+), 37 deletions(-) - create mode 100644 clang/lib/Driver/ToolChains/ClassicFlang.cpp - create mode 100644 clang/lib/Driver/ToolChains/ClassicFlang.h - mode change 100644 => 100755 clang/lib/Driver/Types.cpp - create mode 100644 clang/test/CodeGen/libpgmath-logfun-aarch64.ll - create mode 100644 clang/test/CodeGen/libpgmath-logfun-x86_64.ll - create mode 100644 clang/test/Driver/emit-flang-attrs.f90 - create mode 100644 clang/test/Driver/flang/Inputs/llvm-ir-input.ll - create mode 100644 clang/test/Driver/flang/classic-flang-emit-flang-llvm.f95 - create mode 100644 clang/test/Driver/flang/classic-flang-fp-contract.f95 - create mode 100644 clang/test/Driver/flang/classic-flang-must-preprocess.F - create mode 100644 clang/test/Driver/flang/classic-flang-must-preprocess.F95 - create mode 100644 clang/test/Driver/flang/classic-flang-version.f - create mode 100644 clang/test/Driver/flang/classic-flang-vscale-mbits.f95 - create mode 100644 clang/test/Driver/flang/classic-flang-vscale.f95 - create mode 100644 clang/test/Driver/flang/classic-flang.f - create mode 100644 clang/test/Driver/flang/classic-flang.f95 - create mode 100644 clang/test/Driver/flang/llvm-ir-input.f - create mode 100644 clang/test/Driver/flang/reassoc.f90 - create mode 100644 clang/test/Driver/fortran-phases.f90 - create mode 100644 clang/test/Driver/fortran-preprocessor.f90 - -diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt -index d558b0522e82..949f12d3ce8c 100644 ---- a/clang/CMakeLists.txt -+++ b/clang/CMakeLists.txt -@@ -392,6 +392,11 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) - endif() - endif() - -+option(LLVM_ENABLE_CLASSIC_FLANG "Build support for classic Flang instead of the new built-in Flang" OFF) -+if(LLVM_ENABLE_CLASSIC_FLANG) -+ add_definitions( -DENABLE_CLASSIC_FLANG ) -+endif() -+ - option(CLANG_BUILD_TOOLS - "Build the Clang tools. If OFF, just generate build targets." ON) - -diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h -index 14fc94fe27f9..41577b97e030 100644 ---- a/clang/include/clang/Basic/CodeGenOptions.h -+++ b/clang/include/clang/Basic/CodeGenOptions.h -@@ -59,6 +59,9 @@ public: - Accelerate, // Use the Accelerate framework. - LIBMVEC, // GLIBC vector math library. - MASSV, // IBM MASS vector library. -+#ifdef ENABLE_CLASSIC_FLANG -+ PGMATH, // PGI math library. -+#endif - SVML, // Intel short vector math library. - SLEEF, // SLEEF SIMD Library for Evaluating Elementary Functions. - Darwin_libsystem_m, // Use Darwin's libsytem_m vector functions. -diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td -index 1b69324d073a..09a1949d7596 100644 ---- a/clang/include/clang/Basic/DiagnosticDriverKinds.td -+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td -@@ -131,6 +131,10 @@ def err_drv_invalid_linker_name : Error< - "invalid linker name in argument '%0'">; - def err_drv_invalid_rtlib_name : Error< - "invalid runtime library name in argument '%0'">; -+def err_drv_invalid_allocatable_mode : Error< -+ "invalid semantic mode for assignments to allocatables in argument '%0'">; -+def err_drv_unsupported_fixed_line_length : Error< -+ "unsupported fixed-format line length in argument '%0'">; - def err_drv_unsupported_rtlib_for_platform : Error< - "unsupported runtime library '%0' for platform '%1'">; - def err_drv_invalid_unwindlib_name : Error< -@@ -354,6 +358,8 @@ def err_drv_negative_columns : Error< - "invalid value '%1' in '%0', value must be 'none' or a positive integer">; - def err_drv_small_columns : Error< - "invalid value '%1' in '%0', value must be '%2' or greater">; -+def err_drv_clang_unsupported_minfo_arg : Error< -+ "'%0' option does not support '%1' value">; - - def err_drv_invalid_malign_branch_EQ : Error< - "invalid argument '%0' to -malign-branch=; each element must be one of: %1">; -diff --git a/clang/include/clang/Basic/MacroBuilder.h b/clang/include/clang/Basic/MacroBuilder.h -index 96e67cbbfa3f..bfc5e38c15e3 100644 ---- a/clang/include/clang/Basic/MacroBuilder.h -+++ b/clang/include/clang/Basic/MacroBuilder.h -@@ -24,9 +24,10 @@ class MacroBuilder { - raw_ostream &Out; - public: - MacroBuilder(raw_ostream &Output) : Out(Output) {} -+ virtual ~MacroBuilder() {} - - /// Append a \#define line for macro of the form "\#define Name Value\n". -- void defineMacro(const Twine &Name, const Twine &Value = "1") { -+ virtual void defineMacro(const Twine &Name, const Twine &Value = "1") { - Out << "#define " << Name << ' ' << Value << '\n'; - } - -diff --git a/clang/include/clang/Basic/Sanitizers.def b/clang/include/clang/Basic/Sanitizers.def -index c2137e3f61f6..4f0a2bf332ef 100644 ---- a/clang/include/clang/Basic/Sanitizers.def -+++ b/clang/include/clang/Basic/Sanitizers.def -@@ -107,6 +107,8 @@ SANITIZER("signed-integer-overflow", SignedIntegerOverflow) - SANITIZER("unreachable", Unreachable) - SANITIZER("vla-bound", VLABound) - SANITIZER("vptr", Vptr) -+// fortran contiguous pointer checks -+SANITIZER("discontiguous", Discontiguous) - - // IntegerSanitizer - SANITIZER("unsigned-integer-overflow", UnsignedIntegerOverflow) -diff --git a/clang/include/clang/Driver/CMakeLists.txt b/clang/include/clang/Driver/CMakeLists.txt -index ea55ba0f1f27..8c0af1528a96 100644 ---- a/clang/include/clang/Driver/CMakeLists.txt -+++ b/clang/include/clang/Driver/CMakeLists.txt -@@ -1,7 +1,14 @@ - set(LLVM_TARGET_DEFINITIONS Options.td) -+ -+set(CLANG_DRIVER_OPTIONS) - if (BUILD_FOR_OPENEULER) --tablegen(LLVM Options.inc -gen-opt-parser-defs -DBUILD_FOR_OPENEULER) --else() --tablegen(LLVM Options.inc -gen-opt-parser-defs) -+ list(APPEND CLANG_DRIVER_OPTIONS -DBUILD_FOR_OPENEULER ) - endif() -+ -+if (LLVM_ENABLE_CLASSIC_FLANG) -+ list(APPEND CLANG_DRIVER_OPTIONS -DENABLE_CLASSIC_FLANG ) -+endif() -+ -+tablegen(LLVM Options.inc ${CLANG_DRIVER_OPTIONS} -gen-opt-parser-defs ) -+ - add_public_tablegen_target(ClangDriverOptions) -diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td -index d4f7315bf8cb..365285966a67 100644 ---- a/clang/include/clang/Driver/Options.td -+++ b/clang/include/clang/Driver/Options.td -@@ -590,7 +590,11 @@ class InternalDriverOpt : Group, - Flags<[NoXarchOption, HelpHidden]>; - def driver_mode : Joined<["--"], "driver-mode=">, Group, - Flags<[CoreOption, NoXarchOption, HelpHidden]>, -+#ifdef ENABLE_CLASSIC_FLANG -+ HelpText<"Set the driver mode to one of: 'gcc', 'g++', 'cpp', 'cl', or 'flang'">; -+#else - HelpText<"Set the driver mode to either 'gcc', 'g++', 'cpp', or 'cl'">; -+#endif - def rsp_quoting : Joined<["--"], "rsp-quoting=">, Group, - Flags<[CoreOption, NoXarchOption, HelpHidden]>, - HelpText<"Set the rsp quoting to either 'posix', or 'windows'">; -@@ -1183,6 +1187,10 @@ def d_Flag : Flag<["-"], "d">, Group; - def d_Joined : Joined<["-"], "d">, Group; - def emit_ast : Flag<["-"], "emit-ast">, Flags<[CoreOption]>, - HelpText<"Emit Clang AST files for source inputs">; -+#ifdef ENABLE_CLASSIC_FLANG -+def emit_flang_llvm : Flag<["-"], "emit-flang-llvm">, -+ HelpText<"Emit Flang LLVM files for source inputs">; -+#endif - def emit_llvm : Flag<["-"], "emit-llvm">, Flags<[CC1Option, FC1Option, FlangOption]>, Group, - HelpText<"Use the LLVM representation for assembler and object files">; - def emit_interface_stubs : Flag<["-"], "emit-interface-stubs">, Flags<[CC1Option]>, Group, -@@ -2640,10 +2648,17 @@ def fno_experimental_isel : Flag<["-"], "fno-experimental-isel">, Group; - def fveclib : Joined<["-"], "fveclib=">, Group, Flags<[CC1Option]>, - HelpText<"Use the given vector functions library">, -+#ifdef ENABLE_CLASSIC_FLANG -+ Values<"Accelerate,libmvec,MASSV,PGMATH,SVML,SLEEF,Darwin_libsystem_m,ArmPL,none">, -+ NormalizedValuesScope<"CodeGenOptions">, -+ NormalizedValues<["Accelerate", "LIBMVEC", "MASSV", "PGMATH", "SVML", "SLEEF", -+ "Darwin_libsystem_m", "ArmPL", "NoLibrary"]>, -+#else - Values<"Accelerate,libmvec,MASSV,SVML,SLEEF,Darwin_libsystem_m,ArmPL,none">, - NormalizedValuesScope<"CodeGenOptions">, - NormalizedValues<["Accelerate", "LIBMVEC", "MASSV", "SVML", "SLEEF", - "Darwin_libsystem_m", "ArmPL", "NoLibrary"]>, -+#endif - MarshallingInfoEnum, "NoLibrary">; - def fno_lax_vector_conversions : Flag<["-"], "fno-lax-vector-conversions">, Group, - Alias, AliasArgs<["none"]>; -@@ -5382,6 +5397,20 @@ def module_dir : JoinedOrSeparate<["-"], "module-dir">, MetaVarName<"">, - It is also added to the list of directories to be searched by an USE statement. - The default is the current directory.}]>; - -+#ifdef ENABLE_CLASSIC_FLANG -+// Define a group for Fortran source format options. -+def fortran_format_Group : OptionGroup<"Fortran format Group">, Group; -+def ffixed_form : Flag<["-"], "ffixed-form">, Group, -+ HelpText<"Process source files in fixed form">; -+def fno_fixed_form : Flag<["-"], "fno-fixed-form">, Group, -+ HelpText<"Disable fixed-form format for Fortran">; -+def ffree_form : Flag<["-"], "ffree-form">, Group, -+ HelpText<"Process source files in free form">; -+def fno_free_form : Flag<["-"], "fno-free-form">, Group, -+ HelpText<"Disable free-form format for Fortran">; -+def ffixed_line_length_VALUE : Joined<["-"], "ffixed-line-length-">, Group, -+ HelpText<"Set line length in fixed-form format Fortran, current supporting only 72 and 132 characters">; -+#else - def ffixed_form : Flag<["-"], "ffixed-form">, Group, - HelpText<"Process source files in fixed form">; - def ffree_form : Flag<["-"], "ffree-form">, Group, -@@ -5391,6 +5420,7 @@ def ffixed_line_length_EQ : Joined<["-"], "ffixed-line-length=">, Group - DocBrief<[{Set column after which characters are ignored in typical fixed-form lines in the source - file}]>; - def ffixed_line_length_VALUE : Joined<["-"], "ffixed-line-length-">, Group, Alias; -+#endif - def fconvert_EQ : Joined<["-"], "fconvert=">, Group, - HelpText<"Set endian conversion of data for unformatted files">; - def fopenacc : Flag<["-"], "fopenacc">, Group, -@@ -5399,8 +5429,14 @@ def fdefault_double_8 : Flag<["-"],"fdefault-double-8">, Group, - HelpText<"Set the default double precision kind to an 8 byte wide type">; - def fdefault_integer_8 : Flag<["-"],"fdefault-integer-8">, Group, - HelpText<"Set the default integer and logical kind to an 8 byte wide type">; -+#ifdef ENABLE_CLASSIC_FLANG -+def fno_default_integer_8 : Flag<["-"], "fno-default-integer-8">, Group; -+#endif - def fdefault_real_8 : Flag<["-"],"fdefault-real-8">, Group, - HelpText<"Set the default real kind to an 8 byte wide type">; -+#ifdef ENABLE_CLASSIC_FLANG -+def fno_default_real_8 : Flag<["-"], "fno-default-real-8">, Group; -+#endif - def flarge_sizes : Flag<["-"],"flarge-sizes">, Group, - HelpText<"Use INTEGER(KIND=8) for the result type in size-related intrinsics">; - -@@ -5410,7 +5446,17 @@ def fintrinsic_modules_path : Separate<["-"], "fintrinsic-modules-path">, Group - HelpText<"Specify where to find the compiled intrinsic modules">, - DocBrief<[{This option specifies the location of pre-compiled intrinsic modules, - if they are not in the default location expected by the compiler.}]>; -- -+#ifdef ENABLE_CLASSIC_FLANG -+def fbackslash : Flag<["-"], "fbackslash">, Group, -+ HelpText<"Specify that backslash in string introduces an escape character">, -+ DocBrief<[{Change the interpretation of backslashes in string literals from -+a single backslash character to "C-style" escape characters.}]>; -+def fno_backslash : Flag<["-"], "fno-backslash">, Group; -+// Add the options -f(no-)implicit-none and -f(no-)automatic for compatibility -+// reason. They are not implemented yet in Classic Flang for now. -+defm implicit_none : BooleanFFlag<"implicit-none">, Group; -+def fno_automatic : Flag<["-"], "fno-automatic">, Group; -+#else - defm backslash : OptInFC1FFlag<"backslash", "Specify that backslash in string introduces an escape character">; - defm xor_operator : OptInFC1FFlag<"xor-operator", "Enable .XOR. as a synonym of .NEQV.">; - defm logical_abbreviations : OptInFC1FFlag<"logical-abbreviations", "Enable logical abbreviations">; -@@ -5419,6 +5465,7 @@ defm underscoring : OptInFC1FFlag<"underscoring", "Appends one trailing undersco - - def fno_automatic : Flag<["-"], "fno-automatic">, Group, - HelpText<"Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE">; -+#endif - - defm stack_arrays : BoolOptionWithoutMarshalling<"f", "stack-arrays", - PosFlag, -@@ -5426,12 +5473,19 @@ defm stack_arrays : BoolOptionWithoutMarshalling<"f", "stack-arrays", - defm loop_versioning : BoolOptionWithoutMarshalling<"f", "version-loops-for-stride", - PosFlag, - NegFlag>; -+ - } // let Flags = [FC1Option, FlangOption, FlangOnlyOption] - -+#ifdef ENABLE_CLASSIC_FLANG -+def J : JoinedOrSeparate<["-"], "J">, -+ Flags<[RenderJoined]>, -+ Group; -+#else - def J : JoinedOrSeparate<["-"], "J">, - Flags<[RenderJoined, FlangOption, FC1Option, FlangOnlyOption]>, - Group, - Alias; -+#endif - - //===----------------------------------------------------------------------===// - // FC1 Options -@@ -7458,6 +7512,147 @@ def fcgl : DXCFlag<"fcgl">, Alias; - def enable_16bit_types : DXCFlag<"enable-16bit-types">, Alias, - HelpText<"Enable 16-bit types and disable min precision types." - "Available in HLSL 2018 and shader model 6.2.">; -+ -+#ifdef ENABLE_CLASSIC_FLANG -+// Classic Flang options that we recognize in the driver and pass along when -+// invoking flang1/flang2 to compile Fortran code. -+def flang_rt_Group : OptionGroup<"Flang runtime library Group">; -+def pgi_fortran_Group : OptionGroup<"PGI Fortran compatibility Group">, -+ Flags<[HelpHidden]>; -+ -+// Classic Flang-specific options -+multiclass BooleanKFlag { -+ def _on : Flag<["-"], "K"#name>; -+ def _off : Flag<["-"], "Kno"#name>; -+} -+ -+multiclass BooleanMFlag { -+ def _on : Flag<["-"], "M"#name>; -+ def _off : Flag<["-"], "Mno"#name>; -+} -+ -+def Mfixed : Flag<["-"], "Mfixed">, Group, -+ HelpText<"Force fixed-form format Fortran">, -+ Flags<[HelpHidden]>; -+def Mfree_on: Flag<["-"], "Mfree">, Group, -+ HelpText<"Enable free-form format for Fortran">, -+ Flags<[HelpHidden]>; -+def Mfree_off: Flag<["-"], "Mnofree">, Group, -+ HelpText<"Disable free-form format for Fortran">, -+ Flags<[HelpHidden]>; -+def Mfreeform_on: Flag<["-"], "Mfreeform">, Group, -+ HelpText<"Enable free-form format for Fortran">, -+ Flags<[HelpHidden]>; -+def Mfreeform_off: Flag<["-"], "Mnofreeform">, Group, -+ HelpText<"Disable free-form format for Fortran">, -+ Flags<[HelpHidden]>; -+ -+def Minfo_EQ : CommaJoined<["-"], "Minfo=">, -+ HelpText<"Diagnostic information about successful optimizations">, -+ Values<"all,vect,inline">; -+def Minfoall : Flag<["-"], "Minfo">, -+ HelpText<"Diagnostic information about all successful optimizations">; -+def Mneginfo_EQ : CommaJoined<["-"], "Mneginfo=">, -+ HelpText<"Diagnostic information about missed optimizations">, -+ Values<"all,vect,inline">; -+def Mneginfoall : Flag<["-"], "Mneginfo">, -+ HelpText<"Diagnostic information about all missed optimizations">; -+ -+def Mipa: Joined<["-"], "Mipa">, Group; -+def Mstackarrays: Joined<["-"], "Mstack_arrays">, Group; -+def pc: JoinedOrSeparate<["-"], "pc">, Group; -+def Mfprelaxed: Joined<["-"], "Mfprelaxed">, Group; -+def Mnofprelaxed: Joined<["-"], "Mnofprelaxed">, Group; -+defm Mstride0: BooleanMFlag<"stride0">, Group; -+defm Mrecursive: BooleanMFlag<"recursive">, Group; -+defm Mreentrant: BooleanMFlag<"reentrant">, Group; -+defm Mbounds: BooleanMFlag<"bounds">, Group; -+def Mdaz_on: Flag<["-"], "Mdaz">, Group, -+ HelpText<"Treat denormalized numbers as zero">; -+def Mdaz_off: Flag<["-"], "Mnodaz">, Group, -+ HelpText<"Disable treating denormalized numbers as zero">; -+def Kieee_on : Flag<["-"], "Kieee">, Group, -+ HelpText<"Enable IEEE division">; -+def Kieee_off : Flag<["-"], "Knoieee">, Group, -+ HelpText<"Disable IEEE division">; -+def Mextend : Flag<["-"], "Mextend">, Group, -+ HelpText<"Allow lines up to 132 characters in Fortran sources">; -+def Mpreprocess : Flag<["-"], "Mpreprocess">, Group, -+ HelpText<"Preprocess Fortran files">; -+def Mstandard: Flag<["-"], "Mstandard">, Group, -+ HelpText<"Check Fortran standard conformance">; -+def Mchkptr: Flag<["-"], "Mchkptr">, Group; -+def Mwritable_constants: Flag<["-"], "Mwritable-constants">, Group, -+ HelpText<"Store constants in the writable data segment">; -+defm Minline: BooleanMFlag<"inline">, Group; -+def fma: Flag<["-"], "fma">, Group, -+ HelpText<"Enable generation of FMA instructions">; -+def nofma: Flag<["-"], "nofma">, Group, -+ HelpText<"Disable generation of FMA instructions">; -+defm Mfma: BooleanMFlag<"fma">, Group, -+ HelpText<"Enable generation of FMA instructions">; -+def mp: Flag<["-"], "mp">, Group, -+ HelpText<"Enable OpenMP">; -+def nomp: Flag<["-"], "nomp">, Group, -+ HelpText<"Do not link with OpenMP library libomp">; -+def Mflushz_on: Flag<["-"], "Mflushz">, Group, -+ HelpText<"Set SSE to flush-to-zero mode">; -+def Mflushz_off: Flag<["-"], "Mnoflushz">, Group, -+ HelpText<"Disabling setting SSE to flush-to-zero mode">; -+def Msave_on: Flag<["-"], "Msave">, Group, -+ HelpText<"Assume all Fortran variables have SAVE attribute">; -+def Msave_off: Flag<["-"], "Mnosave">, Group, -+ HelpText<"Assume no Fortran variables have SAVE attribute">; -+def Mcache_align_on: Flag<["-"], "Mcache_align">, Group, -+ HelpText<"Align large objects on cache-line boundaries">; -+def Mcache_align_off: Flag<["-"], "Mnocache_align">, Group, -+ HelpText<"Disable aligning large objects on cache-line boundaries">; -+def ModuleDir : Separate<["-"], "module">, Group, -+ HelpText<"Fortran module path">; -+def Minform_EQ : Joined<["-"], "Minform=">, -+ HelpText<"Set error level of messages to display">; -+def Mallocatable_EQ : Joined<["-"], "Mallocatable=">, -+ HelpText<"Select semantics for assignments to allocatables (F03 or F95)">; -+def Mbyteswapio: Flag<["-"], "Mbyteswapio">, Group, -+ HelpText<"Swap byte-order for unformatted input/output">; -+def byteswapio: Flag<["-"], "byteswapio">, Group, -+ HelpText<"Swap byte-order for unformatted input/output">; -+def Mbackslash: Flag<["-"], "Mbackslash">, Group, -+ HelpText<"Treat backslash like any other character in character strings">; -+def Mnobackslash: Flag<["-"], "Mnobackslash">, Group, -+ HelpText<"Treat backslash as C-style escape character">; -+def staticFlangLibs: Flag<["-"], "static-flang-libs">, Group, -+ HelpText<"Link using static Flang libraries">; -+def noFlangLibs: Flag<["-"], "no-flang-libs">, Group, -+ HelpText<"Do not link against Flang libraries">; -+def r8: Flag<["-"], "r8">, Group, -+ HelpText<"Treat REAL as REAL*8">; -+def i8: Flag<["-"], "i8">, Group, -+ HelpText<"Treat INTEGER and LOGICAL as INTEGER*8 and LOGICAL*8">; -+def no_fortran_main: Flag<["-"], "fno-fortran-main">, Group, -+ HelpText<"Don't link in Fortran main">; -+def Mnomain: Flag<["-"], "Mnomain">, Group, -+ HelpText<"Don't link in Fortran main">; -+def frelaxed_math : Flag<["-"], "frelaxed-math">, Group, -+ HelpText<"Use relaxed Math intrinsic functions">; -+def Memit_dwarf_common_blocks_name: Flag<["-"], "Memit-dwarf-common-blocks-name">, -+ Group, HelpText<"Emit COMMON blocks name in DWARF">; -+def Munixlogical: Flag<["-"], "Munixlogical">, Group, -+ HelpText<"Use unixlogical for all loigical operations">; -+ -+// Flang internal debug options -+def Mx_EQ : Joined<["-"], "Mx,">, Group; -+def My_EQ : Joined<["-"], "My,">, Group; -+def Hx_EQ : Joined<["-"], "Hx,">, Group; -+def Hy_EQ : Joined<["-"], "Hy,">, Group; -+def Wm_EQ : Joined<["-"], "Wm,">, Group; -+ -+def Mq_EQ : Joined<["-"], "Mq,">, Group; -+def Hq_EQ : Joined<["-"], "Hq,">, Group; -+def Mqq_EQ : Joined<["-"], "Mqq,">, Group; -+def Hqq_EQ : Joined<["-"], "Hqq,">, Group; -+def Wh_EQ : Joined<["-"], "Wh,">, Group; -+#endif - def hlsl_entrypoint : Option<["-"], "hlsl-entry", KIND_SEPARATE>, - Group, - Flags<[CC1Option]>, -diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h -index 2e74507f7126..735e72491488 100644 ---- a/clang/include/clang/Driver/ToolChain.h -+++ b/clang/include/clang/Driver/ToolChain.h -@@ -651,6 +651,16 @@ public: - AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args) const; - -+#ifdef ENABLE_CLASSIC_FLANG -+ /// \brief Add the flang arguments for system include paths. -+ /// -+ /// This routine is responsible for adding the -stdinc argument to -+ /// include headers and module files from standard system header directories. -+ virtual void -+ AddFlangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, -+ llvm::opt::ArgStringList &Flang1Args) const { } -+#endif -+ - /// Add options that need to be passed to cc1 for this target. - virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, -@@ -757,6 +767,13 @@ public: - virtual void AddHIPRuntimeLibArgs(const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs) const {} - -+#ifdef ENABLE_CLASSIC_FLANG -+ /// AddFortranStdlibLibArgs - Add the system specific linker arguments to use -+ /// for the given Fortran runtime library type. -+ virtual void AddFortranStdlibLibArgs(const llvm::opt::ArgList &Args, -+ llvm::opt::ArgStringList &CmdArgs) const; -+#endif -+ - /// Return sanitizers which are available in this toolchain. - virtual SanitizerMask getSupportedSanitizers() const; - -diff --git a/clang/include/clang/Driver/Types.def b/clang/include/clang/Driver/Types.def -index aaea3ec0f9c8..2f241b02676b 100644 ---- a/clang/include/clang/Driver/Types.def -+++ b/clang/include/clang/Driver/Types.def -@@ -77,8 +77,15 @@ TYPE("c++-module-cpp-output", PP_CXXModule, INVALID, "iim", phases - TYPE("ada", Ada, INVALID, nullptr, phases::Compile, phases::Backend, phases::Assemble, phases::Link) - TYPE("assembler", PP_Asm, INVALID, "s", phases::Assemble, phases::Link) - TYPE("assembler-with-cpp", Asm, PP_Asm, "S", phases::Preprocess, phases::Assemble, phases::Link) -+#ifdef ENABLE_CLASSIC_FLANG -+TYPE("f77", PP_F_FixedForm, INVALID, "f", phases::Compile, phases::Backend, phases::Assemble, phases::Link) -+TYPE("f77-cpp-input", F_FixedForm, PP_F_FixedForm, "F", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) -+TYPE("f95", PP_F_FreeForm, INVALID, "f95", phases::Compile, phases::Backend, phases::Assemble, phases::Link) -+TYPE("f95-cpp-input", F_FreeForm, PP_F_FreeForm, "F95", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) -+#else - TYPE("f95", PP_Fortran, INVALID, "i", phases::Compile, phases::Backend, phases::Assemble, phases::Link) - TYPE("f95-cpp-input", Fortran, PP_Fortran, nullptr, phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) -+#endif - TYPE("java", Java, INVALID, nullptr, phases::Compile, phases::Backend, phases::Assemble, phases::Link) - - // LLVM IR/LTO types. We define separate types for IR and LTO because LTO -diff --git a/clang/include/clang/Driver/Types.h b/clang/include/clang/Driver/Types.h -index 4a21af3534de..3a8b785a4c39 100644 ---- a/clang/include/clang/Driver/Types.h -+++ b/clang/include/clang/Driver/Types.h -@@ -95,6 +95,14 @@ namespace types { - /// isOpenCL - Is this an "OpenCL" input. - bool isOpenCL(ID Id); - -+#ifdef ENABLE_CLASSIC_FLANG -+ /// isFreeFormFortran -- is it a free form layout Fortran input -+ bool isFreeFormFortran(ID Id); -+ -+ /// isFixedFormFortran -- is it a fixed form layout Fortran input -+ bool isFixedFormFortran(ID Id); -+#endif -+ - /// isHLSL - Is this an HLSL input. - bool isHLSL(ID Id); - -diff --git a/clang/include/clang/Frontend/Utils.h b/clang/include/clang/Frontend/Utils.h -index 143cf4359f00..c51c617c1379 100644 ---- a/clang/include/clang/Frontend/Utils.h -+++ b/clang/include/clang/Frontend/Utils.h -@@ -17,8 +17,10 @@ - #include "clang/Basic/LLVM.h" - #include "clang/Driver/OptionUtils.h" - #include "clang/Frontend/DependencyOutputOptions.h" -+#include "clang/Basic/TargetInfo.h" - #include "llvm/ADT/ArrayRef.h" - #include "llvm/ADT/IntrusiveRefCntPtr.h" -+#include "llvm/ADT/StringExtras.h" - #include "llvm/ADT/StringMap.h" - #include "llvm/ADT/StringRef.h" - #include "llvm/ADT/StringSet.h" -@@ -31,6 +33,12 @@ - #include - #include - -+namespace llvm { -+ -+class StringRef; -+ -+} // namespace llvm -+ - namespace clang { - - class ASTReader; -@@ -39,6 +47,7 @@ class CompilerInvocation; - class DiagnosticsEngine; - class ExternalSemaSource; - class FrontendOptions; -+class MacroBuilder; - class PCHContainerReader; - class Preprocessor; - class PreprocessorOptions; -@@ -54,6 +63,29 @@ void InitializePreprocessor(Preprocessor &PP, const PreprocessorOptions &PPOpts, - void DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, - const PreprocessorOutputOptions &Opts); - -+/// DefineTypeSize - Emit a macro to the predefines buffer that declares a macro -+/// named MacroName with the max value for a type with width 'TypeWidth' a -+/// signedness of 'isSigned' and with a value suffix of 'ValSuffix' (e.g. LL). -+template -+static void DefineTypeSize(const Twine &MacroName, unsigned TypeWidth, -+ StringRef ValSuffix, bool isSigned, -+ T &Builder) { -+ static_assert(std::is_base_of::value, "Illegal T value"); -+ llvm::APInt MaxVal = isSigned ? llvm::APInt::getSignedMaxValue(TypeWidth) -+ : llvm::APInt::getMaxValue(TypeWidth); -+ Builder.defineMacro(MacroName, toString(MaxVal, 10, isSigned) + ValSuffix); -+} -+ -+/// DefineTypeSize - An overloaded helper that uses TargetInfo to determine -+/// the width, suffix, and signedness of the given type -+template -+static void DefineTypeSize(const Twine &MacroName, TargetInfo::IntType Ty, -+ const TargetInfo &TI, T &Builder) { -+ static_assert(std::is_base_of::value, "Illegal T value"); -+ DefineTypeSize(MacroName, TI.getTypeWidth(Ty), TI.getTypeConstantSuffix(Ty), -+ TI.isTypeSigned(Ty), Builder); -+} -+ - /// An interface for collecting the dependencies of a compilation. Users should - /// use \c attachToPreprocessor and \c attachToASTReader to get all of the - /// dependencies. -diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp -index 483f3e787a78..f962d60870d1 100644 ---- a/clang/lib/CodeGen/BackendUtil.cpp -+++ b/clang/lib/CodeGen/BackendUtil.cpp -@@ -272,6 +272,12 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV, - TargetTriple); - break; -+#ifdef ENABLE_CLASSIC_FLANG -+ case CodeGenOptions::PGMATH: -+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::PGMATH, -+ TargetTriple); -+ break; -+#endif - case CodeGenOptions::SVML: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML, - TargetTriple); -diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp -index d8eb2aecb87a..3b140860cce4 100644 ---- a/clang/lib/CodeGen/CGDebugInfo.cpp -+++ b/clang/lib/CodeGen/CGDebugInfo.cpp -@@ -3980,7 +3980,8 @@ CGDebugInfo::getGlobalVariableForwardDeclaration(const VarDecl *VD) { - auto Align = getDeclAlignIfRequired(VD, CGM.getContext()); - auto *GV = DBuilder.createTempGlobalVariableFwdDecl( - DContext, Name, LinkageName, Unit, Line, getOrCreateType(T, Unit), -- !VD->isExternallyVisible(), nullptr, TemplateParameters, Align); -+ !VD->isExternallyVisible(), nullptr, TemplateParameters, -+ llvm::DINode::FlagZero, Align); - FwdDeclReplaceMap.emplace_back( - std::piecewise_construct, - std::make_tuple(cast(VD->getCanonicalDecl())), -@@ -5467,7 +5468,7 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, - Var->hasLocalLinkage(), true, - Expr.empty() ? nullptr : DBuilder.createExpression(Expr), - getOrCreateStaticDataMemberDeclarationOrNull(D), TemplateParameters, -- Align, Annotations); -+ llvm::DINode::FlagZero, Align, Annotations); - Var->addDebugInfo(GVE); - } - DeclCache[D->getCanonicalDecl()].reset(GVE); -@@ -5563,7 +5564,7 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) { - GV.reset(DBuilder.createGlobalVariableExpression( - DContext, Name, StringRef(), Unit, getLineNumber(VD->getLocation()), Ty, - true, true, InitExpr, getOrCreateStaticDataMemberDeclarationOrNull(VarD), -- TemplateParameters, Align)); -+ TemplateParameters, llvm::DINode::FlagZero, Align)); - } - - void CGDebugInfo::EmitExternalVariable(llvm::GlobalVariable *Var, -@@ -5581,7 +5582,8 @@ void CGDebugInfo::EmitExternalVariable(llvm::GlobalVariable *Var, - llvm::DIGlobalVariableExpression *GVE = - DBuilder.createGlobalVariableExpression( - DContext, Name, StringRef(), Unit, getLineNumber(D->getLocation()), -- Ty, false, false, nullptr, nullptr, nullptr, Align); -+ Ty, false, false, nullptr, nullptr, nullptr, llvm::DINode::FlagZero, -+ Align); - Var->addDebugInfo(GVE); - } - -diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt -index a6bd2d41e797..ac30007588b1 100644 ---- a/clang/lib/Driver/CMakeLists.txt -+++ b/clang/lib/Driver/CMakeLists.txt -@@ -14,6 +14,12 @@ if(WIN32) - set(system_libs version) - endif() - -+if(LLVM_ENABLE_CLASSIC_FLANG) -+ set(TOOLCHAINS_FLANG_CPP ToolChains/ClassicFlang.cpp) -+else() -+ set(TOOLCHAINS_FLANG_CPP ToolChains/Flang.cpp) -+endif() -+ - add_clang_library(clangDriver - Action.cpp - Compilation.cpp -@@ -56,7 +62,7 @@ add_clang_library(clangDriver - ToolChains/Cuda.cpp - ToolChains/Darwin.cpp - ToolChains/DragonFly.cpp -- ToolChains/Flang.cpp -+ ${TOOLCHAINS_FLANG_CPP} - ToolChains/FreeBSD.cpp - ToolChains/Fuchsia.cpp - ToolChains/Gnu.cpp -diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp -index a7e4edb3ab5a..8bfee5928fe5 100644 ---- a/clang/lib/Driver/Driver.cpp -+++ b/clang/lib/Driver/Driver.cpp -@@ -377,10 +377,14 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL, - (PhaseArg = DAL.getLastArg(options::OPT_fmodule_header, - options::OPT_fmodule_header_EQ))) { - FinalPhase = phases::Precompile; -+ - // -{fsyntax-only,-analyze,emit-ast} only run up to the compiler. - } else if ((PhaseArg = DAL.getLastArg(options::OPT_fsyntax_only)) || - (PhaseArg = DAL.getLastArg(options::OPT_print_supported_cpus)) || - (PhaseArg = DAL.getLastArg(options::OPT_module_file_info)) || -+#ifdef ENABLE_CLASSIC_FLANG -+ (PhaseArg = DAL.getLastArg(options::OPT_emit_flang_llvm)) || -+#endif - (PhaseArg = DAL.getLastArg(options::OPT_verify_pch)) || - (PhaseArg = DAL.getLastArg(options::OPT_rewrite_objc)) || - (PhaseArg = DAL.getLastArg(options::OPT_rewrite_legacy_objc)) || -@@ -1983,7 +1987,11 @@ void Driver::PrintHelp(bool ShowHidden) const { - - void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const { - if (IsFlangMode()) { -+#ifdef ENABLE_CLASSIC_FLANG -+ OS << getClangToolFullVersion("flang") << '\n'; -+#else - OS << getClangToolFullVersion("flang-new") << '\n'; -+#endif - } else { - // FIXME: The following handlers should use a callback mechanism, we don't - // know what the client would like to do. -@@ -2665,7 +2673,15 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args, - // stdin must be handled specially. - if (memcmp(Value, "-", 2) == 0) { - if (IsFlangMode()) { -+#ifdef ENABLE_CLASSIC_FLANG -+ // If running with -E, treat as needing preprocessing -+ if (!Args.hasArgNoClaim(options::OPT_E)) -+ Ty = types::TY_PP_F_FreeForm; -+ else -+ Ty = types::TY_F_FreeForm; -+#else - Ty = types::TY_Fortran; -+#endif - } else { - // If running with -E, treat as a C input (this changes the - // builtin macros, for example). This may be overridden by -ObjC -@@ -2687,6 +2703,16 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args, - // idea of what .s is. - if (const char *Ext = strrchr(Value, '.')) - Ty = TC.LookupTypeForExtension(Ext + 1); -+#ifdef ENABLE_CLASSIC_FLANG -+ // If called with -E, treat the inputs as needing preprocessing -+ // regardless of extension -+ if (IsFlangMode() && Args.hasArgNoClaim(options::OPT_E)) { -+ if (Ty == types::TY_PP_F_FreeForm) -+ Ty = types::TY_F_FreeForm; -+ else if (Ty == types::TY_PP_F_FixedForm) -+ Ty = types::TY_F_FixedForm; -+ } -+#endif - - if (Ty == types::TY_INVALID) { - if (IsCLMode() && (Args.hasArgNoClaim(options::OPT_E) || CCGenDiagnostics)) -@@ -4011,6 +4037,14 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args, - if (InputArg->isClaimed()) - continue; - -+#ifdef ENABLE_CLASSIC_FLANG -+ // If the input is detected as already preprocessed (e.g. has the .f95 -+ // extension), and the user specifies -E, preprocess the file anyway. -+ if (IsFlangMode() && InitialPhase == phases::Compile && -+ FinalPhase == phases::Preprocess) -+ continue; -+#endif -+ - // Claim here to avoid the more general unused warning. - InputArg->claim(); - -@@ -4745,6 +4779,10 @@ Action *Driver::ConstructPhaseAction( - return C.MakeAction(Input, types::TY_Nothing); - if (Args.hasArg(options::OPT_extract_api)) - return C.MakeAction(Input, types::TY_API_INFO); -+#ifdef ENABLE_CLASSIC_FLANG -+ if (IsFlangMode()) -+ return C.MakeAction(Input, types::TY_LLVM_IR); -+#endif - return C.MakeAction(Input, types::TY_LLVM_BC); - } - case phases::Backend: { -@@ -5199,6 +5237,12 @@ class ToolSelector final { - if (!T->hasIntegratedBackend() && !(OutputIsLLVM && T->canEmitIR())) - return nullptr; - -+#ifdef ENABLE_CLASSIC_FLANG -+ // Classic Flang is not integrated with the backend. -+ if (C.getDriver().IsFlangMode() && !T->hasIntegratedAssembler()) -+ return nullptr; -+#endif -+ - if (T->canEmitIR() && ((SaveTemps && !InputIsBitcode) || EmbedBitcode)) - return nullptr; - -@@ -5214,8 +5258,17 @@ class ToolSelector final { - /// are appended to \a CollapsedOffloadAction. - void combineWithPreprocessor(const Tool *T, ActionList &Inputs, - ActionList &CollapsedOffloadAction) { -+#ifdef ENABLE_CLASSIC_FLANG -+ // flang1 always combines preprocessing and compilation. -+ // Do not return early even when -save-temps is used. -+ if (!T || !T->hasIntegratedCPP() || -+ (strcmp(T->getName(), "classic-flang") && -+ !canCollapsePreprocessorAction())) -+ return; -+#else - if (!T || !canCollapsePreprocessorAction() || !T->hasIntegratedCPP()) - return; -+#endif - - // Attempt to get a preprocessor action dependence. - ActionList PreprocessJobOffloadActions; -@@ -6418,8 +6471,11 @@ bool Driver::ShouldUseFlangCompiler(const JobAction &JA) const { - return false; - - // And say "no" if this is not a kind of action flang understands. -- if (!isa(JA) && !isa(JA) && -- !isa(JA)) -+ if (!isa(JA) && !isa(JA) -+#ifndef ENABLE_CLASSIC_FLANG -+ && !isa(JA) -+#endif -+ ) - return false; - - return true; -diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp -index 0ddfb07fdad5..acc4e9128cb1 100644 ---- a/clang/lib/Driver/OffloadBundler.cpp -+++ b/clang/lib/Driver/OffloadBundler.cpp -@@ -864,6 +864,10 @@ CreateFileHandler(MemoryBuffer &FirstInput, - return std::make_unique(/*Comment=*/"#"); - if (FilesType == "ll") - return std::make_unique(/*Comment=*/";"); -+#ifdef ENABLE_CLASSIC_FLANG -+ if (FilesType == "f95") -+ return std::make_unique(/*Comment=*/"!"); -+#endif - if (FilesType == "bc") - return std::make_unique(BundlerConfig); - if (FilesType == "s") -diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp -index 0146d8af3549..3d92b7c2a3bd 100644 ---- a/clang/lib/Driver/ToolChain.cpp -+++ b/clang/lib/Driver/ToolChain.cpp -@@ -10,6 +10,9 @@ - #include "ToolChains/Arch/AArch64.h" - #include "ToolChains/Arch/ARM.h" - #include "ToolChains/Clang.h" -+#ifdef ENABLE_CLASSIC_FLANG -+#include "ToolChains/ClassicFlang.h" -+#endif - #include "ToolChains/CommonArgs.h" - #include "ToolChains/Flang.h" - #include "ToolChains/InterfaceStubs.h" -@@ -441,7 +444,11 @@ Tool *ToolChain::getClang() const { - - Tool *ToolChain::getFlang() const { - if (!Flang) -+#ifdef ENABLE_CLASSIC_FLANG -+ Flang.reset(new tools::ClassicFlang(*this)); -+#else - Flang.reset(new tools::Flang(*this)); -+#endif - return Flang.get(); - } - -@@ -864,13 +871,13 @@ std::string ToolChain::GetStaticLibToolPath() const { - - types::ID ToolChain::LookupTypeForExtension(StringRef Ext) const { - types::ID id = types::lookupTypeForExtension(Ext); -- -+#ifndef ENABLE_CLASSIC_FLANG - // Flang always runs the preprocessor and has no notion of "preprocessed - // fortran". Here, TY_PP_Fortran is coerced to TY_Fortran to avoid treating - // them differently. - if (D.IsFlangMode() && id == types::TY_PP_Fortran) - id = types::TY_Fortran; -- -+#endif - return id; - } - -@@ -1223,6 +1230,31 @@ void ToolChain::AddCCKextLibArgs(const ArgList &Args, - CmdArgs.push_back("-lcc_kext"); - } - -+#ifdef ENABLE_CLASSIC_FLANG -+void ToolChain::AddFortranStdlibLibArgs(const ArgList &Args, -+ ArgStringList &CmdArgs) const { -+ bool StaticFlangLibs = false; -+ if (Args.hasArg(options::OPT_staticFlangLibs)) { -+ StaticFlangLibs = true; -+ Args.ClaimAllArgs(options::OPT_staticFlangLibs); -+ } -+ -+ if (StaticFlangLibs && !Args.hasArg(options::OPT_static)) -+ CmdArgs.push_back("-Bstatic"); -+ CmdArgs.push_back("-lflang"); -+ CmdArgs.push_back("-lflangrti"); -+ CmdArgs.push_back("-lpgmath"); -+ if (StaticFlangLibs && !Args.hasArg(options::OPT_static)) -+ CmdArgs.push_back("-Bdynamic"); -+ -+ // Always link Fortran executables with pthreads. -+ CmdArgs.push_back("-lpthread"); -+ -+ if (!Triple.isOSDarwin()) -+ CmdArgs.push_back("-lrt"); -+} -+#endif -+ - bool ToolChain::isFastMathRuntimeAvailable(const ArgList &Args, - std::string &Path) const { - // Do not check for -fno-fast-math or -fno-unsafe-math when -Ofast passed -diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp -index 793af55a1e5f..c5ba8a690687 100644 ---- a/clang/lib/Driver/ToolChains/Clang.cpp -+++ b/clang/lib/Driver/ToolChains/Clang.cpp -@@ -5367,6 +5367,66 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, - } - A->render(Args, CmdArgs); - } -+#ifdef ENABLE_CLASSIC_FLANG -+ // Use PGMATH for Classic Flang by default. -+ else { -+ CmdArgs.push_back("-fveclib=PGMATH"); -+ } -+ -+ std::string PassRemarkVal(""), PassRemarkOpt(""); -+ if (Args.getLastArg(options::OPT_Minfoall)) { -+ PassRemarkVal = ".*"; -+ Args.ClaimAllArgs(options::OPT_Minfoall); -+ } else if (Arg *A = Args.getLastArg(options::OPT_Minfo_EQ)) { -+ for (StringRef val : A->getValues()) { -+ if (val.equals("all")) { -+ PassRemarkVal = ".*"; -+ break; -+ } else if (val.equals("inline") || val.equals("vect")) { -+ PassRemarkVal += PassRemarkVal.empty() ? "" : "|"; -+ PassRemarkVal += val; -+ } else { -+ D.Diag(diag::err_drv_clang_unsupported_minfo_arg) -+ << A->getOption().getName() -+ << val.str(); -+ break; -+ } -+ } -+ } -+ PassRemarkOpt = "-pass-remarks=" + PassRemarkVal; -+ CmdArgs.push_back("-mllvm"); -+ CmdArgs.push_back(Args.MakeArgString(PassRemarkOpt)); -+ Args.ClaimAllArgs(options::OPT_Minfo_EQ); -+ PassRemarkVal.clear(); -+ PassRemarkOpt.clear(); -+ -+ if (Args.getLastArg(options::OPT_Mneginfoall)) { -+ PassRemarkVal = ".*"; -+ Args.ClaimAllArgs(options::OPT_Mneginfoall); -+ } else if (Arg *A = Args.getLastArg(options::OPT_Mneginfo_EQ)) { -+ for (StringRef val : A->getValues()) { -+ if (val.equals("all")) { -+ PassRemarkVal = ".*"; -+ break; -+ } else if (val.equals("inline") || val.equals("vect")) { -+ PassRemarkVal += PassRemarkVal.empty() ? "" : "|"; -+ PassRemarkVal += val; -+ } else { -+ D.Diag(diag::err_drv_clang_unsupported_minfo_arg) -+ << A->getOption().getName() -+ << val.str(); -+ break; -+ } -+ } -+ } -+ PassRemarkOpt = "-pass-remarks-missed=" + PassRemarkVal; -+ CmdArgs.push_back("-mllvm"); -+ CmdArgs.push_back(Args.MakeArgString(PassRemarkOpt)); -+ PassRemarkOpt = "-pass-remarks-analysis=" + PassRemarkVal; -+ CmdArgs.push_back("-mllvm"); -+ CmdArgs.push_back(Args.MakeArgString(PassRemarkOpt)); -+ Args.ClaimAllArgs(options::OPT_Mneginfo_EQ); -+#endif - - if (Args.hasFlag(options::OPT_fmerge_all_constants, - options::OPT_fno_merge_all_constants, false)) -diff --git a/clang/lib/Driver/ToolChains/ClassicFlang.cpp b/clang/lib/Driver/ToolChains/ClassicFlang.cpp -new file mode 100644 -index 000000000000..b8be0be20b07 ---- /dev/null -+++ b/clang/lib/Driver/ToolChains/ClassicFlang.cpp -@@ -0,0 +1,1217 @@ -+//===-- ClassicFlang.cpp - Flang+LLVM ToolChain Implementations -*- C++ -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "ClassicFlang.h" -+#include "CommonArgs.h" -+#include "clang/Driver/InputInfo.h" -+#include "clang/Basic/CharInfo.h" -+#include "clang/Basic/LangOptions.h" -+#include "clang/Basic/MacroBuilder.h" -+#include "clang/Basic/ObjCRuntime.h" -+#include "clang/Basic/TargetInfo.h" -+#include "clang/Basic/TargetOptions.h" -+#include "clang/Basic/Version.h" -+#include "clang/Config/config.h" -+#include "clang/Driver/DriverDiagnostic.h" -+#include "clang/Driver/Options.h" -+#include "clang/Driver/SanitizerArgs.h" -+#include "clang/Driver/XRayArgs.h" -+#include "clang/Frontend/CompilerInstance.h" -+#include "clang/Frontend/Utils.h" -+#include "llvm/ADT/StringExtras.h" -+#include "llvm/Option/ArgList.h" -+#include "llvm/Support/CodeGen.h" -+#include "llvm/Support/Compression.h" -+#include "llvm/Support/FileSystem.h" -+#include "llvm/Support/Path.h" -+#include "llvm/Support/Process.h" -+#include "llvm/Support/YAMLParser.h" -+#include "llvm/TargetParser/TargetParser.h" -+ -+#ifdef LLVM_ON_UNIX -+#include // For getuid(). -+#endif -+ -+using namespace clang::driver; -+using namespace clang::driver::tools; -+using namespace clang; -+using namespace llvm::opt; -+ -+class ClassicFlangMacroBuilder : public MacroBuilder { -+ ArgStringList &CmdArgs; -+ const ArgList &DriverArgs; -+ public: -+ ClassicFlangMacroBuilder(ArgStringList &UpperCmdArgs, const ArgList &DriverArgs, llvm::raw_string_ostream &Output) -+ : MacroBuilder(Output), CmdArgs(UpperCmdArgs), DriverArgs(DriverArgs) { -+ } -+ virtual void defineMacro(const Twine &Name, const Twine &Value = "1") override { -+ CmdArgs.push_back("-def"); -+ CmdArgs.push_back(DriverArgs.MakeArgString(Name + Twine('=') + Value)); -+ } -+}; -+ -+void ClassicFlang::ConstructJob(Compilation &C, const JobAction &JA, -+ const InputInfo &Output, const InputInfoList &Inputs, -+ const ArgList &Args, const char *LinkingOutput) const { -+ const Driver &D = getToolChain().getDriver(); -+ const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); -+ ArgStringList CommonCmdArgs; -+ ArgStringList UpperCmdArgs; -+ ArgStringList LowerCmdArgs; -+ SmallString<256> Stem; -+ std::string OutFile; -+ bool NeedIEEE = true; -+ bool NeedFastMath = false; -+ bool NeedRelaxedMath = false; -+ bool AssociativeMath = false; -+ bool SignedZeros = true; -+ -+ // Check number of inputs for sanity. We need at least one input. -+ assert(Inputs.size() >= 1 && "Must have at least one input."); -+ -+ /***** Process file arguments to both parts *****/ -+ const InputInfo &Input = Inputs[0]; -+ types::ID InputType = Input.getType(); -+ // Check file type sanity -+ assert(types::isAcceptedByFlang(InputType) && "Can only accept Fortran"); -+ -+ if (Args.hasArg(options::OPT_fsyntax_only, options::OPT_E)) { -+ // For -fsyntax-only and -E produce temp files only -+ Stem = C.getDriver().GetTemporaryPath("", ""); -+ } else { -+ OutFile = Output.getFilename(); -+ Stem = llvm::sys::path::filename(OutFile); -+ llvm::sys::path::replace_extension(Stem, ""); -+ } -+ -+#ifdef ENABLE_CLASSIC_FLANG -+ if (Args.hasArg(options::OPT_emit_flang_llvm)) { -+ // -emit-flang-llvm only supports asm output so claim -S to prevent warning -+ Args.ClaimAllArgs(options::OPT_S); -+ } -+#endif -+ -+ // Add input file name to the compilation line -+ UpperCmdArgs.push_back(Input.getBaseInput()); -+ -+ // Add temporary output for ILM -+ const char * ILMFile = Args.MakeArgString(Stem + ".ilm"); -+ LowerCmdArgs.push_back(ILMFile); -+ C.addTempFile(ILMFile); -+ -+ // Generate -cmdline -+ std::string CmdLine("'+flang"); -+ // ignore the first argument which reads "--driver-mode=fortran" -+ for (unsigned i = 1; i < Args.getNumInputArgStrings(); ++i) { -+ CmdLine.append(" "); -+ CmdLine.append(Args.getArgString(i)); -+ } -+ CmdLine.append("'"); -+ -+ CommonCmdArgs.push_back("-cmdline"); -+ CommonCmdArgs.push_back(Args.MakeArgString(CmdLine)); -+ -+ /***** Process common args *****/ -+ -+ // Add "inform level" flag -+ if (Args.hasArg(options::OPT_Minform_EQ)) { -+ // Parse arguments to set its value -+ for (Arg *A : Args.filtered(options::OPT_Minform_EQ)) { -+ A->claim(); -+ CommonCmdArgs.push_back("-inform"); -+ CommonCmdArgs.push_back(A->getValue(0)); -+ } -+ } else { -+ // Default value -+ CommonCmdArgs.push_back("-inform"); -+ CommonCmdArgs.push_back("warn"); -+ } -+ -+ for (auto Arg : Args.filtered(options::OPT_Msave_on)) { -+ Arg->claim(); -+ CommonCmdArgs.push_back("-save"); -+ } -+ -+ for (auto Arg : Args.filtered(options::OPT_Msave_off)) { -+ Arg->claim(); -+ CommonCmdArgs.push_back("-nosave"); -+ } -+ -+ // Treat denormalized numbers as zero: On -+ for (auto Arg : Args.filtered(options::OPT_Mdaz_on)) { -+ Arg->claim(); -+ CommonCmdArgs.push_back("-x"); -+ CommonCmdArgs.push_back("129"); -+ CommonCmdArgs.push_back("4"); -+ CommonCmdArgs.push_back("-y"); -+ CommonCmdArgs.push_back("129"); -+ CommonCmdArgs.push_back("0x400"); -+ } -+ -+ // Treat denormalized numbers as zero: Off -+ for (auto Arg : Args.filtered(options::OPT_Mdaz_off)) { -+ Arg->claim(); -+ CommonCmdArgs.push_back("-y"); -+ CommonCmdArgs.push_back("129"); -+ CommonCmdArgs.push_back("4"); -+ CommonCmdArgs.push_back("-x"); -+ CommonCmdArgs.push_back("129"); -+ CommonCmdArgs.push_back("0x400"); -+ } -+ -+ // Store constants in writable data segment -+ for (auto Arg : Args.filtered(options::OPT_Mwritable_constants)) { -+ Arg->claim(); -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back("183"); -+ LowerCmdArgs.push_back("0x20000000"); -+ } -+ -+ // Bounds checking: On -+ for (auto Arg : Args.filtered(options::OPT_Mbounds_on)) { -+ Arg->claim(); -+ CommonCmdArgs.push_back("-x"); -+ CommonCmdArgs.push_back("70"); -+ CommonCmdArgs.push_back("2"); -+ } -+ -+ // Bounds checking: Off -+ for (auto Arg : Args.filtered(options::OPT_Mbounds_off)) { -+ Arg->claim(); -+ CommonCmdArgs.push_back("-y"); -+ CommonCmdArgs.push_back("70"); -+ CommonCmdArgs.push_back("2"); -+ } -+ -+ // Generate code allowing recursive subprograms -+ for (auto Arg : Args.filtered(options::OPT_Mrecursive_on)) { -+ Arg->claim(); -+ CommonCmdArgs.push_back("-recursive"); -+ } -+ -+ // Disable recursive subprograms -+ for (auto Arg : Args.filtered(options::OPT_Mrecursive_off)) { -+ Arg->claim(); -+ CommonCmdArgs.push_back("-norecursive"); -+ } -+ -+ // Enable generating reentrant code (disable optimizations that inhibit it) -+ for (auto Arg : Args.filtered(options::OPT_Mreentrant_on)) { -+ Arg->claim(); -+ CommonCmdArgs.push_back("-reentrant"); -+ } -+ -+ // Allow optimizations inhibiting reentrancy -+ for (auto Arg : Args.filtered(options::OPT_Mreentrant_off)) { -+ Arg->claim(); -+ CommonCmdArgs.push_back("-noreentrant"); -+ } -+ -+ // Swap byte order for unformatted IO -+ for (auto Arg : Args.filtered(options::OPT_Mbyteswapio, options::OPT_byteswapio)) { -+ Arg->claim(); -+ CommonCmdArgs.push_back("-x"); -+ CommonCmdArgs.push_back("125"); -+ CommonCmdArgs.push_back("2"); -+ } -+ -+ // Contiguous pointer checks -+ if (Arg *A = Args.getLastArg(options::OPT_fsanitize_EQ)) { -+ for (StringRef val : A->getValues()) { -+ if (val.equals("discontiguous") || val.equals("undefined") ) { -+ // -x 54 0x40 -x 54 0x80 -x 54 0x200 -+ UpperCmdArgs.push_back("-x"); -+ UpperCmdArgs.push_back("54"); -+ UpperCmdArgs.push_back("0x2c0"); -+ -+ // -fsanitze=discontiguous has no meaning in LLVM, only flang driver needs to -+ // recognize it. However -fsanitize=undefined needs to be passed on for further -+ // processing by the non-flang part of the driver. -+ if (val.equals("discontiguous")) -+ A->claim(); -+ break; -+ } -+ } -+ } -+ -+ // Treat backslashes as regular characters -+ for (auto Arg : Args.filtered(options::OPT_fno_backslash, options::OPT_Mbackslash)) { -+ Arg->claim(); -+ CommonCmdArgs.push_back("-x"); -+ CommonCmdArgs.push_back("124"); -+ CommonCmdArgs.push_back("0x40"); -+ } -+ -+ // Treat backslashes as C-style escape characters -+ for (auto Arg : Args.filtered(options::OPT_fbackslash, options::OPT_Mnobackslash)) { -+ Arg->claim(); -+ CommonCmdArgs.push_back("-y"); -+ CommonCmdArgs.push_back("124"); -+ CommonCmdArgs.push_back("0x40"); -+ } -+ -+ // handle OpemMP options -+ if (auto *A = Args.getLastArg(options::OPT_mp, options::OPT_nomp, -+ options::OPT_fopenmp, options::OPT_fno_openmp)) { -+ for (auto Arg : Args.filtered(options::OPT_mp, options::OPT_nomp)) { -+ Arg->claim(); -+ } -+ for (auto Arg : Args.filtered(options::OPT_fopenmp, -+ options::OPT_fno_openmp)) { -+ Arg->claim(); -+ } -+ -+ if (A->getOption().matches(options::OPT_mp) || -+ A->getOption().matches(options::OPT_fopenmp)) { -+ -+ CommonCmdArgs.push_back("-mp"); -+ -+ // Allocate threadprivate data local to the thread -+ CommonCmdArgs.push_back("-x"); -+ CommonCmdArgs.push_back("69"); -+ CommonCmdArgs.push_back("0x200"); -+ -+ // Use the 'fair' schedule as the default static schedule -+ // for parallel do loops -+ CommonCmdArgs.push_back("-x"); -+ CommonCmdArgs.push_back("69"); -+ CommonCmdArgs.push_back("0x400"); -+ -+ // Disable use of native atomic instructions -+ // for OpenMP atomics pending either a named -+ // option or a libatomic bundled with flang. -+ UpperCmdArgs.push_back("-x"); -+ UpperCmdArgs.push_back("69"); -+ UpperCmdArgs.push_back("0x1000"); -+ } -+ } -+ -+ // Align large objects on cache lines -+ for (auto Arg : Args.filtered(options::OPT_Mcache_align_on)) { -+ Arg->claim(); -+ CommonCmdArgs.push_back("-x"); -+ CommonCmdArgs.push_back("119"); -+ CommonCmdArgs.push_back("0x10000000"); -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back("129"); -+ LowerCmdArgs.push_back("0x40000000"); -+ } -+ -+ // Disable special alignment of large objects -+ for (auto Arg : Args.filtered(options::OPT_Mcache_align_off)) { -+ Arg->claim(); -+ CommonCmdArgs.push_back("-y"); -+ CommonCmdArgs.push_back("119"); -+ CommonCmdArgs.push_back("0x10000000"); -+ LowerCmdArgs.push_back("-y"); -+ LowerCmdArgs.push_back("129"); -+ LowerCmdArgs.push_back("0x40000000"); -+ } -+ -+ // -Mstack_arrays -+ for (auto Arg : Args.filtered(options::OPT_Mstackarrays)) { -+ Arg->claim(); -+ CommonCmdArgs.push_back("-x"); -+ CommonCmdArgs.push_back("54"); -+ CommonCmdArgs.push_back("8"); -+ } -+ -+ // -Memit-dwarf-common-blocks-name, only add xbit to flang2. -+ for (auto Arg : Args.filtered(options::OPT_Memit_dwarf_common_blocks_name)) { -+ Arg->claim(); -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back("183"); -+ LowerCmdArgs.push_back("0x40000000"); -+ } -+ -+ // -Munixlogical, only add xbit to flang2. -+ for (auto Arg : Args.filtered(options::OPT_Munixlogical)) { -+ Arg->claim(); -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back("125"); -+ LowerCmdArgs.push_back("0x8"); -+ } -+ -+ // Last argument of -g/-gdwarfX should be taken. -+ Arg *GArg = Args.getLastArg(options::OPT_g_Flag); -+ Arg *GDwarfArg = Args.getLastArg(options::OPT_gdwarf_2, -+ options::OPT_gdwarf_3, -+ options::OPT_gdwarf_4, -+ options::OPT_gdwarf_5); -+ -+ if (GArg || GDwarfArg) { -+ -+ for (auto Arg : Args.filtered(options::OPT_g_Flag, options::OPT_gdwarf_2, -+ options::OPT_gdwarf_3, options::OPT_gdwarf_4, -+ options::OPT_gdwarf_5)) { -+ Arg->claim(); -+ } -+ -+ CommonCmdArgs.push_back("-x"); -+ CommonCmdArgs.push_back("120"); -+ -+ if (!GDwarfArg) // -g without -gdwarf-X produces default (DWARFv4) -+ CommonCmdArgs.push_back("0x1000000"); -+ else if (GDwarfArg->getOption().matches(options::OPT_gdwarf_2)) // -gdwarf-2 -+ CommonCmdArgs.push_back("0x200"); -+ else if (GDwarfArg->getOption().matches(options::OPT_gdwarf_3)) // -gdwarf-3 -+ CommonCmdArgs.push_back("0x4000"); -+ else if (GDwarfArg->getOption().matches(options::OPT_gdwarf_4)) // -gdwarf-4 -+ CommonCmdArgs.push_back("0x1000000"); -+ else if (GDwarfArg->getOption().matches(options::OPT_gdwarf_5)) // -gdwarf-5 -+ CommonCmdArgs.push_back("0x2000000"); -+ // Handle `-gpubnames` option separately. -+ for (auto Arg : Args.filtered(options::OPT_gpubnames)) { -+ Arg->claim(); -+ CommonCmdArgs.push_back("-x"); -+ CommonCmdArgs.push_back("120"); -+ CommonCmdArgs.push_back("0x40000000"); // -gpubnames -+ } -+ } -+ -+ // -Mipa has no effect -+ if (Arg *A = Args.getLastArg(options::OPT_Mipa)) { -+ D.Diag(diag::warn_drv_clang_unsupported) -+ << A->getAsString(Args); -+ } -+ -+ // -Minline has no effect -+ if (Arg *A = Args.getLastArg(options::OPT_Minline_on)) { -+ D.Diag(diag::warn_drv_clang_unsupported) -+ << A->getAsString(Args); -+ } -+ -+ // Handle -fdefault-real-8 (and its alias, -r8) and -fno-default-real-8 -+ if (Arg *A = Args.getLastArg(options::OPT_r8, -+ options::OPT_fdefault_real_8, -+ options::OPT_fno_default_real_8)) { -+ const char * fl; -+ // For -f version add -x flag, for -fno add -y -+ if (A->getOption().matches(options::OPT_fno_default_real_8)) { -+ fl = "-y"; -+ } else { -+ fl = "-x"; -+ } -+ -+ for (Arg *A : Args.filtered(options::OPT_r8, -+ options::OPT_fdefault_real_8, -+ options::OPT_fno_default_real_8)) { -+ A->claim(); -+ } -+ -+ UpperCmdArgs.push_back(fl); -+ UpperCmdArgs.push_back("124"); -+ UpperCmdArgs.push_back("0x8"); -+ UpperCmdArgs.push_back(fl); -+ UpperCmdArgs.push_back("124"); -+ UpperCmdArgs.push_back("0x80000"); -+ } -+ -+ // Process and claim -i8/-fdefault-integer-8/-fno-default-integer-8 argument -+ if (Arg *A = Args.getLastArg(options::OPT_i8, -+ options::OPT_fdefault_integer_8, -+ options::OPT_fno_default_integer_8)) { -+ const char * fl; -+ -+ if (A->getOption().matches(options::OPT_fno_default_integer_8)) { -+ fl = "-y"; -+ } else { -+ fl = "-x"; -+ } -+ -+ for (Arg *A : Args.filtered(options::OPT_i8, -+ options::OPT_fdefault_integer_8, -+ options::OPT_fno_default_integer_8)) { -+ A->claim(); -+ } -+ -+ UpperCmdArgs.push_back(fl); -+ UpperCmdArgs.push_back("124"); -+ UpperCmdArgs.push_back("0x10"); -+ } -+ -+ // Pass an arbitrary flag for first part of Fortran frontend -+ for (Arg *A : Args.filtered(options::OPT_Wh_EQ)) { -+ A->claim(); -+ StringRef Value = A->getValue(); -+ SmallVector PassArgs; -+ Value.split(PassArgs, StringRef(",")); -+ for (StringRef PassArg : PassArgs) { -+ UpperCmdArgs.push_back(Args.MakeArgString(PassArg)); -+ } -+ } -+ -+ // Flush to zero mode -+ // Disabled by default, but can be enabled by a switch -+ if (Args.hasArg(options::OPT_Mflushz_on)) { -+ // For -Mflushz set -x 129 2 for second part of Fortran frontend -+ for (Arg *A: Args.filtered(options::OPT_Mflushz_on)) { -+ A->claim(); -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back("129"); -+ LowerCmdArgs.push_back("2"); -+ } -+ } else { -+ LowerCmdArgs.push_back("-y"); -+ LowerCmdArgs.push_back("129"); -+ LowerCmdArgs.push_back("2"); -+ for (Arg *A: Args.filtered(options::OPT_Mflushz_off)) { -+ A->claim(); -+ } -+ } -+ -+ // For -fPIC set -x 62 8 for second part of Fortran frontend -+ for (Arg *A: Args.filtered(options::OPT_fPIC)) { -+ A->claim(); -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back("62"); -+ LowerCmdArgs.push_back("8"); -+ } -+ -+ StringRef OptOStr("0"); -+ if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { -+ if (A->getOption().matches(options::OPT_O4)) { -+ OptOStr = "4"; // FIXME what should this be? -+ } else if (A->getOption().matches(options::OPT_Ofast)) { -+ OptOStr = "2"; // FIXME what should this be? -+ } else if (A->getOption().matches(options::OPT_O0)) { -+ // intentionally do nothing -+ } else { -+ assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag"); -+ StringRef S(A->getValue()); -+ if ((S == "s") || (S == "z")) { -+ // -Os = size; -Oz = more size -+ OptOStr = "2"; // FIXME -Os|-Oz => -opt ? -+ } else if ((S == "1") || (S == "2") || (S == "3")) { -+ OptOStr = S; -+ } else { -+ OptOStr = "4"; -+ } -+ } -+ } -+ unsigned OptLevel = std::stoi(OptOStr.str()); -+ -+ if (Args.hasArg(options::OPT_g_Group)) { -+ // pass -g to lower and upper -+ CommonCmdArgs.push_back("-debug"); -+ } -+ -+ /* Pick the last among conflicting flags, if a positive and negative flag -+ exists for ex. "-ffast-math -fno-fast-math" they get nullified. Also any -+ previously overwritten flag remains that way. -+ For ex. "-Kieee -ffast-math -fno-fast-math". -Kieee gets overwritten by -+ -ffast-math which then gets negated by -fno-fast-math, finally behaving as -+ if none of those flags were passed. -+ */ -+ for(Arg *A: Args.filtered(options::OPT_ffast_math, options::OPT_fno_fast_math, -+ options::OPT_Ofast, options::OPT_Kieee_off, -+ options::OPT_Kieee_on, options::OPT_frelaxed_math, -+ options::OPT_fassociative_math, -+ options::OPT_fno_associative_math, -+ options::OPT_fsigned_zeros, -+ options::OPT_fno_signed_zeros)) { -+ if (A->getOption().matches(options::OPT_ffast_math) || -+ A->getOption().matches(options::OPT_Ofast)) { -+ NeedIEEE = NeedRelaxedMath = false; -+ NeedFastMath = true; -+ } else if (A->getOption().matches(options::OPT_Kieee_on)) { -+ NeedFastMath = NeedRelaxedMath = AssociativeMath = false; -+ NeedIEEE = SignedZeros = true; -+ } else if (A->getOption().matches(options::OPT_frelaxed_math)) { -+ NeedFastMath = NeedIEEE = false; -+ NeedRelaxedMath = true; -+ } else if (A->getOption().matches(options::OPT_fno_fast_math)) { -+ NeedFastMath = false; -+ } else if (A->getOption().matches(options::OPT_Kieee_off)) { -+ NeedIEEE = false; -+ } else if (A->getOption().matches(options::OPT_fassociative_math)) { -+ AssociativeMath = true; -+ NeedIEEE = SignedZeros = false; -+ } else if (A->getOption().matches(options::OPT_fno_associative_math)) { -+ AssociativeMath = false; -+ } else if (A->getOption().matches(options::OPT_fsigned_zeros)) { -+ SignedZeros = true; -+ AssociativeMath = false; -+ } else if (A->getOption().matches(options::OPT_fno_signed_zeros)) { -+ SignedZeros = NeedIEEE = false; -+ } -+ A->claim(); -+ } -+ -+ // fp-contract=fast is the default -+ bool EnableFPContraction = true; -+ if (Arg *A = Args.getLastArg(options::OPT_ffp_contract, -+ options::OPT_Mfma_on, -+ options::OPT_fma, -+ options::OPT_Mfma_off, -+ options::OPT_nofma)) { -+ auto Opt = A->getOption(); -+ if (Opt.matches(options::OPT_ffp_contract)) { -+ StringRef Val = A->getValue(); -+ if ((Val == "fast") || (Val == "on")) { -+ EnableFPContraction = true; -+ } else if (Val == "off") { -+ EnableFPContraction = false; -+ } else { -+ D.Diag(diag::err_drv_unsupported_option_argument) -+ << A->getOption().getName() << Val; -+ } -+ } else if(Opt.matches(options::OPT_Mfma_on) || -+ Opt.matches(options::OPT_fma)) { -+ EnableFPContraction = true; -+ } else { -+ EnableFPContraction = false; -+ } -+ } -+ -+ if(OptLevel == 0) -+ EnableFPContraction = false; -+ -+ // Emit contract math instructions. -+ // Step 1 : Generate fma instructions in flang (can override with fma flag) -+ // Step 2 : Propagate fma contract information to LLVM to further -+ // exploit contraction opportunities -+ if (EnableFPContraction) { -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back("172"); -+ LowerCmdArgs.push_back("0x40000000"); -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back("179"); -+ LowerCmdArgs.push_back("1"); -+ // Step 2 -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back("216"); -+ LowerCmdArgs.push_back("0x1000"); -+ } else { -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back("171"); -+ LowerCmdArgs.push_back("0x40000000"); -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back("178"); -+ LowerCmdArgs.push_back("1"); -+ } -+ -+ if (NeedFastMath) { -+ // Lower: -x 216 1 -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back("216"); -+ LowerCmdArgs.push_back("1"); -+ // Common: -ieee 0 -+ CommonCmdArgs.push_back("-ieee"); -+ CommonCmdArgs.push_back("0"); -+ } else if (NeedIEEE) { -+ // Common: -y 129 2 -+ CommonCmdArgs.push_back("-y"); -+ CommonCmdArgs.push_back("129"); -+ CommonCmdArgs.push_back("2"); -+ // Lower: -x 6 0x100 -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back("6"); -+ LowerCmdArgs.push_back("0x100"); -+ // Lower: -x 42 0x400000 -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back("42"); -+ LowerCmdArgs.push_back("0x400000"); -+ // Lower: -y 129 4 -+ LowerCmdArgs.push_back("-y"); -+ LowerCmdArgs.push_back("129"); -+ LowerCmdArgs.push_back("4"); -+ // Lower: -x 129 0x400 -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back("129"); -+ LowerCmdArgs.push_back("0x400"); -+ // Lower: -y 216 1 (OPT_fno_fast_math) -+ LowerCmdArgs.push_back("-y"); -+ LowerCmdArgs.push_back("216"); -+ LowerCmdArgs.push_back("1"); -+ // Common: -ieee 1 -+ CommonCmdArgs.push_back("-ieee"); -+ CommonCmdArgs.push_back("1"); -+ } else if (NeedRelaxedMath) { -+ // Lower: -x 15 0x400 -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back("15"); -+ LowerCmdArgs.push_back("0x400"); -+ // Lower: -y 216 1 (OPT_fno_fast_math) -+ LowerCmdArgs.push_back("-y"); -+ LowerCmdArgs.push_back("216"); -+ LowerCmdArgs.push_back("1"); -+ // Common: -ieee 0 -+ CommonCmdArgs.push_back("-ieee"); -+ CommonCmdArgs.push_back("0"); -+ } else { -+ // Common: -ieee 0 -+ CommonCmdArgs.push_back("-ieee"); -+ CommonCmdArgs.push_back("0"); -+ } -+ -+ /***** Upper part of the Fortran frontend *****/ -+ -+ // TODO do we need to invoke this under GDB sometimes? -+ const char *UpperExec = Args.MakeArgString(getToolChain().GetProgramPath("flang1")); -+ -+ UpperCmdArgs.push_back("-opt"); UpperCmdArgs.push_back(Args.MakeArgString(OptOStr)); -+ UpperCmdArgs.push_back("-terse"); UpperCmdArgs.push_back("1"); -+ UpperCmdArgs.push_back("-inform"); UpperCmdArgs.push_back("warn"); -+ UpperCmdArgs.push_back("-nohpf"); -+ UpperCmdArgs.push_back("-nostatic"); -+ UpperCmdArgs.append(CommonCmdArgs.begin(), CommonCmdArgs.end()); // Append common arguments -+ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("19"); UpperCmdArgs.push_back("0x400000"); -+ UpperCmdArgs.push_back("-quad"); -+ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("68"); UpperCmdArgs.push_back("0x1"); -+ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("59"); UpperCmdArgs.push_back("4"); -+ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("15"); UpperCmdArgs.push_back("2"); -+ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("49"); UpperCmdArgs.push_back("0x400004"); -+ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("51"); UpperCmdArgs.push_back("0x20"); -+ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("57"); UpperCmdArgs.push_back("0x4c"); -+ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("58"); UpperCmdArgs.push_back("0x10000"); -+ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("124"); UpperCmdArgs.push_back("0x1000"); -+ UpperCmdArgs.push_back("-tp"); UpperCmdArgs.push_back("px"); -+ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("57"); UpperCmdArgs.push_back("0xfb0000"); -+ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("58"); UpperCmdArgs.push_back("0x78031040"); -+ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("47"); UpperCmdArgs.push_back("0x08"); -+ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("48"); UpperCmdArgs.push_back("4608"); -+ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("49"); UpperCmdArgs.push_back("0x100"); -+ if (OptLevel >= 2) { -+ UpperCmdArgs.push_back("-x"); -+ UpperCmdArgs.push_back("70"); -+ UpperCmdArgs.push_back("0x6c00"); -+ UpperCmdArgs.push_back("-x"); -+ UpperCmdArgs.push_back("119"); -+ UpperCmdArgs.push_back("0x10000000"); -+ UpperCmdArgs.push_back("-x"); -+ UpperCmdArgs.push_back("129"); -+ UpperCmdArgs.push_back("2"); -+ UpperCmdArgs.push_back("-x"); -+ UpperCmdArgs.push_back("47"); -+ UpperCmdArgs.push_back("0x400000"); -+ UpperCmdArgs.push_back("-x"); -+ UpperCmdArgs.push_back("52"); -+ UpperCmdArgs.push_back("2"); -+ } -+ -+ // Add system include arguments. -+ getToolChain().AddFlangSystemIncludeArgs(Args, UpperCmdArgs); -+ -+ // Use clang's predefined macros -+ DiagnosticsEngine DE(new DiagnosticIDs(), new DiagnosticOptions, new IgnoringDiagConsumer()); -+ std::shared_ptr TO = std::make_shared(); -+ TO->Triple = Triple.getTriple(); -+ std::shared_ptr TI(clang::TargetInfo::CreateTargetInfo(DE, TO)); -+ std::string PredefineBuffer; -+ llvm::raw_string_ostream Predefines(PredefineBuffer); -+ ClassicFlangMacroBuilder Builder(UpperCmdArgs, Args, Predefines); -+ -+ LangOptions LO; -+ VersionTuple VT = getToolChain().computeMSVCVersion(&D, Args); -+ if (!VT.empty()) { -+ // Set the MSCompatibility version. Subminor version has 5 decimal digits. -+ // Minor and major versions have 2 decimal digits each. -+ LO.MSCompatibilityVersion = VT.getMajor() * 10000000 + -+ VT.getMinor().value_or(0) * 100000 + -+ VT.getSubminor().value_or(0); -+ } -+ -+ // Define Target specific macros like __linux__ -+ TI->getTargetDefines(LO, Builder); -+ -+ Builder.defineMacro("__SIZE_TYPE__", -+ TargetInfo::getTypeName(TI->getSizeType())); -+ Builder.defineMacro( -+ "__PTRDIFF_TYPE__", -+ TargetInfo::getTypeName(TI->getPtrDiffType(LangAS::Default))); -+ -+ if (TI->getPointerWidth(LangAS::Default) == 64 && TI->getLongWidth() == 64 -+ && TI->getIntWidth() == 32) { -+ Builder.defineMacro("_LP64"); -+ Builder.defineMacro("__LP64__"); -+ } -+ -+ if (TI->getPointerWidth(LangAS::Default) == 32 && TI->getLongWidth() == 32 -+ && TI->getIntWidth() == 32) { -+ Builder.defineMacro("_ILP32"); -+ Builder.defineMacro("__ILP32__"); -+ } -+ -+ DefineTypeSize("__LONG_MAX__", TargetInfo::SignedLong, *TI, Builder); -+ -+ // Add additional predefined macros -+ switch (Triple.getArch()) { -+ case llvm::Triple::aarch64: -+ UpperCmdArgs.push_back("-def"); UpperCmdArgs.push_back("__ARM_ARCH__=8"); -+ break; -+ case llvm::Triple::x86_64: -+ UpperCmdArgs.push_back("-def"); UpperCmdArgs.push_back("__amd_64__amd64__"); -+ UpperCmdArgs.push_back("-def"); UpperCmdArgs.push_back("__k8"); -+ UpperCmdArgs.push_back("-def"); UpperCmdArgs.push_back("__k8__"); -+ break; -+ default: /* generic 64-bit */ -+ ; -+ } -+ UpperCmdArgs.push_back("-def"); UpperCmdArgs.push_back("__THROW="); -+ UpperCmdArgs.push_back("-def"); UpperCmdArgs.push_back("__extension__="); -+ UpperCmdArgs.push_back("-def"); UpperCmdArgs.push_back("__PGLLVM__"); -+ -+ // Enable preprocessor -+ if (Args.hasArg(options::OPT_Mpreprocess) || -+ Args.hasArg(options::OPT_cpp) || -+ Args.hasArg(options::OPT_E) || -+ types::getPreprocessedType(InputType) != types::TY_INVALID) { -+ UpperCmdArgs.push_back("-preprocess"); -+ for (auto Arg : Args.filtered(options::OPT_Mpreprocess, options::OPT_cpp, options::OPT_E)) { -+ Arg->claim(); -+ } -+ -+ // When -E option is provided, run only the fortran preprocessor. -+ // Only in -E mode, consume -P if it exists -+ if (Args.hasArg(options::OPT_E)) { -+ UpperCmdArgs.push_back("-es"); -+ // Line marker mode is disabled -+ if (Args.hasArg(options::OPT_P)) { -+ Args.ClaimAllArgs(options::OPT_P); -+ } else { -+ // -pp enables line marker mode in fortran preprocessor -+ UpperCmdArgs.push_back("-pp"); -+ } -+ } -+ } -+ -+ // Enable standards checking -+ if (Args.hasArg(options::OPT_Mstandard)) { -+ UpperCmdArgs.push_back("-standard"); -+ for (auto Arg : Args.filtered(options::OPT_Mstandard)) { -+ Arg->claim(); -+ } -+ } -+ -+ // Free or fixed form file -+ if (Args.hasArg(options::OPT_fortran_format_Group)) { -+ // Override file name suffix, scan arguments for that -+ for (Arg *A : Args.filtered(options::OPT_fortran_format_Group)) { -+ A->claim(); -+ switch (A->getOption().getID()) { -+ default: -+ llvm_unreachable("missed a case"); -+ case options::OPT_ffixed_form: -+ case options::OPT_fno_free_form: -+ case options::OPT_Mfixed: -+ case options::OPT_Mfree_off: -+ case options::OPT_Mfreeform_off: -+ UpperCmdArgs.push_back("-nofreeform"); -+ break; -+ case options::OPT_ffree_form: -+ case options::OPT_fno_fixed_form: -+ case options::OPT_Mfree_on: -+ case options::OPT_Mfreeform_on: -+ UpperCmdArgs.push_back("-freeform"); -+ break; -+ } -+ } -+ } else { -+ // Deduce format from file name suffix -+ if (types::isFreeFormFortran(InputType)) { -+ UpperCmdArgs.push_back("-freeform"); -+ } else { -+ UpperCmdArgs.push_back("-nofreeform"); -+ } -+ } -+ -+ // Extend lines to 132 characters -+ for (auto Arg : Args.filtered(options::OPT_Mextend)) { -+ Arg->claim(); -+ UpperCmdArgs.push_back("-extend"); -+ } -+ -+ for (auto Arg : Args.filtered(options::OPT_ffixed_line_length_VALUE)) { -+ StringRef Value = Arg->getValue(); -+ if (Value == "72") { -+ Arg->claim(); -+ } else if (Value == "132") { -+ Arg->claim(); -+ UpperCmdArgs.push_back("-extend"); -+ } else { -+ D.Diag(diag::err_drv_unsupported_fixed_line_length) -+ << Arg->getAsString(Args); -+ } -+ } -+ -+ // Add user-defined include directories -+ for (auto Arg : Args.filtered(options::OPT_I)) { -+ Arg->claim(); -+ UpperCmdArgs.push_back("-idir"); -+ UpperCmdArgs.push_back(Arg->getValue(0)); -+ } -+ -+ // Add user-defined module directories -+ for (auto Arg : Args.filtered(options::OPT_ModuleDir, options::OPT_J)) { -+ Arg->claim(); -+ UpperCmdArgs.push_back("-moddir"); -+ UpperCmdArgs.push_back(Arg->getValue(0)); -+ } -+ -+ // "Define" preprocessor flags -+ for (auto Arg : Args.filtered(options::OPT_D)) { -+ Arg->claim(); -+ UpperCmdArgs.push_back("-def"); -+ UpperCmdArgs.push_back(Arg->getValue(0)); -+ } -+ -+ // "Define" preprocessor flags -+ for (auto Arg : Args.filtered(options::OPT_U)) { -+ Arg->claim(); -+ UpperCmdArgs.push_back("-undef"); -+ UpperCmdArgs.push_back(Arg->getValue(0)); -+ } -+ -+ UpperCmdArgs.push_back("-vect"); UpperCmdArgs.push_back("48"); -+ -+ // Semantics for assignments to allocatables -+ if (Arg *A = Args.getLastArg(options::OPT_Mallocatable_EQ)) { -+ // Argument is passed explicitly -+ StringRef Value = A->getValue(); -+ if (Value == "03") { // Enable Fortran 2003 semantics -+ UpperCmdArgs.push_back("-x"); // Set XBIT -+ } else if (Value == "95") { // Enable Fortran 2003 semantics -+ UpperCmdArgs.push_back("-y"); // Unset XBIT -+ } else { -+ D.Diag(diag::err_drv_invalid_allocatable_mode) -+ << A->getAsString(Args); -+ } -+ } else { // No argument passed -+ UpperCmdArgs.push_back("-x"); // Default is 03 -+ } -+ UpperCmdArgs.push_back("54"); UpperCmdArgs.push_back("1"); // XBIT value -+ -+ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("70"); UpperCmdArgs.push_back("0x40000000"); -+ UpperCmdArgs.push_back("-y"); UpperCmdArgs.push_back("163"); UpperCmdArgs.push_back("0xc0000000"); -+ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("189"); UpperCmdArgs.push_back("0x10"); -+ -+ // Enable NULL pointer checking -+ if (Args.hasArg(options::OPT_Mchkptr)) { -+ UpperCmdArgs.push_back("-x"); -+ UpperCmdArgs.push_back("70"); -+ UpperCmdArgs.push_back("4"); -+ for (auto Arg : Args.filtered(options::OPT_Mchkptr)) { -+ Arg->claim(); -+ } -+ } -+ -+ // Set a -x flag for first part of Fortran frontend -+ for (Arg *A : Args.filtered(options::OPT_Hx_EQ)) { -+ A->claim(); -+ StringRef Value = A->getValue(); -+ auto XFlag = Value.split(","); -+ UpperCmdArgs.push_back("-x"); -+ UpperCmdArgs.push_back(Args.MakeArgString(XFlag.first)); -+ UpperCmdArgs.push_back(Args.MakeArgString(XFlag.second)); -+ } -+ -+ // Set a -y flag for first part of Fortran frontend -+ for (Arg *A : Args.filtered(options::OPT_Hy_EQ)) { -+ A->claim(); -+ StringRef Value = A->getValue(); -+ auto XFlag = Value.split(","); -+ UpperCmdArgs.push_back("-y"); -+ UpperCmdArgs.push_back(Args.MakeArgString(XFlag.first)); -+ UpperCmdArgs.push_back(Args.MakeArgString(XFlag.second)); -+ } -+ -+ // Set a -q (debug) flag for first part of Fortran frontend -+ for (Arg *A : Args.filtered(options::OPT_Hq_EQ)) { -+ A->claim(); -+ StringRef Value = A->getValue(); -+ auto XFlag = Value.split(","); -+ UpperCmdArgs.push_back("-q"); -+ UpperCmdArgs.push_back(Args.MakeArgString(XFlag.first)); -+ UpperCmdArgs.push_back(Args.MakeArgString(XFlag.second)); -+ } -+ -+ // Set a -qq (debug) flag for first part of Fortran frontend -+ for (Arg *A : Args.filtered(options::OPT_Hqq_EQ)) { -+ A->claim(); -+ StringRef Value = A->getValue(); -+ auto XFlag = Value.split(","); -+ UpperCmdArgs.push_back("-qq"); -+ UpperCmdArgs.push_back(Args.MakeArgString(XFlag.first)); -+ UpperCmdArgs.push_back(Args.MakeArgString(XFlag.second)); -+ } -+ -+ const char * STBFile = Args.MakeArgString(Stem + ".stb"); -+ C.addTempFile(STBFile); -+ UpperCmdArgs.push_back("-stbfile"); -+ UpperCmdArgs.push_back(STBFile); -+ -+ const char * ModuleExportFile = Args.MakeArgString(Stem + ".cmod"); -+ C.addTempFile(ModuleExportFile); -+ UpperCmdArgs.push_back("-modexport"); -+ UpperCmdArgs.push_back(ModuleExportFile); -+ -+ const char * ModuleIndexFile = Args.MakeArgString(Stem + ".cmdx"); -+ C.addTempFile(ModuleIndexFile); -+ UpperCmdArgs.push_back("-modindex"); -+ UpperCmdArgs.push_back(ModuleIndexFile); -+ -+ UpperCmdArgs.push_back("-output"); -+ UpperCmdArgs.push_back(ILMFile); -+ -+ SmallString<256> Path; -+ if(Args.getAllArgValues(options::OPT_fopenmp_targets_EQ).size() > 0) { -+ SmallString<128> TargetInfo; -+ Path = llvm::sys::path::parent_path(Output.getFilename()); -+ Arg* Tgts = Args.getLastArg(options::OPT_fopenmp_targets_EQ); -+ assert(Tgts && Tgts->getNumValues() && -+ "OpenMP offloading has to have targets specified."); -+ for (unsigned i = 0; i < Tgts->getNumValues(); ++i) { -+ if (i) -+ TargetInfo += ','; -+ llvm::Triple T(Tgts->getValue(i)); -+ TargetInfo += T.getTriple(); -+ } -+ UpperCmdArgs.push_back("-fopenmp-targets"); -+ UpperCmdArgs.push_back(Args.MakeArgString(TargetInfo.str())); -+ } -+ -+ C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::AtFileUTF8(), UpperExec, UpperCmdArgs, Inputs)); -+ -+ // For -fsyntax-only or -E that is it -+ if (Args.hasArg(options::OPT_fsyntax_only) || -+ Args.hasArg(options::OPT_E)) return; -+ -+ /***** Lower part of Fortran frontend *****/ -+ -+ const char *LowerExec = Args.MakeArgString(getToolChain().GetProgramPath("flang2")); -+ -+ // TODO FLANG arg handling -+ LowerCmdArgs.push_back("-fn"); LowerCmdArgs.push_back(Input.getBaseInput()); -+ LowerCmdArgs.push_back("-opt"); LowerCmdArgs.push_back(Args.MakeArgString(OptOStr)); -+ LowerCmdArgs.push_back("-terse"); LowerCmdArgs.push_back("1"); -+ LowerCmdArgs.push_back("-inform"); LowerCmdArgs.push_back("warn"); -+ LowerCmdArgs.append(CommonCmdArgs.begin(), CommonCmdArgs.end()); // Append common arguments -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("68"); LowerCmdArgs.push_back("0x1"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("51"); LowerCmdArgs.push_back("0x20"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("119"); LowerCmdArgs.push_back("0xa10000"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("122"); LowerCmdArgs.push_back("0x40"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("123"); LowerCmdArgs.push_back("0x1000"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("127"); LowerCmdArgs.push_back("4"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("127"); LowerCmdArgs.push_back("17"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("19"); LowerCmdArgs.push_back("0x400000"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("28"); LowerCmdArgs.push_back("0x40000"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("120"); LowerCmdArgs.push_back("0x10000000"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("70"); LowerCmdArgs.push_back("0x8000"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("122"); LowerCmdArgs.push_back("1"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("125"); LowerCmdArgs.push_back("0x20000"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("164"); LowerCmdArgs.push_back("0x800000"); -+ LowerCmdArgs.push_back("-quad"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("59"); LowerCmdArgs.push_back("4"); -+ LowerCmdArgs.push_back("-tp"); LowerCmdArgs.push_back("px"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("120"); LowerCmdArgs.push_back("0x1000"); // debug lite -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("124"); LowerCmdArgs.push_back("0x1400"); -+ LowerCmdArgs.push_back("-y"); LowerCmdArgs.push_back("15"); LowerCmdArgs.push_back("2"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("57"); LowerCmdArgs.push_back("0x3b0000"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("58"); LowerCmdArgs.push_back("0x48000000"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("49"); LowerCmdArgs.push_back("0x100"); -+ LowerCmdArgs.push_back("-astype"); LowerCmdArgs.push_back("0"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("183"); LowerCmdArgs.push_back("4"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("121"); LowerCmdArgs.push_back("0x800"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("54"); LowerCmdArgs.push_back("0x10"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("70"); LowerCmdArgs.push_back("0x40000000"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("249"); LowerCmdArgs.push_back("170"); // LLVM version -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("124"); LowerCmdArgs.push_back("1"); -+ LowerCmdArgs.push_back("-y"); LowerCmdArgs.push_back("163"); LowerCmdArgs.push_back("0xc0000000"); -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("189"); LowerCmdArgs.push_back("0x10"); -+ LowerCmdArgs.push_back("-y"); LowerCmdArgs.push_back("189"); LowerCmdArgs.push_back("0x4000000"); -+ -+ if (!SignedZeros) { -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back("216"); -+ LowerCmdArgs.push_back("0x8"); -+ } -+ if (AssociativeMath) { -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back("216"); -+ LowerCmdArgs.push_back("0x10"); -+ } -+ -+ // Remove "noinline" attriblute -+ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("183"); LowerCmdArgs.push_back("0x10"); -+ -+ // Add target features -+ std::vector Features; -+ std::string FeatureList = ""; -+ getTargetFeatureList(D, Triple, Args, UpperCmdArgs, false, Features); -+ if (!Features.empty()) { -+ for (auto Feature : unifyTargetFeatures(Features)) { -+ if (!FeatureList.empty()) -+ FeatureList += ','; -+ FeatureList += Feature; -+ } -+ -+ LowerCmdArgs.push_back("-target_features"); -+ LowerCmdArgs.push_back(Args.MakeArgString(FeatureList)); -+ } -+ -+ // Add vscale range -+ unsigned vscaleMin = 0; -+ unsigned vscaleMax = 0; -+ bool hasSVE = false; -+ if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ)) { -+ StringRef Val = A->getValue(); -+ if (Val.equals("128") || Val.equals("256") || Val.equals("512") || -+ Val.equals("1024") || Val.equals("2048") || Val.equals("128+") || -+ Val.equals("256+") || Val.equals("512+") || Val.equals("1024+") || -+ Val.equals("2048+")) { -+ unsigned Bits = 0; -+ if (Val.endswith("+")) -+ Val = Val.substr(0, Val.size() - 1); -+ else { -+ bool Invalid = Val.getAsInteger(10, Bits); (void)Invalid; -+ assert(!Invalid && "Failed to parse value"); -+ vscaleMax = Bits / 128; -+ } -+ -+ bool Invalid = Val.getAsInteger(10, Bits); (void)Invalid; -+ assert(!Invalid && "Failed to parse value"); -+ vscaleMin = Bits / 128; -+ } else if (!Val.equals("scalable")) -+ getToolChain().getDriver().Diag(diag::warn_drv_clang_unsupported) -+ << A->getOption().getName() << Val; -+ } -+ for (auto Feature : unifyTargetFeatures(Features)) { -+ if (Feature.startswith("+sve")) { -+ hasSVE = true; -+ break; -+ } -+ } -+ if (vscaleMin || vscaleMax) { -+ LowerCmdArgs.push_back("-vscale_range_min"); -+ LowerCmdArgs.push_back(Args.MakeArgString( -+ std::to_string(vscaleMin ? vscaleMin : 1))); -+ LowerCmdArgs.push_back("-vscale_range_max"); -+ LowerCmdArgs.push_back(Args.MakeArgString(std::to_string(vscaleMax))); -+ } else { -+ if (hasSVE) { -+ LowerCmdArgs.push_back("-vscale_range_min"); -+ LowerCmdArgs.push_back(Args.MakeArgString(std::to_string(1))); -+ LowerCmdArgs.push_back("-vscale_range_max"); -+ LowerCmdArgs.push_back(Args.MakeArgString(std::to_string(16))); -+ } -+ } -+ -+ // Set a -x flag for second part of Fortran frontend -+ for (Arg *A : Args.filtered(options::OPT_Mx_EQ)) { -+ A->claim(); -+ StringRef Value = A->getValue(); -+ auto XFlag = Value.split(","); -+ LowerCmdArgs.push_back("-x"); -+ LowerCmdArgs.push_back(Args.MakeArgString(XFlag.first)); -+ LowerCmdArgs.push_back(Args.MakeArgString(XFlag.second)); -+ } -+ -+ // Set a -y flag for second part of Fortran frontend -+ for (Arg *A : Args.filtered(options::OPT_My_EQ)) { -+ A->claim(); -+ StringRef Value = A->getValue(); -+ auto XFlag = Value.split(","); -+ LowerCmdArgs.push_back("-y"); -+ LowerCmdArgs.push_back(Args.MakeArgString(XFlag.first)); -+ LowerCmdArgs.push_back(Args.MakeArgString(XFlag.second)); -+ } -+ -+ // Set a -q (debug) flag for second part of Fortran frontend -+ for (Arg *A : Args.filtered(options::OPT_Mq_EQ)) { -+ A->claim(); -+ StringRef Value = A->getValue(); -+ auto XFlag = Value.split(","); -+ LowerCmdArgs.push_back("-q"); -+ LowerCmdArgs.push_back(Args.MakeArgString(XFlag.first)); -+ LowerCmdArgs.push_back(Args.MakeArgString(XFlag.second)); -+ } -+ -+ // Set a -qq (debug) flag for second part of Fortran frontend -+ for (Arg *A : Args.filtered(options::OPT_Mqq_EQ)) { -+ A->claim(); -+ StringRef Value = A->getValue(); -+ auto XFlag = Value.split(","); -+ LowerCmdArgs.push_back("-qq"); -+ LowerCmdArgs.push_back(Args.MakeArgString(XFlag.first)); -+ LowerCmdArgs.push_back(Args.MakeArgString(XFlag.second)); -+ } -+ -+ // Pass an arbitrary flag for second part of Fortran frontend -+ for (Arg *A : Args.filtered(options::OPT_Wm_EQ)) { -+ A->claim(); -+ StringRef Value = A->getValue(); -+ SmallVector PassArgs; -+ Value.split(PassArgs, StringRef(",")); -+ for (StringRef PassArg : PassArgs) { -+ LowerCmdArgs.push_back(Args.MakeArgString(PassArg)); -+ } -+ } -+ -+ LowerCmdArgs.push_back("-stbfile"); -+ LowerCmdArgs.push_back(STBFile); -+ -+ Path = llvm::sys::path::parent_path(Output.getFilename()); -+ bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP); -+ -+ /* OpenMP GPU Offload */ -+ if(Args.getAllArgValues(options::OPT_fopenmp_targets_EQ).size() > 0) { -+ SmallString<128> TargetInfo;//("-fopenmp-targets "); -+ SmallString<256> TargetInfoAsm;//("-fopenmp-targets-asm "); -+ -+ Arg* Tgts = Args.getLastArg(options::OPT_fopenmp_targets_EQ); -+ assert(Tgts && Tgts->getNumValues() && -+ "OpenMP offloading has to have targets specified."); -+ for (unsigned i = 0; i < Tgts->getNumValues(); ++i) { -+ if (i) -+ TargetInfo += ','; -+ // We need to get the string from the triple because it may be not exactly -+ // the same as the one we get directly from the arguments. -+ llvm::Triple T(Tgts->getValue(i)); -+ TargetInfo += T.getTriple(); -+ // We also need to give a output file -+ TargetInfoAsm += Path; -+ TargetInfoAsm += "/"; -+ TargetInfoAsm += Stem; -+ TargetInfoAsm += "-"; -+ TargetInfoAsm += T.getTriple(); -+ TargetInfoAsm += ".ll"; -+ } -+ // The driver is aware that flang2 can generate multiple files at the same time. -+ // We mimic it here by exchanging the output files. -+ // The driver always uses the output file of -asm. -+ LowerCmdArgs.push_back("-fopenmp-targets"); -+ LowerCmdArgs.push_back(Args.MakeArgString(TargetInfo.str())); -+ if(IsOpenMPDevice) { -+ LowerCmdArgs.push_back("-fopenmp-targets-asm"); -+ LowerCmdArgs.push_back(Args.MakeArgString(OutFile)); -+ LowerCmdArgs.push_back("-asm"); -+ LowerCmdArgs.push_back(Args.MakeArgString(TargetInfoAsm.str())); -+ } else { -+ LowerCmdArgs.push_back("-fopenmp-targets-asm"); -+ LowerCmdArgs.push_back(Args.MakeArgString(TargetInfoAsm.str())); -+ LowerCmdArgs.push_back("-asm"); -+ LowerCmdArgs.push_back(Args.MakeArgString(OutFile)); -+ } -+ } else { -+ LowerCmdArgs.push_back("-asm"); -+ LowerCmdArgs.push_back(Args.MakeArgString(OutFile)); -+ } -+ -+ bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment(); -+ if (IsWindowsMSVC && !Args.hasArg(options::OPT_noFlangLibs)) { -+ getToolChain().AddFortranStdlibLibArgs(Args, LowerCmdArgs); -+ for (auto Arg : Args.filtered(options::OPT_noFlangLibs)) { -+ Arg->claim(); -+ } -+ } -+ -+ C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::AtFileUTF8(), LowerExec, LowerCmdArgs, Inputs)); -+} -+ -diff --git a/clang/lib/Driver/ToolChains/ClassicFlang.h b/clang/lib/Driver/ToolChains/ClassicFlang.h -new file mode 100644 -index 000000000000..5864e0776601 ---- /dev/null -+++ b/clang/lib/Driver/ToolChains/ClassicFlang.h -@@ -0,0 +1,49 @@ -+//===--- ClassicFlang.h - Flang ToolChain Implementations -------*- C++ -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ClassicFlang_H -+#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ClassicFlang_H -+ -+#include "MSVC.h" -+#include "clang/Driver/Driver.h" -+#include "clang/Driver/Tool.h" -+#include "clang/Driver/Types.h" -+#include "llvm/Frontend/Debug/Options.h" -+#include "llvm/Option/Option.h" -+#include "llvm/Support/raw_ostream.h" -+#include "llvm/TargetParser/Triple.h" -+ -+namespace clang { -+namespace driver { -+ -+namespace tools { -+ -+/// \brief Flang Fortran frontend -+class LLVM_LIBRARY_VISIBILITY ClassicFlang : public Tool { -+public: -+ ClassicFlang(const ToolChain &TC) -+ : Tool("classic-flang", -+ "Fortran frontend to LLVM", TC) {} -+ -+ bool hasGoodDiagnostics() const override { return true; } -+ bool hasIntegratedAssembler() const override { return false; } -+ bool hasIntegratedCPP() const override { return true; } -+ -+ void ConstructJob(Compilation &C, const JobAction &JA, -+ const InputInfo &Output, const InputInfoList &Inputs, -+ const llvm::opt::ArgList &TCArgs, -+ const char *LinkingOutput) const override; -+}; -+ -+} // end namespace tools -+ -+} // end namespace driver -+} // end namespace clang -+ -+#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ClassicFlang_H -diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp -index 0d6907b8e5c7..1ccc83a468ce 100644 ---- a/clang/lib/Driver/ToolChains/CommonArgs.cpp -+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp -@@ -146,6 +146,20 @@ static bool shouldIgnoreUnsupportedTargetFeature(const Arg &TargetFeatureArg, - return TargetFeatureArg.getOption().matches(options::OPT_mno_cumode); - } - -+#ifdef ENABLE_CLASSIC_FLANG -+/// \brief Determine if Fortran "main" object is needed -+bool tools::needFortranMain(const Driver &D, const ArgList &Args) { -+ return (needFortranLibs(D, Args) && !Args.hasArg(options::OPT_Mnomain) && -+ !Args.hasArg(options::OPT_no_fortran_main)); -+} -+ -+/// \brief Determine if Fortran link libraies are needed -+bool tools::needFortranLibs(const Driver &D, const ArgList &Args) { -+ return (D.IsFlangMode() && !Args.hasArg(options::OPT_nostdlib) && -+ !Args.hasArg(options::OPT_noFlangLibs)); -+} -+#endif -+ - void tools::addPathIfExists(const Driver &D, const Twine &Path, - ToolChain::path_list &Paths) { - if (D.getVFS().exists(Path)) -@@ -256,6 +270,9 @@ void tools::AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs, - const ArgList &Args, ArgStringList &CmdArgs, - const JobAction &JA) { - const Driver &D = TC.getDriver(); -+#ifdef ENABLE_CLASSIC_FLANG -+ bool SeenFirstLinkerInput = false; -+#endif - - // Add extra linker input arguments which are not treated as inputs - // (constructed via -Xarch_). -@@ -289,6 +306,15 @@ void tools::AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs, - if (II.isNothing()) - continue; - -+#ifdef ENABLE_CLASSIC_FLANG -+ // Add Fortan "main" before the first linker input -+ if (!SeenFirstLinkerInput) { -+ if (needFortranMain(D, Args)) { -+ CmdArgs.push_back("-lflangmain"); -+ } -+ SeenFirstLinkerInput = true; -+ } -+#endif - // Otherwise, this is a linker input argument. - const Arg &A = II.getInputArg(); - -@@ -300,6 +326,16 @@ void tools::AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs, - else - A.renderAsInput(Args, CmdArgs); - } -+#ifdef ENABLE_CLASSIC_FLANG -+ if (!SeenFirstLinkerInput && needFortranMain(D, Args)) { -+ CmdArgs.push_back("-lflangmain"); -+ } -+ -+ // Claim "no Fortran main" arguments -+ for (auto Arg : Args.filtered(options::OPT_no_fortran_main, options::OPT_Mnomain)) { -+ Arg->claim(); -+ } -+#endif - } - - void tools::addLinkerCompressDebugSectionsOption( -@@ -489,10 +525,18 @@ static void getWebAssemblyTargetFeatures(const Driver &D, - options::OPT_m_wasm_Features_Group); - } - -+#ifndef ENABLE_CLASSIC_FLANG - void tools::getTargetFeatures(const Driver &D, const llvm::Triple &Triple, - const ArgList &Args, ArgStringList &CmdArgs, - bool ForAS, bool IsAux) { - std::vector Features; -+#else -+void tools::getTargetFeatureList(const Driver &D, -+ const llvm::Triple &Triple, -+ const ArgList &Args, ArgStringList &CmdArgs, -+ bool ForAS, -+ std::vector &Features) { -+#endif - switch (Triple.getArch()) { - default: - break; -@@ -567,6 +611,15 @@ void tools::getTargetFeatures(const Driver &D, const llvm::Triple &Triple, - loongarch::getLoongArchTargetFeatures(D, Triple, Args, Features); - break; - } -+#ifdef ENABLE_CLASSIC_FLANG -+} -+ -+void tools::getTargetFeatures(const Driver &D, const llvm::Triple &Triple, -+ const ArgList &Args, ArgStringList &CmdArgs, -+ bool ForAS, bool IsAux) { -+ std::vector Features; -+ getTargetFeatureList(D, Triple, Args, CmdArgs, ForAS, Features); -+#endif - - for (auto Feature : unifyTargetFeatures(Features)) { - CmdArgs.push_back(IsAux ? "-aux-target-feature" : "-target-feature"); -@@ -889,7 +942,11 @@ bool tools::addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC, - const ArgList &Args, bool ForceStaticHostRuntime, - bool IsOffloadingHost, bool GompNeedsRT) { - if (!Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, -- options::OPT_fno_openmp, false)) -+ options::OPT_fno_openmp, false) -+#ifdef ENABLE_CLASSIC_FLANG -+ && !Args.hasFlag(options::OPT_mp, options::OPT_nomp, false) -+#endif -+ ) - return false; - - Driver::OpenMPRuntimeKind RTKind = TC.getDriver().getOpenMPRuntime(Args); -@@ -934,7 +991,16 @@ bool tools::addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC, - } - - void tools::addFortranRuntimeLibs(const ToolChain &TC, -+#ifdef ENABLE_CLASSIC_FLANG -+ const llvm::opt::ArgList &Args, -+#endif - llvm::opt::ArgStringList &CmdArgs) { -+#ifdef ENABLE_CLASSIC_FLANG -+ if (needFortranLibs(TC.getDriver(), Args)) -+ TC.AddFortranStdlibLibArgs(Args, CmdArgs); -+ else -+ Args.ClaimAllArgs(options::OPT_noFlangLibs); -+#else - if (TC.getTriple().isKnownWindowsMSVCEnvironment()) { - CmdArgs.push_back("Fortran_main.lib"); - CmdArgs.push_back("FortranRuntime.lib"); -@@ -944,6 +1010,7 @@ void tools::addFortranRuntimeLibs(const ToolChain &TC, - CmdArgs.push_back("-lFortranRuntime"); - CmdArgs.push_back("-lFortranDecimal"); - } -+#endif - } - - void tools::addFortranRuntimeLibraryPath(const ToolChain &TC, -diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h -index 6a8de0f1c36d..bd5cb1bb866e 100644 ---- a/clang/lib/Driver/ToolChains/CommonArgs.h -+++ b/clang/lib/Driver/ToolChains/CommonArgs.h -@@ -23,6 +23,12 @@ namespace clang { - namespace driver { - namespace tools { - -+#ifdef ENABLE_CLASSIC_FLANG -+bool needFortranLibs(const Driver &D, const llvm::opt::ArgList &Args); -+ -+bool needFortranMain(const Driver &D, const llvm::opt::ArgList &Args); -+#endif -+ - void addPathIfExists(const Driver &D, const Twine &Path, - ToolChain::path_list &Paths); - -@@ -131,6 +137,9 @@ bool addOpenMPRuntime(llvm::opt::ArgStringList &CmdArgs, const ToolChain &TC, - - /// Adds Fortran runtime libraries to \p CmdArgs. - void addFortranRuntimeLibs(const ToolChain &TC, -+#ifdef ENABLE_CLASSIC_FLANG -+ const llvm::opt::ArgList &Args, -+#endif - llvm::opt::ArgStringList &CmdArgs); - - /// Adds the path for the Fortran runtime libraries to \p CmdArgs. -@@ -173,6 +182,17 @@ void AddTargetFeature(const llvm::opt::ArgList &Args, - std::string getCPUName(const Driver &D, const llvm::opt::ArgList &Args, - const llvm::Triple &T, bool FromAs = false); - -+#ifdef ENABLE_CLASSIC_FLANG -+// Helper function extracted from upstream getTargetFeatures. Classic Flang -+// uses this helper to render the target feature options for the Fortran -+// frontend. -+void getTargetFeatureList(const Driver &D, -+ const llvm::Triple &Triple, -+ const llvm::opt::ArgList &Args, -+ llvm::opt::ArgStringList &CmdArgs, -+ bool ForAS, std::vector &Features); -+#endif -+ - void getTargetFeatures(const Driver &D, const llvm::Triple &Triple, - const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs, bool ForAS, -diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp -index 3a577650eb08..36fddca2d177 100644 ---- a/clang/lib/Driver/ToolChains/Cuda.cpp -+++ b/clang/lib/Driver/ToolChains/Cuda.cpp -@@ -1034,3 +1034,38 @@ VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D, - const ArgList &Args) const { - return HostTC.computeMSVCVersion(D, Args); - } -+ -+#ifdef ENABLE_CLASSIC_FLANG -+static void AddFlangSysIncludeArg(const ArgList &DriverArgs, -+ ArgStringList &Flang1Args, -+ ToolChain::path_list IncludePathList) { -+ std::string ArgValue; // Path argument value -+ -+ // Make up argument value consisting of paths separated by colons -+ bool first = true; -+ for (auto P : IncludePathList) { -+ if (first) { -+ first = false; -+ } else { -+ ArgValue += ":"; -+ } -+ ArgValue += P; -+ } -+ -+ // Add the argument -+ Flang1Args.push_back("-stdinc"); -+ Flang1Args.push_back(DriverArgs.MakeArgString(ArgValue)); -+} -+ -+void CudaToolChain::AddFlangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, -+ llvm::opt::ArgStringList &Flang1Args) const { -+ path_list IncludePathList; -+ const Driver &D = getDriver(); -+ if (DriverArgs.hasArg(options::OPT_nostdinc)) -+ return; -+ SmallString<128> P(D.InstalledDir); -+ llvm::sys::path::append(P, "../include"); -+ IncludePathList.push_back(P.c_str()); -+ AddFlangSysIncludeArg(DriverArgs, Flang1Args, IncludePathList); -+} -+#endif -diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h -index 39df6e06fb26..3eded1f53e42 100644 ---- a/clang/lib/Driver/ToolChains/Cuda.h -+++ b/clang/lib/Driver/ToolChains/Cuda.h -@@ -214,6 +214,11 @@ public: - llvm::opt::ArgStringList &CC1Args) const override; - void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args) const override; -+#ifdef ENABLE_CLASSIC_FLANG -+ void -+ AddFlangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, -+ llvm::opt::ArgStringList &Flang1Args) const override; -+#endif - - SanitizerMask getSupportedSanitizers() const override; - -diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp -index 65bd6c6a7eb3..3b8e4d7e133a 100644 ---- a/clang/lib/Driver/ToolChains/Darwin.cpp -+++ b/clang/lib/Driver/ToolChains/Darwin.cpp -@@ -679,7 +679,11 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA, - // to generate executables. - if (getToolChain().getDriver().IsFlangMode()) { - addFortranRuntimeLibraryPath(getToolChain(), Args, CmdArgs); -+#ifdef ENABLE_CLASSIC_FLANG -+ addFortranRuntimeLibs(getToolChain(), Args, CmdArgs); -+#else - addFortranRuntimeLibs(getToolChain(), CmdArgs); -+#endif - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) -diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp -index 0bb5433a658a..076a2d4a21d8 100644 ---- a/clang/lib/Driver/ToolChains/Gnu.cpp -+++ b/clang/lib/Driver/ToolChains/Gnu.cpp -@@ -578,7 +578,11 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, - // AddRuntTimeLibs). - if (D.IsFlangMode()) { - addFortranRuntimeLibraryPath(ToolChain, Args, CmdArgs); -+#ifdef ENABLE_CLASSIC_FLANG -+ addFortranRuntimeLibs(ToolChain, Args, CmdArgs); -+#else - addFortranRuntimeLibs(ToolChain, CmdArgs); -+#endif - CmdArgs.push_back("-lm"); - } - -diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp -index 1ba222bf83b1..7f0714676a9d 100644 ---- a/clang/lib/Driver/ToolChains/Linux.cpp -+++ b/clang/lib/Driver/ToolChains/Linux.cpp -@@ -608,6 +608,196 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const { - return "/" + LibDir + "/" + Loader; - } - -+#ifdef ENABLE_CLASSIC_FLANG -+/// Convert path list to Fortran frontend argument -+static void AddFlangSysIncludeArg(const ArgList &DriverArgs, -+ ArgStringList &Flang1Args, -+ ToolChain::path_list IncludePathList) { -+ std::string ArgValue; // Path argument value -+ -+ // Make up argument value consisting of paths separated by colons -+ bool first = true; -+ for (auto P : IncludePathList) { -+ if (first) { -+ first = false; -+ } else { -+ ArgValue += ":"; -+ } -+ ArgValue += P; -+ } -+ -+ // Add the argument -+ Flang1Args.push_back("-stdinc"); -+ Flang1Args.push_back(DriverArgs.MakeArgString(ArgValue)); -+} -+ -+void Linux::AddFlangSystemIncludeArgs(const ArgList &DriverArgs, -+ ArgStringList &Flang1Args) const { -+ path_list IncludePathList; -+ const Driver &D = getDriver(); -+ std::string SysRoot = computeSysRoot(); -+ -+ if (DriverArgs.hasArg(options::OPT_nostdinc)) -+ return; -+ -+ { -+ SmallString<128> P(D.InstalledDir); -+ llvm::sys::path::append(P, "../include"); -+ IncludePathList.push_back(P.c_str()); -+ } -+ -+ if (!DriverArgs.hasArg(options::OPT_nostdlibinc)) -+ IncludePathList.push_back(SysRoot + "/usr/local/include"); -+ -+ if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { -+ SmallString<128> P(D.ResourceDir); -+ llvm::sys::path::append(P, "include"); -+ IncludePathList.push_back(P.c_str()); -+ } -+ -+ if (DriverArgs.hasArg(options::OPT_nostdlibinc)) { -+ AddFlangSysIncludeArg(DriverArgs, Flang1Args, IncludePathList); -+ return; -+ } -+ -+ // Check for configure-time C include directories. -+ StringRef CIncludeDirs(C_INCLUDE_DIRS); -+ if (CIncludeDirs != "") { -+ SmallVector dirs; -+ CIncludeDirs.split(dirs, ":"); -+ for (StringRef dir : dirs) { -+ StringRef Prefix = -+ llvm::sys::path::is_absolute(dir) ? StringRef(SysRoot) : ""; -+ IncludePathList.push_back(Prefix.str() + dir.str()); -+ } -+ AddFlangSysIncludeArg(DriverArgs, Flang1Args, IncludePathList); -+ return; -+ } -+ -+ // Lacking those, try to detect the correct set of system includes for the -+ // target triple. -+ -+ // Add include directories specific to the selected multilib set and multilib. -+ if (GCCInstallation.isValid()) { -+ const auto &Callback = Multilibs.includeDirsCallback(); -+ if (Callback) { -+ for (const auto &Path : Callback(GCCInstallation.getMultilib())) -+ addExternCSystemIncludeIfExists( -+ DriverArgs, Flang1Args, GCCInstallation.getInstallPath() + Path); -+ } -+ } -+ -+ // Implement generic Debian multiarch support. -+ const StringRef X86_64MultiarchIncludeDirs[] = { -+ "/usr/include/x86_64-linux-gnu", -+ -+ // FIXME: These are older forms of multiarch. It's not clear that they're -+ // in use in any released version of Debian, so we should consider -+ // removing them. -+ "/usr/include/i686-linux-gnu/64", "/usr/include/i486-linux-gnu/64"}; -+ const StringRef X86MultiarchIncludeDirs[] = { -+ "/usr/include/i386-linux-gnu", -+ -+ // FIXME: These are older forms of multiarch. It's not clear that they're -+ // in use in any released version of Debian, so we should consider -+ // removing them. -+ "/usr/include/x86_64-linux-gnu/32", "/usr/include/i686-linux-gnu", -+ "/usr/include/i486-linux-gnu"}; -+ const StringRef AArch64MultiarchIncludeDirs[] = { -+ "/usr/include/aarch64-linux-gnu"}; -+ const StringRef ARMMultiarchIncludeDirs[] = { -+ "/usr/include/arm-linux-gnueabi"}; -+ const StringRef ARMHFMultiarchIncludeDirs[] = { -+ "/usr/include/arm-linux-gnueabihf"}; -+ const StringRef MIPSMultiarchIncludeDirs[] = {"/usr/include/mips-linux-gnu"}; -+ const StringRef MIPSELMultiarchIncludeDirs[] = { -+ "/usr/include/mipsel-linux-gnu"}; -+ const StringRef MIPS64MultiarchIncludeDirs[] = { -+ "/usr/include/mips64-linux-gnu", "/usr/include/mips64-linux-gnuabi64"}; -+ const StringRef MIPS64ELMultiarchIncludeDirs[] = { -+ "/usr/include/mips64el-linux-gnu", -+ "/usr/include/mips64el-linux-gnuabi64"}; -+ const StringRef PPCMultiarchIncludeDirs[] = { -+ "/usr/include/powerpc-linux-gnu"}; -+ const StringRef PPC64MultiarchIncludeDirs[] = { -+ "/usr/include/powerpc64-linux-gnu"}; -+ const StringRef PPC64LEMultiarchIncludeDirs[] = { -+ "/usr/include/powerpc64le-linux-gnu"}; -+ const StringRef SparcMultiarchIncludeDirs[] = { -+ "/usr/include/sparc-linux-gnu"}; -+ const StringRef Sparc64MultiarchIncludeDirs[] = { -+ "/usr/include/sparc64-linux-gnu"}; -+ ArrayRef MultiarchIncludeDirs; -+ switch (getTriple().getArch()) { -+ case llvm::Triple::x86_64: -+ MultiarchIncludeDirs = X86_64MultiarchIncludeDirs; -+ break; -+ case llvm::Triple::x86: -+ MultiarchIncludeDirs = X86MultiarchIncludeDirs; -+ break; -+ case llvm::Triple::aarch64: -+ case llvm::Triple::aarch64_be: -+ MultiarchIncludeDirs = AArch64MultiarchIncludeDirs; -+ break; -+ case llvm::Triple::arm: -+ if (getTriple().getEnvironment() == llvm::Triple::GNUEABIHF) -+ MultiarchIncludeDirs = ARMHFMultiarchIncludeDirs; -+ else -+ MultiarchIncludeDirs = ARMMultiarchIncludeDirs; -+ break; -+ case llvm::Triple::mips: -+ MultiarchIncludeDirs = MIPSMultiarchIncludeDirs; -+ break; -+ case llvm::Triple::mipsel: -+ MultiarchIncludeDirs = MIPSELMultiarchIncludeDirs; -+ break; -+ case llvm::Triple::mips64: -+ MultiarchIncludeDirs = MIPS64MultiarchIncludeDirs; -+ break; -+ case llvm::Triple::mips64el: -+ MultiarchIncludeDirs = MIPS64ELMultiarchIncludeDirs; -+ break; -+ case llvm::Triple::ppc: -+ MultiarchIncludeDirs = PPCMultiarchIncludeDirs; -+ break; -+ case llvm::Triple::ppc64: -+ MultiarchIncludeDirs = PPC64MultiarchIncludeDirs; -+ break; -+ case llvm::Triple::ppc64le: -+ MultiarchIncludeDirs = PPC64LEMultiarchIncludeDirs; -+ break; -+ case llvm::Triple::sparc: -+ MultiarchIncludeDirs = SparcMultiarchIncludeDirs; -+ break; -+ case llvm::Triple::sparcv9: -+ MultiarchIncludeDirs = Sparc64MultiarchIncludeDirs; -+ break; -+ default: -+ break; -+ } -+ for (StringRef Dir : MultiarchIncludeDirs) { -+ if (llvm::sys::fs::exists(SysRoot + Dir)) { -+ IncludePathList.push_back(SysRoot + Dir.str()); -+ break; -+ } -+ } -+ -+ if (getTriple().getOS() == llvm::Triple::RTEMS) { -+ AddFlangSysIncludeArg(DriverArgs, Flang1Args, IncludePathList); -+ return; -+ } -+ -+ // Add an include of '/include' directly. This isn't provided by default by -+ // system GCCs, but is often used with cross-compiling GCCs, and harmless to -+ // add even when Clang is acting as-if it were a system compiler. -+ IncludePathList.push_back(SysRoot + "/include"); -+ -+ IncludePathList.push_back(SysRoot + "/usr/include"); -+ -+ AddFlangSysIncludeArg(DriverArgs, Flang1Args, IncludePathList); -+} -+#endif -+ - void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs, - ArgStringList &CC1Args) const { - const Driver &D = getDriver(); -diff --git a/clang/lib/Driver/ToolChains/Linux.h b/clang/lib/Driver/ToolChains/Linux.h -index 524391743090..b6db4160811d 100644 ---- a/clang/lib/Driver/ToolChains/Linux.h -+++ b/clang/lib/Driver/ToolChains/Linux.h -@@ -27,6 +27,11 @@ public: - const llvm::Triple &TargetTriple, - StringRef SysRoot) const override; - -+#ifdef ENABLE_CLASSIC_FLANG -+ void -+ AddFlangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, -+ llvm::opt::ArgStringList &Flang1Args) const override; -+#endif - void - AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args) const override; -diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp -index a9fe9da4620f..1562e1a4b8eb 100644 ---- a/clang/lib/Driver/ToolChains/MSVC.cpp -+++ b/clang/lib/Driver/ToolChains/MSVC.cpp -@@ -131,7 +131,11 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, - - if (C.getDriver().IsFlangMode()) { - addFortranRuntimeLibraryPath(TC, Args, CmdArgs); -+#ifdef ENABLE_CLASSIC_FLANG -+ addFortranRuntimeLibs(TC, Args, CmdArgs); -+#else - addFortranRuntimeLibs(TC, CmdArgs); -+#endif - - // Inform the MSVC linker that we're generating a console application, i.e. - // one with `main` as the "user-defined" entry point. The `main` function is -@@ -263,6 +267,13 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, - } - } - -+#ifdef ENABLE_CLASSIC_FLANG -+ if (C.getDriver().IsFlangMode()) { -+ CmdArgs.push_back(Args.MakeArgString(std::string("-libpath:") + -+ TC.getDriver().Dir + "/../lib")); -+ } -+#endif -+ - // Add compiler-rt lib in case if it was explicitly - // specified as an argument for --rtlib option. - if (!Args.hasArg(options::OPT_nostdlib)) { -@@ -510,6 +521,74 @@ void MSVCToolChain::AddHIPRuntimeLibArgs(const ArgList &Args, - "amdhip64.lib"}); - } - -+#ifdef ENABLE_CLASSIC_FLANG -+void MSVCToolChain::AddFortranStdlibLibArgs(const ArgList &Args, -+ ArgStringList &CmdArgs) const { -+ bool staticFlangLibs = false; -+ bool useOpenMP = false; -+ -+ if (Args.hasArg(options::OPT_staticFlangLibs)) { -+ for (auto *A: Args.filtered(options::OPT_staticFlangLibs)) { -+ A->claim(); -+ staticFlangLibs = true; -+ } -+ } -+ -+ Arg *A = Args.getLastArg(options::OPT_mp, options::OPT_nomp, -+ options::OPT_fopenmp, options::OPT_fno_openmp); -+ if (A && -+ (A->getOption().matches(options::OPT_mp) || -+ A->getOption().matches(options::OPT_fopenmp))) { -+ useOpenMP = true; -+ } -+ -+ if (needFortranMain(getDriver(), Args)) { -+ // flangmain is always static -+ CmdArgs.push_back("-linker"); -+ CmdArgs.push_back("/subsystem:console"); -+ CmdArgs.push_back("-linker"); -+ CmdArgs.push_back("/defaultlib:flangmain.lib"); -+ } -+ -+ if (staticFlangLibs) { -+ CmdArgs.push_back("-linker"); -+ CmdArgs.push_back("/defaultlib:libflang.lib"); -+ CmdArgs.push_back("-linker"); -+ CmdArgs.push_back("/defaultlib:libflangrti.lib"); -+ CmdArgs.push_back("-linker"); -+ CmdArgs.push_back("/defaultlib:libpgmath.lib"); -+ } else { -+ CmdArgs.push_back("-linker"); -+ CmdArgs.push_back("/defaultlib:flang.lib"); -+ CmdArgs.push_back("-linker"); -+ CmdArgs.push_back("/defaultlib:flangrti.lib"); -+ CmdArgs.push_back("-linker"); -+ CmdArgs.push_back("/defaultlib:pgmath.lib"); -+ } -+ if (useOpenMP) { -+ CmdArgs.push_back("-linker"); -+ CmdArgs.push_back("/nodefaultlib:vcomp.lib"); -+ CmdArgs.push_back("-linker"); -+ CmdArgs.push_back("/nodefaultlib:vcompd.lib"); -+ CmdArgs.push_back("-linker"); -+ CmdArgs.push_back("/defaultlib:libomp.lib"); -+ } -+ -+ // Allways link Fortran executables with Pthreads -+ // CmdArgs.push_back("-lpthread"); -+ -+ // These options are added clang-cl in Clang.cpp for C/C++ -+ // In clang-cl.exe -MD and -MT control these options, but in -+ // flang.exe like clang.exe these are different options for -+ // dependency tracking. Let's assume that if somebody needs -+ // static flang libs, they don't need static C runtime libs. -+ // FIXME: Use LLVM_USE_CRT_ variable -+ // to use libcmt.lib or msvcrt.lib -+ CmdArgs.push_back("-linker"); -+ CmdArgs.push_back("/defaultlib:libcmt.lib"); -+} -+#endif -+ - void MSVCToolChain::printVerboseInfo(raw_ostream &OS) const { - CudaInstallation.print(OS); - RocmInstallation.print(OS); -@@ -634,6 +713,42 @@ void MSVCToolChain::AddSystemIncludeWithSubfolder( - addSystemInclude(DriverArgs, CC1Args, path); - } - -+#ifdef ENABLE_CLASSIC_FLANG -+/// Convert path list to Fortran frontend argument -+static void AddFlangSysIncludeArg(const ArgList &DriverArgs, -+ ArgStringList &Flang1Args, -+ ToolChain::path_list IncludePathList) { -+ std::string ArgValue; // Path argument value -+ -+ // Make up argument value consisting of paths separated by colons -+ bool first = true; -+ for (auto P : IncludePathList) { -+ if (first) { -+ first = false; -+ } else { -+ ArgValue += ";"; -+ } -+ ArgValue += P; -+ } -+ -+ // Add the argument -+ Flang1Args.push_back("-stdinc"); -+ Flang1Args.push_back(DriverArgs.MakeArgString(ArgValue)); -+} -+ -+void MSVCToolChain::AddFlangSystemIncludeArgs(const ArgList &DriverArgs, -+ ArgStringList &Flang1Args) const { -+path_list IncludePathList; -+ const Driver &D = getDriver(); -+ if (DriverArgs.hasArg(options::OPT_nostdinc)) -+ return; -+ SmallString<128> P(D.InstalledDir); -+ llvm::sys::path::append(P, "../include"); -+ IncludePathList.push_back(P.c_str()); -+ AddFlangSysIncludeArg(DriverArgs, Flang1Args, IncludePathList); -+} -+#endif -+ - void MSVCToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, - ArgStringList &CC1Args) const { - if (DriverArgs.hasArg(options::OPT_nostdinc)) -diff --git a/clang/lib/Driver/ToolChains/MSVC.h b/clang/lib/Driver/ToolChains/MSVC.h -index 0f687bc70ae4..46d5af9d114f 100644 ---- a/clang/lib/Driver/ToolChains/MSVC.h -+++ b/clang/lib/Driver/ToolChains/MSVC.h -@@ -84,6 +84,12 @@ public: - return VSLayout == llvm::ToolsetLayout::VS2017OrNewer; - } - -+#ifdef ENABLE_CLASSIC_FLANG -+ void -+ AddFlangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, -+ llvm::opt::ArgStringList &Flang1Args) const override; -+#endif -+ - void - AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args) const override; -@@ -100,6 +106,11 @@ public: - void AddHIPRuntimeLibArgs(const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs) const override; - -+#ifdef ENABLE_CLASSIC_FLANG -+ void AddFortranStdlibLibArgs(const llvm::opt::ArgList &Args, -+ llvm::opt::ArgStringList &CmdArgs) const override; -+#endif -+ - bool getWindowsSDKLibraryPath( - const llvm::opt::ArgList &Args, std::string &path) const; - bool getUniversalCRTLibraryPath(const llvm::opt::ArgList &Args, -diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp -index 503dbf3f0dea..264cc7ab06c8 100644 ---- a/clang/lib/Driver/ToolChains/MinGW.cpp -+++ b/clang/lib/Driver/ToolChains/MinGW.cpp -@@ -240,7 +240,11 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA, - - if (C.getDriver().IsFlangMode()) { - addFortranRuntimeLibraryPath(TC, Args, CmdArgs); -+#ifdef ENABLE_CLASSIC_FLANG -+ addFortranRuntimeLibs(TC, Args, CmdArgs); -+#else - addFortranRuntimeLibs(TC, CmdArgs); -+#endif - } - - // TODO: Add profile stuff here -diff --git a/clang/lib/Driver/Types.cpp b/clang/lib/Driver/Types.cpp -old mode 100644 -new mode 100755 -index 7d6308d757bc..a10c6e2c6c42 ---- a/clang/lib/Driver/Types.cpp -+++ b/clang/lib/Driver/Types.cpp -@@ -55,9 +55,11 @@ const char *types::getTypeName(ID Id) { - - types::ID types::getPreprocessedType(ID Id) { - ID PPT = getInfo(Id).PreprocessedType; -+#ifndef ENABLE_CLASSIC_FLANG - assert((getInfo(Id).Phases.contains(phases::Preprocess) != - (PPT == TY_INVALID)) && - "Unexpected Preprocess Type."); -+#endif - return PPT; - } - -@@ -134,6 +136,10 @@ bool types::isAcceptedByClang(ID Id) { - case TY_Asm: - case TY_C: case TY_PP_C: - case TY_CL: case TY_CLCXX: -+#ifdef ENABLE_CLASSIC_FLANG -+ case TY_F_FreeForm: case TY_PP_F_FreeForm: -+ case TY_F_FixedForm: case TY_PP_F_FixedForm: -+#endif - case TY_CUDA: case TY_PP_CUDA: - case TY_CUDA_DEVICE: - case TY_HIP: -@@ -164,12 +170,20 @@ bool types::isAcceptedByFlang(ID Id) { - default: - return false; - -+#ifdef ENABLE_CLASSIC_FLANG -+ case TY_F_FreeForm: -+ case TY_PP_F_FreeForm: -+ case TY_F_FixedForm: -+ case TY_PP_F_FixedForm: -+ return true; -+#else - case TY_Fortran: - case TY_PP_Fortran: - return true; - case TY_LLVM_IR: - case TY_LLVM_BC: - return true; -+#endif - } - } - -@@ -286,6 +300,22 @@ bool types::isHIP(ID Id) { - } - } - -+#ifdef ENABLE_CLASSIC_FLANG -+bool types::isFreeFormFortran(ID Id) { -+ if (!isAcceptedByFlang(Id)) -+ return false; -+ -+ return (Id == TY_F_FreeForm || Id == TY_PP_F_FreeForm); -+} -+ -+bool types::isFixedFormFortran(ID Id) { -+ if (!isAcceptedByFlang(Id)) -+ return false; -+ -+ return (Id == TY_F_FixedForm || Id == TY_PP_F_FixedForm); -+} -+#endif -+ - bool types::isHLSL(ID Id) { return Id == TY_HLSL; } - - bool types::isSrcFile(ID Id) { -@@ -296,8 +326,13 @@ types::ID types::lookupTypeForExtension(llvm::StringRef Ext) { - return llvm::StringSwitch(Ext) - .Case("c", TY_C) - .Case("C", TY_CXX) -+#ifdef ENABLE_CLASSIC_FLANG -+ .Case("F", TY_F_FixedForm) -+ .Case("f", TY_PP_F_FixedForm) -+#else - .Case("F", TY_Fortran) - .Case("f", TY_PP_Fortran) -+#endif - .Case("h", TY_CHeader) - .Case("H", TY_CXXHeader) - .Case("i", TY_PP_C) -@@ -331,6 +366,20 @@ types::ID types::lookupTypeForExtension(llvm::StringRef Ext) { - .Case("cui", TY_PP_CUDA) - .Case("cxx", TY_CXX) - .Case("CXX", TY_CXX) -+#ifdef ENABLE_CLASSIC_FLANG -+ .Case("for", TY_PP_F_FixedForm) -+ .Case("FOR", TY_PP_F_FixedForm) -+ .Case("fpp", TY_F_FixedForm) -+ .Case("FPP", TY_F_FixedForm) -+ .Case("f90", TY_PP_F_FreeForm) -+ .Case("f95", TY_PP_F_FreeForm) -+ .Case("f03", TY_PP_F_FreeForm) -+ .Case("f08", TY_PP_F_FreeForm) -+ .Case("F90", TY_F_FreeForm) -+ .Case("F95", TY_F_FreeForm) -+ .Case("F03", TY_F_FreeForm) -+ .Case("F08", TY_F_FreeForm) -+#else - .Case("F03", TY_Fortran) - .Case("f03", TY_PP_Fortran) - .Case("F08", TY_Fortran) -@@ -343,6 +392,7 @@ types::ID types::lookupTypeForExtension(llvm::StringRef Ext) { - .Case("FOR", TY_PP_Fortran) - .Case("fpp", TY_Fortran) - .Case("FPP", TY_Fortran) -+#endif - .Case("gch", TY_PCH) - .Case("hip", TY_HIP) - .Case("hipi", TY_PP_HIP) -diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp -index f8fae82fba12..7b01cb01a45e 100644 ---- a/clang/lib/Frontend/InitPreprocessor.cpp -+++ b/clang/lib/Frontend/InitPreprocessor.cpp -@@ -161,26 +161,6 @@ static void DefineFloatMacros(MacroBuilder &Builder, StringRef Prefix, - Builder.defineMacro(DefPrefix + "MIN__", Twine(Min)+Ext); - } - -- --/// DefineTypeSize - Emit a macro to the predefines buffer that declares a macro --/// named MacroName with the max value for a type with width 'TypeWidth' a --/// signedness of 'isSigned' and with a value suffix of 'ValSuffix' (e.g. LL). --static void DefineTypeSize(const Twine &MacroName, unsigned TypeWidth, -- StringRef ValSuffix, bool isSigned, -- MacroBuilder &Builder) { -- llvm::APInt MaxVal = isSigned ? llvm::APInt::getSignedMaxValue(TypeWidth) -- : llvm::APInt::getMaxValue(TypeWidth); -- Builder.defineMacro(MacroName, toString(MaxVal, 10, isSigned) + ValSuffix); --} -- --/// DefineTypeSize - An overloaded helper that uses TargetInfo to determine --/// the width, suffix, and signedness of the given type --static void DefineTypeSize(const Twine &MacroName, TargetInfo::IntType Ty, -- const TargetInfo &TI, MacroBuilder &Builder) { -- DefineTypeSize(MacroName, TI.getTypeWidth(Ty), TI.getTypeConstantSuffix(Ty), -- TI.isTypeSigned(Ty), Builder); --} -- - static void DefineFmt(const Twine &Prefix, TargetInfo::IntType Ty, - const TargetInfo &TI, MacroBuilder &Builder) { - bool IsSigned = TI.isTypeSigned(Ty); -diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt -index 31b494f39cce..b88694deb818 100644 ---- a/clang/test/CMakeLists.txt -+++ b/clang/test/CMakeLists.txt -@@ -10,6 +10,7 @@ llvm_canonicalize_cmake_booleans( - CLANG_PLUGIN_SUPPORT - CLANG_SPAWN_CC1 - ENABLE_BACKTRACES -+ LLVM_ENABLE_CLASSIC_FLANG - LLVM_ENABLE_ZLIB - LLVM_ENABLE_ZSTD - LLVM_ENABLE_PER_TARGET_RUNTIME_DIR -diff --git a/clang/test/CodeGen/libpgmath-logfun-aarch64.ll b/clang/test/CodeGen/libpgmath-logfun-aarch64.ll -new file mode 100644 -index 000000000000..141fed29ccd1 ---- /dev/null -+++ b/clang/test/CodeGen/libpgmath-logfun-aarch64.ll -@@ -0,0 +1,58 @@ -+; REQUIRES: aarch64-registered-target -+ -+; RUN: %clang -target aarch64-unknown-linux-gnu -Ofast -S %s -o - | FileCheck %s -+ -+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -+ -+define void @fun_(i64* nocapture %z) local_unnamed_addr #0 { -+L.entry: -+ %0 = bitcast i64* %z to i8* -+ %1 = bitcast i64* %z to float* -+ %2 = load float, float* %1, align 4 -+ %3 = fpext float %2 to double -+ %4 = fadd double %3, 5.000000e-01 -+ %5 = tail call double @__pd_log_1(double %4) #1 -+ %6 = fptrunc double %5 to float -+ %7 = tail call float @__ps_exp_1(float %6) #2 -+ store float %7, float* %1, align 4 -+ %8 = getelementptr i8, i8* %0, i64 4 -+ %9 = bitcast i8* %8 to float* -+ %10 = load float, float* %9, align 4 -+ %11 = fpext float %10 to double -+ %12 = fadd double %11, 5.000000e-01 -+ %13 = tail call double @__pd_log_1(double %12) #1 -+ %14 = fptrunc double %13 to float -+ %15 = tail call float @__ps_exp_1(float %14) #2 -+ store float %15, float* %9, align 4 -+ %16 = getelementptr i64, i64* %z, i64 1 -+ %17 = bitcast i64* %16 to float* -+ %18 = load float, float* %17, align 4 -+ %19 = fpext float %18 to double -+ %20 = fadd double %19, 5.000000e-01 -+ %21 = tail call double @__pd_log_1(double %20) #1 -+ %22 = fptrunc double %21 to float -+ %23 = tail call float @__ps_exp_1(float %22) #2 -+ store float %23, float* %17, align 4 -+ %24 = getelementptr i8, i8* %0, i64 12 -+ %25 = bitcast i8* %24 to float* -+ %26 = load float, float* %25, align 4 -+ %27 = fpext float %26 to double -+ %28 = fadd double %27, 5.000000e-01 -+ %29 = tail call double @__pd_log_1(double %28) #1 -+ %30 = fptrunc double %29 to float -+ %31 = tail call float @__ps_exp_1(float %30) #2 -+ store float %31, float* %25, align 4 -+ ret void -+ -+; CHECK-NOT: __pd_log_4 -+; CHECK: __pd_log_2 -+; CHECK: __pd_log_2 -+} -+ -+; Function Attrs: nounwind readnone willreturn -+declare float @__ps_exp_1(float) #0 -+ -+; Function Attrs: nounwind readnone willreturn -+declare double @__pd_log_1(double) #0 -+ -+attributes #0 = { nounwind readnone willreturn } -diff --git a/clang/test/CodeGen/libpgmath-logfun-x86_64.ll b/clang/test/CodeGen/libpgmath-logfun-x86_64.ll -new file mode 100644 -index 000000000000..3ce1d910947f ---- /dev/null -+++ b/clang/test/CodeGen/libpgmath-logfun-x86_64.ll -@@ -0,0 +1,57 @@ -+; REQUIRES: x86-registered-target -+ -+; RUN: %clang -target x86_64-unknown-linux-gnu -msse -Ofast -S %s -o - | FileCheck %s -+ -+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -+ -+define void @fun_(i64* nocapture %z) local_unnamed_addr #0 { -+L.entry: -+ %0 = bitcast i64* %z to i8* -+ %1 = bitcast i64* %z to float* -+ %2 = load float, float* %1, align 4 -+ %3 = fpext float %2 to double -+ %4 = fadd double %3, 5.000000e-01 -+ %5 = tail call double @__pd_log_1(double %4) #1 -+ %6 = fptrunc double %5 to float -+ %7 = tail call float @__ps_exp_1(float %6) #2 -+ store float %7, float* %1, align 4 -+ %8 = getelementptr i8, i8* %0, i64 4 -+ %9 = bitcast i8* %8 to float* -+ %10 = load float, float* %9, align 4 -+ %11 = fpext float %10 to double -+ %12 = fadd double %11, 5.000000e-01 -+ %13 = tail call double @__pd_log_1(double %12) #1 -+ %14 = fptrunc double %13 to float -+ %15 = tail call float @__ps_exp_1(float %14) #2 -+ store float %15, float* %9, align 4 -+ %16 = getelementptr i64, i64* %z, i64 1 -+ %17 = bitcast i64* %16 to float* -+ %18 = load float, float* %17, align 4 -+ %19 = fpext float %18 to double -+ %20 = fadd double %19, 5.000000e-01 -+ %21 = tail call double @__pd_log_1(double %20) #1 -+ %22 = fptrunc double %21 to float -+ %23 = tail call float @__ps_exp_1(float %22) #2 -+ store float %23, float* %17, align 4 -+ %24 = getelementptr i8, i8* %0, i64 12 -+ %25 = bitcast i8* %24 to float* -+ %26 = load float, float* %25, align 4 -+ %27 = fpext float %26 to double -+ %28 = fadd double %27, 5.000000e-01 -+ %29 = tail call double @__pd_log_1(double %28) #1 -+ %30 = fptrunc double %29 to float -+ %31 = tail call float @__ps_exp_1(float %30) #2 -+ store float %31, float* %25, align 4 -+ ret void -+ -+; CHECK-NOT: __pd_log_1 -+; CHECK: __pd_log_4 -+} -+ -+; Function Attrs: nounwind readnone willreturn -+declare float @__ps_exp_1(float) #0 -+ -+; Function Attrs: nounwind readnone willreturn -+declare double @__pd_log_1(double) #0 -+ -+attributes #0 = { nounwind readnone willreturn } -diff --git a/clang/test/Driver/autocomplete.c b/clang/test/Driver/autocomplete.c -index d6f57708b67e..8c4921c9dbd5 100644 ---- a/clang/test/Driver/autocomplete.c -+++ b/clang/test/Driver/autocomplete.c -@@ -85,7 +85,7 @@ - // FVECLIBALL-NEXT: libmvec - // FVECLIBALL-NEXT: MASSV - // FVECLIBALL-NEXT: none --// FVECLIBALL-NEXT: SLEEF -+// FVECLIBALL: SLEEF - // FVECLIBALL-NEXT: SVML - // RUN: %clang --autocomplete=-fshow-overloads= | FileCheck %s -check-prefix=FSOVERALL - // FSOVERALL: all -diff --git a/clang/test/Driver/emit-flang-attrs.f90 b/clang/test/Driver/emit-flang-attrs.f90 -new file mode 100644 -index 000000000000..df23a8aa6e87 ---- /dev/null -+++ b/clang/test/Driver/emit-flang-attrs.f90 -@@ -0,0 +1,58 @@ -+! REQUIRES: aarch64-registered-target -+! REQUIRES: classic_flang -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a -c %s 2>&1 | FileCheck --check-prefix=CHECK-ATTRS-NEON %s -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve -c %s 2>&1 | FileCheck --check-prefix=CHECK-ATTRS-SVE %s -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+nosve -c %s 2>&1 | FileCheck --check-prefix=CHECK-ATTRS-NOSVE %s -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve+nosve -c %s 2>&1 | FileCheck --check-prefix=CHECK-ATTRS-NOSVE %s -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2+nosve2 -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-REVERT -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-aes -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-AES -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-sm4 -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-SM4 -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-sha3 -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-SHA3 -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-bitperm+nosve2-bitperm -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-BITPERM-REVERT -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2 -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-IMPLY -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+nosve+sve2 -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-CONFLICT-REV -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve+sve2 -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE-SVE2 -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-bitperm -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-BITPERM -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+nosve+sve2-aes -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE-SUBFEATURE-CONFLICT-REV -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-sm4+nosve2 -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-SUBFEATURE-CONFLICT -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-bitperm+nosve2-aes -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-SUBFEATURE-MIX -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-sm4+nosve2-sm4 -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-SM4-REVERT -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-sha3+nosve2-sha3 %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-SHA3-REVERT -+! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-aes+nosve2-aes %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-AES-REVERT -+ -+! CHECK-ATTRS-NEON: "{{.*}}flang2" -+! CHECK-ATTRS-NEON-SAME: "-target_features" "+neon,+v8a" -+! CHECK-ATTRS-SVE: "{{.*}}flang2" -+! CHECK-ATTRS-SVE-SAME: "-target_features" "+neon,+v8a,+sve" -+! CHECK-ATTRS-NOSVE: "{{.*}}flang2" -+! CHECK-ATTRS-NOSVE-SAME: "-target_features" "+neon,+v8a,-sve,-sve2,-sve2-bitperm,-sve2-sha3,-sve2-aes,-sve2-sm4" -+! CHECK-SVE2-REVERT: "{{.*}}flang2" -+! CHECK-SVE2-REVERT-SAME: "-target_features" "+neon,+v8a,+sve,-sve2,-sve2-bitperm,-sve2-sha3,-sve2-aes,-sve2-sm4" -+! CHECK-SVE2-AES: "{{.*}}flang2" -+! CHECK-SVE2-AES-SAME: "-target_features" "+neon,+v8a,+sve2-aes,+sve,+sve2" -+! CHECK-SVE2-SM4: "{{.*}}flang2" -+! CHECK-SVE2-SM4-SAME: "-target_features" "+neon,+v8a,+sve2-sm4,+sve,+sve2" -+! CHECK-SVE2-SHA3: "{{.*}}flang2" -+! CHECK-SVE2-SHA3-SAME: "-target_features" "+neon,+v8a,+sve2-sha3,+sve,+sve2" -+! CHECK-SVE2-BITPERM-REVERT: "{{.*}}flang2" -+! CHECK-SVE2-BITPERM-REVERT-SAME: "-target_features" "+neon,+v8a,+sve,+sve2,-sve2-bitperm" -+! CHECK-SVE2-IMPLY: "{{.*}}flang2" -+! CHECK-SVE2-IMPLY-SAME: "-target_features" "+neon,+v8a,+sve2,+sve" -+! CHECK-SVE2-CONFLICT-REV: "{{.*}}flang2" -+! CHECK-SVE2-CONFLICT-REV-SAME: "-target_features" "+neon,+v8a,-sve2-bitperm,-sve2-sha3,-sve2-aes,-sve2-sm4,+sve2,+sve" -+! CHECK-SVE-SVE2: "{{.*}}flang2" -+! CHECK-SVE-SVE2-SAME: "-target_features" "+neon,+v8a,+sve2,+sve" -+! CHECK-SVE2-BITPERM: "{{.*}}flang2" -+! CHECK-SVE2-BITPERM-SAME: "-target_features" "+neon,+v8a,+sve2-bitperm,+sve,+sve2" -+! CHECK-SVE-SUBFEATURE-CONFLICT-REV: "{{.*}}flang2" -+! CHECK-SVE-SUBFEATURE-CONFLICT-REV-SAME: "-target_features" "+neon,+v8a,-sve2-bitperm,-sve2-sha3,-sve2-sm4,+sve2-aes,+sve,+sve2" -+! CHECK-SVE2-SUBFEATURE-CONFLICT: "{{.*}}flang2" -+! CHECK-SVE2-SUBFEATURE-CONFLICT-SAME: "-target_features" "+neon,+v8a,+sve,-sve2,-sve2-bitperm,-sve2-sha3,-sve2-aes,-sve2-sm4" -+! CHECK-SVE2-SUBFEATURE-MIX: "{{.*}}flang2" -+! CHECK-SVE2-SUBFEATURE-MIX-SAME: "-target_features" "+neon,+v8a,+sve2-bitperm,+sve,+sve2,-sve2-aes" -+! CHECK-SVE2-SM4-REVERT: "{{.*}}flang2" -+! CHECK-SVE2-SM4-REVERT-SAME: "-target_features" "+neon,+v8a,+sve,+sve2,-sve2-sm4" -+! CHECK-SVE2-SHA3-REVERT: "{{.*}}flang2" -+! CHECK-SVE2-SHA3-REVERT-SAME: "-target_features" "+neon,+v8a,+sve,+sve2,-sve2-sha3" -+! CHECK-SVE2-AES-REVERT: "{{.*}}flang2" -+! CHECK-SVE2-AES-REVERT-SAME: "-target_features" "+neon,+v8a,+sve,+sve2,-sve2-aes" -diff --git a/clang/test/Driver/flang/Inputs/llvm-ir-input.ll b/clang/test/Driver/flang/Inputs/llvm-ir-input.ll -new file mode 100644 -index 000000000000..e69de29bb2d1 -diff --git a/clang/test/Driver/flang/classic-flang-emit-flang-llvm.f95 b/clang/test/Driver/flang/classic-flang-emit-flang-llvm.f95 -new file mode 100644 -index 000000000000..225207c85db7 ---- /dev/null -+++ b/clang/test/Driver/flang/classic-flang-emit-flang-llvm.f95 -@@ -0,0 +1,10 @@ -+! REQUIRES: classic_flang -+ -+! Check that the -emit-flang-llvm option dumps LLVM IR pre-optimisation -+ -+! RUN: %clang --driver-mode=flang -emit-flang-llvm -S -o %t.ll %s -### 2>&1 \ -+! RUN: | FileCheck %s -+! CHECK-NOT: argument unused during compilation: '-S' -+! CHECK: "{{.*}}flang1" -+! CHECK-NEXT: "{{.*}}flang2" -+! CHECK-NOT: "{{.*}}clang{{.*}}" "-cc1" -diff --git a/clang/test/Driver/flang/classic-flang-fp-contract.f95 b/clang/test/Driver/flang/classic-flang-fp-contract.f95 -new file mode 100644 -index 000000000000..b181065d1cac ---- /dev/null -+++ b/clang/test/Driver/flang/classic-flang-fp-contract.f95 -@@ -0,0 +1,27 @@ -+! REQUIRES: classic_flang -+ -+! RUN: %flang -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT-ABSENCE -+! RUN: %flang -O1 -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT -+! RUN: %flang -O2 -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT -+! RUN: %flang -O3 -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT -+! RUN: %flang -Ofast -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT -+! RUN: %flang -ffp-contract=fast -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT-ABSENCE -+! RUN: %flang -O1 -ffp-contract=fast -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT -+! RUN: %flang -O2 -ffp-contract=fast -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT -+! RUN: %flang -O3 -ffp-contract=fast -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT -+! RUN: %flang -Ofast -ffp-contract=fast -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT -+! RUN: %flang -ffp-contract=on -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT-ABSENCE -+! RUN: %flang -O1 -ffp-contract=on -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT -+! RUN: %flang -O2 -ffp-contract=on -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT -+! RUN: %flang -O3 -ffp-contract=on -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT -+! RUN: %flang -Ofast -ffp-contract=on -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT -+! RUN: %flang -ffp-contract=off -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT-ABSENCE -+! RUN: %flang -O1 -ffp-contract=off -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT-ABSENCE -+! RUN: %flang -O2 -ffp-contract=off -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT-ABSENCE -+! RUN: %flang -O3 -ffp-contract=off -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT-ABSENCE -+! RUN: %flang -Ofast -ffp-contract=off -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT-ABSENCE -+ -+! CHECK-FLANG2-FP-CONTRACT: "{{.*}}flang2" -+! CHECK-FLANG2-FP-CONTRACT-SAME: "-x" "172" "0x40000000" "-x" "179" "1" "-x" "216" "0x1000" -+! CHECK-FLANG2-FP-CONTRACT-ABSENCE: "{{.*}}flang2" -+! CHECK-FLANG2-FP-CONTRACT-ABSENCE-SAME: "-x" "171" "0x40000000" "-x" "178" "1" -diff --git a/clang/test/Driver/flang/classic-flang-must-preprocess.F b/clang/test/Driver/flang/classic-flang-must-preprocess.F -new file mode 100644 -index 000000000000..d52c1cf8d3c0 ---- /dev/null -+++ b/clang/test/Driver/flang/classic-flang-must-preprocess.F -@@ -0,0 +1,12 @@ -+! REQUIRES: classic_flang -+ -+! Check that the driver invokes flang1 correctly for fixed-form Fortran code -+! which requires preprocessing. -+ -+! RUN: %clang --driver-mode=flang -target x86_64-unknown-linux-gnu -c %s -### 2>&1 \ -+! RUN: | FileCheck %s -+! CHECK: "flang1" -+! CHECK-SAME: "-preprocess" -+! CHECK-SAME: "-nofreeform" -+! CHECK-NEXT: "flang2" -+! CHECK-NEXT: {{clang.* "-cc1"}} -diff --git a/clang/test/Driver/flang/classic-flang-must-preprocess.F95 b/clang/test/Driver/flang/classic-flang-must-preprocess.F95 -new file mode 100644 -index 000000000000..7d452a535784 ---- /dev/null -+++ b/clang/test/Driver/flang/classic-flang-must-preprocess.F95 -@@ -0,0 +1,12 @@ -+! REQUIRES: classic_flang -+ -+! Check that the driver invokes flang1 correctly for free-form Fortran code -+! which requires preprocessing. -+ -+! RUN: %clang --driver-mode=flang -target x86_64-unknown-linux-gnu -c %s -### 2>&1 \ -+! RUN: | FileCheck %s -+! CHECK: "flang1" -+! CHECK-SAME: "-preprocess" -+! CHECK-SAME: "-freeform" -+! CHECK-NEXT: "flang2" -+! CHECK-NEXT: {{clang.* "-cc1"}} -diff --git a/clang/test/Driver/flang/classic-flang-version.f b/clang/test/Driver/flang/classic-flang-version.f -new file mode 100644 -index 000000000000..c2082d3af8b7 ---- /dev/null -+++ b/clang/test/Driver/flang/classic-flang-version.f -@@ -0,0 +1,3 @@ -+! REQUIRES: classic-flang -+! RUN: %flang --version | FileCheck %s -+! CHECK: flang version {{.*}} ({{.*}}flang-compiler/classic-flang-llvm-project.git {{.*}}) -diff --git a/clang/test/Driver/flang/classic-flang-vscale-mbits.f95 b/clang/test/Driver/flang/classic-flang-vscale-mbits.f95 -new file mode 100644 -index 000000000000..f0ed43aa027a ---- /dev/null -+++ b/clang/test/Driver/flang/classic-flang-vscale-mbits.f95 -@@ -0,0 +1,28 @@ -+// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve -msve-vector-bits=128 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE-128 %s -+// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve -msve-vector-bits=128+ %s 2>&1 | FileCheck -check-prefix=CHECK-SVE-128PLUS %s -+// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve -msve-vector-bits=256 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE-256 %s -+// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve -msve-vector-bits=256+ %s 2>&1 | FileCheck -check-prefix=CHECK-SVE-256PLUS %s -+// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2 -msve-vector-bits=512 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2-512 %s -+// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2 -msve-vector-bits=512+ %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2-512PLUS %s -+// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2-sha3 -msve-vector-bits=2048 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2SHA3-2048 %s -+// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2-sha3 -msve-vector-bits=2048+ %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2SHA3-2048PLUS %s -+// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2 -msve-vector-bits=scalable %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2-SCALABLE %s -+ -+// CHECK-SVE-128: "-target_features" "+neon,+v8a,+sve" -+// CHECK-SVE-128-DAG: "-vscale_range_min" "1" "-vscale_range_max" "1" -+// CHECK-SVE-128PLUS: "-target_features" "+neon,+v8a,+sve" -+// CHECK-SVE-128PLUS-DAG: "-vscale_range_min" "1" "-vscale_range_max" "0" -+// CHECK-SVE-256: "-target_features" "+neon,+v8a,+sve" -+// CHECK-SVE-256-DAG: "-vscale_range_min" "2" "-vscale_range_max" "2" -+// CHECK-SVE-256PLUS: "-target_features" "+neon,+v8a,+sve" -+// CHECK-SVE-256PLUS-DAG: "-vscale_range_min" "2" "-vscale_range_max" "0" -+// CHECK-SVE2-512: "-target_features" "+neon,+v8a,+sve2,+sve" -+// CHECK-SVE2-512-DAG: "-vscale_range_min" "4" "-vscale_range_max" "4" -+// CHECK-SVE2-512PLUS: "-target_features" "+neon,+v8a,+sve2,+sve" -+// CHECK-SVE2-512PLUS-DAG: "-vscale_range_min" "4" "-vscale_range_max" "0" -+// CHECK-SVE2SHA3-2048: "-target_features" "+neon,+v8a,+sve2-sha3,+sve,+sve2" -+// CHECK-SVE2SHA3-2048-DAG: "-vscale_range_min" "16" "-vscale_range_max" "16" -+// CHECK-SVE2SHA3-2048PLUS: "-target_features" "+neon,+v8a,+sve2-sha3,+sve,+sve2" -+// CHECK-SVE2SHA3-2048PLUS-DAG: "-vscale_range_min" "16" "-vscale_range_max" "0" -+// CHECK-SVE2-SCALABLE: "-target_features" "+neon,+v8a,+sve2,+sve" -+// CHECK-SVE2-SCALABLE-DAG: "-vscale_range_min" "1" "-vscale_range_max" "16" -diff --git a/clang/test/Driver/flang/classic-flang-vscale.f95 b/clang/test/Driver/flang/classic-flang-vscale.f95 -new file mode 100644 -index 000000000000..8110be594db5 ---- /dev/null -+++ b/clang/test/Driver/flang/classic-flang-vscale.f95 -@@ -0,0 +1,28 @@ -+// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a %s 2>&1 | FileCheck -check-prefix=CHECK-NEON %s -+// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve %s 2>&1 | FileCheck -check-prefix=CHECK-SVE %s -+// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2 %s -+// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2-sha3 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2SHA3 %s -+// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve+nosve %s 2>&1 | FileCheck -check-prefix=CHECK-SVE-NOSVE %s -+// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2+nosve2-sha3 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2-NOSVE2SHA3 %s -+// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2-sha3+nosve2 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2SHA3-NOSVE2 %s -+// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2-sha3+nosve %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2SHA3-NOSVE %s -+ -+// CHECK-NEON: "-target_features" "+neon,+v8a" -+// CHECK-NEON-NOT: "-vscale_range_min" -+// CHECK-NEON-NOT: "-vscale_range_max" -+// CHECK-SVE: "-target_features" "+neon,+v8a,+sve" -+// CHECK-SVE-DAG: "-vscale_range_min" "1" "-vscale_range_max" "16" -+// CHECK-SVE2: "-target_features" "+neon,+v8a,+sve2,+sve" -+// CHECK-SVE2-DAG: "-vscale_range_min" "1" "-vscale_range_max" "16" -+// CHECK-SVE2SHA3: "-target_features" "+neon,+v8a,+sve2-sha3,+sve,+sve2" -+// CHECK-SVE2SHA3-DAG: "-vscale_range_min" "1" "-vscale_range_max" "16" -+// CHECK-SVE-NOSVE: "-target_features" "+neon,+v8a,-sve,-sve2,-sve2-bitperm,-sve2-sha3,-sve2-aes,-sve2-sm4" -+// CHECK-SVE-NOSVE-NOT: "-vscale_range_min" -+// CHECK-SVE-NOSVE-NOT: "-vscale_range_max" -+// CHECK-SVE2-NOSVE2SHA3: "-target_features" "+neon,+v8a,+sve2,+sve,-sve2-sha3" -+// CHECK-SVE2-NOSVE2SHA3-DAG: "-vscale_range_min" "1" "-vscale_range_max" "16" -+// CHECK-SVE2SHA3-NOSVE2: "-target_features" "+neon,+v8a,+sve,-sve2,-sve2-bitperm,-sve2-sha3,-sve2-aes,-sve2-sm4" -+// CHECK-SVE2SHA3-NOSVE2-DAG: "-vscale_range_min" "1" "-vscale_range_max" "16" -+// CHECK-SVE2SHA3-NOSVE: "-target_features" "+neon,+v8a,-sve,-sve2,-sve2-bitperm,-sve2-sha3,-sve2-aes,-sve2-sm4" -+// CHECK-SVE2SHA3-NOSVE-NOT: "-vscale_range_min" -+// CHECK-SVE2SHA3-NOSVE-NOT: "-vscale_range_max" -diff --git a/clang/test/Driver/flang/classic-flang.f b/clang/test/Driver/flang/classic-flang.f -new file mode 100644 -index 000000000000..98f74d9626d7 ---- /dev/null -+++ b/clang/test/Driver/flang/classic-flang.f -@@ -0,0 +1,26 @@ -+! REQUIRES: classic_flang -+ -+! Check that the driver invokes flang1 correctly for preprocessed fixed-form -+! Fortran code. -+ -+! RUN: %clang --driver-mode=flang -target x86_64-unknown-linux-gnu -c %s -### 2>&1 \ -+! RUN: | FileCheck %s -+! CHECK: "flang1" -+! CHECK-NOT: "-preprocess" -+! CHECK-SAME: "-nofreeform" -+! CHECK-NEXT: "flang2" -+! CHECK-NEXT: {{clang.* "-cc1"}} -+ -+! Check that the driver invokes flang1 correctly when preprocessing is -+! explicitly requested. -+ -+! RUN: %clang --driver-mode=flang -target x86_64-unknown-linux-gnu -E %s -### 2>&1 \ -+! RUN: | FileCheck --check-prefix=CHECK-PREPROCESS %s -+! CHECK-PREPROCESS: "flang1" -+! CHECK-PREPROCESS-SAME: "-preprocess" -+! CHECK-PREPROCESS-SAME: "-es" -+! CHECK-PREPROCESS-SAME: "-pp" -+! CHECK-PREPROCESS-NOT: "flang1" -+! CHECK-PREPROCESS-NOT: "flang2" -+! CHECK-PREPROCESS-NOT: {{clang.* "-cc1"}} -+! CHECK-PREPROCESS-NOT: {{clang.* "-cc1as"}} -diff --git a/clang/test/Driver/flang/classic-flang.f95 b/clang/test/Driver/flang/classic-flang.f95 -new file mode 100644 -index 000000000000..3350bd45550f ---- /dev/null -+++ b/clang/test/Driver/flang/classic-flang.f95 -@@ -0,0 +1,120 @@ -+! REQUIRES: classic_flang -+ -+! Check that the driver invokes flang1 correctly for preprocessed free-form -+! Fortran code. Also check that the backend is invoked correctly. -+ -+! RUN: %clang --driver-mode=flang -target x86_64-unknown-linux-gnu -c %s -### 2>&1 \ -+! RUN: | FileCheck --check-prefix=CHECK-OBJECT %s -+! CHECK-OBJECT: "flang1" -+! CHECK-OBJECT-NOT: "-preprocess" -+! CHECK-OBJECT-SAME: "-freeform" -+! CHECK-OBJECT-NEXT: "flang2" -+! CHECK-OBJECT-SAME: "-asm" [[LLFILE:.*.ll]] -+! CHECK-OBJECT-NEXT: {{clang.* "-cc1"}} -+! CHECK-OBJECT-SAME: "-o" "classic-flang.o" -+! CHECK-OBJECT-SAME: "-x" "ir" -+! CHECK-OBJECT-SAME: [[LLFILE]] -+ -+! Check that the driver invokes flang1 correctly when preprocessing is -+! explicitly requested. -+ -+! RUN: %clang --driver-mode=flang -target x86_64-unknown-linux-gnu -E %s -### 2>&1 \ -+! RUN: | FileCheck --check-prefix=CHECK-PREPROCESS %s -+! CHECK-PREPROCESS: "flang1" -+! CHECK-PREPROCESS-SAME: "-preprocess" -+! CHECK-PREPROCESS-SAME: "-es" -+! CHECK-PREPROCESS-SAME: "-pp" -+! CHECK-PREPROCESS-NOT: "flang1" -+! CHECK-PREPROCESS-NOT: "flang2" -+! CHECK-PREPROCESS-NOT: {{clang.* "-cc1"}} -+! CHECK-PREPROCESS-NOT: {{clang.* "-cc1as"}} -+ -+! Check that the backend job (clang -cc1) is not combined into the compile job -+! (flang2) even if -integrated-as is specified. -+ -+! RUN: %clang --driver-mode=flang -target x86_64-unknown-linux-gnu -integrated-as -S %s -### 2>&1 \ -+! RUN: | FileCheck --check-prefix=CHECK-ASM %s -+! CHECK-ASM: "flang1" -+! CHECK-ASM-NEXT: "flang2" -+! CHECK-ASM-SAME: "-asm" [[LLFILE:.*.ll]] -+! CHECK-ASM-NEXT: {{clang.* "-cc1"}} -+! CHECK-ASM-SAME: "-o" "classic-flang.s" -+! CHECK-ASM-SAME: "-x" "ir" -+! CHECK-ASM-SAME: [[LLFILE]] -+ -+! Check that the linker job is given the correct libraries and library paths. -+ -+! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -mp \ -+! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD,CHECK-MAIN,CHECK-DYNAMIC-FLANG,CHECK-DYNAMIC-OMP %s -+! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -mp -nomp \ -+! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD,CHECK-MAIN,CHECK-DYNAMIC-FLANG,CHECK-NO-OMP %s -+! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -fopenmp \ -+! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD,CHECK-MAIN,CHECK-DYNAMIC-FLANG,CHECK-DYNAMIC-OMP %s -+! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -fopenmp -fno-openmp \ -+! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD,CHECK-MAIN,CHECK-DYNAMIC-FLANG,CHECK-NO-OMP %s -+! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -fopenmp -static-openmp \ -+! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD,CHECK-MAIN,CHECK-DYNAMIC-FLANG,CHECK-STATIC-OMP %s -+! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -fopenmp -static-flang-libs \ -+! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD,CHECK-MAIN,CHECK-STATIC-FLANG,CHECK-DYNAMIC-OMP %s -+! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -static-flang-libs \ -+! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD,CHECK-MAIN,CHECK-STATIC-FLANG,CHECK-NO-OMP %s -+! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -Mnomain \ -+! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD,CHECK-NOMAIN,CHECK-DYNAMIC-FLANG %s -+! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -fno-fortran-main \ -+! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD,CHECK-NOMAIN,CHECK-DYNAMIC-FLANG %s -+! CHECK-LD: "{{.*}}ld{{(.exe)?}}" -+! CHECK-LD-NOT: "-static" -+! CHECK-LD-SAME: "{{[^"]*}}classic-flang-{{[^ ]*}}.o" -+! CHECK-MAIN-SAME: "-lflangmain" -+! CHECK-NOMAIN-NOT: "-lflangmain" -+! CHECK-LD-SAME: "-lfoo" "-L{{[^ ]*[/\\]+}}basic_linux_tree{{[/\\]+}}usr{{[/\\]+}}lib" -+! CHECK-DYNAMIC-FLANG-NOT: "-Bstatic" -+! CHECK-DYNAMIC-FLANG: "-lflang" "-lflangrti" "-lpgmath" "-lpthread" "-lrt" "-lm" -+! CHECK-DYNAMIC-FLANG-NOT: "-Bdynamic" -+! CHECK-STATIC-FLANG: "-Bstatic" "-lflang" "-lflangrti" "-lpgmath" "-Bdynamic" "-lpthread" "-lrt" "-lm" -+! CHECK-DYNAMIC-OMP-NOT: "-Bstatic" -+! CHECK-DYNAMIC-OMP: "-lomp" "-L{{[^ ]*[/\\]+}}basic_linux_tree{{[/\\]+}}usr{{[/\\]+}}lib" -+! CHECK-DYNAMIC-OMP-NOT: "-Bdynamic" -+! CHECK-STATIC-OMP: "-Bstatic" "-lomp" "-Bdynamic" "-L{{[^ ]*[/\\]+}}basic_linux_tree{{[/\\]+}}usr{{[/\\]+}}lib" -+! CHECK-NO-OMP-NOT: "-lomp" -+ -+! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -static -static-flang-libs \ -+! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD-STATIC,CHECK-NO-OMP %s -+! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -static -fopenmp \ -+! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD-STATIC,CHECK-STATIC-BOTH %s -+! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -static -fopenmp -static-openmp \ -+! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD-STATIC,CHECK-STATIC-BOTH %s -+! CHECK-LD-STATIC: "{{.*}}ld{{(.exe)?}}" -+! CHECK-LD-STATIC: "-static" "-o" "a.out" -+! CHECK-LD-STATIC: "{{[^"]*}}classic-flang-{{[^ ]*}}.o" "-lflangmain" "-lfoo" "-L{{[^ ]*[/\\]+}}basic_linux_tree{{[/\\]+}}usr{{[/\\]+}}lib" -+! CHECK-LD-STATIC-NOT: "-Bstatic" -+! CHECK-LD-STATIC: "-lflang" "-lflangrti" "-lpgmath" "-lpthread" "-lrt" "-lm" -+! CHECK-LD-STATIC-NOT: "-Bdynamic" -+! CHECK-STATIC-BOTH-NOT: "-Bstatic" -+! CHECK-STATIC-BOTH: "-lomp" -+! CHECK-STATIC-BOTH-NOT: "-Bdynamic" -+ -+! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -no-flang-libs \ -+! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-NOFLANGLIBS %s -+! CHECK-NOFLANGLIBS: "{{.*}}ld{{(.exe)?}}" -+! CHECK-NOFLANGLIBS-SAME: "{{[^"]*}}classic-flang-{{[^ ]*}}.o" -+! CHECK-NOFLANGLIBS-NOT: "-lflangmain" -+! CHECK-NOFLANGLIBS-SAME: "-lfoo" "-L{{[^ ]*[/\\]+}}basic_linux_tree{{[/\\]+}}usr{{[/\\]+}}lib" -+! CHECK-NOFLANGLIBS-NOT: "-lflang" "-lflangrti" "-lpgmath" -+! CHECK-NOFLANGLIBS: "-lm" "-lgcc" -+! CHECK-NOFLANGLIBS: "-lgcc_s" -+! CHECK-NOFLANGLIBS: "-lc" -+ -+! In Flang mode, we always link with libm, even with -nostdlib. -+! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -nostdlib \ -+! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-NOSTDLIB %s -+! CHECK-NOSTDLIB: "{{.*}}ld{{(.exe)?}}" -+! CHECK-NOSTDLIB-SAME: "{{[^"]*}}classic-flang-{{[^ ]*}}.o" -+! CHECK-NOSTDLIB-NOT: "-lflangmain" -+! CHECK-NOSTDLIB-SAME: "-lfoo" "-L{{[^ ]*[/\\]+}}basic_linux_tree{{[/\\]+}}usr{{[/\\]+}}lib" -+! CHECK-NOSTDLIB-NOT: "-lflang" "-lflangrti" "-lpgmath" -+! CHECK-NOSTDLIB-NOT: "-lpthread" "-lrt" -+! CHECK-NOSTDLIB: "-lm" -+! CHECK-NOSTDLIB-NOT: "-lgcc" -+! CHECK-NOSTDLIB-NOT: "-lgcc_s" -+! CHECK-NOSTDLIB-NOT: "-lc" -diff --git a/clang/test/Driver/flang/flang.f90 b/clang/test/Driver/flang/flang.f90 -index 5d8edf6308b0..8caa45f6342c 100644 ---- a/clang/test/Driver/flang/flang.f90 -+++ b/clang/test/Driver/flang/flang.f90 -@@ -1,5 +1,7 @@ - ! Check that flang -fc1 is invoked when in --driver-mode=flang. - -+! UNSUPPORTED: classic_flang -+ - ! This is a copy of flang_ucase.F90 because the driver has logic in it which - ! differentiates between F90 and f90 files. Flang will not treat these files - ! differently. -diff --git a/clang/test/Driver/flang/flang_ucase.F90 b/clang/test/Driver/flang/flang_ucase.F90 -index 50305ee337e1..21f7c469144b 100644 ---- a/clang/test/Driver/flang/flang_ucase.F90 -+++ b/clang/test/Driver/flang/flang_ucase.F90 -@@ -1,5 +1,7 @@ - ! Check that flang -fc1 is invoked when in --driver-mode=flang. - -+! UNSUPPORTED: classic_flang -+ - ! This is a copy of flang.f90 because the driver has logic in it which - ! differentiates between F90 and f90 files. Flang will not treat these files - ! differently. -diff --git a/clang/test/Driver/flang/llvm-ir-input.f b/clang/test/Driver/flang/llvm-ir-input.f -new file mode 100644 -index 000000000000..c34bf28328cb ---- /dev/null -+++ b/clang/test/Driver/flang/llvm-ir-input.f -@@ -0,0 +1,7 @@ -+! Check that LLVM IR input is passed to clang instead of flang1. -+ -+! REQUIRES: classic_flang -+! RUN: %clang --driver-mode=flang -S %S/Inputs/llvm-ir-input.ll -### 2>&1 | FileCheck %s -+ -+! CHECK-NOT: flang1 -+! CHECK: "{{.*}}clang{{.*}}" "-cc1" -diff --git a/clang/test/Driver/flang/multiple-inputs-mixed.f90 b/clang/test/Driver/flang/multiple-inputs-mixed.f90 -index 2395dbecf1fe..8f3918f7cb21 100644 ---- a/clang/test/Driver/flang/multiple-inputs-mixed.f90 -+++ b/clang/test/Driver/flang/multiple-inputs-mixed.f90 -@@ -1,5 +1,7 @@ - ! Check that flang can handle mixed C and fortran inputs. - -+! UNSUPPORTED: classic_flang -+ - ! RUN: %clang --driver-mode=flang -### -fsyntax-only %S/Inputs/one.f90 %S/Inputs/other.c 2>&1 | FileCheck --check-prefixes=CHECK-SYNTAX-ONLY %s - ! CHECK-SYNTAX-ONLY-LABEL: "{{[^"]*}}flang-new{{[^"/]*}}" "-fc1" - ! CHECK-SYNTAX-ONLY: "{{[^"]*}}/Inputs/one.f90" -diff --git a/clang/test/Driver/flang/multiple-inputs.f90 b/clang/test/Driver/flang/multiple-inputs.f90 -index ada999e927a6..4068ac3a0966 100644 ---- a/clang/test/Driver/flang/multiple-inputs.f90 -+++ b/clang/test/Driver/flang/multiple-inputs.f90 -@@ -1,5 +1,7 @@ - ! Check that flang driver can handle multiple inputs at once. - -+! UNSUPPORTED: classic_flang -+ - ! RUN: %clang --driver-mode=flang -### -fsyntax-only %S/Inputs/one.f90 %S/Inputs/two.f90 2>&1 | FileCheck --check-prefixes=CHECK-SYNTAX-ONLY %s - ! CHECK-SYNTAX-ONLY-LABEL: "{{[^"]*}}flang-new{{[^"/]*}}" "-fc1" - ! CHECK-SYNTAX-ONLY: "{{[^"]*}}/Inputs/one.f90" -diff --git a/clang/test/Driver/flang/reassoc.f90 b/clang/test/Driver/flang/reassoc.f90 -new file mode 100644 -index 000000000000..fe42087f7070 ---- /dev/null -+++ b/clang/test/Driver/flang/reassoc.f90 -@@ -0,0 +1,59 @@ -+! REQUIRES: classic_flang -+ -+! Tests for flags which generate nsw, reassoc attributes -+ -+! RUN: %flang -Kieee %s -### 2>&1 | FileCheck --check-prefixes=IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -Knoieee %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -ffast-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -fno-fast-math %s -### 2>&1 | FileCheck --check-prefixes=NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -frelaxed-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -fassociative-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_RELAXED,REASSOC_NSZ %s -+! RUN: %flang -fno-associative-math %s -### 2>&1 | FileCheck --check-prefixes=NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -fsigned-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -fno-signed-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NSZ %s -+ -+! RUN: %flang -fno-associative-math -fno-signed-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NSZ %s -+! RUN: %flang -fno-associative-math -fsigned-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -fassociative-math -fno-signed-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_RELAXED,REASSOC_NSZ %s -+! RUN: %flang -fassociative-math -fsigned-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s -+ -+! RUN: %flang -Kieee -fassociative-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_RELAXED,REASSOC_NSZ %s -+! RUN: %flang -Kieee -fno-associative-math %s -### 2>&1 | FileCheck --check-prefixes=IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -Kieee -fsigned-zeros %s -### 2>&1 | FileCheck --check-prefixes=IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -Kieee -fno-signed-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NSZ %s -+! RUN: %flang -ffast-math -fassociative-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_RELAXED,REASSOC_NSZ %s -+! RUN: %flang -ffast-math -fno-associative-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -ffast-math -fsigned-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -ffast-math -fno-signed-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_RELAXED,NO_REASSOC,NSZ %s -+! RUN: %flang -frelaxed-math -fassociative-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,REASSOC_NSZ %s -+! RUN: %flang -frelaxed-math -fno-associative-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -frelaxed-math -fsigned-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -frelaxed-math -fno-signed-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_REASSOC,NSZ %s -+ -+! RUN: %flang -fassociative-math -Kieee %s -### 2>&1 | FileCheck --check-prefixes=IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -fno-associative-math -Kieee %s -### 2>&1 | FileCheck --check-prefixes=IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -fsigned-zeros -Kieee %s -### 2>&1 | FileCheck --check-prefixes=IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -fno-signed-zeros -Kieee %s -### 2>&1 | FileCheck --check-prefixes=IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -fassociative-math -ffast-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_RELAXED,REASSOC_NSZ %s -+! RUN: %flang -fno-associative-math -ffast-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -fsigned-zeros -ffast-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -fno-signed-zeros -ffast-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_RELAXED,NO_REASSOC,NSZ %s -+! RUN: %flang -fassociative-math -frelaxed-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,REASSOC_NSZ %s -+! RUN: %flang -fno-associative-math -frelaxed-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -fsigned-zeros -frelaxed-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,RELAXED,NO_REASSOC,NO_NSZ %s -+! RUN: %flang -fno-signed-zeros -frelaxed-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_REASSOC,NSZ %s -+ -+! IEEE: {{.*}}flang2{{.*}} "-ieee" "1" -+! NO_IEEE-NOT: {{.*}}flang2{{.*}} "-ieee" "1" -+ -+! FAST: {{.*}}flang2{{.*}} "-x" "216" "1" -+! NO_FAST-NOT: {{.*}}flang2{{.*}} "-x" "216" "1" -+ -+! RELAXED: {{.*}}flang2{{.*}} "-x" "15" "0x400" -+! NO_RELAXED-NOT: {{.*}}flang2{{.*}} "-x" "15" "0x400" -+ -+! REASSOC_NSZ: {{.*}}flang2{{.*}} "-x" "216" "0x8" "-x" "216" "0x10" -+! NO_REASSOC-NOT: {{.*}}flang2{{.*}} "-x" "216" "0x10" -+ -+! NSZ: {{.*}}flang2{{.*}} "-x" "216" "0x8" -+! NO_NSZ-NOT: {{.*}}flang2{{.*}} "-x" "216" "0x8" -diff --git a/clang/test/Driver/fortran-phases.f90 b/clang/test/Driver/fortran-phases.f90 -new file mode 100644 -index 000000000000..57d5f2c92b78 ---- /dev/null -+++ b/clang/test/Driver/fortran-phases.f90 -@@ -0,0 +1,119 @@ -+! Test to see that the correct phases are run for the commandline input -+ -+! REQUIRES: classic_flang -+ -+! RUN: %flang -ccc-print-phases 2>&1 %s | FileCheck %s --check-prefix=LINK-NOPP -+! RUN: %flang -ccc-print-phases -c 2>&1 %s | FileCheck %s --check-prefix=CONLY-NOPP -+! RUN: %flang -ccc-print-phases -S 2>&1 %s | FileCheck %s --check-prefix=AONLY-NOPP -+! RUN: %flang -ccc-print-phases -c -emit-llvm 2>&1 %s | FileCheck %s --check-prefix=LLONLY-NOPP -+! RUN: %flang -ccc-print-phases -S -emit-llvm 2>&1 %s | FileCheck %s --check-prefix=LLONLY-NOPP -+! RUN: %flang -ccc-print-phases -emit-flang-llvm 2>&1 %s | FileCheck %s --check-prefix=FLLONLY-NOPP -+! RUN: %flang -ccc-print-phases -fsyntax-only 2>&1 %s | FileCheck %s --check-prefix=SONLY-NOPP -+! RUN: %flang -ccc-print-phases -E 2>&1 %s | FileCheck %s --check-prefix=PPONLY-NOPP -+ -+! RUN: %flang -ccc-print-phases 2>&1 -x f95-cpp-input %s | FileCheck %s --check-prefix=LINK -+! RUN: %flang -ccc-print-phases 2>&1 -x f95-cpp-input %s | FileCheck %s --check-prefix=LINK -+! RUN: %flang -ccc-print-phases -c 2>&1 -x f95-cpp-input %s | FileCheck %s --check-prefix=CONLY -+! RUN: %flang -ccc-print-phases -S 2>&1 -x f95-cpp-input %s | FileCheck %s --check-prefix=AONLY -+! RUN: %flang -ccc-print-phases -c -emit-llvm 2>&1 -x f95-cpp-input %s | FileCheck %s --check-prefix=LLONLY -+! RUN: %flang -ccc-print-phases -S -emit-llvm 2>&1 -x f95-cpp-input %s | FileCheck %s --check-prefix=LLONLY -+! RUN: %flang -ccc-print-phases -emit-flang-llvm 2>&1 -x f95-cpp-input %s | FileCheck %s --check-prefix=FLLONLY -+! RUN: %flang -ccc-print-phases -fsyntax-only 2>&1 -x f95-cpp-input %s | FileCheck %s --check-prefix=SONLY -+! RUN: %flang -ccc-print-phases -E 2>&1 -x f95-cpp-input %s | FileCheck %s --check-prefix=PPONLY -+ -+! LINK-NOPP: 0: input, {{.*}}, f95 -+! LINK-NOPP: 1: compiler, {0}, ir -+! LINK-NOPP: 2: backend, {1}, assembler -+! LINK-NOPP: 3: assembler, {2}, object -+! LINK-NOPP: 4: linker, {3}, image -+ -+! CONLY-NOPP: 0: input, {{.*}}, f95 -+! CONLY-NOPP: 1: compiler, {0}, ir -+! CONLY-NOPP: 2: backend, {1}, assembler -+! CONLY-NOPP: 3: assembler, {2}, object -+! CONLY-NOPP-NOT: {{.*}}: linker, {{{.*}}}, image -+ -+! AONLY-NOPP: 0: input, {{.*}}, f95 -+! AONLY-NOPP: 1: compiler, {0}, ir -+! AONLY-NOPP: 2: backend, {1}, assembler -+! AONLY-NOPP-NOT: {{.*}}: assembler, {{{.*}}}, object -+! AONLY-NOPP-NOT: {{.*}}: linker, {{{.*}}}, image -+ -+! LLONLY-NOPP: 0: input, {{.*}}, f95 -+! LLONLY-NOPP: 1: compiler, {0}, ir -+! LLONLY-NOPP-NOT: {{.*}}: backend, {{{.*}}}, assembler -+! LLONLY-NOPP-NOT: {{.*}}: assembler, {{{.*}}}, object -+! LLONLY-NOPP-NOT: {{.*}}: linker, {{{.*}}}, image -+ -+! FLLONLY-NOPP: 0: input, {{.*}}, f95 -+! FLLONLY-NOPP: 1: compiler, {0}, ir -+! FLLONLY-NOPP-NOT: {{.*}}: backend, {{{.*}}}, assembler -+! FLLONLY-NOPP-NOT: {{.*}}: assembler, {{{.*}}}, object -+! FLLONLY-NOPP-NOT: {{.*}}: linker, {{{.*}}}, image -+ -+! SONLY-NOPP: 0: input, {{.*}}, f95 -+! SONLY-NOPP-NOT: {{.*}}: compiler, {{{.*}}}, ir -+! SONLY-NOPP-NOT: {{.*}}: backend, {{{.*}}}, assembler -+! SONLY-NOPP-NOT: {{.*}}: assembler, {{{.*}}}, object -+! SONLY-NOPP-NOT: {{.*}}: linker, {{{.*}}}, image -+ -+! flang always preprocesses with -E regardless of file extension -+! PPONLY-NOPP: 0: input, {{.*}}, f95 -+! PPONLY-NOPP: 1: preprocessor, {0}, f95 -+! PPONLY-NOPP-NOT: {{.*}}: compiler, {{{.*}}}, ir -+! PPONLY-NOPP-NOT: {{.*}}: backend, {{{.*}}}, assembler -+! PPONLY-NOPP-NOT: {{.*}}: assembler, {{{.*}}}, object -+! PPONLY-NOPP-NOT: {{.*}}: linker, {{{.*}}}, image -+ -+! LINK: 0: input, {{.*}}, f95-cpp-input -+! LINK: 1: preprocessor, {0}, f95 -+! LINK: 2: compiler, {1}, ir -+! LINK: 3: backend, {2}, assembler -+! LINK: 4: assembler, {3}, object -+! LINK: 5: linker, {4}, image -+ -+! CONLY: 0: input, {{.*}}, f95-cpp-input -+! CONLY: 1: preprocessor, {0}, f95 -+! CONLY: 2: compiler, {1}, ir -+! CONLY: 3: backend, {2}, assembler -+! CONLY: 4: assembler, {3}, object -+! CONLY-NOT: {{.*}}: linker, {{{.*}}}, image -+ -+! AONLY: 0: input, {{.*}}, f95-cpp-input -+! AONLY: 1: preprocessor, {0}, f95 -+! AONLY: 2: compiler, {1}, ir -+! AONLY: 3: backend, {2}, assembler -+! AONLY-NOT: {{.*}}: assembler, {{{.*}}}, object -+! AONLY-NOT: {{.*}}: linker, {{{.*}}}, image -+ -+! LLONLY: 0: input, {{.*}}, f95-cpp-input -+! LLONLY: 1: preprocessor, {0}, f95 -+! LLONLY: 2: compiler, {1}, ir -+! LLONLY-NOT: {{.*}}: backend, {{{.*}}}, assembler -+! LLONLY-NOT: {{.*}}: assembler, {{{.*}}}, object -+! LLONLY-NOT: {{.*}}: linker, {{{.*}}}, image -+ -+! FLLONLY: 0: input, {{.*}}, f95-cpp-input -+! FLLONLY: 1: preprocessor, {0}, f95 -+! FLLONLY: 2: compiler, {1}, ir -+! FLLONLY-NOT: {{.*}}: backend, {{{.*}}}, assembler -+! FLLONLY-NOT: {{.*}}: assembler, {{{.*}}}, object -+! FLLONLY-NOT: {{.*}}: linker, {{{.*}}}, image -+ -+! SONLY: 0: input, {{.*}}, f95-cpp-input -+! SONLY: 1: preprocessor, {0}, f95 -+! SONLY-NOT: {{.*}}: compiler, {{{.*}}}, ir -+! SONLY-NOT: {{.*}}: backend, {{{.*}}}, assembler -+! SONLY-NOT: {{.*}}: assembler, {{{.*}}}, object -+! SONLY-NOT: {{.*}}: linker, {{{.*}}}, image -+ -+! PPONLY: 0: input, {{.*}}, f95-cpp-input -+! PPONLY: 1: preprocessor, {0}, f95 -+! PPONLY-NOT: {{.*}}: compiler, {{{.*}}}, ir -+! PPONLY-NOT: {{.*}}: backend, {{{.*}}}, assembler -+! PPONLY-NOT: {{.*}}: assembler, {{{.*}}}, object -+! PPONLY-NOT: {{.*}}: linker, {{{.*}}}, image -+ -+program hello -+ write(*, *) "Hello" -+end program hello -diff --git a/clang/test/Driver/fortran-preprocessor.f90 b/clang/test/Driver/fortran-preprocessor.f90 -new file mode 100644 -index 000000000000..d9e5620145ac ---- /dev/null -+++ b/clang/test/Driver/fortran-preprocessor.f90 -@@ -0,0 +1,48 @@ -+! REQUIRES: classic_flang -+ -+! -cpp should preprocess as it goes, regardless of input file extension -+! RUN: %flang -cpp -c -DHELLO="hello all" -### %s 2>&1 | FileCheck %s --check-prefixes=ALL,CPP,PP -+! RUN: %flang -cpp -c -DHELLO="hello all" -### -x f95-cpp-input %s 2>&1 | FileCheck %s --check-prefixes=ALL,CPP,PP -+! -E should preprocess then stop, regardless of input file extension -+! RUN: %flang -E -DHELLO="hello all" -### %s 2>&1 | FileCheck %s --check-prefixes=ALL,E,PPONLY -+! RUN: %flang -E -DHELLO="hello all" -### -x f95-cpp-input %s 2>&1 | FileCheck %s --check-prefixes=ALL,E,PPONLY -+! -cpp and -E are redundant -+! RUN: %flang -E -cpp -DHELLO="hello all" -### %s 2>&1 | FileCheck %s --check-prefixes=ALL,E,PPONLY -+ -+! Don't link when given linker input -+! RUN: %flang -E -cpp -Wl,-rpath=blah -### %s 2>&1 | FileCheck %s --check-prefixes=ALL,E,PPONLY -+ -+! Explicitly test this nonsence case causing a bug with LLVM 13/14 -+! RUN: %flang -E -traditional-cpp -DHELLO="hello all" -x f95-cpp-input -### %s 2>&1 | FileCheck %s --check-prefixes=ALL,E,PPONLY -+ -+! Test -save-temps does not break things (same codepath as -traditional-cpp bug above) -+! RUN: %flang -E -DHELLO="hello all" -save-temps -### %s 2>&1 | FileCheck %s --check-prefixes=ALL,E,PPONLY -+! RUN: %flang -E -DHELLO="hello all" -save-temps -### -x f95-cpp-input %s 2>&1 | FileCheck %s --check-prefixes=ALL,E,PPONLY -+! RUN: %flang -cpp -c -DHELLO="hello all" -save-temps -### %s 2>&1 | FileCheck %s --check-prefixes=ALL,CPP,PP -+! RUN: %flang -cpp -c -DHELLO="hello all" -save-temps -### -x f95-cpp-input %s 2>&1 | FileCheck %s --check-prefixes=ALL,CPP,PP -+ -+! Test for the correct cmdline flags -+! Consume up to flang1 line -+! ALL-LABEL: "{{.*}}flang1" -+! CPP-NOT: "-es" -+! CPP: "-preprocess" -+! CPP-NOT: "-es" -+ -+! E-DAG: "-es" -+! E-DAG: "-preprocess" -+ -+! CPP should continue to build object -+! PP: "{{.*}}flang2" -+! PPONLY-NOT: "{{.*}}flang2" -+ -+! flang1 and flang2 should only be called at most once! -+! ALL-NOT: "{{.*}}flang1" -+! ALL-NOT: "{{.*}}flang2" -+ -+! These commands should never call a linker! -+! ALL-NOT: "{{.*}}ld" -+ -+program hello -+ write(*, *) HELLO -+end program hello -+ -diff --git a/clang/test/Driver/fortran.f95 b/clang/test/Driver/fortran.f95 -index db3ff2da17e8..e364180a1f9c 100644 ---- a/clang/test/Driver/fortran.f95 -+++ b/clang/test/Driver/fortran.f95 -@@ -1,6 +1,8 @@ - ! Check that the clang driver can invoke gcc to compile Fortran when in - ! --driver-mode=clang. This is legacy behaviour - see also --driver-mode=flang. - -+! UNSUPPORTED: classic_flang -+ - ! RUN: %clang -target x86_64-unknown-linux-gnu -integrated-as -c %s -### 2>&1 \ - ! RUN: | FileCheck --check-prefix=CHECK-OBJECT %s - ! CHECK-OBJECT: gcc -diff --git a/clang/test/Driver/gfortran.f90 b/clang/test/Driver/gfortran.f90 -index c985428650ec..1276703b1f6e 100644 ---- a/clang/test/Driver/gfortran.f90 -+++ b/clang/test/Driver/gfortran.f90 -@@ -1,4 +1,5 @@ - ! XFAIL: * -+! UNSUPPORTED: classic_flang - ! Test that Clang can forward all of the flags which are documented as - ! being supported by gfortran to GCC when falling back to GCC for - ! a fortran input file. -diff --git a/clang/test/Driver/lit.local.cfg b/clang/test/Driver/lit.local.cfg -index 6370e9f92d89..5ec1f9a8fc71 100644 ---- a/clang/test/Driver/lit.local.cfg -+++ b/clang/test/Driver/lit.local.cfg -@@ -12,6 +12,9 @@ config.suffixes = [ - ".f90", - ".F90", - ".f95", -+ '.F95', -+ '.f', -+ '.F', - ".cu", - ".rs", - ".cl", -diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py -index 68e038475a5c..bb6fb1ae5146 100644 ---- a/clang/test/lit.cfg.py -+++ b/clang/test/lit.cfg.py -@@ -395,3 +395,6 @@ if "system-aix" in config.available_features: - # possibly be present in system and user configuration files, so disable - # default configs for the test runs. - config.environment["CLANG_NO_DEFAULT_CONFIG"] = "1" -+ -+if config.use_classic_flang: -+ config.available_features.add("classic_flang") -diff --git a/clang/test/lit.site.cfg.py.in b/clang/test/lit.site.cfg.py.in -index ef75770a2c3c..bc278740e0d0 100644 ---- a/clang/test/lit.site.cfg.py.in -+++ b/clang/test/lit.site.cfg.py.in -@@ -41,6 +41,7 @@ config.llvm_external_lit = path(r"@LLVM_EXTERNAL_LIT@") - config.standalone_build = @CLANG_BUILT_STANDALONE@ - config.ppc_linux_default_ieeelongdouble = @PPC_LINUX_DEFAULT_IEEELONGDOUBLE@ - config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@ -+config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@ - - import lit.llvm - lit.llvm.initialize(lit_config, config) -diff --git a/clang/tools/driver/CMakeLists.txt b/clang/tools/driver/CMakeLists.txt -index 2182486f93a5..c269561fde69 100644 ---- a/clang/tools/driver/CMakeLists.txt -+++ b/clang/tools/driver/CMakeLists.txt -@@ -59,7 +59,7 @@ endif() - add_dependencies(clang clang-resource-headers) - - if(NOT CLANG_LINKS_TO_CREATE) -- set(CLANG_LINKS_TO_CREATE clang++ clang-cl clang-cpp) -+ set(CLANG_LINKS_TO_CREATE clang++ clang-cl clang-cpp flang) - endif() - - if (CLANG_ENABLE_HLSL) --- -2.24.3 (Apple Git-128) - diff --git a/0012-Fix-declaration-definition-mismatch-for-classic-flang.patch b/0012-Fix-declaration-definition-mismatch-for-classic-flang.patch deleted file mode 100644 index 8d75040e963446d7f18c200f0592ae04c882a517..0000000000000000000000000000000000000000 --- a/0012-Fix-declaration-definition-mismatch-for-classic-flang.patch +++ /dev/null @@ -1,326 +0,0 @@ -From bf04bad2c43a69730e1cdf4dcd2d06ab786bda33 Mon Sep 17 00:00:00 2001 -From: luofeng14 -Date: Wed, 17 Apr 2024 10:49:19 +0800 -Subject: [PATCH] Fix declaration definition mismatch for classic flang - ---- - .../clang/Basic/DiagnosticDriverKinds.td | 4 ++++ - clang/include/clang/Basic/MacroBuilder.h | 7 ++++++- - clang/include/clang/Basic/Sanitizers.def | 2 ++ - clang/include/clang/Driver/Options.td | 1 - - clang/include/clang/Frontend/Utils.h | 10 ++++++++- - clang/lib/CodeGen/CGDebugInfo.cpp | 21 +++++++++++++++---- - clang/lib/Driver/Driver.cpp | 1 - - clang/lib/Frontend/InitPreprocessor.cpp | 21 +++++++++++++++++++ - .../test/CodeGen/libpgmath-logfun-aarch64.ll | 1 + - clang/test/CodeGen/libpgmath-logfun-x86_64.ll | 2 +- - clang/test/Driver/autocomplete.c | 2 +- - .../flang/classic-flang-vscale-mbits.f95 | 2 ++ - .../Driver/flang/classic-flang-vscale.f95 | 2 ++ - clang/tools/driver/CMakeLists.txt | 6 +++++- - 14 files changed, 71 insertions(+), 11 deletions(-) - -diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td -index 09a1949d7596..62a60e65c6a4 100644 ---- a/clang/include/clang/Basic/DiagnosticDriverKinds.td -+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td -@@ -131,10 +131,12 @@ def err_drv_invalid_linker_name : Error< - "invalid linker name in argument '%0'">; - def err_drv_invalid_rtlib_name : Error< - "invalid runtime library name in argument '%0'">; -+#ifdef ENABLE_CLASSIC_FLANG - def err_drv_invalid_allocatable_mode : Error< - "invalid semantic mode for assignments to allocatables in argument '%0'">; - def err_drv_unsupported_fixed_line_length : Error< - "unsupported fixed-format line length in argument '%0'">; -+#endif - def err_drv_unsupported_rtlib_for_platform : Error< - "unsupported runtime library '%0' for platform '%1'">; - def err_drv_invalid_unwindlib_name : Error< -@@ -358,8 +360,10 @@ def err_drv_negative_columns : Error< - "invalid value '%1' in '%0', value must be 'none' or a positive integer">; - def err_drv_small_columns : Error< - "invalid value '%1' in '%0', value must be '%2' or greater">; -+#ifdef ENABLE_CLASSIC_FLANG - def err_drv_clang_unsupported_minfo_arg : Error< - "'%0' option does not support '%1' value">; -+#endif - - def err_drv_invalid_malign_branch_EQ : Error< - "invalid argument '%0' to -malign-branch=; each element must be one of: %1">; -diff --git a/clang/include/clang/Basic/MacroBuilder.h b/clang/include/clang/Basic/MacroBuilder.h -index bfc5e38c15e3..54e9ea643394 100644 ---- a/clang/include/clang/Basic/MacroBuilder.h -+++ b/clang/include/clang/Basic/MacroBuilder.h -@@ -24,10 +24,15 @@ class MacroBuilder { - raw_ostream &Out; - public: - MacroBuilder(raw_ostream &Output) : Out(Output) {} -+ #ifdef ENABLE_CLASSIC_FLANG - virtual ~MacroBuilder() {} -- -+ #endif - /// Append a \#define line for macro of the form "\#define Name Value\n". -+ #ifdef ENABLE_CLASSIC_FLANG - virtual void defineMacro(const Twine &Name, const Twine &Value = "1") { -+ #else -+ void defineMacro(const Twine &Name, const Twine &Value = "1") { -+ #endif - Out << "#define " << Name << ' ' << Value << '\n'; - } - -diff --git a/clang/include/clang/Basic/Sanitizers.def b/clang/include/clang/Basic/Sanitizers.def -index 4f0a2bf332ef..a8fcba677ee5 100644 ---- a/clang/include/clang/Basic/Sanitizers.def -+++ b/clang/include/clang/Basic/Sanitizers.def -@@ -107,8 +107,10 @@ SANITIZER("signed-integer-overflow", SignedIntegerOverflow) - SANITIZER("unreachable", Unreachable) - SANITIZER("vla-bound", VLABound) - SANITIZER("vptr", Vptr) -+#ifdef ENABLE_CLASSIC_FLANG - // fortran contiguous pointer checks - SANITIZER("discontiguous", Discontiguous) -+#endif - - // IntegerSanitizer - SANITIZER("unsigned-integer-overflow", UnsignedIntegerOverflow) -diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td -index 365285966a67..dab35f22e4ac 100644 ---- a/clang/include/clang/Driver/Options.td -+++ b/clang/include/clang/Driver/Options.td -@@ -5473,7 +5473,6 @@ defm stack_arrays : BoolOptionWithoutMarshalling<"f", "stack-arrays", - defm loop_versioning : BoolOptionWithoutMarshalling<"f", "version-loops-for-stride", - PosFlag, - NegFlag>; -- - } // let Flags = [FC1Option, FlangOption, FlangOnlyOption] - - #ifdef ENABLE_CLASSIC_FLANG -diff --git a/clang/include/clang/Frontend/Utils.h b/clang/include/clang/Frontend/Utils.h -index c51c617c1379..4ac76dd348c6 100644 ---- a/clang/include/clang/Frontend/Utils.h -+++ b/clang/include/clang/Frontend/Utils.h -@@ -17,10 +17,12 @@ - #include "clang/Basic/LLVM.h" - #include "clang/Driver/OptionUtils.h" - #include "clang/Frontend/DependencyOutputOptions.h" -+#ifdef ENABLE_CLASSIC_FLANG - #include "clang/Basic/TargetInfo.h" -+#include "llvm/ADT/StringExtras.h" -+#endif - #include "llvm/ADT/ArrayRef.h" - #include "llvm/ADT/IntrusiveRefCntPtr.h" --#include "llvm/ADT/StringExtras.h" - #include "llvm/ADT/StringMap.h" - #include "llvm/ADT/StringRef.h" - #include "llvm/ADT/StringSet.h" -@@ -33,11 +35,13 @@ - #include - #include - -+#ifdef ENABLE_CLASSIC_FLANG - namespace llvm { - - class StringRef; - - } // namespace llvm -+#endif - - namespace clang { - -@@ -47,7 +51,9 @@ class CompilerInvocation; - class DiagnosticsEngine; - class ExternalSemaSource; - class FrontendOptions; -+#ifdef ENABLE_CLASSIC_FLANG - class MacroBuilder; -+#endif - class PCHContainerReader; - class Preprocessor; - class PreprocessorOptions; -@@ -63,6 +69,7 @@ void InitializePreprocessor(Preprocessor &PP, const PreprocessorOptions &PPOpts, - void DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, - const PreprocessorOutputOptions &Opts); - -+#ifdef ENABLE_CLASSIC_FLANG - /// DefineTypeSize - Emit a macro to the predefines buffer that declares a macro - /// named MacroName with the max value for a type with width 'TypeWidth' a - /// signedness of 'isSigned' and with a value suffix of 'ValSuffix' (e.g. LL). -@@ -85,6 +92,7 @@ static void DefineTypeSize(const Twine &MacroName, TargetInfo::IntType Ty, - DefineTypeSize(MacroName, TI.getTypeWidth(Ty), TI.getTypeConstantSuffix(Ty), - TI.isTypeSigned(Ty), Builder); - } -+#endif - - /// An interface for collecting the dependencies of a compilation. Users should - /// use \c attachToPreprocessor and \c attachToASTReader to get all of the -diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp -index 3b140860cce4..30d62ae8faf6 100644 ---- a/clang/lib/CodeGen/CGDebugInfo.cpp -+++ b/clang/lib/CodeGen/CGDebugInfo.cpp -@@ -3981,7 +3981,10 @@ CGDebugInfo::getGlobalVariableForwardDeclaration(const VarDecl *VD) { - auto *GV = DBuilder.createTempGlobalVariableFwdDecl( - DContext, Name, LinkageName, Unit, Line, getOrCreateType(T, Unit), - !VD->isExternallyVisible(), nullptr, TemplateParameters, -- llvm::DINode::FlagZero, Align); -+#ifdef ENABLE_CLASSIC_FLANG -+ llvm::DINode::FlagZero, -+#endif -+ Align); - FwdDeclReplaceMap.emplace_back( - std::piecewise_construct, - std::make_tuple(cast(VD->getCanonicalDecl())), -@@ -5468,7 +5471,10 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, - Var->hasLocalLinkage(), true, - Expr.empty() ? nullptr : DBuilder.createExpression(Expr), - getOrCreateStaticDataMemberDeclarationOrNull(D), TemplateParameters, -- llvm::DINode::FlagZero, Align, Annotations); -+#ifdef ENABLE_CLASSIC_FLANG -+ llvm::DINode::FlagZero, -+#endif -+ Align, Annotations); - Var->addDebugInfo(GVE); - } - DeclCache[D->getCanonicalDecl()].reset(GVE); -@@ -5564,7 +5570,11 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) { - GV.reset(DBuilder.createGlobalVariableExpression( - DContext, Name, StringRef(), Unit, getLineNumber(VD->getLocation()), Ty, - true, true, InitExpr, getOrCreateStaticDataMemberDeclarationOrNull(VarD), -- TemplateParameters, llvm::DINode::FlagZero, Align)); -+ TemplateParameters, -+#ifdef ENABLE_CLASSIC_FLANG -+ llvm::DINode::FlagZero, -+#endif -+ Align)); - } - - void CGDebugInfo::EmitExternalVariable(llvm::GlobalVariable *Var, -@@ -5582,7 +5592,10 @@ void CGDebugInfo::EmitExternalVariable(llvm::GlobalVariable *Var, - llvm::DIGlobalVariableExpression *GVE = - DBuilder.createGlobalVariableExpression( - DContext, Name, StringRef(), Unit, getLineNumber(D->getLocation()), -- Ty, false, false, nullptr, nullptr, nullptr, llvm::DINode::FlagZero, -+ Ty, false, false, nullptr, nullptr, nullptr, -+#ifdef ENABLE_CLASSIC_FLANG -+ llvm::DINode::FlagZero, -+#endif - Align); - Var->addDebugInfo(GVE); - } -diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp -index 8bfee5928fe5..819d7703b2e7 100644 ---- a/clang/lib/Driver/Driver.cpp -+++ b/clang/lib/Driver/Driver.cpp -@@ -377,7 +377,6 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL, - (PhaseArg = DAL.getLastArg(options::OPT_fmodule_header, - options::OPT_fmodule_header_EQ))) { - FinalPhase = phases::Precompile; -- - // -{fsyntax-only,-analyze,emit-ast} only run up to the compiler. - } else if ((PhaseArg = DAL.getLastArg(options::OPT_fsyntax_only)) || - (PhaseArg = DAL.getLastArg(options::OPT_print_supported_cpus)) || -diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp -index 7b01cb01a45e..05682ae9f135 100644 ---- a/clang/lib/Frontend/InitPreprocessor.cpp -+++ b/clang/lib/Frontend/InitPreprocessor.cpp -@@ -161,6 +161,27 @@ static void DefineFloatMacros(MacroBuilder &Builder, StringRef Prefix, - Builder.defineMacro(DefPrefix + "MIN__", Twine(Min)+Ext); - } - -+#ifndef ENABLE_CLASSIC_FLANG -+/// DefineTypeSize - Emit a macro to the predefines buffer that declares a macro -+/// named MacroName with the max value for a type with width 'TypeWidth' a -+/// signedness of 'isSigned' and with a value suffix of 'ValSuffix' (e.g. LL). -+static void DefineTypeSize(const Twine &MacroName, unsigned TypeWidth, -+ StringRef ValSuffix, bool isSigned, -+ MacroBuilder &Builder) { -+ llvm::APInt MaxVal = isSigned ? llvm::APInt::getSignedMaxValue(TypeWidth) -+ : llvm::APInt::getMaxValue(TypeWidth); -+ Builder.defineMacro(MacroName, toString(MaxVal, 10, isSigned) + ValSuffix); -+} -+ -+/// DefineTypeSize - An overloaded helper that uses TargetInfo to determine -+/// the width, suffix, and signedness of the given type -+static void DefineTypeSize(const Twine &MacroName, TargetInfo::IntType Ty, -+ const TargetInfo &TI, MacroBuilder &Builder) { -+ DefineTypeSize(MacroName, TI.getTypeWidth(Ty), TI.getTypeConstantSuffix(Ty), -+ TI.isTypeSigned(Ty), Builder); -+} -+#endif -+ - static void DefineFmt(const Twine &Prefix, TargetInfo::IntType Ty, - const TargetInfo &TI, MacroBuilder &Builder) { - bool IsSigned = TI.isTypeSigned(Ty); -diff --git a/clang/test/CodeGen/libpgmath-logfun-aarch64.ll b/clang/test/CodeGen/libpgmath-logfun-aarch64.ll -index 141fed29ccd1..be05a472016c 100644 ---- a/clang/test/CodeGen/libpgmath-logfun-aarch64.ll -+++ b/clang/test/CodeGen/libpgmath-logfun-aarch64.ll -@@ -1,3 +1,4 @@ -+; REQUIRES: classic_flang - ; REQUIRES: aarch64-registered-target - - ; RUN: %clang -target aarch64-unknown-linux-gnu -Ofast -S %s -o - | FileCheck %s -diff --git a/clang/test/CodeGen/libpgmath-logfun-x86_64.ll b/clang/test/CodeGen/libpgmath-logfun-x86_64.ll -index 3ce1d910947f..c89e538f34e3 100644 ---- a/clang/test/CodeGen/libpgmath-logfun-x86_64.ll -+++ b/clang/test/CodeGen/libpgmath-logfun-x86_64.ll -@@ -1,5 +1,5 @@ - ; REQUIRES: x86-registered-target -- -+; REQUIRES: classic_flang - ; RUN: %clang -target x86_64-unknown-linux-gnu -msse -Ofast -S %s -o - | FileCheck %s - - target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -diff --git a/clang/test/Driver/autocomplete.c b/clang/test/Driver/autocomplete.c -index 8c4921c9dbd5..d6f57708b67e 100644 ---- a/clang/test/Driver/autocomplete.c -+++ b/clang/test/Driver/autocomplete.c -@@ -85,7 +85,7 @@ - // FVECLIBALL-NEXT: libmvec - // FVECLIBALL-NEXT: MASSV - // FVECLIBALL-NEXT: none --// FVECLIBALL: SLEEF -+// FVECLIBALL-NEXT: SLEEF - // FVECLIBALL-NEXT: SVML - // RUN: %clang --autocomplete=-fshow-overloads= | FileCheck %s -check-prefix=FSOVERALL - // FSOVERALL: all -diff --git a/clang/test/Driver/flang/classic-flang-vscale-mbits.f95 b/clang/test/Driver/flang/classic-flang-vscale-mbits.f95 -index f0ed43aa027a..089993be7b04 100644 ---- a/clang/test/Driver/flang/classic-flang-vscale-mbits.f95 -+++ b/clang/test/Driver/flang/classic-flang-vscale-mbits.f95 -@@ -1,3 +1,5 @@ -+! REQUIRES: classic_flang -+ - // RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve -msve-vector-bits=128 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE-128 %s - // RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve -msve-vector-bits=128+ %s 2>&1 | FileCheck -check-prefix=CHECK-SVE-128PLUS %s - // RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve -msve-vector-bits=256 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE-256 %s -diff --git a/clang/test/Driver/flang/classic-flang-vscale.f95 b/clang/test/Driver/flang/classic-flang-vscale.f95 -index 8110be594db5..b8496975758f 100644 ---- a/clang/test/Driver/flang/classic-flang-vscale.f95 -+++ b/clang/test/Driver/flang/classic-flang-vscale.f95 -@@ -1,3 +1,5 @@ -+! REQUIRES: classic_flang -+ - // RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a %s 2>&1 | FileCheck -check-prefix=CHECK-NEON %s - // RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve %s 2>&1 | FileCheck -check-prefix=CHECK-SVE %s - // RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2 %s -diff --git a/clang/tools/driver/CMakeLists.txt b/clang/tools/driver/CMakeLists.txt -index c269561fde69..465b05c9c0e9 100644 ---- a/clang/tools/driver/CMakeLists.txt -+++ b/clang/tools/driver/CMakeLists.txt -@@ -59,7 +59,11 @@ endif() - add_dependencies(clang clang-resource-headers) - - if(NOT CLANG_LINKS_TO_CREATE) -- set(CLANG_LINKS_TO_CREATE clang++ clang-cl clang-cpp flang) -+ if (LLVM_ENABLE_CLASSIC_FLANG) -+ set(CLANG_LINKS_TO_CREATE clang++ clang-cl clang-cpp flang) -+ else() -+ set(CLANG_LINKS_TO_CREATE clang++ clang-cl clang-cpp) -+ endif() - endif() - - if (CLANG_ENABLE_HLSL) --- -2.24.3 (Apple Git-128) - diff --git a/0013-Ignored-option-Wa-generate-missing-build-notes.patch b/0013-Ignored-option-Wa-generate-missing-build-notes.patch deleted file mode 100644 index 1d6c807c611492c04c3228f1d2a6ebe0390317e7..0000000000000000000000000000000000000000 --- a/0013-Ignored-option-Wa-generate-missing-build-notes.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 2ce94f822eb951dcff23036f204a98900a4fb805 Mon Sep 17 00:00:00 2001 -From: wangqiang -Date: Wed, 24 Apr 2024 11:21:02 +0800 -Subject: [PATCH] Ignored option `-Wa,--generate-missing-build-notes=` - ---- - clang/lib/Driver/ToolChains/Clang.cpp | 6 ++++++ - .../Driver/test-generate-missing-build-notes.cpp | 13 +++++++++++++ - 2 files changed, 19 insertions(+) - create mode 100644 clang/test/Driver/test-generate-missing-build-notes.cpp - -diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp -index 6b5930990..704a32912 100644 ---- a/clang/lib/Driver/ToolChains/Clang.cpp -+++ b/clang/lib/Driver/ToolChains/Clang.cpp -@@ -2731,6 +2731,12 @@ static void CollectArgsForIntegratedAssembler(Compilation &C, - CmdArgs.push_back(Value.data()); - } else if (Value == "--version") { - D.PrintVersion(C, llvm::outs()); -+ #ifdef BUILD_FOR_OPENEULER -+ } else if (Value.startswith("--generate-missing-build-notes=") && -+ Args.hasFlag(options::OPT_fgcc_compatible, -+ options::OPT_fno_gcc_compatible, false)) { -+ // Do nothing. -+ #endif - } else { - D.Diag(diag::err_drv_unsupported_option_argument) - << A->getSpelling() << Value; -diff --git a/clang/test/Driver/test-generate-missing-build-notes.cpp b/clang/test/Driver/test-generate-missing-build-notes.cpp -new file mode 100644 -index 000000000..efd5251e6 ---- /dev/null -+++ b/clang/test/Driver/test-generate-missing-build-notes.cpp -@@ -0,0 +1,13 @@ -+// REQUIRES: build_for_openeuler -+// RUN: %clang -### -fgcc-compatible -Wa,--generate-missing-build-notes=yes %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s -+// RUN: %clang -### -fgcc-compatible -Wa,--generate-missing-build-notes=no %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s -+// CHECK-NO-ERROR-NOT: --generate-missing-build-notes= -+// RUN: %clang -### -Wa,--generate-missing-build-notes=yes %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR %s -+// RUN: %clang -### -Wa,--generate-missing-build-notes=no %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR %s -+// RUN: %clang -### -fno-gcc-compatible -Wa,--generate-missing-build-notes=yes %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR %s -+// RUN: %clang -### -fno-gcc-compatible -Wa,--generate-missing-build-notes=no %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR %s -+// CHECK-ERROR: error: unsupported argument '--generate-missing-build-notes= -+ -+int main() { -+ return 1; -+} --- -2.33.0 - diff --git a/0014-Update-llvm-lit-config-to-support-build_for_openeule.patch b/0014-Update-llvm-lit-config-to-support-build_for_openeule.patch deleted file mode 100644 index dc92eeaf79afa3cfb51f6850eef3f92bd9e3c4df..0000000000000000000000000000000000000000 --- a/0014-Update-llvm-lit-config-to-support-build_for_openeule.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 5721be433ddee5f60d4a9434df43a023f1ec4c0e Mon Sep 17 00:00:00 2001 -From: wangqiang -Date: Sun, 28 Apr 2024 14:30:34 +0800 -Subject: [PATCH] Update llvm-lit config to support build_for_openeuler - ---- - clang/test/lit.site.cfg.py.in | 1 + - 1 files changed, 1 insertions(+) - -diff --git a/clang/test/lit.site.cfg.py.in b/clang/test/lit.site.cfg.py.in -index ef75770a2..ae5309180 100644 ---- a/clang/test/lit.site.cfg.py.in -+++ b/clang/test/lit.site.cfg.py.in -@@ -42,6 +42,7 @@ config.llvm_external_lit = path(r"@LLVM_EXTERNAL_LIT@") - config.ppc_linux_default_ieeelongdouble = @PPC_LINUX_DEFAULT_IEEELONGDOUBLE@ - config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@ - config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@ -+config.build_for_openeuler = @BUILD_FOR_OPENEULER@ - - import lit.llvm - lit.llvm.initialize(lit_config, config) - --- -2.33.0 - diff --git a/0015-Backport-Defer-the-instantiation-of-explicit-specifier-until-.patch b/0015-Backport-Defer-the-instantiation-of-explicit-specifier-until-.patch deleted file mode 100644 index c7b713e2406176ee9724171b0f47419c23f5ef6c..0000000000000000000000000000000000000000 --- a/0015-Backport-Defer-the-instantiation-of-explicit-specifier-until-.patch +++ /dev/null @@ -1,259 +0,0 @@ -From c2668403868559918b54671d3d31527fb2f04486 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?=E5=88=98=E9=9B=A8=E5=9F=B9?= -Date: Wed, 1 Nov 2023 21:45:48 +0800 -Subject: [PATCH] Defer the instantiation of explicit-specifier until - constraint checking completes (#70548) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Modifications: - -- Skip the instantiation of the explicit-specifier during Decl -substitution if we are deducing template arguments and the -explicit-specifier is value dependent. - -- Instantiate the explicit-specifier after the constraint checking -completes. - -- Make `instantiateExplicitSpecifier` a member function in order to -instantiate the explicit-specifier in different stages. - -This PR doesn’t defer the instantiation of the explicit specifier for -deduction guides, because I’m not familiar with deduction guides yet. -I’ll dig into it after this PR. - -According to my local test, GCC 13 tuple works with this PR. - -Fixes #59827. - ---------- - -Co-authored-by: Erich Keane ---- - docs/ReleaseNotes.rst | 4 ++ - include/clang/Sema/Sema.h | 3 ++ - lib/Sema/SemaTemplateDeduction.cpp | 53 +++++++++++++++++++ - lib/Sema/SemaTemplateInstantiateDecl.cpp | 40 +++++++++----- - test/SemaCXX/cxx2a-explicit-bool-deferred.cpp | 31 +++++++++++ - 5 files changed, 117 insertions(+), 14 deletions(-) - create mode 100644 test/SemaCXX/cxx2a-explicit-bool-deferred.cpp - -diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst -index 5086a56e..05dad41c 100644 ---- a/clang/docs/ReleaseNotes.rst -+++ b/clang/docs/ReleaseNotes.rst -@@ -860,6 +860,10 @@ Bug Fixes to C++ Support - (`#64172 `_) and - (`#64723 `_). - -+- Clang now defers the instantiation of explicit specifier until constraint checking -+ completes (except deduction guides). Fixes: -+ (`#59827 `_) -+ - Bug Fixes to AST Handling - ^^^^^^^^^^^^^^^^^^^^^^^^^ - -diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h -index 3752a23f..b2ab6d0f 100644 ---- a/clang/include/clang/Sema/Sema.h -+++ b/clang/include/clang/Sema/Sema.h -@@ -10293,6 +10293,9 @@ public: - const CXXConstructorDecl *Tmpl, - const MultiLevelTemplateArgumentList &TemplateArgs); - -+ ExplicitSpecifier instantiateExplicitSpecifier( -+ const MultiLevelTemplateArgumentList &TemplateArgs, ExplicitSpecifier ES); -+ - NamedDecl *FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D, - const MultiLevelTemplateArgumentList &TemplateArgs, - bool FindingInstantiatedContext = false); -diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp -index 31ea7be2..58dd1b78 100644 ---- a/clang/lib/Sema/SemaTemplateDeduction.cpp -+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp -@@ -3546,6 +3546,48 @@ static unsigned getPackIndexForParam(Sema &S, - llvm_unreachable("parameter index would not be produced from template"); - } - -+// if `Specialization` is a `CXXConstructorDecl` or `CXXConversionDecl`, -+// we'll try to instantiate and update its explicit specifier after constraint -+// checking. -+static Sema::TemplateDeductionResult instantiateExplicitSpecifierDeferred( -+ Sema &S, FunctionDecl *Specialization, -+ const MultiLevelTemplateArgumentList &SubstArgs, -+ TemplateDeductionInfo &Info, FunctionTemplateDecl *FunctionTemplate, -+ ArrayRef DeducedArgs) { -+ auto GetExplicitSpecifier = [](FunctionDecl *D) { -+ return isa(D) -+ ? cast(D)->getExplicitSpecifier() -+ : cast(D)->getExplicitSpecifier(); -+ }; -+ auto SetExplicitSpecifier = [](FunctionDecl *D, ExplicitSpecifier ES) { -+ isa(D) -+ ? cast(D)->setExplicitSpecifier(ES) -+ : cast(D)->setExplicitSpecifier(ES); -+ }; -+ -+ ExplicitSpecifier ES = GetExplicitSpecifier(Specialization); -+ Expr *ExplicitExpr = ES.getExpr(); -+ if (!ExplicitExpr) -+ return Sema::TDK_Success; -+ if (!ExplicitExpr->isValueDependent()) -+ return Sema::TDK_Success; -+ -+ Sema::InstantiatingTemplate Inst( -+ S, Info.getLocation(), FunctionTemplate, DeducedArgs, -+ Sema::CodeSynthesisContext::DeducedTemplateArgumentSubstitution, Info); -+ if (Inst.isInvalid()) -+ return Sema::TDK_InstantiationDepth; -+ Sema::SFINAETrap Trap(S); -+ const ExplicitSpecifier InstantiatedES = -+ S.instantiateExplicitSpecifier(SubstArgs, ES); -+ if (InstantiatedES.isInvalid() || Trap.hasErrorOccurred()) { -+ Specialization->setInvalidDecl(true); -+ return Sema::TDK_SubstitutionFailure; -+ } -+ SetExplicitSpecifier(Specialization, InstantiatedES); -+ return Sema::TDK_Success; -+} -+ - /// Finish template argument deduction for a function template, - /// checking the deduced template arguments for completeness and forming - /// the function template specialization. -@@ -3675,6 +3717,17 @@ Sema::TemplateDeductionResult Sema::FinishTemplateArgumentDeduction( - } - } - -+ // We skipped the instantiation of the explicit-specifier during the -+ // substitution of `FD` before. So, we try to instantiate it back if -+ // `Specialization` is either a constructor or a conversion function. -+ if (isa(Specialization)) { -+ if (TDK_Success != instantiateExplicitSpecifierDeferred( -+ *this, Specialization, SubstArgs, Info, -+ FunctionTemplate, DeducedArgs)) { -+ return TDK_SubstitutionFailure; -+ } -+ } -+ - if (OriginalCallArgs) { - // C++ [temp.deduct.call]p4: - // In general, the deduction process attempts to find template argument -diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp -index f78d46f5..a40510ce 100644 ---- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp -+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp -@@ -555,18 +555,16 @@ static void instantiateDependentAMDGPUFlatWorkGroupSizeAttr( - S.addAMDGPUFlatWorkGroupSizeAttr(New, Attr, MinExpr, MaxExpr); - } - --static ExplicitSpecifier --instantiateExplicitSpecifier(Sema &S, -- const MultiLevelTemplateArgumentList &TemplateArgs, -- ExplicitSpecifier ES, FunctionDecl *New) { -+ExplicitSpecifier Sema::instantiateExplicitSpecifier( -+ const MultiLevelTemplateArgumentList &TemplateArgs, ExplicitSpecifier ES) { - if (!ES.getExpr()) - return ES; - Expr *OldCond = ES.getExpr(); - Expr *Cond = nullptr; - { - EnterExpressionEvaluationContext Unevaluated( -- S, Sema::ExpressionEvaluationContext::ConstantEvaluated); -- ExprResult SubstResult = S.SubstExpr(OldCond, TemplateArgs); -+ *this, Sema::ExpressionEvaluationContext::ConstantEvaluated); -+ ExprResult SubstResult = SubstExpr(OldCond, TemplateArgs); - if (SubstResult.isInvalid()) { - return ExplicitSpecifier::Invalid(); - } -@@ -574,7 +572,7 @@ instantiateExplicitSpecifier(Sema &S, - } - ExplicitSpecifier Result(Cond, ES.getKind()); - if (!Cond->isTypeDependent()) -- S.tryResolveExplicitSpecifier(Result); -+ tryResolveExplicitSpecifier(Result); - return Result; - } - -@@ -2065,8 +2063,8 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl( - - ExplicitSpecifier InstantiatedExplicitSpecifier; - if (auto *DGuide = dyn_cast(D)) { -- InstantiatedExplicitSpecifier = instantiateExplicitSpecifier( -- SemaRef, TemplateArgs, DGuide->getExplicitSpecifier(), DGuide); -+ InstantiatedExplicitSpecifier = SemaRef.instantiateExplicitSpecifier( -+ TemplateArgs, DGuide->getExplicitSpecifier()); - if (InstantiatedExplicitSpecifier.isInvalid()) - return nullptr; - } -@@ -2440,11 +2438,25 @@ Decl *TemplateDeclInstantiator::VisitCXXMethodDecl( - } - } - -- ExplicitSpecifier InstantiatedExplicitSpecifier = -- instantiateExplicitSpecifier(SemaRef, TemplateArgs, -- ExplicitSpecifier::getFromDecl(D), D); -- if (InstantiatedExplicitSpecifier.isInvalid()) -- return nullptr; -+ auto InstantiatedExplicitSpecifier = ExplicitSpecifier::getFromDecl(D); -+ // deduction guides need this -+ const bool CouldInstantiate = -+ InstantiatedExplicitSpecifier.getExpr() == nullptr || -+ !InstantiatedExplicitSpecifier.getExpr()->isValueDependent(); -+ -+ // Delay the instantiation of the explicit-specifier until after the -+ // constraints are checked during template argument deduction. -+ if (CouldInstantiate || -+ SemaRef.CodeSynthesisContexts.back().Kind != -+ Sema::CodeSynthesisContext::DeducedTemplateArgumentSubstitution) { -+ InstantiatedExplicitSpecifier = SemaRef.instantiateExplicitSpecifier( -+ TemplateArgs, InstantiatedExplicitSpecifier); -+ -+ if (InstantiatedExplicitSpecifier.isInvalid()) -+ return nullptr; -+ } else { -+ InstantiatedExplicitSpecifier.setKind(ExplicitSpecKind::Unresolved); -+ } - - // Implicit destructors/constructors created for local classes in - // DeclareImplicit* (see SemaDeclCXX.cpp) might not have an associated TSI. -diff --git a/clang/test/SemaCXX/cxx2a-explicit-bool-deferred.cpp b/clang/test/SemaCXX/cxx2a-explicit-bool-deferred.cpp -new file mode 100644 -index 00000000..4d667008 ---- /dev/null -+++ b/clang/test/SemaCXX/cxx2a-explicit-bool-deferred.cpp -@@ -0,0 +1,31 @@ -+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++2a %s -+ -+template struct is_same { -+ static constexpr bool value = false; -+}; -+ -+template struct is_same { -+ static constexpr bool value = true; -+}; -+ -+template -+concept SameHelper = is_same::value; -+template -+concept same_as = SameHelper && SameHelper; -+ -+namespace deferred_instantiation { -+template constexpr X do_not_instantiate() { return nullptr; } -+ -+struct T { -+ template X> explicit(do_not_instantiate()) T(X) {} -+ -+ T(int) {} -+}; -+ -+T t(5); -+// expected-error@17{{cannot initialize}} -+// expected-note@20{{in instantiation of function template specialization}} -+// expected-note@30{{while substituting deduced template arguments}} -+// expected-note@30{{in instantiation of function template specialization}} -+T t2(5.0f); -+} // namespace deferred_instantiation --- -2.33.0 - diff --git a/0016-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch b/0016-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch deleted file mode 100644 index 1e361b0c3e825f5ff311e4c4fac476799c66c780..0000000000000000000000000000000000000000 --- a/0016-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch +++ /dev/null @@ -1,739 +0,0 @@ -From a9863e2b6e6783aa9be0b9d1d187084fd4b32a3a Mon Sep 17 00:00:00 2001 -From: Muhammad Asif Manzoor -Date: Thu, 21 Mar 2024 12:50:38 -0400 -Subject: Add BiSheng Autotuner support for LLVM compiler - -Automatic tuning is an automatic iterative process that optimizes a given -program by manipulating compilation options for optimal performance. -BiSheng Autotuner provides a resumable interface for tuning process. BiSheng -Autotuner can tune 1) individual code segments/blocks (fine grain turning) like -loops, callsites, instructions, etc. and 2) entire modules/programs (coarse -grain tuning) for compiler flags, pass ordering, etc. -This patch enables LLVM compiler to extract tuneable code regions and then apply -suggested configuration (by Autotuner) to find out the optimal configurations. ---- - clang/include/clang/Basic/CMakeLists.txt | 15 ++ - .../clang/Basic/DiagnosticDriverKinds.td | 9 ++ - .../clang/Basic/DiagnosticFrontendKinds.td | 8 + - clang/include/clang/Driver/CMakeLists.txt | 6 +- - clang/include/clang/Driver/Driver.h | 36 +++++ - clang/include/clang/Driver/Options.td | 13 ++ - clang/lib/CodeGen/BackendUtil.cpp | 58 +++++++ - clang/lib/Driver/Driver.cpp | 82 ++++++++++ - clang/lib/Driver/ToolChains/Clang.cpp | 21 +++ - clang/lib/Driver/ToolChains/CommonArgs.cpp | 113 ++++++++++++++ - clang/lib/Driver/ToolChains/CommonArgs.h | 8 + - clang/lib/Driver/ToolChains/Gnu.cpp | 34 ++++ - .../ExecuteCompilerInvocation.cpp | 27 ++++ - .../autotune_datadir/baseline-config.yaml | 9 ++ - .../autotune_datadir/random-config.yaml | 9 ++ - .../BaselineConfig/apply-baseline-config.c | 32 ++++ - .../test/Autotuning/Driver/Inputs/config.yaml | 3 + - .../Autotuning/Driver/Inputs/template.yaml | 9 ++ - .../Driver/autotune-generate-pipeline.c | 146 ++++++++++++++++++ - .../Driver/autotune-pipeline-thin-lto.c | 42 +++++ - .../Autotuning/Driver/autotune-pipeline.c | 131 ++++++++++++++++ - .../test/Autotuning/GenerateOpp/generate.cpp | 25 +++ - .../Inputs/template.yaml | 9 ++ - .../IncrementalCompilation/Inputs/test1.c | 3 + - .../IncrementalCompilation/Inputs/test2.c | 17 ++ - .../IncrementalCompilation/Inputs/test3.c | 6 + - .../inc-compile-generate-input.cpp | 44 ++++++ - .../Inputs/datadir/corse_grain_config.yaml | 1 + - .../LTO/Inputs/datadir/fine_grain_a.out.yaml | 4 + - .../LTO/Inputs/datadir/fine_grain_output.yaml | 1 + - .../LTO/apply_config_coarse_grain.cpp | 41 +++++ - .../LTO/apply_config_fine_grain.cpp | 58 +++++++ - .../Autotuning/LTO/generate_opportunity.cpp | 56 +++++++ - .../PhaseOrdering/Inputs/template.yaml | 8 + - .../Autotuning/PhaseOrdering/pass-order.cpp | 48 ++++++ - 42 files changed, 1170 insertions(+), 1 deletion(-) - -diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt -index f010e04f62cd..e449d2790597 100644 ---- a/clang/include/clang/Basic/CMakeLists.txt -+++ b/clang/include/clang/Basic/CMakeLists.txt -@@ -1,6 +1,12 @@ -+set(CLANG_BASIC_OPTIONS) -+if(LLVM_ENABLE_AUTOTUNER) -+ list(APPEND CLANG_BASIC_OPTIONS "-DENABLE_AUTOTUNER") -+endif() -+ - macro(clang_diag_gen component) - clang_tablegen(Diagnostic${component}Kinds.inc - -gen-clang-diags-defs -clang-component=${component} -+ ${CLANG_BASIC_OPTIONS} - SOURCE Diagnostic.td - TARGET ClangDiagnostic${component}) - endmacro(clang_diag_gen) -@@ -18,20 +24,24 @@ clang_diag_gen(Refactoring) - clang_diag_gen(Sema) - clang_diag_gen(Serialization) - clang_tablegen(DiagnosticGroups.inc -gen-clang-diag-groups -+ ${CLANG_BASIC_OPTIONS} - SOURCE Diagnostic.td - TARGET ClangDiagnosticGroups) - - clang_tablegen(DiagnosticIndexName.inc -gen-clang-diags-index-name -+ ${CLANG_BASIC_OPTIONS} - SOURCE Diagnostic.td - TARGET ClangDiagnosticIndexName) - - clang_tablegen(AttrList.inc -gen-clang-attr-list - -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ -+ ${CLANG_BASIC_OPTIONS} - SOURCE Attr.td - TARGET ClangAttrList) - - clang_tablegen(AttrSubMatchRulesList.inc -gen-clang-attr-subject-match-rule-list - -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ -+ ${CLANG_BASIC_OPTIONS} - SOURCE Attr.td - TARGET ClangAttrSubjectMatchRuleList) - -@@ -43,6 +53,7 @@ clang_tablegen(AttrTokenKinds.inc -gen-clang-attr-token-kinds - - clang_tablegen(AttrHasAttributeImpl.inc -gen-clang-attr-has-attribute-impl - -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ -+ ${CLANG_BASIC_OPTIONS} - SOURCE Attr.td - TARGET ClangAttrHasAttributeImpl - ) -@@ -67,15 +78,19 @@ clang_tablegen(arm_mve_builtin_aliases.inc -gen-arm-mve-builtin-aliases - SOURCE arm_mve.td - TARGET ClangARMMveBuiltinAliases) - clang_tablegen(arm_sve_builtins.inc -gen-arm-sve-builtins -+ ${CLANG_BASIC_OPTIONS} - SOURCE arm_sve.td - TARGET ClangARMSveBuiltins) - clang_tablegen(arm_sve_builtin_cg.inc -gen-arm-sve-builtin-codegen -+ ${CLANG_BASIC_OPTIONS} - SOURCE arm_sve.td - TARGET ClangARMSveBuiltinCG) - clang_tablegen(arm_sve_typeflags.inc -gen-arm-sve-typeflags -+ ${CLANG_BASIC_OPTIONS} - SOURCE arm_sve.td - TARGET ClangARMSveTypeFlags) - clang_tablegen(arm_sve_sema_rangechecks.inc -gen-arm-sve-sema-rangechecks -+ ${CLANG_BASIC_OPTIONS} - SOURCE arm_sve.td - TARGET ClangARMSveSemaRangeChecks) - clang_tablegen(arm_sme_builtins.inc -gen-arm-sme-builtins -diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td -index 37586242953f..6b68bc458b93 100644 ---- a/clang/include/clang/Basic/DiagnosticDriverKinds.td -+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td -@@ -248,6 +248,15 @@ def err_drv_cannot_read_config_file : Error< - "cannot read configuration file '%0': %1">; - def err_drv_arg_requires_bitcode_input: Error< - "option '%0' requires input to be LLVM bitcode">; -+#ifdef ENABLE_AUTOTUNER -+def err_drv_autotune_generic : Error<"%0">; -+def err_drv_autotune_disabled_O0 : Error< -+ "-fautotune/-fautotune-generate should not be enabled at -O0">; -+def err_drv_autotune_incorrect_env : Error< -+ "incorrect argument '%0' in environment variable used">; -+def err_drv_autotune_no_filter_types : Error< -+ "no types added for filtering with %0">; -+#endif - - def err_target_unsupported_arch - : Error<"the target architecture '%0' is not supported by the target '%1'">; -diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td -index 9ed9a88fa3d6..11022962ae9e 100644 ---- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td -+++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td -@@ -346,4 +346,12 @@ def warn_profile_data_misexpect : Warning< - def err_extract_api_ignores_file_not_found : - Error<"file '%0' specified by '--extract-api-ignores=' not found">, DefaultFatal; - -+#ifdef ENABLE_AUTOTUNER -+let CategoryName = "AutoTuning Issues" in { -+def err_auto_tuning_error_reading : Error<"'%0'">; -+def err_auto_tuning_error_dumping : Error<"'%0'">; -+def err_unable_to_create_pass : Error< -+ "cannot create pass '%0' from AutoTuning input file">; -+} // end of autoTuning issue category -+#endif - } -diff --git a/clang/include/clang/Driver/CMakeLists.txt b/clang/include/clang/Driver/CMakeLists.txt -index 8c0af1528a96..56fff6a2504e 100644 ---- a/clang/include/clang/Driver/CMakeLists.txt -+++ b/clang/include/clang/Driver/CMakeLists.txt -@@ -8,7 +8,11 @@ endif() - if (LLVM_ENABLE_CLASSIC_FLANG) - list(APPEND CLANG_DRIVER_OPTIONS -DENABLE_CLASSIC_FLANG ) - endif() -- -+ -+if (LLVM_ENABLE_AUTOTUNER) -+ list(APPEND CLANG_DRIVER_OPTIONS "-DENABLE_AUTOTUNER" ) -+endif() -+ - tablegen(LLVM Options.inc ${CLANG_DRIVER_OPTIONS} -gen-opt-parser-defs ) - - add_public_tablegen_target(ClangDriverOptions) -diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h -index e3e98bad9912..dcecb473b516 100644 ---- a/clang/include/clang/Driver/Driver.h -+++ b/clang/include/clang/Driver/Driver.h -@@ -72,6 +72,14 @@ enum ModuleHeaderMode { - HeaderMode_System - }; - -+#if defined(ENABLE_AUTOTUNER) -+enum AutoTuneKind { -+ AutoTuneNone, -+ AutoTuneGenerate, -+ AutoTuneNext, -+}; -+#endif -+ - /// Driver - Encapsulate logic for constructing compilation processes - /// from a set of gcc-driver-like command line arguments. - class Driver { -@@ -119,6 +127,11 @@ class Driver { - /// LTO mode selected via -f(no-offload-)?lto(=.*)? options. - LTOKind OffloadLTOMode; - -+#if defined(ENABLE_AUTOTUNER) -+ /// AutoTune mode selected via -fautotune or -fautotune-generate option -+ AutoTuneKind AutoTuneMode; -+#endif -+ - public: - enum OpenMPRuntimeKind { - /// An unknown OpenMP runtime. We can't generate effective OpenMP code -@@ -191,6 +204,21 @@ public: - /// Information about the host which can be overridden by the user. - std::string HostBits, HostMachine, HostSystem, HostRelease; - -+#if defined(ENABLE_AUTOTUNER) -+ /// The path to the llvm-autotune data directory. -+ std::string AutoTuneDirDataPath; -+ /// Path for project base directory. Base directory is removed from absolute -+ /// path and relative path is used as (coarse-grain) code region name. This -+ /// allow to port a config file from one machine/location to another. -+ std::string AutoTuneProjectDir; -+ -+ /// Whether to prepare the compiler to produce additional metadata -+ /// that will be consumed by Autotuner's ML model -+ bool IsMLTuningEnabled; -+ -+ std::string AutoTuneOptions; -+#endif -+ - /// The file to log CC_PRINT_PROC_STAT_FILE output to, if enabled. - std::string CCPrintStatReportFilename; - -@@ -705,6 +733,14 @@ public: - return IsOffload ? OffloadLTOMode : LTOMode; - } - -+#if defined(ENABLE_AUTOTUNER) -+ /// Returns true if we are performing any kind of AutoTune. -+ bool isUsingAutoTune() const { return AutoTuneMode != AutoTuneNone; } -+ -+ /// Get the specific kind of AutoTune being performed. -+ AutoTuneKind getAutoTuneMode() const { return AutoTuneMode; } -+#endif -+ - private: - - /// Tries to load options from configuration files. -diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td -index c5cc66c58f25..71d6ed66ab96 100644 ---- a/clang/include/clang/Driver/Options.td -+++ b/clang/include/clang/Driver/Options.td -@@ -1795,6 +1795,19 @@ def fmemory_profile_use_EQ : Joined<["-"], "fmemory-profile-use=">, - Group, Flags<[CC1Option, CoreOption]>, MetaVarName<"">, - HelpText<"Use memory profile for profile-guided memory optimization">, - MarshallingInfoString>; -+#ifdef ENABLE_AUTOTUNER -+// Auto-tuning flags. -+def fautotune : Flag<["-"], "fautotune">, Group, -+ HelpText<"Auto-tune with the compiler configuration under 'autotune_datadir' (overridden by AUTOTUNE_DATADIR env var)">; -+def fautotune_EQ : Joined<["-"], "fautotune=">, Group, -+ HelpText<"Auto-tune with the compiler configuration of the specified id under 'autotune_datadir' (overridden by AUTOTUNE_DATADIR env var)">; -+def fautotune_generate : Flag<["-"], "fautotune-generate">, Group, -+ HelpText<"Generate initial compiler configuration for Function/Loop code regions under 'autotune_datadir' (overridden by AUTOTUNE_DATADIR env var)">; -+def fautotune_generate_EQ : CommaJoined<["-"], "fautotune-generate=">, Group, -+ HelpText<"Generate initial compiler configuration for the given comma-separated list of code regions under 'autotune_datadir' (overridden by AUTOTUNE_DATADIR env var)">, Values<"Other,Function,Loop,MachineBasicBlock">; -+def fautotune_rank : Flag<["-"], "fautotune-rank">, Group, -+ HelpText<"Generate files necessary for ML-guided ranking">; -+#endif - - #ifdef BUILD_FOR_OPENEULER - def fgcc_compatible : Flag<["-"], "fgcc-compatible">, Group, -diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp -index f962d60870d1..cef5e0d16ba7 100644 ---- a/clang/lib/CodeGen/BackendUtil.cpp -+++ b/clang/lib/CodeGen/BackendUtil.cpp -@@ -88,6 +88,10 @@ using namespace llvm; - llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); - #include "llvm/Support/Extension.def" - -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/Analysis/AutotuningDump.h" -+#endif -+ - namespace llvm { - extern cl::opt DebugInfoCorrelate; - -@@ -1021,6 +1025,27 @@ void EmitAssemblyHelper::RunOptimizationPipeline( - }); - } - -+#if defined(ENABLE_AUTOTUNER) -+ bool Changed = false; -+ // If autotuning is enabled (for applying configuration), use AutoTuner -+ // generated pass ordering instead of passes in compilation pipeline. Passes -+ // before and after the compilation pipeline will be intact. -+ if (autotuning::Engine.isEnabled()) { -+ std::vector PassesList; -+ Changed = autotuning::Engine.lookUpGlobalParams("OptPass", PassesList); -+ if (Changed && PassesList.size()) { -+ std::string PassPipeline = ""; -+ for (auto PassName : PassesList) -+ PassPipeline.append(PassName + ","); -+ PassPipeline.pop_back(); -+ -+ if (auto Err = PB.parsePassPipeline(MPM, PassPipeline)) -+ errs() << "AutoTuner: cannot add pass:" << toString(std::move(Err)) -+ << "\n"; -+ } -+ } -+ if (!Changed) { -+#endif - if (IsThinLTO || (IsLTO && CodeGenOpts.UnifiedLTO)) { - MPM = PB.buildThinLTOPreLinkDefaultPipeline(Level); - } else if (IsLTO) { -@@ -1028,6 +1053,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline( - } else { - MPM = PB.buildPerModuleDefaultPipeline(Level); - } -+#if defined(ENABLE_AUTOTUNER) -+ } -+#endif - } - - // Add a verifier pass if requested. We don't have to do this if the action -@@ -1078,6 +1106,12 @@ void EmitAssemblyHelper::RunOptimizationPipeline( - } - } - -+#if defined(ENABLE_AUTOTUNER) -+ // Please ensure this pass is added after all optimization passes. -+ if (autotuning::Engine.isEnabled()) -+ MPM.addPass(RequireAnalysisPass()); -+#endif -+ - // Now that we have all of the passes ready, run them. - { - PrettyStackTraceString CrashInfo("Optimizer"); -@@ -1125,6 +1159,22 @@ void EmitAssemblyHelper::RunCodegenPipeline( - void EmitAssemblyHelper::EmitAssembly(BackendAction Action, - std::unique_ptr OS) { - TimeRegion Region(CodeGenOpts.TimePasses ? &CodeGenerationTime : nullptr); -+ -+#if defined(ENABLE_AUTOTUNER) -+ // AUTO-TUNING - auto-tuning initialization for this module. -+ // Initialize it before parsing command-line options because we want to -+ // overwrite the llvm options using the config file. -+ if (Error E = autotuning::Engine.init(TheModule->getModuleIdentifier())) { -+ Diags.Report(diag::err_auto_tuning_error_reading) << toString(std::move(E)); -+ return; -+ } -+ if (autotuning::Engine.isEnabled() && autotuning::Engine.isParseInput() && -+ (autotuning::Engine.LLVMParams.size() || -+ autotuning::Engine.ProgramParams.size())) -+ llvm::cl::ParseAutoTunerOptions(autotuning::Engine.LLVMParams, -+ autotuning::Engine.ProgramParams); -+#endif -+ - setCommandLineOpts(CodeGenOpts); - - bool RequiresCodeGen = actionRequiresCodeGen(Action); -@@ -1142,6 +1192,14 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, - RunOptimizationPipeline(Action, OS, ThinLinkOS); - RunCodegenPipeline(Action, OS, DwoOS); - -+#if defined(ENABLE_AUTOTUNER) -+ // AUTO-TUNING - auto-tuning finalization for this module -+ if (Error E = autotuning::Engine.finalize()) { -+ Diags.Report(diag::err_auto_tuning_error_dumping) << toString(std::move(E)); -+ return; -+ } -+#endif -+ - if (ThinLinkOS) - ThinLinkOS->keep(); - if (DwoOS) -diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp -index 819d7703b2e7..bd9db7714f95 100644 ---- a/clang/lib/Driver/Driver.cpp -+++ b/clang/lib/Driver/Driver.cpp -@@ -104,6 +104,14 @@ - #if LLVM_ON_UNIX - #include // getpid - #endif -+#if defined(ENABLE_AUTOTUNER) -+// Constant definition for environment variable to enable AutoTuner and set -+// the mode to generate opportunities or apply configurations. -+const std::string AutoTuneModeStr = "AUTOTUNE_MODE"; -+// Constant definition for environment variable to specify the project base -+// directory. -+const std::string AutoTunePrjDirStr = "AUTOTUNE_PROJECT_DIR"; -+#endif - - using namespace clang::driver; - using namespace clang; -@@ -200,6 +208,9 @@ Driver::Driver(StringRef ClangExecutable, StringRef TargetTriple, - SaveTemps(SaveTempsNone), BitcodeEmbed(EmbedNone), - Offload(OffloadHostDevice), CXX20HeaderType(HeaderMode_None), - ModulesModeCXX20(false), LTOMode(LTOK_None), -+#if defined(ENABLE_AUTOTUNER) -+ AutoTuneMode(AutoTuneNone), -+#endif - ClangExecutable(ClangExecutable), SysRoot(DEFAULT_SYSROOT), - DriverTitle(Title), CCCPrintBindings(false), CCPrintOptions(false), - CCLogDiagnostics(false), CCGenDiagnostics(false), -@@ -1379,6 +1390,77 @@ Compilation *Driver::BuildCompilation(ArrayRef ArgList) { - - setLTOMode(Args); - -+#if defined(ENABLE_AUTOTUNER) -+ // Process -fautotune and -fautotune-generate flags. -+ bool IsAutoTuneGenerate = Args.hasArg(options::OPT_fautotune_generate, -+ options::OPT_fautotune_generate_EQ); -+ bool IsAutoTune = -+ Args.hasArg(options::OPT_fautotune, options::OPT_fautotune_EQ); -+ // Check if the environment variable AUTOTUNE_MODE is used instead of -+ // -fautotune-generate/-fautotune. -+ if (!IsAutoTuneGenerate && !IsAutoTune) { -+ if (std::optional MaybeMode = -+ llvm::sys::Process::GetEnv(AutoTuneModeStr)) { -+ StringRef Mode = *MaybeMode; -+ StringRef OrgMode = *MaybeMode; -+ if (Mode.consume_front("-fautotune-generate")) { -+ if (Mode.empty() || Mode.startswith("=")) -+ IsAutoTuneGenerate = true; -+ else -+ Diags.Report(diag::err_drv_autotune_incorrect_env) << OrgMode; -+ } else if (Mode.consume_front("-fautotune")) { -+ if (Mode.empty() || Mode.startswith("=")) -+ IsAutoTune = true; -+ else -+ Diags.Report(diag::err_drv_autotune_incorrect_env) << OrgMode; -+ } else { -+ Diags.Report(diag::err_drv_autotune_incorrect_env) << OrgMode; -+ } -+ -+ if (Mode.consume_front("=")) { -+ if (Mode.empty()) -+ Diags.Report(diag::err_drv_autotune_no_filter_types) -+ << (IsAutoTuneGenerate ? "-fautotune-generate=" : "-fautotune="); -+ -+ AutoTuneOptions = Mode.str(); -+ } -+ } -+ } -+ -+ IsMLTuningEnabled = Args.hasArg(options::OPT_fautotune_rank); -+ -+ if (IsAutoTuneGenerate && IsAutoTune) -+ Diags.Report(diag::err_drv_argument_not_allowed_with) -+ << "-fautotune" -+ << "-fautotune-generate"; -+ -+ if (IsMLTuningEnabled && !(IsAutoTuneGenerate || IsAutoTune)) -+ Diags.Report(diag::err_drv_argument_only_allowed_with) -+ << "-fautotune-rank" -+ << "-fautotune or -fautotune-generate"; -+ -+ if (IsAutoTuneGenerate || IsAutoTune) { -+ // Check if the environment variable AUTOTUNE_DATADIR is set. -+ if (std::optional MaybePath = -+ llvm::sys::Process::GetEnv("AUTOTUNE_DATADIR")) -+ AutoTuneDirDataPath = *MaybePath; -+ else -+ AutoTuneDirDataPath = "autotune_datadir"; -+ -+ // Check if the environment variable AUTOTUNE_PROJECT_DIR is set. -+ if (std::optional MaybeProjectDIR = -+ llvm::sys::Process::GetEnv(AutoTunePrjDirStr)) -+ AutoTuneProjectDir = *MaybeProjectDIR; -+ else -+ AutoTuneProjectDir = ""; -+ -+ if (IsAutoTuneGenerate) -+ AutoTuneMode = AutoTuneGenerate; -+ if (IsAutoTune) -+ AutoTuneMode = AutoTuneNext; -+ } -+#endif -+ - // Process -fembed-bitcode= flags. - if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) { - StringRef Name = A->getValue(); -diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp -index a4af991b5ff3..933661685117 100644 ---- a/clang/lib/Driver/ToolChains/Clang.cpp -+++ b/clang/lib/Driver/ToolChains/Clang.cpp -@@ -5990,6 +5990,27 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, - if (!Triple.isNVPTX() && !Triple.isAMDGCN()) - addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs); - -+#if defined(ENABLE_AUTOTUNER) -+ // Add Auto-tuning options. -+ if (C.getDriver().isUsingAutoTune()) { -+ Arg *A = Args.getLastArg(options::OPT_O_Group); -+ if (!A || A->getOption().matches(options::OPT_O0)) -+ D.Diag(clang::diag::err_drv_autotune_disabled_O0); -+ -+ // Enable debug info when Auto-tuning options are specified. -+ CmdArgs.push_back("-debug-info-kind=line-tables-only"); -+ if (!D.AutoTuneProjectDir.empty()) { -+ CmdArgs.push_back("-mllvm"); -+ CmdArgs.push_back(Args.MakeArgString(Twine("-autotuning-project-dir=") + -+ D.AutoTuneProjectDir)); -+ } -+ if (D.getAutoTuneMode() == AutoTuneKind::AutoTuneGenerate) -+ AddAutoTuningOpportunities(Args, D, CmdArgs); -+ else if (D.getAutoTuneMode() == AutoTuneKind::AutoTuneNext) -+ AddAutoTuningInput(Args, D, CmdArgs); -+ } -+#endif -+ - Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ); - - if (getLastProfileSampleUseArg(Args) && -diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp -index 1ccc83a468ce..e01b21e102b1 100644 ---- a/clang/lib/Driver/ToolChains/CommonArgs.cpp -+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp -@@ -2429,6 +2429,119 @@ void tools::addMachineOutlinerArgs(const Driver &D, - } - } - -+#if defined(ENABLE_AUTOTUNER) -+static bool isAcceptableThinLTOCodeRegion(StringRef CR) { -+ if ((CR.equals("CallSite") || CR.equals("Loop") || CR.equals("Function") || -+ CR.equals("MachineBasicBlock"))) -+ return false; -+ return true; -+} -+ -+static bool processOpportunitiesOptions(StringRef CR, bool IsThinLTO, -+ std::string &CodeRegionsFilterStr) { -+ // Check if the argument has a valid value. -+ if (!(CR.equals("Other") || CR.equals("LLVMParam") || CR.equals("CallSite") || -+ CR.equals("Function") || CR.equals("Loop") || -+ CR.equals("MachineBasicBlock") || CR.equals("Switch") || -+ CR.equals("ProgramParam"))) -+ return false; -+ -+ // Disable fine grain tuning for thin LTO during link time optimization. -+ if (IsThinLTO && !isAcceptableThinLTOCodeRegion(CR)) { -+ llvm::errs() -+ << "error: fine-grained autotuning not supported in ThinLTO mode\n"; -+ return false; -+ } -+ -+ if (!CodeRegionsFilterStr.empty()) -+ CodeRegionsFilterStr += ','; -+ CodeRegionsFilterStr += CR; -+ return true; -+} -+ -+// Add AutoTuner options for generating tuning opporutnities. -+// IsThinLTO will only be true during link time optimization for -flto=thin. -+void tools::AddAutoTuningOpportunities(const ArgList &Args, const Driver &D, -+ ArgStringList &CmdArgs, bool IsThinLTO) { -+ // Dump CodeRegions into opportunity files. -+ CmdArgs.push_back("-mllvm"); -+ SmallString<128> OppPath = StringRef(D.AutoTuneDirDataPath); -+ llvm::sys::path::append(OppPath, "opp"); -+ StringRef RawTypeFilterStr = D.AutoTuneOptions; -+ CmdArgs.push_back(Args.MakeArgString(Twine("-auto-tuning-opp=") + OppPath)); -+ if (D.IsMLTuningEnabled) { -+ // Baseline config is -1 -+ CmdArgs.push_back("-mllvm"); -+ CmdArgs.push_back(Args.MakeArgString(Twine("-auto-tuning-config-id=-1"))); -+ } -+ // Filter CodeRegions by type. -+ std::string CodeRegionsFilterStr; -+ if (Arg *A = Args.getLastArg(options::OPT_fautotune_generate_EQ)) { -+ for (StringRef CR : A->getValues()) { -+ if (!processOpportunitiesOptions(CR, IsThinLTO, CodeRegionsFilterStr)) -+ D.Diag(diag::err_drv_unsupported_option_argument) -+ << A->getOption().getName() << CR; -+ } -+ } else if (!RawTypeFilterStr.empty()) { -+ SmallVector TypeFilters; -+ RawTypeFilterStr.split(TypeFilters, ','); -+ for (StringRef CR : TypeFilters) { -+ if (!processOpportunitiesOptions(CR, IsThinLTO, CodeRegionsFilterStr)) -+ D.Diag(diag::err_drv_unsupported_option_argument) -+ << "fautotune-generate" << CR; -+ } -+ } else { -+ if (IsThinLTO) -+ D.Diag(diag::err_drv_autotune_generic) -+ << "AutoTuner: no valid code region type specified for ThinLTO mode"; -+ // Otherwise by default, dump CodeRegions of Function and Loop type. -+ CodeRegionsFilterStr = "CallSite,Function,Loop"; -+ } -+ CmdArgs.push_back("-mllvm"); -+ CmdArgs.push_back( -+ Args.MakeArgString("-auto-tuning-type-filter=" + CodeRegionsFilterStr)); -+} -+ -+static bool processInputOptions(StringRef Options, SmallString<128> &Path, -+ const ArgList &Args, const Driver &D, -+ llvm::opt::ArgStringList &CmdArgs) { -+ unsigned Value = 0; -+ // Check if the argument is an integer type. -+ if (Options.getAsInteger(10, Value)) -+ return false; -+ llvm::sys::path::append(Path, "config-" + Twine(Value) + ".yaml"); -+ if (D.IsMLTuningEnabled) { -+ CmdArgs.push_back("-mllvm"); -+ CmdArgs.push_back( -+ Args.MakeArgString(Twine("-auto-tuning-config-id=" + Twine(Value)))); -+ } -+ return true; -+} -+ -+void tools::AddAutoTuningInput(const ArgList &Args, const Driver &D, -+ llvm::opt::ArgStringList &CmdArgs) { -+ SmallString<128> InputPath = StringRef(D.AutoTuneDirDataPath); -+ StringRef RawOptionsStr = D.AutoTuneOptions; -+ -+ if (Arg *A = Args.getLastArg(options::OPT_fautotune_EQ)) { -+ if (!processInputOptions(StringRef(A->getValue()), InputPath, Args, D, -+ CmdArgs)) -+ D.Diag(diag::err_drv_invalid_int_value) -+ << A->getAsString(Args) << A->getValue(); -+ } else if (!RawOptionsStr.empty()) { -+ if (!processInputOptions(RawOptionsStr, InputPath, Args, D, CmdArgs)) -+ D.Diag(diag::err_drv_invalid_int_value) -+ << "-fautotune=" + RawOptionsStr.str() << RawOptionsStr; -+ } else { -+ llvm::sys::path::append(InputPath, "config.yaml"); -+ } -+ CmdArgs.push_back("-mllvm"); -+ CmdArgs.push_back( -+ Args.MakeArgString(Twine("-auto-tuning-input=") + InputPath)); -+ setenv("AUTOTUNE_INPUT", Args.MakeArgString(InputPath), 1); -+} -+#endif -+ - void tools::addOpenMPDeviceRTL(const Driver &D, - const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, -diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h -index bd5cb1bb866e..36103655c522 100644 ---- a/clang/lib/Driver/ToolChains/CommonArgs.h -+++ b/clang/lib/Driver/ToolChains/CommonArgs.h -@@ -240,6 +240,14 @@ void addMachineOutlinerArgs(const Driver &D, const llvm::opt::ArgList &Args, - const llvm::Triple &Triple, bool IsLTO, - const StringRef PluginOptPrefix = ""); - -+#if defined(ENABLE_AUTOTUNER) -+void AddAutoTuningOpportunities(const llvm::opt::ArgList &Args, const Driver &D, -+ llvm::opt::ArgStringList &CmdArgs, -+ bool isThinLTO = false); -+void AddAutoTuningInput(const llvm::opt::ArgList &Args, const Driver &D, -+ llvm::opt::ArgStringList &CmdArgs); -+#endif -+ - void addOpenMPDeviceRTL(const Driver &D, const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - StringRef BitcodeSuffix, const llvm::Triple &Triple); -diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp -index 42050dced99c..91a9eda9d78c 100644 ---- a/clang/lib/Driver/ToolChains/Gnu.cpp -+++ b/clang/lib/Driver/ToolChains/Gnu.cpp -@@ -682,6 +682,40 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, - - Args.AddAllArgs(CmdArgs, options::OPT_T); - -+#if defined(ENABLE_AUTOTUNER) -+ // AutoTuner related features will only be enabled for LTO build during -+ // linking phase. Otherwise, non LTO build will require lld linker -+ // unnecessarily (other linkers do not support AutoTuner). -+ if (D.isUsingAutoTune() && D.isUsingLTO()) { -+ bool LinkerIsLLD = false; -+ (void) ToolChain.GetLinkerPath(&LinkerIsLLD); -+ // AutoTuner support is only available for LLD Linker. -+ if (!LinkerIsLLD) -+ D.Diag(clang::diag::err_drv_lto_without_lld); -+ -+ bool IsThinLTO = D.getLTOMode() == LTOK_Thin; -+ if (!D.AutoTuneProjectDir.empty()) { -+ CmdArgs.push_back("-mllvm"); -+ CmdArgs.push_back(Args.MakeArgString(Twine("-autotuning-project-dir=") + -+ D.AutoTuneProjectDir)); -+ } -+ // Enable tuning of callsites cause all of the callsites will have local -+ // linkage during LTO and they are not tuned by default. -+ CmdArgs.push_back(Args.MakeArgString("-mllvm")); -+ CmdArgs.push_back( -+ Args.MakeArgString("-auto-tuning-enable-local-callsite-tuning=true")); -+ if (D.getAutoTuneMode() == AutoTuneKind::AutoTuneGenerate) { -+ AddAutoTuningOpportunities(Args, D, CmdArgs, IsThinLTO); -+ } else if (D.getAutoTuneMode() == AutoTuneKind::AutoTuneNext) { -+ AddAutoTuningInput(Args, D, CmdArgs); -+ if (IsThinLTO) { -+ CmdArgs.push_back("-mllvm"); -+ CmdArgs.push_back("-autotuning-thin-lto=true"); -+ } -+ } -+ } -+#endif -+ - const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); - C.addCommand(std::make_unique(JA, *this, - ResponseFileSupport::AtFileCurCP(), -diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp -index 310f67774a66..92beeef9bd5e 100644 ---- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp -+++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp -@@ -222,6 +222,33 @@ bool ExecuteCompilerInvocation(CompilerInstance *Clang) { - // This should happen AFTER plugins have been loaded! - if (!Clang->getFrontendOpts().LLVMArgs.empty()) { - unsigned NumArgs = Clang->getFrontendOpts().LLVMArgs.size(); -+#if defined(ENABLE_AUTOTUNER) -+ // Both incremental compilation (for AutoTuner) and 'opt-bisect-limit' -+ // changes the behavior of compilation pipeline. If incremental compilation -+ // is used along with 'opt-bisect-limit' then 'opt-bisect-limit' is -+ // preferred and incremental compilation is disabled. -+ unsigned BisectLimitFound = 0; -+ unsigned CompileModeFound = 0; -+ for (unsigned Idx = 0; Idx != NumArgs; ++Idx) { -+ if (Clang->getFrontendOpts().LLVMArgs[Idx].find("-opt-bisect-limit=") != -+ std::string::npos) -+ BisectLimitFound = Idx; -+ if (Clang->getFrontendOpts().LLVMArgs[Idx].find( -+ "-auto-tuning-compile-mode=") != std::string::npos) -+ CompileModeFound = Idx; -+ if (BisectLimitFound && CompileModeFound) -+ break; -+ } -+ if (BisectLimitFound && CompileModeFound && -+ Clang->getFrontendOpts().LLVMArgs[CompileModeFound].compare( -+ "-auto-tuning-compile-mode=Inactive") != 0) { -+ Clang->getFrontendOpts().LLVMArgs[CompileModeFound] = -+ "-auto-tuning-compile-mode=Inactive"; -+ llvm::errs() << "AutoTunerCompile: Incremental compilation cannot work " -+ "with '-opt-bisect-limit' flag.\n" -+ "Disabling incremental compilation.\n"; -+ } -+#endif - auto Args = std::make_unique(NumArgs + 2); - Args[0] = "clang (LLVM option parsing)"; - for (unsigned i = 0; i != NumArgs; ++i) --- -2.33.0 - diff --git a/0017-fix-for-missing-DENABLE_AUTOTUNER.patch b/0017-fix-for-missing-DENABLE_AUTOTUNER.patch deleted file mode 100644 index 6f5a7e47b9d99e627b9f73920cf738d5f50038cb..0000000000000000000000000000000000000000 --- a/0017-fix-for-missing-DENABLE_AUTOTUNER.patch +++ /dev/null @@ -1,17 +0,0 @@ -diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt -index 949f12d3ce8c..98fcb6ea1a07 100644 ---- a/clang/CMakeLists.txt -+++ b/clang/CMakeLists.txt -@@ -322,6 +322,10 @@ if (BUILD_FOR_OPENEULER) - add_definitions( -DBUILD_FOR_OPENEULER ) - endif() - -+if (LLVM_ENABLE_AUTOTUNER) -+ add_definitions( -DENABLE_AUTOTUNER ) -+endif() -+ - # Determine HOST_LINK_VERSION on Darwin. - set(HOST_LINK_VERSION) - if (APPLE AND NOT CMAKE_LINKER MATCHES ".*lld.*") - -- - \ No newline at end of file diff --git a/0018-backport-Clang-Fix-build-with-GCC-14-on-ARM-78704.patch b/0018-backport-Clang-Fix-build-with-GCC-14-on-ARM-78704.patch deleted file mode 100644 index 27d577a28187e4295c5e7b3ff137ace66e99da50..0000000000000000000000000000000000000000 --- a/0018-backport-Clang-Fix-build-with-GCC-14-on-ARM-78704.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 505323d49f4621e5f7210d99fd52dd33a6223fa8 Mon Sep 17 00:00:00 2001 -From: eastb233 -Date: Tue, 3 Sep 2024 11:59:57 +0800 -Subject: [PATCH] [backport][Clang] Fix build with GCC 14 on ARM (#78704) - -Reference: https://github.com/llvm/llvm-project/pull/78704 - -GCC 14 defines `__arm_streaming` as a macro expanding to -`[[arm::streaming]]`. Due to the nested macro use, this gets expanded -prior to concatenation. - -It doesn't look like C++ has a really clean way to prevent macro -expansion. The best I have found is to use `EMPTY ## X` where `EMPTY` is -an empty macro argument, so this is the hack I'm implementing here. - -Fixes https://github.com/llvm/llvm-project/issues/78691. ---- - clang/include/clang/Basic/TokenKinds.def | 2 +- - clang/include/clang/Basic/TokenKinds.h | 2 +- - clang/utils/TableGen/ClangAttrEmitter.cpp | 2 +- - 3 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def -index ef0dad0f2dcd..afd101b007b4 100644 ---- a/clang/include/clang/Basic/TokenKinds.def -+++ b/clang/include/clang/Basic/TokenKinds.def -@@ -753,7 +753,7 @@ KEYWORD(__builtin_sycl_unique_stable_name, KEYSYCL) - - // Keywords defined by Attr.td. - #ifndef KEYWORD_ATTRIBUTE --#define KEYWORD_ATTRIBUTE(X) KEYWORD(X, KEYALL) -+#define KEYWORD_ATTRIBUTE(X, EMPTY) KEYWORD(EMPTY ## X, KEYALL) - #endif - #include "clang/Basic/AttrTokenKinds.inc" - -diff --git a/clang/include/clang/Basic/TokenKinds.h b/clang/include/clang/Basic/TokenKinds.h -index e4857405bc7f..ff117bd5afc5 100644 ---- a/clang/include/clang/Basic/TokenKinds.h -+++ b/clang/include/clang/Basic/TokenKinds.h -@@ -109,7 +109,7 @@ bool isPragmaAnnotation(TokenKind K); - - inline constexpr bool isRegularKeywordAttribute(TokenKind K) { - return (false --#define KEYWORD_ATTRIBUTE(X) || (K == tok::kw_##X) -+#define KEYWORD_ATTRIBUTE(X, ...) || (K == tok::kw_##X) - #include "clang/Basic/AttrTokenKinds.inc" - ); - } -diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp -index b5813c6abc2b..79db17501b64 100644 ---- a/clang/utils/TableGen/ClangAttrEmitter.cpp -+++ b/clang/utils/TableGen/ClangAttrEmitter.cpp -@@ -3430,7 +3430,7 @@ void EmitClangAttrTokenKinds(RecordKeeper &Records, raw_ostream &OS) { - "RegularKeyword attributes with arguments are not " - "yet supported"); - OS << "KEYWORD_ATTRIBUTE(" -- << S.getSpellingRecord().getValueAsString("Name") << ")\n"; -+ << S.getSpellingRecord().getValueAsString("Name") << ", )\n"; - } - OS << "#undef KEYWORD_ATTRIBUTE\n"; - } --- -2.38.1.windows.1 - diff --git a/0019-AArch64-Support-HiSilicon-s-HIP09-Processor.patch b/0019-AArch64-Support-HiSilicon-s-HIP09-Processor.patch deleted file mode 100644 index 11a8c1712ef9d769c48e43aa73e1a4a9fecafc86..0000000000000000000000000000000000000000 --- a/0019-AArch64-Support-HiSilicon-s-HIP09-Processor.patch +++ /dev/null @@ -1,176 +0,0 @@ -From cac43828d26b178807d194b4bd7c5df69603df29 Mon Sep 17 00:00:00 2001 -From: xiajingze -Date: Wed, 31 Jul 2024 18:37:29 +0800 -Subject: [PATCH] [AArch64] Support HiSilicon's HIP09 Processor - -Signed-off-by: xiajingze ---- - clang/test/CMakeLists.txt | 1 ++ - clang/test/Driver/aarch64-hip09.c | 9 ++ - .../test/Misc/target-invalid-cpu-note-hip09.c | 97 +++++++++++++++++++ - clang/test/Misc/target-invalid-cpu-note.c | 1 + - clang/test/lit.site.cfg.py.in | 4 + - 5 files changed, 112 insertions(+), 0 deletion(-) - create mode 100644 clang/test/Driver/aarch64-hip09.c - create mode 100644 clang/test/Misc/target-invalid-cpu-note-hip09.c - -diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt -index b88694deb..25e4e1f30 100644 ---- a/clang/test/CMakeLists.txt -+++ b/clang/test/CMakeLists.txt -@@ -19,6 +19,7 @@ llvm_canonicalize_cmake_booleans( - LLVM_WITH_Z3 - PPC_LINUX_DEFAULT_IEEELONGDOUBLE - LLVM_TOOL_LLVM_DRIVER_BUILD -+ LLVM_ENABLE_AARCH64_HIP09 - ) - - configure_lit_site_cfg( -diff --git a/clang/test/Driver/aarch64-hip09.c b/clang/test/Driver/aarch64-hip09.c -new file mode 100644 -index 000000000..156be3f38 ---- /dev/null -+++ b/clang/test/Driver/aarch64-hip09.c -@@ -0,0 +1,9 @@ -+// REQUIRES: enable_enable_aarch64_hip09 -+// RUN: %clang -target aarch64_be -mcpu=hip09 -### -c %s 2>&1 | FileCheck -check-prefix=hip09-BE %s -+// RUN: %clang -target aarch64 -mbig-endian -mcpu=hip09 -### -c %s 2>&1 | FileCheck -check-prefix=hip09-BE %s -+// RUN: %clang -target aarch64_be -mbig-endian -mcpu=hip09 -### -c %s 2>&1 | FileCheck -check-prefix=hip09-BE %s -+// RUN: %clang -target aarch64_be -mtune=hip09 -### -c %s 2>&1 | FileCheck -check-prefix=hip09-BE-TUNE %s -+// RUN: %clang -target aarch64 -mbig-endian -mtune=hip09 -### -c %s 2>&1 | FileCheck -check-prefix=hip09-BE-TUNE %s -+// RUN: %clang -target aarch64_be -mbig-endian -mtune=hip09 -### -c %s 2>&1 | FileCheck -check-prefix=hip09-BE-TUNE %s -+// hip09-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "hip09" -+// hip09-BE-TUNE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" -diff --git a/clang/test/Misc/target-invalid-cpu-note-hip09.c b/clang/test/Misc/target-invalid-cpu-note-hip09.c -new file mode 100644 -index 000000000..f2561a089 ---- /dev/null -+++ b/clang/test/Misc/target-invalid-cpu-note-hip09.c -@@ -0,0 +1,97 @@ -+// REQUIRES: enable_enable_aarch64_hip09 -+// Use CHECK-NEXT instead of multiple CHECK-SAME to ensure we will fail if there is anything extra in the output. -+// RUN: not %clang_cc1 -triple armv5--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix ARM -+// ARM: error: unknown target CPU 'not-a-cpu' -+// ARM-NEXT: note: valid target CPU values are: arm8, arm810, strongarm, strongarm110, strongarm1100, strongarm1110, arm7tdmi, arm7tdmi-s, arm710t, arm720t, arm9, arm9tdmi, arm920, arm920t, arm922t, arm940t, ep9312, arm10tdmi, arm1020t, arm9e, arm946e-s, arm966e-s, arm968e-s, arm10e, arm1020e, arm1022e, arm926ej-s, arm1136j-s, arm1136jf-s, mpcore, mpcorenovfp, arm1176jz-s, arm1176jzf-s, arm1156t2-s, arm1156t2f-s, cortex-m0, cortex-m0plus, cortex-m1, sc000, cortex-a5, cortex-a7, cortex-a8, cortex-a9, cortex-a12, cortex-a15, cortex-a17, krait, cortex-r4, cortex-r4f, cortex-r5, cortex-r7, cortex-r8, cortex-r52, sc300, cortex-m3, cortex-m4, cortex-m7, cortex-m23, cortex-m33, cortex-m35p, cortex-m55, cortex-m85, cortex-a32, cortex-a35, cortex-a53, cortex-a55, cortex-a57, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-x1, cortex-x1c, neoverse-n1, neoverse-n2, neoverse-v1, cyclone, exynos-m3, exynos-m4, exynos-m5, kryo, iwmmxt, xscale, swift{{$}} -+ -+// RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64 -+// AARCH64: error: unknown target CPU 'not-a-cpu' -+// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, hip09, grace{{$}} -+ -+// RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64 -+// TUNE_AARCH64: error: unknown target CPU 'not-a-cpu' -+// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, hip09, grace{{$}} -+ -+// RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86 -+// X86: error: unknown target CPU 'not-a-cpu' -+// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}} -+ -+// RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64 -+// X86_64: error: unknown target CPU 'not-a-cpu' -+// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}} -+ -+// RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86 -+// TUNE_X86: error: unknown target CPU 'not-a-cpu' -+// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}} -+ -+// RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64 -+// TUNE_X86_64: error: unknown target CPU 'not-a-cpu' -+// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}} -+ -+// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX -+// NVPTX: error: unknown target CPU 'not-a-cpu' -+// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151{{$}} -+ -+// RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600 -+// R600: error: unknown target CPU 'not-a-cpu' -+// R600-NEXT: note: valid target CPU values are: r600, rv630, rv635, r630, rs780, rs880, rv610, rv620, rv670, rv710, rv730, rv740, rv770, cedar, palm, cypress, hemlock, juniper, redwood, sumo, sumo2, barts, caicos, aruba, cayman, turks{{$}} -+ -+// RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN -+// AMDGCN: error: unknown target CPU 'not-a-cpu' -+// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151{{$}} -+ -+// RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM -+// WEBASM: error: unknown target CPU 'not-a-cpu' -+// WEBASM-NEXT: note: valid target CPU values are: mvp, bleeding-edge, generic{{$}} -+ -+// RUN: not %clang_cc1 -triple systemz--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix SYSTEMZ -+// SYSTEMZ: error: unknown target CPU 'not-a-cpu' -+// SYSTEMZ-NEXT: note: valid target CPU values are: arch8, z10, arch9, z196, arch10, zEC12, arch11, z13, arch12, z14, arch13, z15, arch14, z16{{$}} -+ -+// RUN: not %clang_cc1 -triple sparc--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix SPARC -+// SPARC: error: unknown target CPU 'not-a-cpu' -+// SPARC-NEXT: note: valid target CPU values are: v8, supersparc, sparclite, f934, hypersparc, sparclite86x, sparclet, tsc701, v9, ultrasparc, ultrasparc3, niagara, niagara2, niagara3, niagara4, ma2100, ma2150, ma2155, ma2450, ma2455, ma2x5x, ma2080, ma2085, ma2480, ma2485, ma2x8x, myriad2, myriad2.1, myriad2.2, myriad2.3, leon2, at697e, at697f, leon3, ut699, gr712rc, leon4, gr740{{$}} -+ -+// RUN: not %clang_cc1 -triple sparcv9--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix SPARCV9 -+// SPARCV9: error: unknown target CPU 'not-a-cpu' -+// SPARCV9-NEXT: note: valid target CPU values are: v9, ultrasparc, ultrasparc3, niagara, niagara2, niagara3, niagara4{{$}} -+ -+// RUN: not %clang_cc1 -triple powerpc--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix PPC -+// PPC: error: unknown target CPU 'not-a-cpu' -+// PPC-NEXT: note: valid target CPU values are: generic, 440, 450, 601, 602, 603, 603e, 603ev, 604, 604e, 620, 630, g3, 7400, g4, 7450, g4+, 750, 8548, 970, g5, a2, e500, e500mc, e5500, power3, pwr3, power4, pwr4, power5, pwr5, power5x, pwr5x, power6, pwr6, power6x, pwr6x, power7, pwr7, power8, pwr8, power9, pwr9, power10, pwr10, powerpc, ppc, ppc32, powerpc64, ppc64, powerpc64le, ppc64le, future{{$}} -+ -+// RUN: not %clang_cc1 -triple mips--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix MIPS -+// MIPS: error: unknown target CPU 'not-a-cpu' -+// MIPS-NEXT: note: valid target CPU values are: mips1, mips2, mips3, mips4, mips5, mips32, mips32r2, mips32r3, mips32r5, mips32r6, mips64, mips64r2, mips64r3, mips64r5, mips64r6, octeon, octeon+, p5600{{$}} -+ -+// RUN: not %clang_cc1 -triple lanai--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix LANAI -+// LANAI: error: unknown target CPU 'not-a-cpu' -+// LANAI-NEXT: note: valid target CPU values are: v11{{$}} -+ -+// RUN: not %clang_cc1 -triple hexagon--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix HEXAGON -+// HEXAGON: error: unknown target CPU 'not-a-cpu' -+// HEXAGON-NEXT: note: valid target CPU values are: hexagonv5, hexagonv55, hexagonv60, hexagonv62, hexagonv65, hexagonv66, hexagonv67, hexagonv67t, hexagonv68, hexagonv69, hexagonv71, hexagonv71t, hexagonv73{{$}} -+ -+// RUN: not %clang_cc1 -triple bpf--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix BPF -+// BPF: error: unknown target CPU 'not-a-cpu' -+// BPF-NEXT: note: valid target CPU values are: generic, v1, v2, v3, probe{{$}} -+ -+// RUN: not %clang_cc1 -triple avr--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AVR -+// AVR: error: unknown target CPU 'not-a-cpu' -+// AVR-NEXT: note: valid target CPU values are: avr1, at90s1200, attiny11, attiny12, attiny15, attiny28, avr2, at90s2313, at90s2323, at90s2333, at90s2343, attiny22, attiny26, at86rf401, at90s4414, at90s4433, at90s4434, at90s8515, at90c8534, at90s8535, avr25, ata5272, ata6616c, attiny13, attiny13a, attiny2313, attiny2313a, attiny24, attiny24a, attiny4313, attiny44, attiny44a, attiny84, attiny84a, attiny25, attiny45, attiny85, attiny261, attiny261a, attiny441, attiny461, attiny461a, attiny841, attiny861, attiny861a, attiny87, attiny43u, attiny48, attiny88, attiny828, avr3, at43usb355, at76c711, avr31, atmega103, at43usb320, avr35, attiny167, at90usb82, at90usb162, ata5505, ata6617c, ata664251, atmega8u2, atmega16u2, atmega32u2, attiny1634, avr4, atmega8, ata6289, atmega8a, ata6285, ata6286, ata6612c, atmega48, atmega48a, atmega48pa, atmega48pb, atmega48p, atmega88, atmega88a, atmega88p, atmega88pa, atmega88pb, atmega8515, atmega8535, atmega8hva, at90pwm1, at90pwm2, at90pwm2b, at90pwm3, at90pwm3b, at90pwm81, avr5, ata5702m322, ata5782, ata5790, ata5790n, ata5791, ata5795, ata5831, ata6613c, ata6614q, ata8210, ata8510, atmega16, atmega16a, atmega161, atmega162, atmega163, atmega164a, atmega164p, atmega164pa, atmega165, atmega165a, atmega165p, atmega165pa, atmega168, atmega168a, atmega168p, atmega168pa, atmega168pb, atmega169, atmega169a, atmega169p, atmega169pa, atmega32, atmega32a, atmega323, atmega324a, atmega324p, atmega324pa, atmega324pb, atmega325, atmega325a, atmega325p, atmega325pa, atmega3250, atmega3250a, atmega3250p, atmega3250pa, atmega328, atmega328p, atmega328pb, atmega329, atmega329a, atmega329p, atmega329pa, atmega3290, atmega3290a, atmega3290p, atmega3290pa, atmega406, atmega64, atmega64a, atmega640, atmega644, atmega644a, atmega644p, atmega644pa, atmega645, atmega645a, atmega645p, atmega649, atmega649a, atmega649p, atmega6450, atmega6450a, atmega6450p, atmega6490, atmega6490a, atmega6490p, atmega64rfr2, atmega644rfr2, atmega16hva, atmega16hva2, atmega16hvb, atmega16hvbrevb, atmega32hvb, atmega32hvbrevb, atmega64hve, atmega64hve2, at90can32, at90can64, at90pwm161, at90pwm216, at90pwm316, atmega32c1, atmega64c1, atmega16m1, atmega32m1, atmega64m1, atmega16u4, atmega32u4, atmega32u6, at90usb646, at90usb647, at90scr100, at94k, m3000, avr51, atmega128, atmega128a, atmega1280, atmega1281, atmega1284, atmega1284p, atmega128rfa1, atmega128rfr2, atmega1284rfr2, at90can128, at90usb1286, at90usb1287, avr6, atmega2560, atmega2561, atmega256rfr2, atmega2564rfr2, avrxmega2, atxmega16a4, atxmega16a4u, atxmega16c4, atxmega16d4, atxmega32a4, atxmega32a4u, atxmega32c3, atxmega32c4, atxmega32d3, atxmega32d4, atxmega32e5, atxmega16e5, atxmega8e5, avrxmega4, atxmega64a3, atxmega64a3u, atxmega64a4u, atxmega64b1, atxmega64b3, atxmega64c3, atxmega64d3, atxmega64d4, avrxmega5, atxmega64a1, atxmega64a1u, avrxmega6, atxmega128a3, atxmega128a3u, atxmega128b1, atxmega128b3, atxmega128c3, atxmega128d3, atxmega128d4, atxmega192a3, atxmega192a3u, atxmega192c3, atxmega192d3, atxmega256a3, atxmega256a3u, atxmega256a3b, atxmega256a3bu, atxmega256c3, atxmega256d3, atxmega384c3, atxmega384d3, avrxmega7, atxmega128a1, atxmega128a1u, atxmega128a4u, avrtiny, attiny4, attiny5, attiny9, attiny10, attiny20, attiny40, attiny102, attiny104, avrxmega3, attiny202, attiny402, attiny204, attiny404, attiny804, attiny1604, attiny406, attiny806, attiny1606, attiny807, attiny1607, attiny212, attiny412, attiny214, attiny414, attiny814, attiny1614, attiny416, attiny816, attiny1616, attiny3216, attiny417, attiny817, attiny1617, attiny3217, attiny1624, attiny1626, attiny1627, atmega808, atmega809, atmega1608, atmega1609, atmega3208, atmega3209, atmega4808, atmega4809 -+ -+// RUN: not %clang_cc1 -triple riscv32 -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix RISCV32 -+// RISCV32: error: unknown target CPU 'not-a-cpu' -+// RISCV32-NEXT: note: valid target CPU values are: generic-rv32, rocket-rv32, sifive-e20, sifive-e21, sifive-e24, sifive-e31, sifive-e34, sifive-e76, syntacore-scr1-base, syntacore-scr1-max{{$}} -+ -+// RUN: not %clang_cc1 -triple riscv64 -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix RISCV64 -+// RISCV64: error: unknown target CPU 'not-a-cpu' -+// RISCV64-NEXT: note: valid target CPU values are: generic-rv64, rocket-rv64, sifive-s21, sifive-s51, sifive-s54, sifive-s76, sifive-u54, sifive-u74, sifive-x280{{$}} -+ -+// RUN: not %clang_cc1 -triple riscv32 -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE-RISCV32 -+// TUNE-RISCV32: error: unknown target CPU 'not-a-cpu' -+// TUNE-RISCV32-NEXT: note: valid target CPU values are: generic-rv32, rocket-rv32, sifive-e20, sifive-e21, sifive-e24, sifive-e31, sifive-e34, sifive-e76, syntacore-scr1-base, syntacore-scr1-max, generic, rocket, sifive-7-series{{$}} -+ -+// RUN: not %clang_cc1 -triple riscv64 -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE-RISCV64 -+// TUNE-RISCV64: error: unknown target CPU 'not-a-cpu' -+// TUNE-RISCV64-NEXT: note: valid target CPU values are: generic-rv64, rocket-rv64, sifive-s21, sifive-s51, sifive-s54, sifive-s76, sifive-u54, sifive-u74, sifive-x280, generic, rocket, sifive-7-series{{$}} -diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c -index cd1b0bc15..466b26263 100644 ---- a/clang/test/Misc/target-invalid-cpu-note.c -+++ b/clang/test/Misc/target-invalid-cpu-note.c -@@ -1,3 +1,4 @@ -+// UNSUPPORTED: enable_enable_aarch64_hip09 - // Use CHECK-NEXT instead of multiple CHECK-SAME to ensure we will fail if there is anything extra in the output. - // RUN: not %clang_cc1 -triple armv5--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix ARM - // ARM: error: unknown target CPU 'not-a-cpu' -diff --git a/clang/test/lit.site.cfg.py.in b/clang/test/lit.site.cfg.py.in -index 89b7cafdc..7728be7d4 100644 ---- a/clang/test/lit.site.cfg.py.in -+++ b/clang/test/lit.site.cfg.py.in -@@ -44,9 +44,13 @@ config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@ - config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@ - config.build_for_openeuler = @BUILD_FOR_OPENEULER@ -+config.enable_enable_aarch64_hip09 = @LLVM_ENABLE_AARCH64_HIP09@ - - import lit.llvm - lit.llvm.initialize(lit_config, config) - -+if config.enable_enable_aarch64_hip09: -+ config.available_features.add("enable_enable_aarch64_hip09") -+ - # Let the main config do the real work. - lit_config.load_config( - config, os.path.join(config.clang_src_dir, "test/lit.cfg.py")) --- -2.19.1 - diff --git a/0020-Backport-LoongArch-fix-and-add-some-new-support.patch b/0020-Backport-LoongArch-fix-and-add-some-new-support.patch deleted file mode 100644 index bb7566c0fbdf39beea761d29f7eece7680464e31..0000000000000000000000000000000000000000 --- a/0020-Backport-LoongArch-fix-and-add-some-new-support.patch +++ /dev/null @@ -1,1715 +0,0 @@ -From 0cae10595a7521e2c430c605c1f830570b3c9682 Mon Sep 17 00:00:00 2001 -From: Lu Weining -Date: Thu, 30 Nov 2023 14:08:45 +0800 -Subject: [PATCH 1/9] [Driver] Support -mcmodel= for LoongArch (#72514) - -7e42545 rejects unsupported mcmodel options, but normal/medium/extreme -should be supported models for LoongArch according to [gcc -document](https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html). - -The mappings among `gcc`, `clang driver`, `clang cc1` and `LLVM (i.e. -llc --code-model=)` are: - -| gcc | clang driver | clang cc1 | LLVM | -| ------------- | ------------------ | ----------------- | -------------- | -| normal | normal | small | small | -| medium | medium | medium | medium | -| extreme | extreme | large | large | - -(cherry picked from commit 1296d20adfb0978afe38d67efab9818079d870ca) ---- - clang/lib/Driver/ToolChains/Clang.cpp | 38 ++++++++++++++++++++------- - clang/test/Driver/mcmodel.c | 15 +++++++++++ - 2 files changed, 44 insertions(+), 9 deletions(-) - -diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp -index fac4f03d6193..4e5f689498d6 100644 ---- a/clang/lib/Driver/ToolChains/Clang.cpp -+++ b/clang/lib/Driver/ToolChains/Clang.cpp -@@ -5773,18 +5773,38 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, - - if (Arg *A = Args.getLastArg(options::OPT_mcmodel_EQ)) { - StringRef CM = A->getValue(); -- if (CM == "small" || CM == "kernel" || CM == "medium" || CM == "large" || -- CM == "tiny") { -- if (Triple.isOSAIX() && CM == "medium") -- CmdArgs.push_back("-mcmodel=large"); -- else if (Triple.isAArch64() && (CM == "kernel" || CM == "medium")) -+ if (Triple.isLoongArch()) { -+ bool Ok = false; -+ if (CM == "extreme" && -+ Args.hasFlagNoClaim(options::OPT_fplt, options::OPT_fno_plt, false)) -+ D.Diag(diag::err_drv_argument_not_allowed_with) -+ << A->getAsString(Args) << "-fplt"; -+ Ok = CM == "normal" || CM == "medium" || CM == "extreme"; -+ // Convert to LLVM recognizable names. -+ if (Ok) { -+ CM = llvm::StringSwitch(CM) -+ .Case("normal", "small") -+ .Case("extreme", "large") -+ .Default(CM); -+ CmdArgs.push_back(Args.MakeArgString("-mcmodel=" + CM)); -+ } else { - D.Diag(diag::err_drv_invalid_argument_to_option) - << CM << A->getOption().getName(); -- else -- A->render(Args, CmdArgs); -+ } - } else { -- D.Diag(diag::err_drv_invalid_argument_to_option) -- << CM << A->getOption().getName(); -+ if (CM == "small" || CM == "kernel" || CM == "medium" || CM == "large" || -+ CM == "tiny") { -+ if (Triple.isOSAIX() && CM == "medium") -+ CmdArgs.push_back("-mcmodel=large"); -+ else if (Triple.isAArch64() && (CM == "kernel" || CM == "medium")) -+ D.Diag(diag::err_drv_invalid_argument_to_option) -+ << CM << A->getOption().getName(); -+ else -+ A->render(Args, CmdArgs); -+ } else { -+ D.Diag(diag::err_drv_invalid_argument_to_option) -+ << CM << A->getOption().getName(); -+ } - } - } - -diff --git a/clang/test/Driver/mcmodel.c b/clang/test/Driver/mcmodel.c -index 63b432036159..4aada126cf06 100644 ---- a/clang/test/Driver/mcmodel.c -+++ b/clang/test/Driver/mcmodel.c -@@ -8,6 +8,14 @@ - // RUN: not %clang -c -mcmodel=lager %s 2>&1 | FileCheck --check-prefix=INVALID %s - // RUN: not %clang -c --target=aarch64 -mcmodel=medium %s 2>&1 | FileCheck --check-prefix=AARCH64-MEDIUM %s - // RUN: not %clang -c --target=aarch64 -mcmodel=kernel %s 2>&1 | FileCheck --check-prefix=AARCH64-KERNEL %s -+// RUN: %clang --target=loongarch64 -### -S -mcmodel=normal %s 2>&1 | FileCheck --check-prefix=SMALL %s -+// RUN: %clang --target=loongarch64 -### -S -mcmodel=medium %s 2>&1 | FileCheck --check-prefix=MEDIUM %s -+// RUN: %clang --target=loongarch64 -### -S -mcmodel=extreme %s 2>&1 | FileCheck --check-prefix=LARGE %s -+// RUN: not %clang -c --target=loongarch64 -mcmodel=tiny %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-TINY %s -+// RUN: not %clang -c --target=loongarch64 -mcmodel=small %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-SMALL %s -+// RUN: not %clang -c --target=loongarch64 -mcmodel=kernel %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-KERNEL %s -+// RUN: not %clang -c --target=loongarch64 -mcmodel=large %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-LARGE %s -+// RUN: not %clang -c --target=loongarch64 -mcmodel=extreme -fplt %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-PLT-EXTREME %s - - // TINY: "-mcmodel=tiny" - // SMALL: "-mcmodel=small" -@@ -20,3 +28,10 @@ - - // AARCH64-MEDIUM: error: invalid argument 'medium' to -mcmodel= - // AARCH64-KERNEL: error: invalid argument 'kernel' to -mcmodel= -+ -+// ERR-LOONGARCH64-TINY: error: invalid argument 'tiny' to -mcmodel= -+// ERR-LOONGARCH64-SMALL: error: invalid argument 'small' to -mcmodel= -+// ERR-LOONGARCH64-KERNEL: error: invalid argument 'kernel' to -mcmodel= -+// ERR-LOONGARCH64-LARGE: error: invalid argument 'large' to -mcmodel= -+ -+// ERR-LOONGARCH64-PLT-EXTREME: error: invalid argument '-mcmodel=extreme' not allowed with '-fplt' --- -2.20.1 - - -From b0e5225dea19a71b0c2f2168c117ac5032c2d18a Mon Sep 17 00:00:00 2001 -From: Zhaoxin Yang -Date: Tue, 9 Jul 2024 14:13:19 +0800 -Subject: [PATCH 2/9] [LoongArch][clang] Add support for option `-msimd=` and - macro `__loongarch_simd_width`. (#97984) - -(cherry picked from commit 626c7ce33f850831949e4e724016ddbff3a34990) ---- - .../clang/Basic/DiagnosticDriverKinds.td | 2 + - clang/include/clang/Driver/Options.td | 3 + - clang/lib/Basic/Targets/LoongArch.cpp | 8 +- - .../lib/Driver/ToolChains/Arch/LoongArch.cpp | 29 ++++ - clang/test/Driver/loongarch-msimd.c | 129 ++++++++++++++++++ - clang/test/Preprocessor/init-loongarch.c | 3 + - 6 files changed, 172 insertions(+), 2 deletions(-) - create mode 100644 clang/test/Driver/loongarch-msimd.c - -diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td -index 6b68bc458b93..060f96118364 100644 ---- a/clang/include/clang/Basic/DiagnosticDriverKinds.td -+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td -@@ -757,6 +757,8 @@ def err_drv_loongarch_wrong_fpu_width_for_lasx : Error< - "wrong fpu width; LASX depends on 64-bit FPU.">; - def err_drv_loongarch_invalid_simd_option_combination : Error< - "invalid option combination; LASX depends on LSX.">; -+def err_drv_loongarch_invalid_msimd_EQ : Error< -+ "invalid argument '%0' to -msimd=; must be one of: none, lsx, lasx">; - - def err_drv_expand_response_file : Error< - "failed to expand response file: %0">; -diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td -index 344c8bd49da7..530bb53ea9b5 100644 ---- a/clang/include/clang/Driver/Options.td -+++ b/clang/include/clang/Driver/Options.td -@@ -4236,6 +4236,9 @@ def mlasx : Flag<["-"], "mlasx">, Group, - HelpText<"Enable Loongson Advanced SIMD Extension (LASX).">; - def mno_lasx : Flag<["-"], "mno-lasx">, Group, - HelpText<"Disable Loongson Advanced SIMD Extension (LASX).">; -+def msimd_EQ : Joined<["-"], "msimd=">, Group, -+ Flags<[TargetSpecific]>, -+ HelpText<"Select the SIMD extension(s) to be enabled in LoongArch either 'none', 'lsx', 'lasx'.">; - def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">, - Flags<[CC1Option]>, Group, - MarshallingInfoFlag>; -diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp -index 88537989a051..913404240916 100644 ---- a/clang/lib/Basic/Targets/LoongArch.cpp -+++ b/clang/lib/Basic/Targets/LoongArch.cpp -@@ -208,10 +208,14 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, - TuneCPU = ArchName; - Builder.defineMacro("__loongarch_tune", Twine('"') + TuneCPU + Twine('"')); - -- if (HasFeatureLSX) -+ if (HasFeatureLASX) { -+ Builder.defineMacro("__loongarch_simd_width", "256"); - Builder.defineMacro("__loongarch_sx", Twine(1)); -- if (HasFeatureLASX) - Builder.defineMacro("__loongarch_asx", Twine(1)); -+ } else if (HasFeatureLSX) { -+ Builder.defineMacro("__loongarch_simd_width", "128"); -+ Builder.defineMacro("__loongarch_sx", Twine(1)); -+ } - - StringRef ABI = getABI(); - if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") -diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp -index 31153a67ad28..2d9c3f810a06 100644 ---- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp -+++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp -@@ -207,6 +207,35 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, - } else /*-mno-lasx*/ - Features.push_back("-lasx"); - } -+ -+ // Select lsx/lasx feature determined by -msimd=. -+ // Option -msimd= has lower priority than -m[no-]lsx and -m[no-]lasx. -+ if (const Arg *A = Args.getLastArg(options::OPT_msimd_EQ)) { -+ StringRef MSIMD = A->getValue(); -+ if (MSIMD == "lsx") { -+ // Option -msimd=lsx depends on 64-bit FPU. -+ // -m*-float and -mfpu=none/0/32 conflict with -mlsx. -+ if (llvm::find(Features, "-d") != Features.end()) -+ D.Diag(diag::err_drv_loongarch_wrong_fpu_width) << /*LSX*/ 0; -+ // The previous option does not contain feature -lsx. -+ else if (llvm::find(Features, "-lsx") == Features.end()) -+ Features.push_back("+lsx"); -+ } else if (MSIMD == "lasx") { -+ // Option -msimd=lasx depends on 64-bit FPU and LSX. -+ // -m*-float and -mfpu=none/0/32 conflict with -mlsx. -+ if (llvm::find(Features, "-d") != Features.end()) -+ D.Diag(diag::err_drv_loongarch_wrong_fpu_width) << /*LASX*/ 1; -+ else if (llvm::find(Features, "-lsx") != Features.end()) -+ D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); -+ // The previous option does not contain feature -lasx. -+ else if (llvm::find(Features, "-lasx") == Features.end()) { -+ Features.push_back("+lsx"); -+ Features.push_back("+lasx"); -+ } -+ } else if (MSIMD != "none") { -+ D.Diag(diag::err_drv_loongarch_invalid_msimd_EQ) << MSIMD; -+ } -+ } - } - - std::string loongarch::postProcessTargetCPUString(const std::string &CPU, -diff --git a/clang/test/Driver/loongarch-msimd.c b/clang/test/Driver/loongarch-msimd.c -new file mode 100644 -index 000000000000..984f3e8bf2bf ---- /dev/null -+++ b/clang/test/Driver/loongarch-msimd.c -@@ -0,0 +1,129 @@ -+/// Test -msimd options. -+ -+/// COM: -msimd=none -+// RUN: %clang --target=loongarch64 -mlasx -msimd=none -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,LASX -+// RUN: %clang --target=loongarch64 -mlasx -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,LASX -+ -+// RUN: %clang --target=loongarch64 -msimd=none -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=none -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+// RUN: %clang --target=loongarch64 -mno-lasx -msimd=none -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+// RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+// RUN: %clang --target=loongarch64 -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+ -+// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,NOLASX -+// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,NOLASX -+// RUN: %clang --target=loongarch64 -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,NOLASX -+ -+ -+/// COM: -msimd=lsx -+// RUN: %clang --target=loongarch64 -mlasx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,LASX -+// RUN: %clang --target=loongarch64 -mlasx -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,LASX -+ -+// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+// RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+// RUN: %clang --target=loongarch64 -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+ -+// RUN: %clang --target=loongarch64 -msimd=lsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,NOLASX -+// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,NOLASX -+// RUN: %clang --target=loongarch64 -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,NOLASX -+// RUN: %clang --target=loongarch64 -mno-lasx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,NOLASX -+// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,NOLASX -+ -+ -+/// COM: -msimd=lasx -+// RUN: %clang --target=loongarch64 -msimd=lasx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,LASX -+// RUN: %clang --target=loongarch64 -mlasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,LASX -+// RUN: %clang --target=loongarch64 -mlasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,LASX -+// RUN: %clang --target=loongarch64 -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,LASX -+ -+// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+// RUN: %clang --target=loongarch64 -mno-lasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+ -+// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,NOLASX -+// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,NOLASX -+// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ -+// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ -+// RUN: FileCheck %s --check-prefixes=LSX,NOLASX -+ -+ -+// LSX: "-target-feature" "+lsx" -+// LASX: "-target-feature" "+lasx" -+// NOLSX-NOT: "-target-feature" "+lsx" -+// NOLASX-NOT: "-target-feature" "+lasx" -diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c -index e235a7283021..154ad82e0f8c 100644 ---- a/clang/test/Preprocessor/init-loongarch.c -+++ b/clang/test/Preprocessor/init-loongarch.c -@@ -817,6 +817,7 @@ - // RUN: %clang --target=loongarch64 -mno-lasx -mlsx -x c -E -dM %s -o - \ - // RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s - // MLSX-NOT: #define __loongarch_asx -+// MLSX: #define __loongarch_simd_width 128 - // MLSX: #define __loongarch_sx 1 - - // RUN: %clang --target=loongarch64 -mlasx -x c -E -dM %s -o - \ -@@ -828,6 +829,7 @@ - // RUN: %clang --target=loongarch64 -mlasx -mlsx -x c -E -dM %s -o - \ - // RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s - // MLASX: #define __loongarch_asx 1 -+// MLASX: #define __loongarch_simd_width 256 - // MLASX: #define __loongarch_sx 1 - - // RUN: %clang --target=loongarch64 -mno-lsx -x c -E -dM %s -o - \ -@@ -841,4 +843,5 @@ - // RUN: %clang --target=loongarch64 -mno-lasx -x c -E -dM %s -o - \ - // RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s - // MNO-LSX-NOT: #define __loongarch_asx -+// MNO-LSX-NOT: #define __loongarch_simd_width - // MNO-LSX-NOT: #define __loongarch_sx --- -2.20.1 - - -From b2f8e92e88bf63e54ace9b2f9b2aa77dcf0c50c4 Mon Sep 17 00:00:00 2001 -From: Zhaoxin Yang -Date: Thu, 11 Jul 2024 17:43:38 +0800 -Subject: [PATCH 3/9] [LoongArch][clang] Modify `loongarch-msimd.c` to avoid - `grep -o`. NFC (#98442) - -Address buildbot failure: -https://lab.llvm.org/buildbot/#/builders/64/builds/250/steps/6/logs/FAIL__Clang__loongarch-msimd_c - -(cherry picked from commit 74b933c28e777fdc04e50f5f96e4f7a4ad1e79a6) ---- - clang/test/Driver/loongarch-msimd.c | 42 +++-------------------------- - 1 file changed, 4 insertions(+), 38 deletions(-) - -diff --git a/clang/test/Driver/loongarch-msimd.c b/clang/test/Driver/loongarch-msimd.c -index 984f3e8bf2bf..cd463300c874 100644 ---- a/clang/test/Driver/loongarch-msimd.c -+++ b/clang/test/Driver/loongarch-msimd.c -@@ -2,128 +2,94 @@ - - /// COM: -msimd=none - // RUN: %clang --target=loongarch64 -mlasx -msimd=none -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,LASX - // RUN: %clang --target=loongarch64 -mlasx -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,LASX - - // RUN: %clang --target=loongarch64 -msimd=none -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX - // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=none -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX - // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX - // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX - // RUN: %clang --target=loongarch64 -mlsx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX - // RUN: %clang --target=loongarch64 -mno-lasx -msimd=none -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX - // RUN: %clang --target=loongarch64 -mno-lasx -mlsx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX - // RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX - // RUN: %clang --target=loongarch64 -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX - - // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,NOLASX - // RUN: %clang --target=loongarch64 -mno-lasx -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,NOLASX - // RUN: %clang --target=loongarch64 -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,NOLASX - - - /// COM: -msimd=lsx - // RUN: %clang --target=loongarch64 -mlasx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,LASX - // RUN: %clang --target=loongarch64 -mlasx -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,LASX - - // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX - // RUN: %clang --target=loongarch64 -mlsx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX - // RUN: %clang --target=loongarch64 -mno-lasx -mlsx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX - // RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX - // RUN: %clang --target=loongarch64 -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX - // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX - - // RUN: %clang --target=loongarch64 -msimd=lsx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,NOLASX - // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,NOLASX - // RUN: %clang --target=loongarch64 -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,NOLASX - // RUN: %clang --target=loongarch64 -mno-lasx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,NOLASX - // RUN: %clang --target=loongarch64 -mno-lasx -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,NOLASX - - - /// COM: -msimd=lasx - // RUN: %clang --target=loongarch64 -msimd=lasx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,LASX - // RUN: %clang --target=loongarch64 -mlasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,LASX - // RUN: %clang --target=loongarch64 -mlasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,LASX - // RUN: %clang --target=loongarch64 -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,LASX - - // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX - // RUN: %clang --target=loongarch64 -mno-lasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX - - // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,NOLASX - // RUN: %clang --target=loongarch64 -mno-lasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,NOLASX - // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ --// RUN: grep -o '"-target-feature" "+[[:alnum:]]\+"' | sort -r | \ - // RUN: FileCheck %s --check-prefixes=LSX,NOLASX - - --// LSX: "-target-feature" "+lsx" --// LASX: "-target-feature" "+lasx" -+// NOLSX-NOT: "-target-feature" "+lsx" -+// NOLASX-NOT: "-target-feature" "+lasx" -+// LSX-DAG: "-target-feature" "+lsx" -+// LASX-DAG: "-target-feature" "+lasx" - // NOLSX-NOT: "-target-feature" "+lsx" - // NOLASX-NOT: "-target-feature" "+lasx" --- -2.20.1 - - -From b5d3aa3ac0dcf98fbb5f8d2d9de295be991c9e8f Mon Sep 17 00:00:00 2001 -From: Zhaoxin Yang -Date: Tue, 23 Jul 2024 12:06:59 +0800 -Subject: [PATCH 4/9] [LoongArch][CodeGen] Implement 128-bit and 256-bit vector - shuffle. (#100054) - -[LoongArch][CodeGen] Implement 128-bit and 256-bit vector shuffle -operations. - -In LoongArch, shuffle operations can be divided into two types: -- Single-vector shuffle: Shuffle using only one vector, with the other -vector being `undef` or not selected by mask. This can be expanded to -instructions such as `vreplvei` and `vshuf4i`. -- Two-vector shuffle: Shuflle using two vectors. This can be expanded to -instructions like `vilv[l/h]`, `vpack[ev/od]`, `vpick[ev/od]` and the -basic `vshuf`. - -In the future, more optimizations may be added, such as handling 1-bit -vectors and processing single element patterns, etc. - -(cherry picked from commit 464ea880cf7710cc8675c83001d7ae020406cf42) ---- - clang/lib/Driver/ToolChains/Arch/LoongArch.cpp | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp -index 2d9c3f810a06..8b3d2837a4e5 100644 ---- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp -+++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp -@@ -216,7 +216,7 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, - // Option -msimd=lsx depends on 64-bit FPU. - // -m*-float and -mfpu=none/0/32 conflict with -mlsx. - if (llvm::find(Features, "-d") != Features.end()) -- D.Diag(diag::err_drv_loongarch_wrong_fpu_width) << /*LSX*/ 0; -+ D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lsx); - // The previous option does not contain feature -lsx. - else if (llvm::find(Features, "-lsx") == Features.end()) - Features.push_back("+lsx"); -@@ -224,7 +224,7 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, - // Option -msimd=lasx depends on 64-bit FPU and LSX. - // -m*-float and -mfpu=none/0/32 conflict with -mlsx. - if (llvm::find(Features, "-d") != Features.end()) -- D.Diag(diag::err_drv_loongarch_wrong_fpu_width) << /*LASX*/ 1; -+ D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); - else if (llvm::find(Features, "-lsx") != Features.end()) - D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); - // The previous option does not contain feature -lasx. --- -2.20.1 - - -From 17f537eeaef8db451c70fc56a921e5ff542f713b Mon Sep 17 00:00:00 2001 -From: Ami-zhang -Date: Tue, 23 Jul 2024 14:02:04 +0800 -Subject: [PATCH 5/9] [LoongArch] Enable 128-bits vector by default (#100056) - -This commit is to enable 128 vector feature by default, in order to be -consistent with gcc. - -(cherry picked from commit b4ef0ba244899a64a1b1e6448eca942cfa5eda18) ---- - .../lib/Driver/ToolChains/Arch/LoongArch.cpp | 76 +++++++++++-------- - .../test/Driver/loongarch-default-features.c | 2 +- - clang/test/Driver/loongarch-mlasx.c | 6 +- - clang/test/Driver/loongarch-msimd.c | 4 +- - clang/test/Driver/loongarch-msingle-float.c | 4 +- - clang/test/Driver/loongarch-msoft-float.c | 4 +- - clang/test/Preprocessor/init-loongarch.c | 8 +- - 7 files changed, 60 insertions(+), 44 deletions(-) - -diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp -index 8b3d2837a4e5..87d7b30ef5d3 100644 ---- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp -+++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp -@@ -127,6 +127,11 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, - const llvm::Triple &Triple, - const ArgList &Args, - std::vector &Features) { -+ // Enable the `lsx` feature on 64-bit LoongArch by default. -+ if (Triple.isLoongArch64() && -+ (!Args.hasArgNoClaim(clang::driver::options::OPT_march_EQ))) -+ Features.push_back("+lsx"); -+ - std::string ArchName; - if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) - ArchName = A->getValue(); -@@ -145,9 +150,11 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, - } else if (A->getOption().matches(options::OPT_msingle_float)) { - Features.push_back("+f"); - Features.push_back("-d"); -+ Features.push_back("-lsx"); - } else /*Soft-float*/ { - Features.push_back("-f"); - Features.push_back("-d"); -+ Features.push_back("-lsx"); - } - } else if (const Arg *A = Args.getLastArg(options::OPT_mfpu_EQ)) { - StringRef FPU = A->getValue(); -@@ -157,9 +164,11 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, - } else if (FPU == "32") { - Features.push_back("+f"); - Features.push_back("-d"); -+ Features.push_back("-lsx"); - } else if (FPU == "0" || FPU == "none") { - Features.push_back("-f"); - Features.push_back("-d"); -+ Features.push_back("-lsx"); - } else { - D.Diag(diag::err_drv_loongarch_invalid_mfpu_EQ) << FPU; - } -@@ -175,6 +184,42 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, - A->ignoreTargetSpecific(); - if (Arg *A = Args.getLastArgNoClaim(options::OPT_mfpu_EQ)) - A->ignoreTargetSpecific(); -+ if (Arg *A = Args.getLastArgNoClaim(options::OPT_msimd_EQ)) -+ A->ignoreTargetSpecific(); -+ -+ // Select lsx/lasx feature determined by -msimd=. -+ // Option -msimd= precedes -m[no-]lsx and -m[no-]lasx. -+ if (const Arg *A = Args.getLastArg(options::OPT_msimd_EQ)) { -+ StringRef MSIMD = A->getValue(); -+ if (MSIMD == "lsx") { -+ // Option -msimd=lsx depends on 64-bit FPU. -+ // -m*-float and -mfpu=none/0/32 conflict with -msimd=lsx. -+ if (llvm::find(Features, "-d") != Features.end()) -+ D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lsx); -+ else -+ Features.push_back("+lsx"); -+ } else if (MSIMD == "lasx") { -+ // Option -msimd=lasx depends on 64-bit FPU and LSX. -+ // -m*-float, -mfpu=none/0/32 and -mno-lsx conflict with -msimd=lasx. -+ if (llvm::find(Features, "-d") != Features.end()) -+ D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); -+ else if (llvm::find(Features, "-lsx") != Features.end()) -+ D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); -+ -+ // The command options do not contain -mno-lasx. -+ if (!Args.getLastArg(options::OPT_mno_lasx)) { -+ Features.push_back("+lsx"); -+ Features.push_back("+lasx"); -+ } -+ } else if (MSIMD == "none") { -+ if (llvm::find(Features, "+lsx") != Features.end()) -+ Features.push_back("-lsx"); -+ if (llvm::find(Features, "+lasx") != Features.end()) -+ Features.push_back("-lasx"); -+ } else { -+ D.Diag(diag::err_drv_loongarch_invalid_msimd_EQ) << MSIMD; -+ } -+ } - - // Select lsx feature determined by -m[no-]lsx. - if (const Arg *A = Args.getLastArg(options::OPT_mlsx, options::OPT_mno_lsx)) { -@@ -198,8 +243,6 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, - if (A->getOption().matches(options::OPT_mlasx)) { - if (llvm::find(Features, "-d") != Features.end()) - D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); -- else if (llvm::find(Features, "-lsx") != Features.end()) -- D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); - else { /*-mlasx*/ - Features.push_back("+lsx"); - Features.push_back("+lasx"); -@@ -207,35 +250,6 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, - } else /*-mno-lasx*/ - Features.push_back("-lasx"); - } -- -- // Select lsx/lasx feature determined by -msimd=. -- // Option -msimd= has lower priority than -m[no-]lsx and -m[no-]lasx. -- if (const Arg *A = Args.getLastArg(options::OPT_msimd_EQ)) { -- StringRef MSIMD = A->getValue(); -- if (MSIMD == "lsx") { -- // Option -msimd=lsx depends on 64-bit FPU. -- // -m*-float and -mfpu=none/0/32 conflict with -mlsx. -- if (llvm::find(Features, "-d") != Features.end()) -- D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lsx); -- // The previous option does not contain feature -lsx. -- else if (llvm::find(Features, "-lsx") == Features.end()) -- Features.push_back("+lsx"); -- } else if (MSIMD == "lasx") { -- // Option -msimd=lasx depends on 64-bit FPU and LSX. -- // -m*-float and -mfpu=none/0/32 conflict with -mlsx. -- if (llvm::find(Features, "-d") != Features.end()) -- D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); -- else if (llvm::find(Features, "-lsx") != Features.end()) -- D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); -- // The previous option does not contain feature -lasx. -- else if (llvm::find(Features, "-lasx") == Features.end()) { -- Features.push_back("+lsx"); -- Features.push_back("+lasx"); -- } -- } else if (MSIMD != "none") { -- D.Diag(diag::err_drv_loongarch_invalid_msimd_EQ) << MSIMD; -- } -- } - } - - std::string loongarch::postProcessTargetCPUString(const std::string &CPU, -diff --git a/clang/test/Driver/loongarch-default-features.c b/clang/test/Driver/loongarch-default-features.c -index 3cdf3ba3d23e..90634bbcf003 100644 ---- a/clang/test/Driver/loongarch-default-features.c -+++ b/clang/test/Driver/loongarch-default-features.c -@@ -2,7 +2,7 @@ - // RUN: %clang --target=loongarch64 -S -emit-llvm %s -o - | FileCheck %s --check-prefix=LA64 - - // LA32: "target-features"="+32bit" --// LA64: "target-features"="+64bit,+d,+f,+ual" -+// LA64: "target-features"="+64bit,+d,+f,+lsx,+ual" - - int foo(void) { - return 3; -diff --git a/clang/test/Driver/loongarch-mlasx.c b/clang/test/Driver/loongarch-mlasx.c -index 0b934f125c9e..87634ff5a9a4 100644 ---- a/clang/test/Driver/loongarch-mlasx.c -+++ b/clang/test/Driver/loongarch-mlasx.c -@@ -5,7 +5,7 @@ - // RUN: %clang --target=loongarch64 -mno-lasx -fsyntax-only %s -### 2>&1 | \ - // RUN: FileCheck %s --check-prefix=CC1-NOLASX - // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -fsyntax-only %s -### 2>&1 | \ --// RUN: FileCheck %s --check-prefix=CC1-NOLASX -+// RUN: FileCheck %s --check-prefix=CC1-LSX - // RUN: %clang --target=loongarch64 -mno-lasx -mlasx -fsyntax-only %s -### 2>&1 | \ - // RUN: FileCheck %s --check-prefix=CC1-LASX - // RUN: %clang --target=loongarch64 -mlsx -mlasx -fsyntax-only %s -### 2>&1 | \ -@@ -18,7 +18,7 @@ - // RUN: %clang --target=loongarch64 -mno-lasx -S -emit-llvm %s -o - | \ - // RUN: FileCheck %s --check-prefix=IR-NOLASX - // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -S -emit-llvm %s -o - | \ --// RUN: FileCheck %s --check-prefix=IR-NOLASX -+// RUN: FileCheck %s --check-prefix=IR-LSX - // RUN: %clang --target=loongarch64 -mno-lasx -mlasx -S -emit-llvm %s -o - | \ - // RUN: FileCheck %s --check-prefix=IR-LASX - // RUN: %clang --target=loongarch64 -mlsx -mlasx -S -emit-llvm %s -o - | \ -@@ -26,9 +26,11 @@ - // RUN: %clang --target=loongarch64 -mlasx -mlsx -S -emit-llvm %s -o - | \ - // RUN: FileCheck %s --check-prefix=IR-LASX - -+// CC1-LSX: "-target-feature" "+lsx" - // CC1-LASX: "-target-feature" "+lsx" "-target-feature" "+lasx" - // CC1-NOLASX: "-target-feature" "-lasx" - -+// IR-LSX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lsx{{(,.*)?}}" - // IR-LASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lasx{{(,.*)?}}" - // IR-NOLASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lasx{{(,.*)?}}" - -diff --git a/clang/test/Driver/loongarch-msimd.c b/clang/test/Driver/loongarch-msimd.c -index cd463300c874..49d298e1b2e3 100644 ---- a/clang/test/Driver/loongarch-msimd.c -+++ b/clang/test/Driver/loongarch-msimd.c -@@ -75,9 +75,9 @@ - // RUN: FileCheck %s --check-prefixes=LSX,LASX - - // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ --// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+// RUN: FileCheck %s --check-prefixes=LSX,NOLASX - // RUN: %clang --target=loongarch64 -mno-lasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ --// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX -+// RUN: FileCheck %s --check-prefixes=LSX,NOLASX - - // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ - // RUN: FileCheck %s --check-prefixes=LSX,NOLASX -diff --git a/clang/test/Driver/loongarch-msingle-float.c b/clang/test/Driver/loongarch-msingle-float.c -index bd9b3e8a8c01..4eb0865b53a5 100644 ---- a/clang/test/Driver/loongarch-msingle-float.c -+++ b/clang/test/Driver/loongarch-msingle-float.c -@@ -11,10 +11,10 @@ - // WARN: warning: ignoring '-mabi=lp64s' as it conflicts with that implied by '-msingle-float' (lp64f) - // WARN: warning: ignoring '-mfpu=64' as it conflicts with that implied by '-msingle-float' (32) - --// CC1: "-target-feature" "+f"{{.*}} "-target-feature" "-d" -+// CC1: "-target-feature" "+f"{{.*}} "-target-feature" "-d" "-target-feature" "-lsx" - // CC1: "-target-abi" "lp64f" - --// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+f,{{(.*,)?}}-d" -+// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+f,{{(.*,)?}}-d,-lsx" - - int foo(void) { - return 3; -diff --git a/clang/test/Driver/loongarch-msoft-float.c b/clang/test/Driver/loongarch-msoft-float.c -index 0e5121ac84b4..ebf27fb00e30 100644 ---- a/clang/test/Driver/loongarch-msoft-float.c -+++ b/clang/test/Driver/loongarch-msoft-float.c -@@ -11,10 +11,10 @@ - // WARN: warning: ignoring '-mabi=lp64d' as it conflicts with that implied by '-msoft-float' (lp64s) - // WARN: warning: ignoring '-mfpu=64' as it conflicts with that implied by '-msoft-float' (0) - --// CC1: "-target-feature" "-f"{{.*}} "-target-feature" "-d" -+// CC1: "-target-feature" "-f"{{.*}} "-target-feature" "-d" "-target-feature" "-lsx" - // CC1: "-target-abi" "lp64s" - --// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-d,{{(.*,)?}}-f{{(,.*)?}}" -+// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-d,{{(.*,)?}}-f,-lsx" - - int foo(void) { - return 3; -diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c -index 154ad82e0f8c..635d029ce9d3 100644 ---- a/clang/test/Preprocessor/init-loongarch.c -+++ b/clang/test/Preprocessor/init-loongarch.c -@@ -814,6 +814,8 @@ - // RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s - // RUN: %clang --target=loongarch64 -mlsx -mno-lasx -x c -E -dM %s -o - \ - // RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s -+// RUN: %clang --target=loongarch64 -mno-lasx -x c -E -dM %s -o - \ -+// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s - // RUN: %clang --target=loongarch64 -mno-lasx -mlsx -x c -E -dM %s -o - \ - // RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s - // MLSX-NOT: #define __loongarch_asx -@@ -822,12 +824,12 @@ - - // RUN: %clang --target=loongarch64 -mlasx -x c -E -dM %s -o - \ - // RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s --// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -x c -E -dM %s -o - \ --// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s - // RUN: %clang --target=loongarch64 -mlsx -mlasx -x c -E -dM %s -o - \ - // RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s - // RUN: %clang --target=loongarch64 -mlasx -mlsx -x c -E -dM %s -o - \ - // RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s -+// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -x c -E -dM %s -o - \ -+// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s - // MLASX: #define __loongarch_asx 1 - // MLASX: #define __loongarch_simd_width 256 - // MLASX: #define __loongarch_sx 1 -@@ -840,8 +842,6 @@ - // RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s - // RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -x c -E -dM %s -o - \ - // RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s --// RUN: %clang --target=loongarch64 -mno-lasx -x c -E -dM %s -o - \ --// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s - // MNO-LSX-NOT: #define __loongarch_asx - // MNO-LSX-NOT: #define __loongarch_simd_width - // MNO-LSX-NOT: #define __loongarch_sx --- -2.20.1 - - -From 57eaecf7bdb7a7502580076b365b4f70dde1185d Mon Sep 17 00:00:00 2001 -From: Ami-zhang -Date: Tue, 23 Jan 2024 14:24:58 +0800 -Subject: [PATCH 6/9] [LoongArch] Add definitions and feature 'frecipe' for FP - approximation intrinsics/builtins (#78962) - -This PR adds definitions and 'frecipe' feature for FP approximation -intrinsics/builtins. In additions, this adds and complements relative -testcases. - -(cherry picked from commit fcb8342a219ada8ec641790a4c8a9f969d7d64ee) ---- - .../clang/Basic/BuiltinsLoongArchBase.def | 5 +++ - .../clang/Basic/BuiltinsLoongArchLASX.def | 6 +++ - .../clang/Basic/BuiltinsLoongArchLSX.def | 6 +++ - clang/lib/Headers/larchintrin.h | 12 +++++ - clang/lib/Headers/lasxintrin.h | 24 ++++++++++ - clang/lib/Headers/lsxintrin.h | 24 ++++++++++ - .../LoongArch/builtin-dbl-approximate.c | 45 +++++++++++++++++++ - .../LoongArch/builtin-flt-approximate.c | 45 +++++++++++++++++++ - .../CodeGen/LoongArch/intrinsic-la64-error.c | 21 +++++++++ - .../lasx/builtin-approximate-alias.c | 37 +++++++++++++++ - .../LoongArch/lasx/builtin-approximate.c | 38 ++++++++++++++++ - .../LoongArch/lsx/builtin-approximate-alias.c | 37 +++++++++++++++ - .../LoongArch/lsx/builtin-approximate.c | 38 ++++++++++++++++ - 13 files changed, 338 insertions(+) - create mode 100644 clang/test/CodeGen/LoongArch/builtin-dbl-approximate.c - create mode 100644 clang/test/CodeGen/LoongArch/builtin-flt-approximate.c - create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c - create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c - create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-approximate-alias.c - create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-approximate.c - -diff --git a/clang/include/clang/Basic/BuiltinsLoongArchBase.def b/clang/include/clang/Basic/BuiltinsLoongArchBase.def -index cbb239223aae..a5a07c167908 100644 ---- a/clang/include/clang/Basic/BuiltinsLoongArchBase.def -+++ b/clang/include/clang/Basic/BuiltinsLoongArchBase.def -@@ -51,3 +51,8 @@ TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") - - TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") - TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") -+ -+TARGET_BUILTIN(__builtin_loongarch_frecipe_s, "ff", "nc", "f,frecipe") -+TARGET_BUILTIN(__builtin_loongarch_frecipe_d, "dd", "nc", "d,frecipe") -+TARGET_BUILTIN(__builtin_loongarch_frsqrte_s, "ff", "nc", "f,frecipe") -+TARGET_BUILTIN(__builtin_loongarch_frsqrte_d, "dd", "nc", "d,frecipe") -diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def -index 3de200f665b6..4cf51cc000f6 100644 ---- a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def -+++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def -@@ -657,9 +657,15 @@ TARGET_BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc", "lasx") - TARGET_BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc", "lasx") - TARGET_BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc", "lasx") - -+TARGET_BUILTIN(__builtin_lasx_xvfrecipe_s, "V8fV8f", "nc", "lasx,frecipe") -+TARGET_BUILTIN(__builtin_lasx_xvfrecipe_d, "V4dV4d", "nc", "lasx,frecipe") -+ - TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc", "lasx") - TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc", "lasx") - -+TARGET_BUILTIN(__builtin_lasx_xvfrsqrte_s, "V8fV8f", "nc", "lasx,frecipe") -+TARGET_BUILTIN(__builtin_lasx_xvfrsqrte_d, "V4dV4d", "nc", "lasx,frecipe") -+ - TARGET_BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc", "lasx") - TARGET_BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc", "lasx") - TARGET_BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc", "lasx") -diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def -index 8e6aec886c50..c90f4dc5458f 100644 ---- a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def -+++ b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def -@@ -641,9 +641,15 @@ TARGET_BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc", "lsx") - TARGET_BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc", "lsx") - TARGET_BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc", "lsx") - -+TARGET_BUILTIN(__builtin_lsx_vfrecipe_s, "V4fV4f", "nc", "lsx,frecipe") -+TARGET_BUILTIN(__builtin_lsx_vfrecipe_d, "V2dV2d", "nc", "lsx,frecipe") -+ - TARGET_BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc", "lsx") - TARGET_BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc", "lsx") - -+TARGET_BUILTIN(__builtin_lsx_vfrsqrte_s, "V4fV4f", "nc", "lsx,frecipe") -+TARGET_BUILTIN(__builtin_lsx_vfrsqrte_d, "V2dV2d", "nc", "lsx,frecipe") -+ - TARGET_BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc", "lsx") - TARGET_BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc", "lsx") - -diff --git a/clang/lib/Headers/larchintrin.h b/clang/lib/Headers/larchintrin.h -index 24dd29ce91ff..f4218295919a 100644 ---- a/clang/lib/Headers/larchintrin.h -+++ b/clang/lib/Headers/larchintrin.h -@@ -228,6 +228,18 @@ extern __inline void - ((void)__builtin_loongarch_ldpte_d((long int)(_1), (_2))) - #endif - -+#define __frecipe_s(/*float*/ _1) \ -+ (float)__builtin_loongarch_frecipe_s((float)_1) -+ -+#define __frecipe_d(/*double*/ _1) \ -+ (double)__builtin_loongarch_frecipe_d((double)_1) -+ -+#define __frsqrte_s(/*float*/ _1) \ -+ (float)__builtin_loongarch_frsqrte_s((float)_1) -+ -+#define __frsqrte_d(/*double*/ _1) \ -+ (double)__builtin_loongarch_frsqrte_d((double)_1) -+ - #ifdef __cplusplus - } - #endif -diff --git a/clang/lib/Headers/lasxintrin.h b/clang/lib/Headers/lasxintrin.h -index 6b4d5012a24b..dafc2a2f3e6a 100644 ---- a/clang/lib/Headers/lasxintrin.h -+++ b/clang/lib/Headers/lasxintrin.h -@@ -1726,6 +1726,18 @@ extern __inline - return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1); - } - -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfrecipe_s(__m256 _1) { -+ return (__m256)__builtin_lasx_xvfrecipe_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfrecipe_d(__m256d _1) { -+ return (__m256d)__builtin_lasx_xvfrecipe_d((v4f64)_1); -+} -+ - extern __inline - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 - __lasx_xvfrint_s(__m256 _1) { -@@ -1750,6 +1762,18 @@ extern __inline - return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1); - } - -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 -+ __lasx_xvfrsqrte_s(__m256 _1) { -+ return (__m256)__builtin_lasx_xvfrsqrte_s((v8f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d -+ __lasx_xvfrsqrte_d(__m256d _1) { -+ return (__m256d)__builtin_lasx_xvfrsqrte_d((v4f64)_1); -+} -+ - extern __inline - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 - __lasx_xvflogb_s(__m256 _1) { -diff --git a/clang/lib/Headers/lsxintrin.h b/clang/lib/Headers/lsxintrin.h -index a29bc7757ab5..f347955ce6fb 100644 ---- a/clang/lib/Headers/lsxintrin.h -+++ b/clang/lib/Headers/lsxintrin.h -@@ -1776,6 +1776,18 @@ extern __inline - return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1); - } - -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfrecipe_s(__m128 _1) { -+ return (__m128)__builtin_lsx_vfrecipe_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfrecipe_d(__m128d _1) { -+ return (__m128d)__builtin_lsx_vfrecipe_d((v2f64)_1); -+} -+ - extern __inline - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 - __lsx_vfrint_s(__m128 _1) { -@@ -1800,6 +1812,18 @@ extern __inline - return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1); - } - -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 -+ __lsx_vfrsqrte_s(__m128 _1) { -+ return (__m128)__builtin_lsx_vfrsqrte_s((v4f32)_1); -+} -+ -+extern __inline -+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d -+ __lsx_vfrsqrte_d(__m128d _1) { -+ return (__m128d)__builtin_lsx_vfrsqrte_d((v2f64)_1); -+} -+ - extern __inline - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 - __lsx_vflogb_s(__m128 _1) { -diff --git a/clang/test/CodeGen/LoongArch/builtin-dbl-approximate.c b/clang/test/CodeGen/LoongArch/builtin-dbl-approximate.c -new file mode 100644 -index 000000000000..e5fe684346c0 ---- /dev/null -+++ b/clang/test/CodeGen/LoongArch/builtin-dbl-approximate.c -@@ -0,0 +1,45 @@ -+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 -+// RUN: %clang_cc1 -triple loongarch32 -target-feature +d -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s -+// RUN: %clang_cc1 -triple loongarch64 -target-feature +d -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s -+ -+#include -+ -+// CHECK-LABEL: @frecipe_d -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frecipe.d(double [[A:%.*]]) -+// CHECK-NEXT: ret double [[TMP0]] -+// -+double frecipe_d (double _1) -+{ -+ return __builtin_loongarch_frecipe_d (_1); -+} -+ -+// CHECK-LABEL: @frsqrte_d -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frsqrte.d(double [[A:%.*]]) -+// CHECK-NEXT: ret double [[TMP0]] -+// -+double frsqrte_d (double _1) -+{ -+ return __builtin_loongarch_frsqrte_d (_1); -+} -+ -+// CHECK-LABEL: @frecipe_d_alia -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frecipe.d(double [[A:%.*]]) -+// CHECK-NEXT: ret double [[TMP0]] -+// -+double frecipe_d_alia (double _1) -+{ -+ return __frecipe_d (_1); -+} -+ -+// CHECK-LABEL: @frsqrte_d_alia -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frsqrte.d(double [[A:%.*]]) -+// CHECK-NEXT: ret double [[TMP0]] -+// -+double frsqrte_d_alia (double _1) -+{ -+ return __frsqrte_d (_1); -+} -diff --git a/clang/test/CodeGen/LoongArch/builtin-flt-approximate.c b/clang/test/CodeGen/LoongArch/builtin-flt-approximate.c -new file mode 100644 -index 000000000000..47bb47084364 ---- /dev/null -+++ b/clang/test/CodeGen/LoongArch/builtin-flt-approximate.c -@@ -0,0 +1,45 @@ -+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 -+// RUN: %clang_cc1 -triple loongarch32 -target-feature +f -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s -+// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s -+ -+#include -+ -+// CHECK-LABEL: @frecipe_s -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frecipe.s(float [[A:%.*]]) -+// CHECK-NEXT: ret float [[TMP0]] -+// -+float frecipe_s (float _1) -+{ -+ return __builtin_loongarch_frecipe_s (_1); -+} -+ -+// CHECK-LABEL: @frsqrte_s -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frsqrte.s(float [[A:%.*]]) -+// CHECK-NEXT: ret float [[TMP0]] -+// -+float frsqrte_s (float _1) -+{ -+ return __builtin_loongarch_frsqrte_s (_1); -+} -+ -+// CHECK-LABEL: @frecipe_s_alia -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frecipe.s(float [[A:%.*]]) -+// CHECK-NEXT: ret float [[TMP0]] -+// -+float frecipe_s_alia (float _1) -+{ -+ return __frecipe_s (_1); -+} -+ -+// CHECK-LABEL: @frsqrte_s_alia -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frsqrte.s(float [[A:%.*]]) -+// CHECK-NEXT: ret float [[TMP0]] -+// -+float frsqrte_s_alia (float _1) -+{ -+ return __frsqrte_s (_1); -+} -diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la64-error.c b/clang/test/CodeGen/LoongArch/intrinsic-la64-error.c -index efb3b94175cf..a3242dfd41e9 100644 ---- a/clang/test/CodeGen/LoongArch/intrinsic-la64-error.c -+++ b/clang/test/CodeGen/LoongArch/intrinsic-la64-error.c -@@ -1,7 +1,28 @@ - // RUN: %clang_cc1 -triple loongarch64 -emit-llvm -S -verify %s -o /dev/null -+// RUN: not %clang_cc1 -triple loongarch64 -DFEATURE_CHECK -emit-llvm %s -o /dev/null 2>&1 \ -+// RUN: | FileCheck %s - - #include - -+#ifdef FEATURE_CHECK -+void test_feature(unsigned long *v_ul, int *v_i, float a, double b) { -+// CHECK: error: '__builtin_loongarch_cacop_w' needs target feature 32bit -+ __builtin_loongarch_cacop_w(1, v_ul[0], 1024); -+// CHECK: error: '__builtin_loongarch_movfcsr2gr' needs target feature f -+ v_i[0] = __builtin_loongarch_movfcsr2gr(1); -+// CHECK: error: '__builtin_loongarch_movgr2fcsr' needs target feature f -+ __builtin_loongarch_movgr2fcsr(1, v_i[1]); -+// CHECK: error: '__builtin_loongarch_frecipe_s' needs target feature f,frecipe -+ float f1 = __builtin_loongarch_frecipe_s(a); -+// CHECK: error: '__builtin_loongarch_frsqrte_s' needs target feature f,frecipe -+ float f2 = __builtin_loongarch_frsqrte_s(a); -+// CHECK: error: '__builtin_loongarch_frecipe_d' needs target feature d,frecipe -+ double d1 = __builtin_loongarch_frecipe_d(b); -+// CHECK: error: '__builtin_loongarch_frsqrte_d' needs target feature d,frecipe -+ double d2 = __builtin_loongarch_frsqrte_d(b); -+} -+#endif -+ - void csrrd_d(int a) { - __builtin_loongarch_csrrd_d(16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} - __builtin_loongarch_csrrd_d(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} -diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c -new file mode 100644 -index 000000000000..b79f93940399 ---- /dev/null -+++ b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c -@@ -0,0 +1,37 @@ -+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -+// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s -+ -+#include -+ -+// CHECK-LABEL: @xvfrecipe_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void -+// -+v8f32 xvfrecipe_s(v8f32 _1) { return __lasx_xvfrecipe_s(_1); } -+// CHECK-LABEL: @xvfrecipe_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void -+// -+v4f64 xvfrecipe_d(v4f64 _1) { return __lasx_xvfrecipe_d(_1); } -+// CHECK-LABEL: @xvfrsqrte_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void -+// -+v8f32 xvfrsqrte_s(v8f32 _1) { return __lasx_xvfrsqrte_s(_1); } -+// CHECK-LABEL: @xvfrsqrte_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void -+// -+v4f64 xvfrsqrte_d(v4f64 _1) { return __lasx_xvfrsqrte_d(_1); } -diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c -new file mode 100644 -index 000000000000..63e9ba639ea2 ---- /dev/null -+++ b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c -@@ -0,0 +1,38 @@ -+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -+// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s -+ -+typedef float v8f32 __attribute__((vector_size(32), aligned(32))); -+typedef double v4f64 __attribute__((vector_size(32), aligned(32))); -+ -+// CHECK-LABEL: @xvfrecipe_s -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void -+// -+v8f32 xvfrecipe_s(v8f32 _1) { return __builtin_lasx_xvfrecipe_s(_1); } -+// CHECK-LABEL: @xvfrecipe_d -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void -+// -+v4f64 xvfrecipe_d(v4f64 _1) { return __builtin_lasx_xvfrecipe_d(_1); } -+// CHECK-LABEL: @xvfrsqrte_s -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float> [[_1]]) -+// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void -+// -+v8f32 xvfrsqrte_s(v8f32 _1) { return __builtin_lasx_xvfrsqrte_s(_1); } -+// CHECK-LABEL: @xvfrsqrte_d -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double> [[_1]]) -+// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] -+// CHECK-NEXT: ret void -+// -+v4f64 xvfrsqrte_d(v4f64 _1) { return __builtin_lasx_xvfrsqrte_d(_1); } -diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-approximate-alias.c b/clang/test/CodeGen/LoongArch/lsx/builtin-approximate-alias.c -new file mode 100644 -index 000000000000..f26f032c878e ---- /dev/null -+++ b/clang/test/CodeGen/LoongArch/lsx/builtin-approximate-alias.c -@@ -0,0 +1,37 @@ -+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -+// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s -+ -+#include -+ -+// CHECK-LABEL: @vfrecipe_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecipe.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] -+// -+v4f32 vfrecipe_s(v4f32 _1) { return __lsx_vfrecipe_s(_1); } -+// CHECK-LABEL: @vfrecipe_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecipe.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] -+// -+v2f64 vfrecipe_d(v2f64 _1) { return __lsx_vfrecipe_d(_1); } -+// CHECK-LABEL: @vfrsqrte_s( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrte.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] -+// -+v4f32 vfrsqrte_s(v4f32 _1) { return __lsx_vfrsqrte_s(_1); } -+// CHECK-LABEL: @vfrsqrte_d( -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrte.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] -+// -+v2f64 vfrsqrte_d(v2f64 _1) { return __lsx_vfrsqrte_d(_1); } -diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-approximate.c b/clang/test/CodeGen/LoongArch/lsx/builtin-approximate.c -new file mode 100644 -index 000000000000..39fa1663db34 ---- /dev/null -+++ b/clang/test/CodeGen/LoongArch/lsx/builtin-approximate.c -@@ -0,0 +1,38 @@ -+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -+// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s -+ -+typedef float v4f32 __attribute__ ((vector_size(16), aligned(16))); -+typedef double v2f64 __attribute__ ((vector_size(16), aligned(16))); -+ -+// CHECK-LABEL: @vfrecipe_s -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecipe.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] -+// -+v4f32 vfrecipe_s (v4f32 _1) { return __builtin_lsx_vfrecipe_s (_1); } -+// CHECK-LABEL: @vfrecipe_d -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecipe.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] -+// -+v2f64 vfrecipe_d (v2f64 _1) { return __builtin_lsx_vfrecipe_d (_1); } -+// CHECK-LABEL: @vfrsqrte_s -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrte.s(<4 x float> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] -+// -+v4f32 vfrsqrte_s (v4f32 _1) { return __builtin_lsx_vfrsqrte_s (_1); } -+// CHECK-LABEL: @vfrsqrte_d -+// CHECK-NEXT: entry: -+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> -+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrte.d(<2 x double> [[TMP0]]) -+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 -+// CHECK-NEXT: ret i128 [[TMP2]] -+// -+v2f64 vfrsqrte_d (v2f64 _1) { return __builtin_lsx_vfrsqrte_d (_1); } --- -2.20.1 - - -From 6f9531b069971dc0f5c6b28bd6a6754c1b5fde72 Mon Sep 17 00:00:00 2001 -From: Ami-zhang -Date: Tue, 23 Jul 2024 14:03:28 +0800 -Subject: [PATCH 7/9] [LoongArch] Support -march=la64v1.0 and -march=la64v1.1 - (#100057) - -The newly added strings `la64v1.0` and `la64v1.1` in `-march` are as -described in LoongArch toolchains conventions (see [1]). - -The target-cpu/feature attributes are forwarded to compiler when -specifying particular `-march` parameter. The default cpu `loongarch64` -is returned when archname is `la64v1.0` or `la64v1.1`. - -In addition, this commit adds `la64v1.0`/`la64v1.1` to -"__loongarch_arch" and adds definition for macro "__loongarch_frecipe". - -[1]: https://github.com/loongson/la-toolchain-conventions - -(cherry picked from commit 5a1b9896ad5a7dcd25a1cc7a4d3fd44155e4b22d) ---- - clang/lib/Basic/Targets/LoongArch.cpp | 23 +++++++++++++++- - clang/lib/Basic/Targets/LoongArch.h | 2 ++ - .../lib/Driver/ToolChains/Arch/LoongArch.cpp | 10 +++++-- - clang/test/Driver/loongarch-march.c | 22 +++++++++++++++ - clang/test/Preprocessor/init-loongarch.c | 27 ++++++++++++++++--- - 5 files changed, 77 insertions(+), 7 deletions(-) - -diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp -index 913404240916..5fede3d7cdc4 100644 ---- a/clang/lib/Basic/Targets/LoongArch.cpp -+++ b/clang/lib/Basic/Targets/LoongArch.cpp -@@ -200,7 +200,24 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, - - // Define __loongarch_arch. - StringRef ArchName = getCPU(); -- Builder.defineMacro("__loongarch_arch", Twine('"') + ArchName + Twine('"')); -+ if (ArchName == "loongarch64") { -+ if (HasFeatureLSX) { -+ // TODO: As more features of the V1.1 ISA are supported, a unified "v1.1" -+ // arch feature set will be used to include all sub-features belonging to -+ // the V1.1 ISA version. -+ if (HasFeatureFrecipe) -+ Builder.defineMacro("__loongarch_arch", -+ Twine('"') + "la64v1.1" + Twine('"')); -+ else -+ Builder.defineMacro("__loongarch_arch", -+ Twine('"') + "la64v1.0" + Twine('"')); -+ } else { -+ Builder.defineMacro("__loongarch_arch", -+ Twine('"') + ArchName + Twine('"')); -+ } -+ } else { -+ Builder.defineMacro("__loongarch_arch", Twine('"') + ArchName + Twine('"')); -+ } - - // Define __loongarch_tune. - StringRef TuneCPU = getTargetOpts().TuneCPU; -@@ -216,6 +233,8 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, - Builder.defineMacro("__loongarch_simd_width", "128"); - Builder.defineMacro("__loongarch_sx", Twine(1)); - } -+ if (HasFeatureFrecipe) -+ Builder.defineMacro("__loongarch_frecipe", Twine(1)); - - StringRef ABI = getABI(); - if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") -@@ -289,6 +308,8 @@ bool LoongArchTargetInfo::handleTargetFeatures( - HasFeatureLSX = true; - else if (Feature == "+lasx") - HasFeatureLASX = true; -+ else if (Feature == "+frecipe") -+ HasFeatureFrecipe = true; - } - return true; - } -diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h -index 3313102492cb..4d2965f5b3a3 100644 ---- a/clang/lib/Basic/Targets/LoongArch.h -+++ b/clang/lib/Basic/Targets/LoongArch.h -@@ -29,6 +29,7 @@ protected: - bool HasFeatureF; - bool HasFeatureLSX; - bool HasFeatureLASX; -+ bool HasFeatureFrecipe; - - public: - LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) -@@ -37,6 +38,7 @@ public: - HasFeatureF = false; - HasFeatureLSX = false; - HasFeatureLASX = false; -+ HasFeatureFrecipe = false; - LongDoubleWidth = 128; - LongDoubleAlign = 128; - LongDoubleFormat = &llvm::APFloat::IEEEquad(); -diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp -index 87d7b30ef5d3..21106c425206 100644 ---- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp -+++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp -@@ -268,8 +268,14 @@ std::string loongarch::postProcessTargetCPUString(const std::string &CPU, - std::string loongarch::getLoongArchTargetCPU(const llvm::opt::ArgList &Args, - const llvm::Triple &Triple) { - std::string CPU; -+ std::string Arch; - // If we have -march, use that. -- if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) -- CPU = A->getValue(); -+ if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) { -+ Arch = A->getValue(); -+ if (Arch == "la64v1.0" || Arch == "la64v1.1") -+ CPU = llvm::LoongArch::getDefaultArch(Triple.isLoongArch64()); -+ else -+ CPU = Arch; -+ } - return postProcessTargetCPUString(CPU, Triple); - } -diff --git a/clang/test/Driver/loongarch-march.c b/clang/test/Driver/loongarch-march.c -index 9214130cd034..d06da72a755c 100644 ---- a/clang/test/Driver/loongarch-march.c -+++ b/clang/test/Driver/loongarch-march.c -@@ -2,10 +2,18 @@ - // RUN: FileCheck %s --check-prefix=CC1-LOONGARCH64 - // RUN: %clang --target=loongarch64 -march=la464 -fsyntax-only %s -### 2>&1 | \ - // RUN: FileCheck %s --check-prefix=CC1-LA464 -+// RUN: %clang --target=loongarch64 -march=la64v1.0 -fsyntax-only %s -### 2>&1 | \ -+// RUN: FileCheck %s --check-prefix=CC1-LA64V1P0 -+// RUN: %clang --target=loongarch64 -march=la64v1.1 -fsyntax-only %s -### 2>&1 | \ -+// RUN: FileCheck %s --check-prefix=CC1-LA64V1P1 - // RUN: %clang --target=loongarch64 -march=loongarch64 -S -emit-llvm %s -o - | \ - // RUN: FileCheck %s --check-prefix=IR-LOONGARCH64 - // RUN: %clang --target=loongarch64 -march=la464 -S -emit-llvm %s -o - | \ - // RUN: FileCheck %s --check-prefix=IR-LA464 -+// RUN: %clang --target=loongarch64 -march=la64v1.0 -S -emit-llvm %s -o - | \ -+// RUN: FileCheck %s --check-prefix=IR-LA64V1P0 -+// RUN: %clang --target=loongarch64 -march=la64v1.1 -S -emit-llvm %s -o - | \ -+// RUN: FileCheck %s --check-prefix=IR-LA64V1P1 - - // CC1-LOONGARCH64: "-target-cpu" "loongarch64" - // CC1-LOONGARCH64-NOT: "-target-feature" -@@ -19,8 +27,22 @@ - // CC1-LA464-NOT: "-target-feature" - // CC1-LA464: "-target-abi" "lp64d" - -+// CC1-LA64V1P0: "-target-cpu" "loongarch64" -+// CC1-LA64V1P0-NOT: "-target-feature" -+// CC1-LA64V1P0: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" -+// CC1-LA64V1P0-NOT: "-target-feature" -+// CC1-LA64V1P0: "-target-abi" "lp64d" -+ -+// CC1-LA64V1P1: "-target-cpu" "loongarch64" -+// CC1-LA64V1P1-NOT: "-target-feature" -+// CC1-LA64V1P1: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" "-target-feature" "+frecipe" -+// CC1-LA64V1P1-NOT: "-target-feature" -+// CC1-LA64V1P1: "-target-abi" "lp64d" -+ - // IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+f,+ual" - // IR-LA464: attributes #[[#]] ={{.*}}"target-cpu"="la464" {{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx,+ual" -+// IR-LA64V1P0: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+lsx,+ual" -+// IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+frecipe,+lsx,+ual" - - int foo(void) { - return 3; -diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c -index 635d029ce9d3..cfa3ddb20f10 100644 ---- a/clang/test/Preprocessor/init-loongarch.c -+++ b/clang/test/Preprocessor/init-loongarch.c -@@ -788,24 +788,43 @@ - // LA64-FPU0-LP64S-NOT: #define __loongarch_single_float - // LA64-FPU0-LP64S: #define __loongarch_soft_float 1 - --/// Check __loongarch_arch and __loongarch_tune. -+/// Check __loongarch_arch{_tune/_frecipe}. - - // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - | \ --// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s -+// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s - // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 | \ - // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s - // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la464 | \ - // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la464 -DTUNE=la464 %s - // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=loongarch64 | \ --// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s -+// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s - // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=la464 | \ --// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la464 %s -+// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=la464 %s - // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -mtune=la464 | \ - // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la464 %s - // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la464 -mtune=loongarch64 | \ - // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la464 -DTUNE=loongarch64 %s -+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 | \ -+// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s -+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang -lsx | \ -+// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s -+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +frecipe | \ -+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.1 -DTUNE=loongarch64 %s -+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx | \ -+// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s -+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 | \ -+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.1 -DTUNE=loongarch64 %s -+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -frecipe | \ -+// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s -+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lsx | \ -+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=loongarch64 -DTUNE=loongarch64 %s -+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +frecipe | \ -+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=loongarch64 -DTUNE=loongarch64 %s -+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +frecipe | \ -+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.1 -DTUNE=loongarch64 %s - - // ARCH-TUNE: #define __loongarch_arch "[[ARCH]]" -+// FRECIPE: #define __loongarch_frecipe 1 - // ARCH-TUNE: #define __loongarch_tune "[[TUNE]]" - - // RUN: %clang --target=loongarch64 -mlsx -x c -E -dM %s -o - \ --- -2.20.1 - - -From 6094875aa6aab1e28a096294783cada0243e95d5 Mon Sep 17 00:00:00 2001 -From: Ami-zhang -Date: Tue, 23 Jul 2024 15:14:20 +0800 -Subject: [PATCH 8/9] [LoongArch] Support la664 (#100068) - -A new ProcessorModel called `la664` is defined in LoongArch.td to -support `-march/-mtune=la664`. - -(cherry picked from commit fcec298087dba0c83f6d0bbafd6cd934c42cbf82) ---- - clang/test/Driver/loongarch-march.c | 11 +++++++++++ - clang/test/Driver/loongarch-mtune.c | 5 +++++ - clang/test/Preprocessor/init-loongarch.c | 8 ++++++++ - 3 files changed, 24 insertions(+) - -diff --git a/clang/test/Driver/loongarch-march.c b/clang/test/Driver/loongarch-march.c -index d06da72a755c..2d5b315d962a 100644 ---- a/clang/test/Driver/loongarch-march.c -+++ b/clang/test/Driver/loongarch-march.c -@@ -6,6 +6,8 @@ - // RUN: FileCheck %s --check-prefix=CC1-LA64V1P0 - // RUN: %clang --target=loongarch64 -march=la64v1.1 -fsyntax-only %s -### 2>&1 | \ - // RUN: FileCheck %s --check-prefix=CC1-LA64V1P1 -+// RUN: %clang --target=loongarch64 -march=la664 -fsyntax-only %s -### 2>&1 | \ -+// RUN: FileCheck %s --check-prefix=CC1-LA664 - // RUN: %clang --target=loongarch64 -march=loongarch64 -S -emit-llvm %s -o - | \ - // RUN: FileCheck %s --check-prefix=IR-LOONGARCH64 - // RUN: %clang --target=loongarch64 -march=la464 -S -emit-llvm %s -o - | \ -@@ -14,6 +16,8 @@ - // RUN: FileCheck %s --check-prefix=IR-LA64V1P0 - // RUN: %clang --target=loongarch64 -march=la64v1.1 -S -emit-llvm %s -o - | \ - // RUN: FileCheck %s --check-prefix=IR-LA64V1P1 -+// RUN: %clang --target=loongarch64 -march=la664 -S -emit-llvm %s -o - | \ -+// RUN: FileCheck %s --check-prefix=IR-LA664 - - // CC1-LOONGARCH64: "-target-cpu" "loongarch64" - // CC1-LOONGARCH64-NOT: "-target-feature" -@@ -39,10 +43,17 @@ - // CC1-LA64V1P1-NOT: "-target-feature" - // CC1-LA64V1P1: "-target-abi" "lp64d" - -+// CC1-LA664: "-target-cpu" "la664" -+// CC1-LA664-NOT: "-target-feature" -+// CC1-LA664: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" "-target-feature" "+frecipe" -+// CC1-LA664-NOT: "-target-feature" -+// CC1-LA664: "-target-abi" "lp64d" -+ - // IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+f,+ual" - // IR-LA464: attributes #[[#]] ={{.*}}"target-cpu"="la464" {{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx,+ual" - // IR-LA64V1P0: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+lsx,+ual" - // IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+frecipe,+lsx,+ual" -+// IR-LA664: attributes #[[#]] ={{.*}}"target-cpu"="la664" {{.*}}"target-features"="+64bit,+d,+f,+frecipe,+lasx,+lsx,+ual" - - int foo(void) { - return 3; -diff --git a/clang/test/Driver/loongarch-mtune.c b/clang/test/Driver/loongarch-mtune.c -index 6f3f39e9bbd8..face12e1a1a8 100644 ---- a/clang/test/Driver/loongarch-mtune.c -+++ b/clang/test/Driver/loongarch-mtune.c -@@ -8,6 +8,11 @@ - // RUN: %clang --target=loongarch64 -mtune=la464 -S -emit-llvm %s -o - | \ - // RUN: FileCheck %s --check-prefix=IRATTR -DCPU=la464 - -+// RUN: %clang --target=loongarch64 -mtune=la664 -fsyntax-only %s -### 2>&1 | \ -+// RUN: FileCheck %s --check-prefix=CC1ARG -DCPU=la664 -+// RUN: %clang --target=loongarch64 -mtune=la664 -S -emit-llvm %s -o - | \ -+// RUN: FileCheck %s --check-prefix=IRATTR -DCPU=la664 -+ - // RUN: %clang --target=loongarch64 -mtune=invalidcpu -fsyntax-only %s -### 2>&1 | \ - // RUN: FileCheck %s --check-prefix=CC1ARG -DCPU=invalidcpu - // RUN: not %clang --target=loongarch64 -mtune=invalidcpu -S -emit-llvm %s -o /dev/null 2>&1 | \ -diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c -index cfa3ddb20f10..7ce3d2de8c78 100644 ---- a/clang/test/Preprocessor/init-loongarch.c -+++ b/clang/test/Preprocessor/init-loongarch.c -@@ -822,6 +822,14 @@ - // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=loongarch64 -DTUNE=loongarch64 %s - // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +frecipe | \ - // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.1 -DTUNE=loongarch64 %s -+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 | \ -+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la664 -DTUNE=la664 %s -+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=la664 | \ -+// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la664 %s -+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -mtune=la664 | \ -+// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la664 %s -+// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 -mtune=loongarch64 | \ -+// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la664 -DTUNE=loongarch64 %s - - // ARCH-TUNE: #define __loongarch_arch "[[ARCH]]" - // FRECIPE: #define __loongarch_frecipe 1 --- -2.20.1 - - -From 18f453f2dba969c1fdcbda562079113de7bbcfca Mon Sep 17 00:00:00 2001 -From: Ami-zhang -Date: Tue, 23 Jul 2024 15:20:30 +0800 -Subject: [PATCH 9/9] [LoongArch] Fix test issue of init-loongarch.c - -(cherry picked from commit d59925c39856f255f4dd4427ccc650f2c2692a24) ---- - clang/test/Preprocessor/init-loongarch.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c -index 7ce3d2de8c78..887b6d6af7e1 100644 ---- a/clang/test/Preprocessor/init-loongarch.c -+++ b/clang/test/Preprocessor/init-loongarch.c -@@ -825,7 +825,7 @@ - // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 | \ - // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la664 -DTUNE=la664 %s - // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=la664 | \ --// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la664 %s -+// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=la664 %s - // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -mtune=la664 | \ - // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la664 %s - // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 -mtune=loongarch64 | \ --- -2.20.1 - diff --git a/0021-AArch64-Delete-hip09-macro.patch b/0021-AArch64-Delete-hip09-macro.patch deleted file mode 100644 index 6e42a674e3d3b179d5e6896a4d226bd4658095c5..0000000000000000000000000000000000000000 --- a/0021-AArch64-Delete-hip09-macro.patch +++ /dev/null @@ -1,184 +0,0 @@ -From 42b0d16ab1ced5720e017fa9f6059c32489ab1bd Mon Sep 17 00:00:00 2001 -From: xiajingze -Date: Wed, 9 Oct 2024 17:13:49 +0800 -Subject: [PATCH] [AArch64] Delete hip09 macro - -Signed-off-by: xiajingze ---- - clang/test/CMakeLists.txt | 1 - - clang/test/Driver/aarch64-hip09.c | 1 - - .../test/Misc/target-invalid-cpu-note-hip09.c | 97 ------------------- - clang/test/Misc/target-invalid-cpu-note.c | 5 +- - clang/test/lit.site.cfg.py.in | 4 - - 5 files changed, 2 insertions(+), 106 deletions(-) - delete mode 100644 clang/test/Misc/target-invalid-cpu-note-hip09.c - -diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt -index 25e4e1f300bd..b88694deb818 100644 ---- a/clang/test/CMakeLists.txt -+++ b/clang/test/CMakeLists.txt -@@ -19,7 +19,6 @@ llvm_canonicalize_cmake_booleans( - LLVM_WITH_Z3 - PPC_LINUX_DEFAULT_IEEELONGDOUBLE - LLVM_TOOL_LLVM_DRIVER_BUILD -- LLVM_ENABLE_AARCH64_HIP09 - ) - - configure_lit_site_cfg( -diff --git a/clang/test/Driver/aarch64-hip09.c b/clang/test/Driver/aarch64-hip09.c -index 156be3f38cde..5851796a2bd6 100644 ---- a/clang/test/Driver/aarch64-hip09.c -+++ b/clang/test/Driver/aarch64-hip09.c -@@ -1,4 +1,3 @@ --// REQUIRES: enable_enable_aarch64_hip09 - // RUN: %clang -target aarch64_be -mcpu=hip09 -### -c %s 2>&1 | FileCheck -check-prefix=hip09-BE %s - // RUN: %clang -target aarch64 -mbig-endian -mcpu=hip09 -### -c %s 2>&1 | FileCheck -check-prefix=hip09-BE %s - // RUN: %clang -target aarch64_be -mbig-endian -mcpu=hip09 -### -c %s 2>&1 | FileCheck -check-prefix=hip09-BE %s -diff --git a/clang/test/Misc/target-invalid-cpu-note-hip09.c b/clang/test/Misc/target-invalid-cpu-note-hip09.c -deleted file mode 100644 -index f2561a0890fc..000000000000 ---- a/clang/test/Misc/target-invalid-cpu-note-hip09.c -+++ /dev/null -@@ -1,97 +0,0 @@ --// REQUIRES: enable_enable_aarch64_hip09 --// Use CHECK-NEXT instead of multiple CHECK-SAME to ensure we will fail if there is anything extra in the output. --// RUN: not %clang_cc1 -triple armv5--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix ARM --// ARM: error: unknown target CPU 'not-a-cpu' --// ARM-NEXT: note: valid target CPU values are: arm8, arm810, strongarm, strongarm110, strongarm1100, strongarm1110, arm7tdmi, arm7tdmi-s, arm710t, arm720t, arm9, arm9tdmi, arm920, arm920t, arm922t, arm940t, ep9312, arm10tdmi, arm1020t, arm9e, arm946e-s, arm966e-s, arm968e-s, arm10e, arm1020e, arm1022e, arm926ej-s, arm1136j-s, arm1136jf-s, mpcore, mpcorenovfp, arm1176jz-s, arm1176jzf-s, arm1156t2-s, arm1156t2f-s, cortex-m0, cortex-m0plus, cortex-m1, sc000, cortex-a5, cortex-a7, cortex-a8, cortex-a9, cortex-a12, cortex-a15, cortex-a17, krait, cortex-r4, cortex-r4f, cortex-r5, cortex-r7, cortex-r8, cortex-r52, sc300, cortex-m3, cortex-m4, cortex-m7, cortex-m23, cortex-m33, cortex-m35p, cortex-m55, cortex-m85, cortex-a32, cortex-a35, cortex-a53, cortex-a55, cortex-a57, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-x1, cortex-x1c, neoverse-n1, neoverse-n2, neoverse-v1, cyclone, exynos-m3, exynos-m4, exynos-m5, kryo, iwmmxt, xscale, swift{{$}} -- --// RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64 --// AARCH64: error: unknown target CPU 'not-a-cpu' --// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, hip09, grace{{$}} -- --// RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64 --// TUNE_AARCH64: error: unknown target CPU 'not-a-cpu' --// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, hip09, grace{{$}} -- --// RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86 --// X86: error: unknown target CPU 'not-a-cpu' --// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}} -- --// RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64 --// X86_64: error: unknown target CPU 'not-a-cpu' --// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}} -- --// RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86 --// TUNE_X86: error: unknown target CPU 'not-a-cpu' --// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}} -- --// RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64 --// TUNE_X86_64: error: unknown target CPU 'not-a-cpu' --// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}} -- --// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX --// NVPTX: error: unknown target CPU 'not-a-cpu' --// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151{{$}} -- --// RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600 --// R600: error: unknown target CPU 'not-a-cpu' --// R600-NEXT: note: valid target CPU values are: r600, rv630, rv635, r630, rs780, rs880, rv610, rv620, rv670, rv710, rv730, rv740, rv770, cedar, palm, cypress, hemlock, juniper, redwood, sumo, sumo2, barts, caicos, aruba, cayman, turks{{$}} -- --// RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN --// AMDGCN: error: unknown target CPU 'not-a-cpu' --// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151{{$}} -- --// RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM --// WEBASM: error: unknown target CPU 'not-a-cpu' --// WEBASM-NEXT: note: valid target CPU values are: mvp, bleeding-edge, generic{{$}} -- --// RUN: not %clang_cc1 -triple systemz--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix SYSTEMZ --// SYSTEMZ: error: unknown target CPU 'not-a-cpu' --// SYSTEMZ-NEXT: note: valid target CPU values are: arch8, z10, arch9, z196, arch10, zEC12, arch11, z13, arch12, z14, arch13, z15, arch14, z16{{$}} -- --// RUN: not %clang_cc1 -triple sparc--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix SPARC --// SPARC: error: unknown target CPU 'not-a-cpu' --// SPARC-NEXT: note: valid target CPU values are: v8, supersparc, sparclite, f934, hypersparc, sparclite86x, sparclet, tsc701, v9, ultrasparc, ultrasparc3, niagara, niagara2, niagara3, niagara4, ma2100, ma2150, ma2155, ma2450, ma2455, ma2x5x, ma2080, ma2085, ma2480, ma2485, ma2x8x, myriad2, myriad2.1, myriad2.2, myriad2.3, leon2, at697e, at697f, leon3, ut699, gr712rc, leon4, gr740{{$}} -- --// RUN: not %clang_cc1 -triple sparcv9--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix SPARCV9 --// SPARCV9: error: unknown target CPU 'not-a-cpu' --// SPARCV9-NEXT: note: valid target CPU values are: v9, ultrasparc, ultrasparc3, niagara, niagara2, niagara3, niagara4{{$}} -- --// RUN: not %clang_cc1 -triple powerpc--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix PPC --// PPC: error: unknown target CPU 'not-a-cpu' --// PPC-NEXT: note: valid target CPU values are: generic, 440, 450, 601, 602, 603, 603e, 603ev, 604, 604e, 620, 630, g3, 7400, g4, 7450, g4+, 750, 8548, 970, g5, a2, e500, e500mc, e5500, power3, pwr3, power4, pwr4, power5, pwr5, power5x, pwr5x, power6, pwr6, power6x, pwr6x, power7, pwr7, power8, pwr8, power9, pwr9, power10, pwr10, powerpc, ppc, ppc32, powerpc64, ppc64, powerpc64le, ppc64le, future{{$}} -- --// RUN: not %clang_cc1 -triple mips--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix MIPS --// MIPS: error: unknown target CPU 'not-a-cpu' --// MIPS-NEXT: note: valid target CPU values are: mips1, mips2, mips3, mips4, mips5, mips32, mips32r2, mips32r3, mips32r5, mips32r6, mips64, mips64r2, mips64r3, mips64r5, mips64r6, octeon, octeon+, p5600{{$}} -- --// RUN: not %clang_cc1 -triple lanai--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix LANAI --// LANAI: error: unknown target CPU 'not-a-cpu' --// LANAI-NEXT: note: valid target CPU values are: v11{{$}} -- --// RUN: not %clang_cc1 -triple hexagon--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix HEXAGON --// HEXAGON: error: unknown target CPU 'not-a-cpu' --// HEXAGON-NEXT: note: valid target CPU values are: hexagonv5, hexagonv55, hexagonv60, hexagonv62, hexagonv65, hexagonv66, hexagonv67, hexagonv67t, hexagonv68, hexagonv69, hexagonv71, hexagonv71t, hexagonv73{{$}} -- --// RUN: not %clang_cc1 -triple bpf--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix BPF --// BPF: error: unknown target CPU 'not-a-cpu' --// BPF-NEXT: note: valid target CPU values are: generic, v1, v2, v3, probe{{$}} -- --// RUN: not %clang_cc1 -triple avr--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AVR --// AVR: error: unknown target CPU 'not-a-cpu' --// AVR-NEXT: note: valid target CPU values are: avr1, at90s1200, attiny11, attiny12, attiny15, attiny28, avr2, at90s2313, at90s2323, at90s2333, at90s2343, attiny22, attiny26, at86rf401, at90s4414, at90s4433, at90s4434, at90s8515, at90c8534, at90s8535, avr25, ata5272, ata6616c, attiny13, attiny13a, attiny2313, attiny2313a, attiny24, attiny24a, attiny4313, attiny44, attiny44a, attiny84, attiny84a, attiny25, attiny45, attiny85, attiny261, attiny261a, attiny441, attiny461, attiny461a, attiny841, attiny861, attiny861a, attiny87, attiny43u, attiny48, attiny88, attiny828, avr3, at43usb355, at76c711, avr31, atmega103, at43usb320, avr35, attiny167, at90usb82, at90usb162, ata5505, ata6617c, ata664251, atmega8u2, atmega16u2, atmega32u2, attiny1634, avr4, atmega8, ata6289, atmega8a, ata6285, ata6286, ata6612c, atmega48, atmega48a, atmega48pa, atmega48pb, atmega48p, atmega88, atmega88a, atmega88p, atmega88pa, atmega88pb, atmega8515, atmega8535, atmega8hva, at90pwm1, at90pwm2, at90pwm2b, at90pwm3, at90pwm3b, at90pwm81, avr5, ata5702m322, ata5782, ata5790, ata5790n, ata5791, ata5795, ata5831, ata6613c, ata6614q, ata8210, ata8510, atmega16, atmega16a, atmega161, atmega162, atmega163, atmega164a, atmega164p, atmega164pa, atmega165, atmega165a, atmega165p, atmega165pa, atmega168, atmega168a, atmega168p, atmega168pa, atmega168pb, atmega169, atmega169a, atmega169p, atmega169pa, atmega32, atmega32a, atmega323, atmega324a, atmega324p, atmega324pa, atmega324pb, atmega325, atmega325a, atmega325p, atmega325pa, atmega3250, atmega3250a, atmega3250p, atmega3250pa, atmega328, atmega328p, atmega328pb, atmega329, atmega329a, atmega329p, atmega329pa, atmega3290, atmega3290a, atmega3290p, atmega3290pa, atmega406, atmega64, atmega64a, atmega640, atmega644, atmega644a, atmega644p, atmega644pa, atmega645, atmega645a, atmega645p, atmega649, atmega649a, atmega649p, atmega6450, atmega6450a, atmega6450p, atmega6490, atmega6490a, atmega6490p, atmega64rfr2, atmega644rfr2, atmega16hva, atmega16hva2, atmega16hvb, atmega16hvbrevb, atmega32hvb, atmega32hvbrevb, atmega64hve, atmega64hve2, at90can32, at90can64, at90pwm161, at90pwm216, at90pwm316, atmega32c1, atmega64c1, atmega16m1, atmega32m1, atmega64m1, atmega16u4, atmega32u4, atmega32u6, at90usb646, at90usb647, at90scr100, at94k, m3000, avr51, atmega128, atmega128a, atmega1280, atmega1281, atmega1284, atmega1284p, atmega128rfa1, atmega128rfr2, atmega1284rfr2, at90can128, at90usb1286, at90usb1287, avr6, atmega2560, atmega2561, atmega256rfr2, atmega2564rfr2, avrxmega2, atxmega16a4, atxmega16a4u, atxmega16c4, atxmega16d4, atxmega32a4, atxmega32a4u, atxmega32c3, atxmega32c4, atxmega32d3, atxmega32d4, atxmega32e5, atxmega16e5, atxmega8e5, avrxmega4, atxmega64a3, atxmega64a3u, atxmega64a4u, atxmega64b1, atxmega64b3, atxmega64c3, atxmega64d3, atxmega64d4, avrxmega5, atxmega64a1, atxmega64a1u, avrxmega6, atxmega128a3, atxmega128a3u, atxmega128b1, atxmega128b3, atxmega128c3, atxmega128d3, atxmega128d4, atxmega192a3, atxmega192a3u, atxmega192c3, atxmega192d3, atxmega256a3, atxmega256a3u, atxmega256a3b, atxmega256a3bu, atxmega256c3, atxmega256d3, atxmega384c3, atxmega384d3, avrxmega7, atxmega128a1, atxmega128a1u, atxmega128a4u, avrtiny, attiny4, attiny5, attiny9, attiny10, attiny20, attiny40, attiny102, attiny104, avrxmega3, attiny202, attiny402, attiny204, attiny404, attiny804, attiny1604, attiny406, attiny806, attiny1606, attiny807, attiny1607, attiny212, attiny412, attiny214, attiny414, attiny814, attiny1614, attiny416, attiny816, attiny1616, attiny3216, attiny417, attiny817, attiny1617, attiny3217, attiny1624, attiny1626, attiny1627, atmega808, atmega809, atmega1608, atmega1609, atmega3208, atmega3209, atmega4808, atmega4809 -- --// RUN: not %clang_cc1 -triple riscv32 -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix RISCV32 --// RISCV32: error: unknown target CPU 'not-a-cpu' --// RISCV32-NEXT: note: valid target CPU values are: generic-rv32, rocket-rv32, sifive-e20, sifive-e21, sifive-e24, sifive-e31, sifive-e34, sifive-e76, syntacore-scr1-base, syntacore-scr1-max{{$}} -- --// RUN: not %clang_cc1 -triple riscv64 -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix RISCV64 --// RISCV64: error: unknown target CPU 'not-a-cpu' --// RISCV64-NEXT: note: valid target CPU values are: generic-rv64, rocket-rv64, sifive-s21, sifive-s51, sifive-s54, sifive-s76, sifive-u54, sifive-u74, sifive-x280{{$}} -- --// RUN: not %clang_cc1 -triple riscv32 -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE-RISCV32 --// TUNE-RISCV32: error: unknown target CPU 'not-a-cpu' --// TUNE-RISCV32-NEXT: note: valid target CPU values are: generic-rv32, rocket-rv32, sifive-e20, sifive-e21, sifive-e24, sifive-e31, sifive-e34, sifive-e76, syntacore-scr1-base, syntacore-scr1-max, generic, rocket, sifive-7-series{{$}} -- --// RUN: not %clang_cc1 -triple riscv64 -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE-RISCV64 --// TUNE-RISCV64: error: unknown target CPU 'not-a-cpu' --// TUNE-RISCV64-NEXT: note: valid target CPU values are: generic-rv64, rocket-rv64, sifive-s21, sifive-s51, sifive-s54, sifive-s76, sifive-u54, sifive-u74, sifive-x280, generic, rocket, sifive-7-series{{$}} -diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c -index 466b262639a2..906be590e857 100644 ---- a/clang/test/Misc/target-invalid-cpu-note.c -+++ b/clang/test/Misc/target-invalid-cpu-note.c -@@ -1,4 +1,3 @@ --// UNSUPPORTED: enable_enable_aarch64_hip09 - // Use CHECK-NEXT instead of multiple CHECK-SAME to ensure we will fail if there is anything extra in the output. - // RUN: not %clang_cc1 -triple armv5--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix ARM - // ARM: error: unknown target CPU 'not-a-cpu' -@@ -6,11 +5,11 @@ - - // RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64 - // AARCH64: error: unknown target CPU 'not-a-cpu' --// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, grace{{$}} -+// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, hip09, grace{{$}} - - // RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64 - // TUNE_AARCH64: error: unknown target CPU 'not-a-cpu' --// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, grace{{$}} -+// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, hip09, grace{{$}} - - // RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86 - // X86: error: unknown target CPU 'not-a-cpu' -diff --git a/clang/test/lit.site.cfg.py.in b/clang/test/lit.site.cfg.py.in -index 7728be7d4d8d..89b7cafdc1d8 100644 ---- a/clang/test/lit.site.cfg.py.in -+++ b/clang/test/lit.site.cfg.py.in -@@ -43,14 +43,10 @@ config.ppc_linux_default_ieeelongdouble = @PPC_LINUX_DEFAULT_IEEELONGDOUBLE@ - config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@ - config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@ - config.build_for_openeuler = @BUILD_FOR_OPENEULER@ --config.enable_enable_aarch64_hip09 = @LLVM_ENABLE_AARCH64_HIP09@ - - import lit.llvm - lit.llvm.initialize(lit_config, config) - --if config.enable_enable_aarch64_hip09: -- config.available_features.add("enable_enable_aarch64_hip09") -- - # Let the main config do the real work. - lit_config.load_config( - config, os.path.join(config.clang_src_dir, "test/lit.cfg.py")) --- -2.43.0 - diff --git a/0022-Driver-Pass-z-arg-and-Wl-z-arg-option-to-the-linker.patch b/0022-Driver-Pass-z-arg-and-Wl-z-arg-option-to-the-linker.patch deleted file mode 100644 index 536f3dff8b0c4aabe71ba2fe7896df7ee8f5eace..0000000000000000000000000000000000000000 --- a/0022-Driver-Pass-z-arg-and-Wl-z-arg-option-to-the-linker.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 822d849ad922edc6306f96efe3e936359bb4541f Mon Sep 17 00:00:00 2001 -From: xiajingze -Date: Thu, 31 Oct 2024 16:30:51 +0800 -Subject: [PATCH] [Driver] Pass -z\ and -Wl,-z \ option to the - linker - -Signed-off-by: xiajingze ---- - clang/include/clang/Driver/Options.td | 5 +++++ - clang/lib/Driver/Driver.cpp | 18 ++++++++++++++++++ - clang/test/Driver/z-args.c | 15 +++++++++++++++ - 3 files changed, 38 insertions(+) - create mode 100644 clang/test/Driver/z-args.c - -diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td -index 344c8bd49da7..b40046a740a8 100644 ---- a/clang/include/clang/Driver/Options.td -+++ b/clang/include/clang/Driver/Options.td -@@ -886,8 +886,13 @@ def Xopenmp_target : Separate<["-"], "Xopenmp-target">, Group - def Xopenmp_target_EQ : JoinedAndSeparate<["-"], "Xopenmp-target=">, Group, - HelpText<"Pass to the target offloading toolchain identified by .">, - MetaVarName<" ">; -+#ifdef BUILD_FOR_OPENEULER -+def z : JoinedOrSeparate<["-"], "z">, Flags<[LinkerInput]>, -+ HelpText<"Pass -z or -z to the linker">, MetaVarName<"">, -+#else - def z : Separate<["-"], "z">, Flags<[LinkerInput]>, - HelpText<"Pass -z to the linker">, MetaVarName<"">, -+#endif - Group; - def offload_link : Flag<["--"], "offload-link">, Group, - HelpText<"Use the new offloading linker to perform the link job.">; -diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp -index bd9db7714f95..28b33c8862e4 100644 ---- a/clang/lib/Driver/Driver.cpp -+++ b/clang/lib/Driver/Driver.cpp -@@ -2597,6 +2597,16 @@ void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC, - } - } - -+#ifdef BUILD_FOR_OPENEULER -+llvm::DenseSet ZArgsList{ -+ "defs", "muldefs", "execstack", "noexecstack", "globalaudit", "combreloc", -+ "nocombreloc", "global", "initfirst", "interpose", "lazy", "loadfltr", -+ "nocopyreloc", "nodefaultlib", "nodelete", "nodlopen", "nodump", "now", -+ "origin", "relro", "norelro", "separate-code", "noseparate-code", "common", -+ "nocommon", "text", "notext", "textoff" -+}; -+#endif -+ - bool Driver::DiagnoseInputExistence(const DerivedArgList &Args, StringRef Value, - types::ID Ty, bool TypoCorrect) const { - if (!getCheckInputsExist()) -@@ -2673,6 +2683,14 @@ bool Driver::DiagnoseInputExistence(const DerivedArgList &Args, StringRef Value, - if (IsCLMode() && Ty == types::TY_Object && !Value.startswith("/")) - return true; - -+#ifdef BUILD_FOR_OPENEULER -+ if (ZArgsList.find(Value) != ZArgsList.end() || -+ Value.starts_with("common-page-size=") || -+ Value.starts_with("max-page-size=") || -+ Value.starts_with("stack-size=")) -+ return true; -+#endif -+ - Diag(clang::diag::err_drv_no_such_file) << Value; - return false; - } -diff --git a/clang/test/Driver/z-args.c b/clang/test/Driver/z-args.c -new file mode 100644 -index 000000000000..83bb2b570e69 ---- /dev/null -+++ b/clang/test/Driver/z-args.c -@@ -0,0 +1,15 @@ -+// REQUIRES: build_for_openeuler -+// RUN: %clang -### -znow 2>&1 | FileCheck -check-prefix=CHECK-LINKER %s -+// CHECK-LINKER: "-z" "now" -+ -+// RUN: %clang -### -Wl,-z now 2>&1 | FileCheck -check-prefix=CHECK-WLCOMMAZ %s -+// CHECK-WLCOMMAZ: "-z" "now" -+// RUN: %clang -### -Wl,-z -Wl,now 2>&1 | FileCheck \ -+// RUN: -check-prefix=CHECK-WLCOMMAZ1 %s -+// CHECK-WLCOMMAZ1: "-z" "now" -+// RUN: %clang -### -Wl,-z -O3 now 2>&1 | FileCheck \ -+// RUN: -check-prefix=CHECK-WLCOMMAZ2 %s -+// CHECK-WLCOMMAZ2: "-z" "now" -+// RUN: %clang -### -Wl,-z stack-size=1 2>&1 | FileCheck \ -+// RUN: -check-prefix=CHECK-WLCOMMAZ3 %s -+// CHECK-WLCOMMAZ3: "-z" "stack-size=1" -\ No newline at end of file --- -2.43.0 - diff --git a/0023-Handling-of-option-Wall-and-Werror-format-2-override.patch b/0023-Handling-of-option-Wall-and-Werror-format-2-override.patch deleted file mode 100644 index 571b833f21a7fa6b2a1190ed414b6ef1ec1e0603..0000000000000000000000000000000000000000 --- a/0023-Handling-of-option-Wall-and-Werror-format-2-override.patch +++ /dev/null @@ -1,225 +0,0 @@ -From 001e7941bc936847b07da2fdb4b19a8adcba7718 Mon Sep 17 00:00:00 2001 -From: liyunfei -Date: Fri, 19 Jul 2024 10:44:49 +0800 -Subject: [PATCH 1/2] Complete -fgcc-compatible option scope - -Complete -fgcc-compatible option scope to Langopts and Diagopts - -(cherry picked from commit 8881224782ade2afaab4860f3462e44b7d5c2601) -Signed-off-by: wangqiang ---- - clang/include/clang/Basic/DiagnosticOptions.def | 4 ++++ - clang/include/clang/Basic/LangOptions.def | 4 ++++ - clang/include/clang/Driver/Options.td | 6 ++++-- - clang/lib/Driver/ToolChains/Clang.cpp | 2 ++ - clang/lib/Frontend/CompilerInvocation.cpp | 15 +++++++++++++-- - 5 files changed, 27 insertions(+), 4 deletions(-) - -diff --git a/clang/include/clang/Basic/DiagnosticOptions.def b/clang/include/clang/Basic/DiagnosticOptions.def -index 6d0c1b14acc1..5253e951d403 100644 ---- a/clang/include/clang/Basic/DiagnosticOptions.def -+++ b/clang/include/clang/Basic/DiagnosticOptions.def -@@ -99,6 +99,10 @@ VALUE_DIAGOPT(MessageLength, 32, 0) - - DIAGOPT(ShowSafeBufferUsageSuggestions, 1, 0) - -+#ifdef BUILD_FOR_OPENEULER -+DIAGOPT(GccCompatible, 1, 0) /// -fgcc-compatible -+#endif -+ - #undef DIAGOPT - #undef ENUM_DIAGOPT - #undef VALUE_DIAGOPT -diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def -index f7ec0406f33e..eb62a4951c65 100644 ---- a/clang/include/clang/Basic/LangOptions.def -+++ b/clang/include/clang/Basic/LangOptions.def -@@ -468,6 +468,10 @@ LANGOPT(IncrementalExtensions, 1, 0, " True if we want to process statements" - - BENIGN_LANGOPT(CheckNew, 1, 0, "Do not assume C++ operator new may not return NULL") - -+#ifdef BUILD_FOR_OPENEULER -+LANGOPT(GccCompatible, 1, 0, "Enable gcc compatibility for openEuler.") -+#endif -+ - #undef LANGOPT - #undef COMPATIBLE_LANGOPT - #undef BENIGN_LANGOPT -diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td -index 71d6ed66ab96..344c8bd49da7 100644 ---- a/clang/include/clang/Driver/Options.td -+++ b/clang/include/clang/Driver/Options.td -@@ -1810,9 +1810,11 @@ def fautotune_rank : Flag<["-"], "fautotune-rank">, Group, - #endif - - #ifdef BUILD_FOR_OPENEULER --def fgcc_compatible : Flag<["-"], "fgcc-compatible">, Group, -+def fgcc_compatible : Flag<["-"], "fgcc-compatible">, -+ Flags<[CC1Option]>, -+ MarshallingInfoFlag>, - HelpText<"Enable gcc compatibility for openEuler.">; --def fno_gcc_compatible : Flag<["-"], "fno-gcc-compatible">, Group; -+def fno_gcc_compatible : Flag<["-"], "fno-gcc-compatible">, Flags<[CC1Option]>; - #endif - - // Begin sanitizer flags. These should all be core options exposed in all driver -diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp -index c49cb43ff19c..fac4f03d6193 100644 ---- a/clang/lib/Driver/ToolChains/Clang.cpp -+++ b/clang/lib/Driver/ToolChains/Clang.cpp -@@ -4725,6 +4725,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, - CmdArgs.push_back("-Wno-error=varargs"); - CmdArgs.push_back("-Wno-error=unused-value"); - CmdArgs.push_back("-Wno-error=format-nonliteral"); -+ -+ CmdArgs.push_back("-fgcc-compatible"); - } - #endif - -diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp -index 1fba91bed041..d7b609ef276c 100644 ---- a/clang/lib/Frontend/CompilerInvocation.cpp -+++ b/clang/lib/Frontend/CompilerInvocation.cpp -@@ -818,8 +818,9 @@ static void addDiagnosticArgs(ArgList &Args, OptSpecifier Group, - std::vector &Diagnostics) { - for (auto *A : Args.filtered(Group)) { - if (A->getOption().getKind() == Option::FlagClass) { -- // The argument is a pure flag (such as OPT_Wall or OPT_Wdeprecated). Add -- // its name (minus the "W" or "R" at the beginning) to the diagnostics. -+ // The argument is a pure flag (such as OPT_Wall or -+ // OPT_Wdeprecated). Add its name (minus the "W" or "R" at the -+ // beginning) to the diagnostics. - Diagnostics.push_back( - std::string(A->getOption().getName().drop_front(1))); - } else if (A->getOption().matches(GroupWithValue)) { -@@ -829,6 +830,7 @@ static void addDiagnosticArgs(ArgList &Args, OptSpecifier Group, - std::string(A->getOption().getName().drop_front(1).rtrim("=-"))); - } else { - // Otherwise, add its value (for OPT_W_Joined and similar). -+ - Diagnostics.push_back(A->getValue()); - } - } -@@ -3522,6 +3524,11 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, - - if (!Opts.RandstructSeed.empty()) - GenerateArg(Args, OPT_frandomize_layout_seed_EQ, Opts.RandstructSeed, SA); -+ -+#ifdef BUILD_FOR_OPENEULER -+ if (Opts.GccCompatible) -+ GenerateArg(Args, OPT_fgcc_compatible, SA); -+#endif - } - - bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, -@@ -4073,6 +4080,10 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, - Diags.Report(diag::err_drv_hlsl_unsupported_target) << T.str(); - } - -+#ifdef BUILD_FOR_OPENEULER -+ Opts.GccCompatible = Args.hasArg(options::OPT_fgcc_compatible); -+#endif -+ - return Diags.getNumErrors() == NumErrorsBefore; - } - --- -2.43.0 - - -From c6f76aa5cdb02c376df17aafadf2dd7cf41fe5b1 Mon Sep 17 00:00:00 2001 -From: wangqiang -Date: Fri, 19 Jul 2024 11:01:22 +0800 -Subject: [PATCH 2/2] Handling of option `-Wall` and `-Werror=format=2` - override `-Wno` - -Fix nfs-utils build issue - -Signed-off-by: wangqiang ---- - clang/lib/Frontend/CompilerInvocation.cpp | 32 ++++++++++++++++++++++- - clang/test/Driver/test-warnning.c | 15 +++++++++++ - 2 files changed, 46 insertions(+), 1 deletion(-) - create mode 100644 clang/test/Driver/test-warnning.c - -diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp -index d7b609ef276c..cbb122cc6eeb 100644 ---- a/clang/lib/Frontend/CompilerInvocation.cpp -+++ b/clang/lib/Frontend/CompilerInvocation.cpp -@@ -817,10 +817,40 @@ static void addDiagnosticArgs(ArgList &Args, OptSpecifier Group, - OptSpecifier GroupWithValue, - std::vector &Diagnostics) { - for (auto *A : Args.filtered(Group)) { -+#ifdef BUILD_FOR_OPENEULER -+ bool GccCompatible = Args.hasFlag(options::OPT_fgcc_compatible, -+ options::OPT_fno_gcc_compatible, false); - if (A->getOption().getKind() == Option::FlagClass) { - // The argument is a pure flag (such as OPT_Wall or - // OPT_Wdeprecated). Add its name (minus the "W" or "R" at the - // beginning) to the diagnostics. -+ if (A->getOption().getName() == "Wall" && GccCompatible) { -+ // Avoid -Wall and -Werror=format=2 override -Wno-xxx -+ Diagnostics.insert( -+ Diagnostics.begin(), -+ std::string(A->getOption().getName().drop_front(1))); -+ } else { -+ Diagnostics.push_back( -+ std::string(A->getOption().getName().drop_front(1))); -+ } -+ } else if (A->getOption().matches(GroupWithValue)) { -+ // This is -Wfoo= or -Rfoo=, where foo is the name of the diagnostic -+ // group. Add only the group name to the diagnostics. -+ Diagnostics.push_back(std::string( -+ A->getOption().getName().drop_front(1).rtrim("=-"))); -+ } else { -+ // Otherwise, add its value (for OPT_W_Joined and similar). -+ if (std::string(A->getValue()) == "error=format=2" && GccCompatible) { -+ // Avoid -Werror=format=2 override -Wno-xxx -+ Diagnostics.insert(Diagnostics.begin(), A->getValue()); -+ } else { -+ Diagnostics.push_back(A->getValue()); -+ } -+ } -+#else -+ if (A->getOption().getKind() == Option::FlagClass) { -+ // The argument is a pure flag (such as OPT_Wall or OPT_Wdeprecated). Add -+ // its name (minus the "W" or "R" at the beginning) to the diagnostics. - Diagnostics.push_back( - std::string(A->getOption().getName().drop_front(1))); - } else if (A->getOption().matches(GroupWithValue)) { -@@ -830,9 +860,9 @@ static void addDiagnosticArgs(ArgList &Args, OptSpecifier Group, - std::string(A->getOption().getName().drop_front(1).rtrim("=-"))); - } else { - // Otherwise, add its value (for OPT_W_Joined and similar). -- - Diagnostics.push_back(A->getValue()); - } -+#endif - } - } - -diff --git a/clang/test/Driver/test-warnning.c b/clang/test/Driver/test-warnning.c -new file mode 100644 -index 000000000000..641f9e3512d5 ---- /dev/null -+++ b/clang/test/Driver/test-warnning.c -@@ -0,0 +1,15 @@ -+// REQUIRES: build_for_openeuler -+ -+// RUN: %clang -v -fgcc-compatible -Wno-format-security -Werror=format=2 -Wall %s -+// RUN: %clang -v -Wall %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR %s -+// CHECK-ERROR: warning: format string is not a string literal (potentially insecure) -+// RUN: %clang -v -Wno-format-security -Werror=format=2 -Wall %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR %s -+// CHECK-ERROR: error: format string is not a string literal (potentially insecure) -+ -+#include -+ -+int main() { -+ char *str = "llvm-project"; -+ printf(str); -+ return 0; -+} -\ No newline at end of file --- -2.43.0 - diff --git a/0024-Backport-PATCH-Clang-CodeGen-Add__builtin_bcopy.patch b/0024-Backport-PATCH-Clang-CodeGen-Add__builtin_bcopy.patch deleted file mode 100644 index 346d0daad23eec74df91a35262081b873323a911..0000000000000000000000000000000000000000 --- a/0024-Backport-PATCH-Clang-CodeGen-Add__builtin_bcopy.patch +++ /dev/null @@ -1,146 +0,0 @@ -From 25282cd2e1bbae9c68a4f0df21fef831331503f4 Mon Sep 17 00:00:00 2001 -From: Carlos Eduardo Seo -Date: Sun, 24 Sep 2023 11:58:14 -0300 -Subject: [PATCH] [Clang][CodeGen] Add __builtin_bcopy (#67130) - -Add __builtin_bcopy to the list of GNU builtins. This was causing a -series of test failures in glibc. - -Adjust the tests to reflect the changes in codegen. - -Fixes #51409. -Fixes #63065. ---- - clang/include/clang/Basic/Builtins.def | 3 ++- - clang/lib/AST/Decl.cpp | 6 ++++++ - clang/lib/CodeGen/CGBuiltin.cpp | 14 ++++++++++++++ - clang/test/Analysis/bstring.c | 3 +-- - clang/test/Analysis/security-syntax-checks.m | 4 ++-- - .../CodeGen/PowerPC/builtins-ppc-xlcompat-macros.c | 6 +++--- - 6 files changed, 28 insertions(+), 8 deletions(-) - -diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def -index 6dad8b512bd2..3f2cbcedc4b5 100644 ---- a/clang/include/clang/Basic/Builtins.def -+++ b/clang/include/clang/Basic/Builtins.def -@@ -560,7 +560,7 @@ BUILTIN(__builtin_va_copy, "vAA", "n") - BUILTIN(__builtin_stdarg_start, "vA.", "nt") - BUILTIN(__builtin_assume_aligned, "v*vC*z.", "nctE") - BUILTIN(__builtin_bcmp, "ivC*vC*z", "FnE") --BUILTIN(__builtin_bcopy, "vv*v*z", "n") -+BUILTIN(__builtin_bcopy, "vvC*v*z", "nF") - BUILTIN(__builtin_bzero, "vv*z", "nF") - BUILTIN(__builtin_free, "vv*", "nF") - BUILTIN(__builtin_malloc, "v*z", "nF") -@@ -1154,6 +1154,7 @@ LIBBUILTIN(strndup, "c*cC*z", "f", STRING_H, ALL_GNU_LANGUAGES) - LIBBUILTIN(index, "c*cC*i", "f", STRINGS_H, ALL_GNU_LANGUAGES) - LIBBUILTIN(rindex, "c*cC*i", "f", STRINGS_H, ALL_GNU_LANGUAGES) - LIBBUILTIN(bzero, "vv*z", "f", STRINGS_H, ALL_GNU_LANGUAGES) -+LIBBUILTIN(bcopy, "vvC*v*z", "f", STRINGS_H, ALL_GNU_LANGUAGES) - LIBBUILTIN(bcmp, "ivC*vC*z", "fE", STRINGS_H, ALL_GNU_LANGUAGES) - // In some systems str[n]casejmp is a macro that expands to _str[n]icmp. - // We undefine then here to avoid wrong name. -diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp -index fbc45fb6397f..3de1e4509bc0 100644 ---- a/clang/lib/AST/Decl.cpp -+++ b/clang/lib/AST/Decl.cpp -@@ -4320,6 +4320,10 @@ unsigned FunctionDecl::getMemoryFunctionKind() const { - case Builtin::BIbzero: - return Builtin::BIbzero; - -+ case Builtin::BI__builtin_bcopy: -+ case Builtin::BIbcopy: -+ return Builtin::BIbcopy; -+ - case Builtin::BIfree: - return Builtin::BIfree; - -@@ -4351,6 +4355,8 @@ unsigned FunctionDecl::getMemoryFunctionKind() const { - return Builtin::BIstrlen; - if (FnInfo->isStr("bzero")) - return Builtin::BIbzero; -+ if (FnInfo->isStr("bcopy")) -+ return Builtin::BIbcopy; - } else if (isInStdNamespace()) { - if (FnInfo->isStr("free")) - return Builtin::BIfree; -diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp -index e512762fafaf..8f87c4d46109 100644 ---- a/clang/lib/CodeGen/CGBuiltin.cpp -+++ b/clang/lib/CodeGen/CGBuiltin.cpp -@@ -3555,6 +3555,20 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, - Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); - return RValue::get(nullptr); - } -+ -+ case Builtin::BIbcopy: -+ case Builtin::BI__builtin_bcopy: { -+ Address Src = EmitPointerWithAlignment(E->getArg(0)); -+ Address Dest = EmitPointerWithAlignment(E->getArg(1)); -+ Value *SizeVal = EmitScalarExpr(E->getArg(2)); -+ EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(0)->getType(), -+ E->getArg(0)->getExprLoc(), FD, 0); -+ EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(1)->getType(), -+ E->getArg(1)->getExprLoc(), FD, 0); -+ Builder.CreateMemMove(Dest, Src, SizeVal, false); -+ return RValue::get(Dest.getPointer()); -+ } -+ - case Builtin::BImemcpy: - case Builtin::BI__builtin_memcpy: - case Builtin::BImempcpy: -diff --git a/clang/test/Analysis/bstring.c b/clang/test/Analysis/bstring.c -index a7c7bdb23683..5d86241a4ac9 100644 ---- a/clang/test/Analysis/bstring.c -+++ b/clang/test/Analysis/bstring.c -@@ -483,8 +483,7 @@ int memcmp8(char *a, size_t n) { - //===----------------------------------------------------------------------=== - - #define bcopy BUILTIN(bcopy) --// __builtin_bcopy is not defined with const in Builtins.def. --void bcopy(/*const*/ void *s1, void *s2, size_t n); -+void bcopy(const void *s1, void *s2, size_t n); - - - void bcopy0 (void) { -diff --git a/clang/test/Analysis/security-syntax-checks.m b/clang/test/Analysis/security-syntax-checks.m -index 5b4f35055f51..59e60f685236 100644 ---- a/clang/test/Analysis/security-syntax-checks.m -+++ b/clang/test/Analysis/security-syntax-checks.m -@@ -77,9 +77,9 @@ int test_bcmp(void *a, void *b, size_t n) { - } - - // Obsolete function bcopy --void bcopy(void *, void *, size_t); -+void bcopy(const void *, void *, size_t); - --void test_bcopy(void *a, void *b, size_t n) { -+void test_bcopy(const void *a, void *b, size_t n) { - bcopy(a, b, n); // expected-warning{{The bcopy() function is obsoleted by memcpy() or memmove(}} - } - -diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-macros.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-macros.c -index cced16431926..64bd6e3ed41e 100644 ---- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-macros.c -+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-macros.c -@@ -167,15 +167,15 @@ void testalignx(const void *pointer) { - } - - // 64BIT-LABEL: @testbcopy( --// 64BIT: call void @bcopy(ptr noundef {{%.*}}, ptr noundef {{%.*}}, i64 noundef {{%.*}}) -+// 64BIT: call void @llvm.memmove.p0.p0.i64(ptr align 1 {{%.*}}, ptr align 1 {{%.*}}, i64 {{%.*}}, i1 false) - // 64BIT-NEXT: ret void - // - // 32BIT-LABEL: @testbcopy( --// 32BIT: call void @bcopy(ptr noundef {{%.*}}, ptr noundef {{%.*}}, i32 noundef {{%.*}}) -+// 32BIT: call void @llvm.memmove.p0.p0.i32(ptr align 1 {{%.*}}, ptr align 1 {{%.*}}, i32 {{%.*}}, i1 false) - // 32BIT-NEXT: ret void - // - void testbcopy(const void *src, void *dest, size_t n) { -- __bcopy(src, dest, n); -+ bcopy(src, dest, n); - } - - // 64BIT-LABEL: @testbzero( --- -Gitee diff --git a/0025-clang-Increase-the-number-of-driver-diagnostics.patch b/0025-clang-Increase-the-number-of-driver-diagnostics.patch deleted file mode 100644 index ed0e2a31e1965e5c853680ee0680872900c372a9..0000000000000000000000000000000000000000 --- a/0025-clang-Increase-the-number-of-driver-diagnostics.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 9efda5a71de8b117366152aa35a8837af0545d0f Mon Sep 17 00:00:00 2001 -From: eastb233 -Date: Mon, 28 Oct 2024 16:20:14 +0800 -Subject: [PATCH] [clang] Increase the number of driver diagnostics - -It hits the limited number of driver diagnostics, -increase `DIAG_SIZE_DRIVER`. ---- - clang/include/clang/Basic/DiagnosticIDs.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/clang/include/clang/Basic/DiagnosticIDs.h b/clang/include/clang/Basic/DiagnosticIDs.h -index bf4995175ef1..8bc45b91911c 100644 ---- a/clang/include/clang/Basic/DiagnosticIDs.h -+++ b/clang/include/clang/Basic/DiagnosticIDs.h -@@ -31,7 +31,7 @@ namespace clang { - // Size of each of the diagnostic categories. - enum { - DIAG_SIZE_COMMON = 300, -- DIAG_SIZE_DRIVER = 300, -+ DIAG_SIZE_DRIVER = 350, - DIAG_SIZE_FRONTEND = 150, - DIAG_SIZE_SERIALIZATION = 120, - DIAG_SIZE_LEX = 400, --- -2.38.1.windows.1 - diff --git a/0026-Driver-add-option-fGNU-compatibility-aliased-with-fg.patch b/0026-Driver-add-option-fGNU-compatibility-aliased-with-fg.patch deleted file mode 100644 index 281c096ed457dbfc7bd32ba93abb0853ec6eb96c..0000000000000000000000000000000000000000 --- a/0026-Driver-add-option-fGNU-compatibility-aliased-with-fg.patch +++ /dev/null @@ -1,27 +0,0 @@ -From cd2d6fe2cdcb0fa047c77993c23da1fb612970be Mon Sep 17 00:00:00 2001 -From: jianghaibo -Date: Wed, 6 Nov 2024 16:04:39 +0800 -Subject: [PATCH] [Driver] add option -fGNU-compatibility aliased with - -fgcc-compatible - ---- - clang/include/clang/Driver/Options.td | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td -index 530bb53ea9b5..2faa65763234 100644 ---- a/clang/include/clang/Driver/Options.td -+++ b/clang/include/clang/Driver/Options.td -@@ -1814,6 +1814,9 @@ def fgcc_compatible : Flag<["-"], "fgcc-compatible">, - Flags<[CC1Option]>, - MarshallingInfoFlag>, - HelpText<"Enable gcc compatibility for openEuler.">; -+def : Flag["-"], "fGNU-compatibility">, -+ Flags<[CC1Option]>, Alias, -+ HelpText<"Alias for -fgcc_compatible">; - def fno_gcc_compatible : Flag<["-"], "fno-gcc-compatible">, Flags<[CC1Option]>; - #endif - --- -2.38.1.windows.1 - diff --git a/0027-Driver-fix-compile-error-for-fGNU-compatibility.patch b/0027-Driver-fix-compile-error-for-fGNU-compatibility.patch deleted file mode 100644 index 50a12fa08360ba09d535d96467b2908db160a814..0000000000000000000000000000000000000000 --- a/0027-Driver-fix-compile-error-for-fGNU-compatibility.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 82d51aca563c40c84b70c0e295a9561d3dd4092b Mon Sep 17 00:00:00 2001 -From: jianghaibo -Date: Wed, 20 Nov 2024 16:13:04 +0800 -Subject: [PATCH] [Driver] fix compile error for -fGNU-compatibility - ---- - clang/include/clang/Driver/Options.td | 2 +- - clang/test/Driver/test-generate-missing-build-notes.cpp | 1 + - 2 files changed, 2 insertions(+), 1 deletion(-) - -diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td -index ccf395dad35a..c109d7a8fcab 100644 ---- a/clang/include/clang/Driver/Options.td -+++ b/clang/include/clang/Driver/Options.td -@@ -1819,7 +1819,7 @@ def fgcc_compatible : Flag<["-"], "fgcc-compatible">, - Flags<[CC1Option]>, - MarshallingInfoFlag>, - HelpText<"Enable gcc compatibility for openEuler.">; --def : Flag["-"], "fGNU-compatibility">, -+def fGNU_compatibility : Flag<["-"], "fGNU-compatibility">, - Flags<[CC1Option]>, Alias, - HelpText<"Alias for -fgcc_compatible">; - def fno_gcc_compatible : Flag<["-"], "fno-gcc-compatible">, Flags<[CC1Option]>; -diff --git a/clang/test/Driver/test-generate-missing-build-notes.cpp b/clang/test/Driver/test-generate-missing-build-notes.cpp -index efd5251e6a1c..54ac4c66a5b0 100644 ---- a/clang/test/Driver/test-generate-missing-build-notes.cpp -+++ b/clang/test/Driver/test-generate-missing-build-notes.cpp -@@ -1,6 +1,7 @@ - // REQUIRES: build_for_openeuler - // RUN: %clang -### -fgcc-compatible -Wa,--generate-missing-build-notes=yes %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s - // RUN: %clang -### -fgcc-compatible -Wa,--generate-missing-build-notes=no %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s -+// RUN: %clang -### -fGNU-compatibility -Wa,--generate-missing-build-notes=no %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s - // CHECK-NO-ERROR-NOT: --generate-missing-build-notes= - // RUN: %clang -### -Wa,--generate-missing-build-notes=yes %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR %s - // RUN: %clang -### -Wa,--generate-missing-build-notes=no %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR %s --- -2.38.1.windows.1 - diff --git a/README.en.md b/README.en.md index f6f498b318a7a9045ea28b7ba6982b0013b6ce86..7b0c0eee8a34b354c107ca49a4134025710100cb 100644 --- a/README.en.md +++ b/README.en.md @@ -1,14 +1,22 @@ -# clang +# clang-latest #### Description -clang: noun - 1. A loud, resonant, metallic sound. - 2. The strident call of a crane or goose. - 3. C-language family front-end toolkit. - -The goal of the Clang project is to create a new C, C++, Objective C -and Objective C++ front-end for the LLVM compiler. Its tools are built -as libraries and designed to be loosely-coupled and extensible. +C Language Family Front-end + +#### Software Architecture +Software architecture description + +#### Installation + +1. xxxx +2. xxxx +3. xxxx + +#### Instructions + +1. xxxx +2. xxxx +3. xxxx #### Contribution diff --git a/README.md b/README.md index e1559a4a3fd08c3852062bd6022c63f175416a53..39078379eed5fef59dc9ebd9e7c3eac357d036ad 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,23 @@ -# clang +# clang-latest #### 介绍 -clang: noun - 1. A loud, resonant, metallic sound. - 2. The strident call of a crane or goose. - 3. C-language family front-end toolkit. +C Language Family Front-end -The goal of the Clang project is to create a new C, C++, Objective C -and Objective C++ front-end for the LLVM compiler. Its tools are built -as libraries and designed to be loosely-coupled and extensible. +#### 软件架构 +软件架构说明 + + +#### 安装教程 + +1. xxxx +2. xxxx +3. xxxx + +#### 使用说明 + +1. xxxx +2. xxxx +3. xxxx #### 参与贡献 @@ -18,11 +27,11 @@ as libraries and designed to be loosely-coupled and extensible. 4. 新建 Pull Request -#### 码云特技 +#### 特技 1. 使用 Readme\_XXX.md 来支持不同的语言,例如 Readme\_en.md, Readme\_zh.md -2. 码云官方博客 [blog.gitee.com](https://blog.gitee.com) -3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解码云上的优秀开源项目 -4. [GVP](https://gitee.com/gvp) 全称是码云最有价值开源项目,是码云综合评定出的优秀开源项目 -5. 码云官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help) -6. 码云封面人物是一档用来展示码云会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) +2. Gitee 官方博客 [blog.gitee.com](https://blog.gitee.com) +3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解 Gitee 上的优秀开源项目 +4. [GVP](https://gitee.com/gvp) 全称是 Gitee 最有价值开源项目,是综合评定出的优秀开源项目 +5. Gitee 官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help) +6. Gitee 封面人物是一档用来展示 Gitee 会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/clang-17.0.6.src.tar.xz b/clang-18.1.8.src.tar.xz similarity index 68% rename from clang-17.0.6.src.tar.xz rename to clang-18.1.8.src.tar.xz index 35fdfaf91da3a6e31a3722e9682641a5b633e8b9..50a742b8ac1b726cc5eb57f909ea152de02b761f 100644 Binary files a/clang-17.0.6.src.tar.xz and b/clang-18.1.8.src.tar.xz differ diff --git a/clang-config.h b/clang-config.h deleted file mode 100644 index c369b4551f785da6e2c586417caa36591a9eaee2..0000000000000000000000000000000000000000 --- a/clang-config.h +++ /dev/null @@ -1,9 +0,0 @@ -#include - -#if __WORDSIZE == 32 -#include "config-32.h" -#elif __WORDSIZE == 64 -#include "config-64.h" -#else -#error "Unknown word size" -#endif diff --git a/clang-tools-extra-17.0.6.src.tar.xz b/clang-tools-extra-18.1.8.src.tar.xz similarity index 31% rename from clang-tools-extra-17.0.6.src.tar.xz rename to clang-tools-extra-18.1.8.src.tar.xz index b37bc74a67cac232ff9ce364d98fdf2adbaca55d..900acef70442984dd140a2720d204d41d887ea33 100644 Binary files a/clang-tools-extra-17.0.6.src.tar.xz and b/clang-tools-extra-18.1.8.src.tar.xz differ diff --git a/clang.spec b/clang.spec index 9e72619fa082161b406e80e4565c81501e88e442..35bd87c9179257e2bfa196dea428f52dd592bd19 100644 --- a/clang.spec +++ b/clang.spec @@ -1,28 +1,21 @@ -%undefine __cmake_in_source_build -%bcond_without sys_llvm %bcond_without check -%bcond_with classic_flang -%bcond_with toolchain_clang -%bcond_without bisheng_autotuner +%bcond_without toolchain_clang %if %{with toolchain_clang} %global toolchain clang %endif -%global maj_ver 17 -%global min_ver 0 -%global patch_ver 6 +%global maj_ver 18 +%global min_ver 1 +%global patch_ver 8 %global clang_version %{maj_ver}.%{min_ver}.%{patch_ver} -%if %{with sys_llvm} -%global pkg_name clang -%global install_prefix %{_prefix} -%global install_datadir %{_datadir} -%else -%global pkg_name clang%{maj_ver} -%global install_prefix %{_libdir}/llvm%{maj_ver} -%global install_datadir %{install_prefix}/share -%endif +%global _scl_prefix /opt/openEuler +%{?scl:%scl_package %scl} +%{!?scl:%global scl_prefix llvm-toolset-%{maj_ver}-} +%{!?scl:%global pkg_name %{name}} +%global install_prefix %{!?scl:%{_scl_prefix}/llvm-toolset-%{maj_ver}/root}%{_prefix} +%global install_datadir %{!?scl:%{_scl_prefix}/llvm-toolset-%{maj_ver}/root}%{_datadir} %global install_bindir %{install_prefix}/bin %global install_includedir %{install_prefix}/include @@ -42,10 +35,11 @@ # Disable LTO as this causes crash if gcc lto enabled. %define _lto_cflags %{nil} -Name: %{pkg_name} +Name: %{?scl_prefix}clang Version: %{clang_version} -Release: 31 +Release: 1 Summary: A C language family front-end for LLVM + License: NCSA URL: http://llvm.org Source0: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{clang_version}/%{clang_srcdir}.tar.xz @@ -53,32 +47,6 @@ Source1: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{clang_ Patch0: fedora-PATCH-clang-Don-t-install-static-libraries.patch Patch1: 0001-Add-triples-for-X86_64-AArch64-Riscv64-openEuler-gcc.patch -Patch2: 0002-Revert-Clang-Change-the-default-DWARF-version-to-5.patch -Patch3: 0003-add-BUILD_FOR_OPENEULER-build-option-to-clang.patch -Patch4: 0004-add-gcc-compatible-in-BUILD_FOR_OPENEULER.patch -Patch5: 0005-backport-Disable-InterpreterExceptionTest-on-RISC-V.patch -Patch6: 0006-clang-LoongArch-Add-loongarch64-to-os-triple.patch -Patch7: 0007-add-more-warning-options-to-fgcc-compatible.patch -Patch8: 0008-Backport-LoongArch-Add-the-support-for-vector.patch -Patch9: 0009-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch -Patch10: 0010-Backport-clang-Avoid-Wshadow-warning-when-init-capture-named.patch -Patch11: 0011-Add-the-support-for-classic-flang.patch -Patch12: 0012-Fix-declaration-definition-mismatch-for-classic-flang.patch -Patch13: 0013-Ignored-option-Wa-generate-missing-build-notes.patch -Patch14: 0014-Update-llvm-lit-config-to-support-build_for_openeule.patch -Patch15: 0015-Backport-Defer-the-instantiation-of-explicit-specifier-until-.patch -Patch16: 0016-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch -Patch17: 0017-fix-for-missing-DENABLE_AUTOTUNER.patch -Patch18: 0018-backport-Clang-Fix-build-with-GCC-14-on-ARM-78704.patch -Patch19: 0019-AArch64-Support-HiSilicon-s-HIP09-Processor.patch -Patch20: 0020-Backport-LoongArch-fix-and-add-some-new-support.patch -Patch21: 0021-AArch64-Delete-hip09-macro.patch -Patch22: 0022-Driver-Pass-z-arg-and-Wl-z-arg-option-to-the-linker.patch -Patch23: 0023-Handling-of-option-Wall-and-Werror-format-2-override.patch -Patch24: 0024-Backport-PATCH-Clang-CodeGen-Add__builtin_bcopy.patch -Patch25: 0025-clang-Increase-the-number-of-driver-diagnostics.patch -Patch26: 0026-Driver-add-option-fGNU-compatibility-aliased-with-fg.patch -Patch27: 0027-Driver-fix-compile-error-for-fGNU-compatibility.patch # Patches for clang-tools-extra # See https://reviews.llvm.org/D120301 @@ -89,21 +57,12 @@ BuildRequires: gcc-c++ BuildRequires: cmake BuildRequires: libatomic -%if %{with sys_llvm} -BuildRequires: llvm-libs = %{version} -BuildRequires: llvm-devel = %{version} -BuildRequires: llvm-static = %{version} -BuildRequires: llvm-test = %{version} -BuildRequires: llvm-googletest = %{version} -BuildRequires: llvm-cmake-utils = %{version} -%else -BuildRequires: llvm%{maj_ver}-libs = %{version} -BuildRequires: llvm%{maj_ver}-devel = %{version} -BuildRequires: llvm%{maj_ver}-static = %{version} -BuildRequires: llvm%{maj_ver}-test = %{version} -BuildRequires: llvm%{maj_ver}-googletest = %{version} -BuildRequires: llvm%{maj_ver}-cmake-utils = %{version} -%endif +BuildRequires: %{?scl_prefix}llvm-libs = %{version} +BuildRequires: %{?scl_prefix}llvm-devel = %{version} +BuildRequires: %{?scl_prefix}llvm-static = %{version} +BuildRequires: %{?scl_prefix}llvm-test = %{version} +BuildRequires: %{?scl_prefix}llvm-googletest = %{version} +BuildRequires: %{?scl_prefix}llvm-cmake-utils = %{version} BuildRequires: libxml2-devel BuildRequires: multilib-rpm-config @@ -112,7 +71,7 @@ BuildRequires: ncurses-devel BuildRequires: perl-generators BuildRequires: python3-lit >= %{version} BuildRequires: python3-sphinx -BuildRequires: python3-recommonmark +BuildRequires: python3-myst-parser BuildRequires: python3-devel BuildRequires: perl(Digest::MD5) @@ -129,17 +88,18 @@ BuildRequires: perl(Sys::Hostname) %if %{with toolchain_clang} BuildRequires: clang %endif +%{?scl:Requires: %scl_runtime} -Requires: %{name}-libs%{?_isa} = %{version}-%{release} +Requires: %{pkg_name}-libs%{?_isa} = %{version}-%{release} Requires: libstdc++-devel # Require gcc libs installed during rumtime Requires: gcc Requires: gcc-c++ -Provides: clang(major) = %{maj_ver} +Provides: %{pkg_name}(major) = %{maj_ver} -Conflicts: compiler-rt < 11.0.0 +Conflicts: %{?scl_prefix}compiler-rt < 11.0.0 %description clang: noun @@ -157,35 +117,40 @@ libomp-devel to enable -fopenmp. %package libs Summary: Runtime library for clang -Requires: %{name}-resource-filesystem%{?_isa} = %{version} -Recommends: compiler-rt%{?_isa} = %{version} -Recommends: libatomic%{?_isa} -Recommends: libomp-devel%{_isa} = %{version} -Recommends: libomp%{_isa} = %{version} +Requires: %{pkg_name}-resource-filesystem = %{version} +Requires: %{?scl_prefix}llvm-libs = %{version} +Recommends: %{?scl_prefix}compiler-rt%{?_isa} = %{version} +Recommends: %{?scl_prefix}libatomic%{?_isa} +Recommends: %{?scl_prefix}libomp-devel%{_isa} = %{version} +Recommends: %{?scl_prefix}libomp%{_isa} = %{version} %description libs Runtime library for clang. %package devel Summary: Development header files for clang -Requires: %{name}-libs = %{version}-%{release} +Requires: %{pkg_name}-libs = %{version}-%{release} +Requires: %{pkg_name}%{?_isa} = %{version}-%{release} +# The clang CMake files reference tools from clang-tools-extra. +Requires: %{pkg_name}-tools-extra%{?_isa} = %{version}-%{release} +Provides: %{pkg_name}-devel(major) = %{maj_ver} %description devel Development header files for clang. %package resource-filesystem Summary: Filesystem package that owns the clang resource directory -Provides: %{name}-resource-filesystem(major) = %{maj_ver} +Provides: %{pkg_name}-resource-filesystem(major) = %{maj_ver} +BuildArch: noarch %description resource-filesystem This package owns the clang resouce directory: $libdir/clang/$version/ - %package analyzer Summary: A source code analysis framework License: NCSA and MIT BuildArch: noarch -Requires: %{name} = %{version}-%{release} +Requires: %{pkg_name} = %{version}-%{release} %description analyzer The Clang Static Analyzer consists of both a source code analysis @@ -195,7 +160,7 @@ intended to run in tandem with a build of a project or code base. %package tools-extra Summary: Extra tools for clang -Requires: %{name}-libs%{?_isa} = %{version}-%{release} +Requires: %{pkg_name}-libs%{?_isa} = %{version}-%{release} Requires: emacs-filesystem %description tools-extra @@ -203,18 +168,18 @@ A set of extra tools built using Clang's tooling API. %package tools-extra-devel Summary: Development header files for clang tools -Requires: %{name}-tools-extra = %{version}-%{release} - +Requires: %{pkg_name}-tools-extra = %{version}-%{release} + %description tools-extra-devel Development header files for clang tools. -%package -n git-clang-format +%package -n %{?scl_prefix}git-clang-format Summary: Integration of clang-format for git -Requires: %{name}-tools-extra = %{version}-%{release} +Requires: %{pkg_name}-tools-extra = %{version}-%{release} Requires: git Requires: python3 -%description -n git-clang-format +%description -n %{?scl_prefix}git-clang-format clang-format integration for git. %prep @@ -242,11 +207,13 @@ pathfix.py -i %{__python3} -pn \ tools/scan-build-py/libexec/* %build -%cmake -G Ninja \ +mkdir -p _build +cd _build +%cmake .. -G Ninja \ -DCLANG_DEFAULT_PIE_ON_LINUX=ON \ -DLLVM_PARALLEL_LINK_JOBS=%{max_link_jobs} \ -DLLVM_LINK_LLVM_DYLIB:BOOL=ON \ - -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DPYTHON_EXECUTABLE=%{__python3} \ -DCMAKE_SKIP_RPATH:BOOL=ON \ -DCLANG_BUILD_TOOLS:BOOL=ON \ @@ -273,31 +240,28 @@ pathfix.py -i %{__python3} -pn \ -DCLANG_BUILD_EXAMPLES:BOOL=OFF \ -DBUILD_SHARED_LIBS=OFF \ -DCLANG_REPOSITORY_STRING="%{?distro} %{version}-%{release}" \ - -DLLVM_EXTERNAL_CLANG_TOOLS_EXTRA_SOURCE_DIR=%{_vpath_srcdir}/../%{clang_tools_srcdir} \ - -DCLANG_RESOURCE_DIR=../%{_lib}/clang/%{maj_ver} \ + -DLLVM_EXTERNAL_CLANG_TOOLS_EXTRA_SOURCE_DIR=../../%{clang_tools_srcdir} \ +%if 0%{?__isa_bits} == 64 + -DCLANG_RESOURCE_DIR=../lib64/clang/%{maj_ver} \ +%else + -DCLANG_RESOURCE_DIR=../lib/clang/%{maj_ver} \ +%endif %if 0%{?__isa_bits} == 64 -DLLVM_LIBDIR_SUFFIX=64 \ %else -DLLVM_LIBDIR_SUFFIX= \ %endif -%if %{with classic_flang} - -DLLVM_ENABLE_CLASSIC_FLANG=ON \ -%endif -%if %{with bisheng_autotuner} - -DLLVM_ENABLE_AUTOTUNER=ON \ -%endif - -DBUILD_FOR_OPENEULER=ON \ %if "%{toolchain}" == "clang" -DCMAKE_C_COMPILER=clang \ -DCMAKE_CXX_COMPILER=clang++ \ %endif -DCLANG_DEFAULT_UNWINDLIB=libgcc -%cmake_build +%ninja_build %install -%cmake_install +%ninja_install -C _build mkdir -p %{buildroot}/%{_bindir} rm -vf %{buildroot}%{_datadir}/clang/clang-format-bbedit.applescript @@ -313,10 +277,15 @@ rm -vf %{buildroot}%{install_sharedir}/clang/bash-autocomplete.sh mkdir -p %{buildroot}%{install_libdir}/clang/%{maj_ver}/{bin,include,lib,share}/ +# Add a symlink in /usr/bin to clang-format-diff +ln -s %{install_datadir}/clang/clang-format-diff.py %{buildroot}%{install_bindir}/clang-format-diff +# Add clang++-{version} symlink +ln -s %{install_bindir}/clang++ %{buildroot}%{install_bindir}/clang++-%{maj_ver} + %check %if %{with check} - -LD_LIBRARY_PATH=%{buildroot}/%{install_libdir} %{__ninja} check-all -C %{__cmake_builddir} +# requires lit.py from LLVM utilities +LD_LIBRARY_PATH=%{buildroot}/%{install_libdir} %{__ninja} check-all -C ./_build/ %endif %files @@ -324,12 +293,10 @@ LD_LIBRARY_PATH=%{buildroot}/%{install_libdir} %{__ninja} check-all -C %{__cmak %{install_bindir}/clang %{install_bindir}/clang++ %{install_bindir}/clang-%{maj_ver} +%{install_bindir}/clang++-%{maj_ver} %{install_bindir}/clang-cl %{install_bindir}/clang-cpp %{install_prefix}/share/man/man1/* -%if %{with classic_flang} -%{install_bindir}/flang -%endif %files libs %{install_libdir}/*.so.* @@ -340,15 +307,16 @@ LD_LIBRARY_PATH=%{buildroot}/%{install_libdir} %{__ninja} check-all -C %{__cmak %{install_includedir}/clang/ %{install_includedir}/clang-c/ %{install_libdir}/cmake/* -%{_bindir}/clang-tblgen -%dir %{_datadir}/clang/ +%{install_bindir}/clang-tblgen +%dir %{install_datadir}/clang/ %files resource-filesystem +%dir %{install_libdir}/clang/ %dir %{install_libdir}/clang/%{maj_ver}/ +%dir %{install_libdir}/clang/%{maj_ver}/bin/ %dir %{install_libdir}/clang/%{maj_ver}/include/ %dir %{install_libdir}/clang/%{maj_ver}/lib/ %dir %{install_libdir}/clang/%{maj_ver}/share/ -%dir %{install_libdir}/clang/ %files analyzer %{install_libexecdir}/ccc-analyzer @@ -399,6 +367,7 @@ LD_LIBRARY_PATH=%{buildroot}/%{install_libdir} %{__ninja} check-all -C %{__cmak %{install_bindir}/pp-trace %{install_bindir}/find-all-symbols %{install_bindir}/modularize +%{install_bindir}/clang-format-diff %{install_bindir}/run-clang-tidy %{install_sharedir}/clang/clang-format.el %{install_sharedir}/clang/clang-rename.el @@ -413,10 +382,13 @@ LD_LIBRARY_PATH=%{buildroot}/%{install_libdir} %{__ninja} check-all -C %{__cmak %files tools-extra-devel %{install_includedir}/clang-tidy/ -%files -n git-clang-format +%files -n %{?scl_prefix}git-clang-format %{install_bindir}/git-clang-format %changelog +* Fri Dec 6 2024 liyunfei - 18.1.8-1 +- init for Multi-Version LLVM-18.1.8 + * Thu Nov 21 2024 eastb233 - 17.0.6-31 - Add option -fGNU-compatibility diff --git a/clang.yaml b/clang.yaml deleted file mode 100644 index c7c50f6ac87dd806f0574c6b3a4fd97b11d3e479..0000000000000000000000000000000000000000 --- a/clang.yaml +++ /dev/null @@ -1,4 +0,0 @@ -version_control: github -src_repo: llvm/llvm-project -tag_prefix: ^llvmorg- -separator: .