diff --git a/0082-Sw64-Add-Sw64-target-support-for-llvm.patch b/0082-Sw64-Add-Sw64-target-support-for-llvm.patch
new file mode 100755
index 0000000000000000000000000000000000000000..36e83de0328c37f36c0e9ff4f6101ab134d5105d
--- /dev/null
+++ b/0082-Sw64-Add-Sw64-target-support-for-llvm.patch
@@ -0,0 +1,28997 @@
+From 612f0bc7ef3af395ad70920d994173970c6cf67b Mon Sep 17 00:00:00 2001
+From: xiaol <xiaol@example.com>
+Date: Tue, 20 May 2025 17:16:19 +0800
+Subject: [PATCH 1/5] llvm
+
+---
+ llvm/CMakeLists.txt                           |    3 +-
+ llvm/cmake/config-ix.cmake                    |    4 +
+ llvm/cmake/config.guess                       |    9 +
+ llvm/include/llvm/BinaryFormat/ELF.h          |   34 +
+ .../llvm/BinaryFormat/ELFRelocs/Sw64.def      |   44 +
+ .../llvm/CodeGen/MachineCombinerPattern.h     |   10 +
+ .../llvm/ExecutionEngine/Orc/OrcABISupport.h  |   40 +
+ llvm/include/llvm/IR/CMakeLists.txt           |    1 +
+ llvm/include/llvm/IR/Intrinsics.td            |    1 +
+ llvm/include/llvm/IR/IntrinsicsSw64.td        |  654 +++
+ llvm/include/llvm/MC/MCAsmInfo.h              |    4 +
+ llvm/include/llvm/MC/MCExpr.h                 |   26 +
+ llvm/include/llvm/Object/ELFObjectFile.h      |    5 +
+ llvm/include/llvm/Support/Sw64ABIFlags.h      |   39 +
+ .../include/llvm/Support/Sw64TargetParser.def |   28 +
+ llvm/include/llvm/Support/Sw64TargetParser.h  |   53 +
+ llvm/include/llvm/TargetParser/Host.h         |    1 +
+ llvm/include/llvm/TargetParser/Triple.h       |   25 +-
+ llvm/include/module.modulemap                 |    1 +
+ .../CodeGen/SelectionDAG/TargetLowering.cpp   |   81 +
+ .../CodeGen/TargetLoweringObjectFileImpl.cpp  |    7 +
+ .../Orc/EPCIndirectionUtils.cpp               |    3 +
+ .../ExecutionEngine/Orc/IndirectionUtils.cpp  |   10 +
+ llvm/lib/ExecutionEngine/Orc/LLJIT.cpp        |    4 +
+ .../lib/ExecutionEngine/Orc/LazyReexports.cpp |    2 +
+ .../lib/ExecutionEngine/Orc/OrcABISupport.cpp |  262 ++
+ .../RuntimeDyld/CMakeLists.txt                |    1 +
+ .../RuntimeDyld/RuntimeDyldELF.cpp            |   59 +
+ .../RuntimeDyld/RuntimeDyldELF.h              |   10 +
+ .../Targets/RuntimeDyldELFSw64.cpp            |  217 +
+ .../RuntimeDyld/Targets/RuntimeDyldELFSw64.h  |   61 +
+ llvm/lib/IR/Function.cpp                      |    1 +
+ llvm/lib/MC/ELFObjectWriter.cpp               |   33 +
+ llvm/lib/MC/MCAsmStreamer.cpp                 |   11 +-
+ llvm/lib/MC/MCELFStreamer.cpp                 |   10 +
+ llvm/lib/MC/MCExpr.cpp                        |   50 +
+ llvm/lib/MC/MCObjectFileInfo.cpp              |    3 +
+ llvm/lib/MC/MCSectionELF.cpp                  |    4 +
+ llvm/lib/Object/ELF.cpp                       |   25 +
+ llvm/lib/Object/RelocationResolver.cpp        |   27 +
+ llvm/lib/Support/CMakeLists.txt               |    1 +
+ llvm/lib/Support/Sw64TargetParser.cpp         |   96 +
+ llvm/lib/Target/Sw64/AsmParser/CMakeLists.txt |   13 +
+ .../Target/Sw64/AsmParser/Sw64AsmParser.cpp   | 2005 ++++++++
+ llvm/lib/Target/Sw64/CMakeLists.txt           |   64 +
+ .../Target/Sw64/Disassembler/CMakeLists.txt   |   11 +
+ .../Sw64/Disassembler/Sw64Disassembler.cpp    |  390 ++
+ .../Target/Sw64/InstPrinter/CMakeLists.txt    |   10 +
+ .../Sw64/InstPrinter/Sw64InstPrinter.cpp      |  148 +
+ .../Target/Sw64/InstPrinter/Sw64InstPrinter.h |   57 +
+ .../Target/Sw64/MCTargetDesc/CMakeLists.txt   |   22 +
+ .../Sw64/MCTargetDesc/Sw64ABIFlagsSection.cpp |   31 +
+ .../Sw64/MCTargetDesc/Sw64ABIFlagsSection.h   |  127 +
+ .../Target/Sw64/MCTargetDesc/Sw64ABIInfo.cpp  |   29 +
+ .../Target/Sw64/MCTargetDesc/Sw64ABIInfo.h    |   77 +
+ .../Sw64/MCTargetDesc/Sw64AsmBackend.cpp      |  317 ++
+ .../Target/Sw64/MCTargetDesc/Sw64AsmBackend.h |   96 +
+ .../Target/Sw64/MCTargetDesc/Sw64BaseInfo.h   |  146 +
+ .../Sw64/MCTargetDesc/Sw64ELFObjectWriter.cpp |  463 ++
+ .../Sw64/MCTargetDesc/Sw64ELFStreamer.cpp     |  108 +
+ .../Sw64/MCTargetDesc/Sw64ELFStreamer.h       |   83 +
+ .../Target/Sw64/MCTargetDesc/Sw64FixupKinds.h |  174 +
+ .../Sw64/MCTargetDesc/Sw64MCAsmInfo.cpp       |   42 +
+ .../Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.h  |   32 +
+ .../Sw64/MCTargetDesc/Sw64MCCodeEmitter.cpp   |  452 ++
+ .../Sw64/MCTargetDesc/Sw64MCCodeEmitter.h     |  111 +
+ .../Target/Sw64/MCTargetDesc/Sw64MCExpr.cpp   |  177 +
+ .../lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.h |   99 +
+ .../Sw64/MCTargetDesc/Sw64MCTargetDesc.cpp    |  189 +
+ .../Sw64/MCTargetDesc/Sw64MCTargetDesc.h      |   66 +
+ .../Sw64/MCTargetDesc/Sw64OptionRecord.cpp    |   32 +
+ .../Sw64/MCTargetDesc/Sw64TargetStreamer.cpp  |  388 ++
+ llvm/lib/Target/Sw64/README.txt               |    7 +
+ llvm/lib/Target/Sw64/Sw64.h                   |   56 +
+ llvm/lib/Target/Sw64/Sw64.td                  |  154 +
+ llvm/lib/Target/Sw64/Sw64AsmPrinter.cpp       |  322 ++
+ llvm/lib/Target/Sw64/Sw64BranchSelector.cpp   |   81 +
+ llvm/lib/Target/Sw64/Sw64CallingConv.td       |   72 +
+ llvm/lib/Target/Sw64/Sw64CombineLS.cpp        |   63 +
+ llvm/lib/Target/Sw64/Sw64ExpandPseudo.cpp     | 1176 +++++
+ llvm/lib/Target/Sw64/Sw64ExpandPseudo2.cpp    |  334 ++
+ llvm/lib/Target/Sw64/Sw64FrameLowering.cpp    |  456 ++
+ llvm/lib/Target/Sw64/Sw64FrameLowering.h      |   82 +
+ llvm/lib/Target/Sw64/Sw64IEEEConstraint.cpp   |  138 +
+ llvm/lib/Target/Sw64/Sw64ISelDAGToDAG.cpp     | 1032 ++++
+ llvm/lib/Target/Sw64/Sw64ISelLowering.cpp     | 4154 +++++++++++++++++
+ llvm/lib/Target/Sw64/Sw64ISelLowering.h       |  484 ++
+ llvm/lib/Target/Sw64/Sw64InstrFormats.td      |  452 ++
+ llvm/lib/Target/Sw64/Sw64InstrFormatsV.td     |  400 ++
+ llvm/lib/Target/Sw64/Sw64InstrInfo.cpp        | 1012 ++++
+ llvm/lib/Target/Sw64/Sw64InstrInfo.h          |  143 +
+ llvm/lib/Target/Sw64/Sw64InstrInfo.td         | 2096 +++++++++
+ llvm/lib/Target/Sw64/Sw64InstrVector.td       | 1970 ++++++++
+ llvm/lib/Target/Sw64/Sw64LLRP.cpp             |  476 ++
+ llvm/lib/Target/Sw64/Sw64MCInstLower.cpp      |  282 ++
+ llvm/lib/Target/Sw64/Sw64MCInstLower.h        |   44 +
+ .../Target/Sw64/Sw64MachineFunctionInfo.cpp   |   33 +
+ .../lib/Target/Sw64/Sw64MachineFunctionInfo.h |   69 +
+ llvm/lib/Target/Sw64/Sw64MacroFusion.cpp      |   65 +
+ llvm/lib/Target/Sw64/Sw64MacroFusion.h        |   28 +
+ llvm/lib/Target/Sw64/Sw64OptionRecord.h       |   68 +
+ .../Target/Sw64/Sw64PreLegalizerCombiner.cpp  |   96 +
+ llvm/lib/Target/Sw64/Sw64RegisterInfo.cpp     |  300 ++
+ llvm/lib/Target/Sw64/Sw64RegisterInfo.h       |   81 +
+ llvm/lib/Target/Sw64/Sw64RegisterInfo.td      |  312 ++
+ llvm/lib/Target/Sw64/Sw64Relocations.h        |   30 +
+ llvm/lib/Target/Sw64/Sw64SchedCore3.td        |  233 +
+ llvm/lib/Target/Sw64/Sw64SchedCore3SIMD.td    |  101 +
+ llvm/lib/Target/Sw64/Sw64SchedCore4.td        |   77 +
+ llvm/lib/Target/Sw64/Sw64Schedule.td          |   80 +
+ llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.cpp |   54 +
+ llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.h   |   34 +
+ llvm/lib/Target/Sw64/Sw64Subtarget.cpp        |  116 +
+ llvm/lib/Target/Sw64/Sw64Subtarget.h          |  164 +
+ llvm/lib/Target/Sw64/Sw64TargetMachine.cpp    |  194 +
+ llvm/lib/Target/Sw64/Sw64TargetMachine.h      |   61 +
+ llvm/lib/Target/Sw64/Sw64TargetObjectFile.cpp |  121 +
+ llvm/lib/Target/Sw64/Sw64TargetObjectFile.h   |   49 +
+ llvm/lib/Target/Sw64/Sw64TargetStreamer.h     |  150 +
+ .../Target/Sw64/Sw64TargetTransformInfo.cpp   |  804 ++++
+ .../lib/Target/Sw64/Sw64TargetTransformInfo.h |  137 +
+ llvm/lib/Target/Sw64/Sw64VectorVarDefine.td   |  319 ++
+ .../lib/Target/Sw64/TargetInfo/CMakeLists.txt |   10 +
+ .../Target/Sw64/TargetInfo/Sw64TargetInfo.cpp |   24 +
+ .../Target/Sw64/TargetInfo/Sw64TargetInfo.h   |   12 +
+ llvm/lib/TargetParser/Host.cpp                |   10 +
+ llvm/lib/TargetParser/Triple.cpp              |   22 +
+ .../Instrumentation/AddressSanitizer.cpp      |    4 +
+ .../Instrumentation/MemorySanitizer.cpp       |  181 +
+ llvm/test/ExecutionEngine/MCJIT/eh-lg-pic.ll  |    2 +-
+ llvm/test/ExecutionEngine/MCJIT/lit.local.cfg |    2 +
+ .../MCJIT/remote/lit.local.cfg                |    4 +
+ .../ExecutionEngine/OrcLazy/lit.local.cfg     |    1 +
+ .../tools/llvm-reduce/file-output-type.test   |    1 +
+ 134 files changed, 27447 insertions(+), 7 deletions(-)
+ create mode 100644 llvm/include/llvm/BinaryFormat/ELFRelocs/Sw64.def
+ create mode 100644 llvm/include/llvm/IR/IntrinsicsSw64.td
+ create mode 100644 llvm/include/llvm/Support/Sw64ABIFlags.h
+ create mode 100644 llvm/include/llvm/Support/Sw64TargetParser.def
+ create mode 100644 llvm/include/llvm/Support/Sw64TargetParser.h
+ create mode 100644 llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.cpp
+ create mode 100644 llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.h
+ create mode 100644 llvm/lib/Support/Sw64TargetParser.cpp
+ create mode 100644 llvm/lib/Target/Sw64/AsmParser/CMakeLists.txt
+ create mode 100644 llvm/lib/Target/Sw64/AsmParser/Sw64AsmParser.cpp
+ create mode 100644 llvm/lib/Target/Sw64/CMakeLists.txt
+ create mode 100644 llvm/lib/Target/Sw64/Disassembler/CMakeLists.txt
+ create mode 100644 llvm/lib/Target/Sw64/Disassembler/Sw64Disassembler.cpp
+ create mode 100644 llvm/lib/Target/Sw64/InstPrinter/CMakeLists.txt
+ create mode 100644 llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.cpp
+ create mode 100644 llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.h
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/CMakeLists.txt
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.cpp
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.h
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.cpp
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.h
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.cpp
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.h
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64BaseInfo.h
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFObjectWriter.cpp
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.cpp
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.h
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64FixupKinds.h
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.cpp
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.h
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.cpp
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.h
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.cpp
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.h
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.cpp
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.h
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64OptionRecord.cpp
+ create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64TargetStreamer.cpp
+ create mode 100644 llvm/lib/Target/Sw64/README.txt
+ create mode 100644 llvm/lib/Target/Sw64/Sw64.h
+ create mode 100644 llvm/lib/Target/Sw64/Sw64.td
+ create mode 100644 llvm/lib/Target/Sw64/Sw64AsmPrinter.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64BranchSelector.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64CallingConv.td
+ create mode 100644 llvm/lib/Target/Sw64/Sw64CombineLS.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64ExpandPseudo.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64ExpandPseudo2.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64FrameLowering.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64FrameLowering.h
+ create mode 100644 llvm/lib/Target/Sw64/Sw64IEEEConstraint.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64ISelDAGToDAG.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64ISelLowering.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64ISelLowering.h
+ create mode 100644 llvm/lib/Target/Sw64/Sw64InstrFormats.td
+ create mode 100644 llvm/lib/Target/Sw64/Sw64InstrFormatsV.td
+ create mode 100644 llvm/lib/Target/Sw64/Sw64InstrInfo.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64InstrInfo.h
+ create mode 100644 llvm/lib/Target/Sw64/Sw64InstrInfo.td
+ create mode 100644 llvm/lib/Target/Sw64/Sw64InstrVector.td
+ create mode 100644 llvm/lib/Target/Sw64/Sw64LLRP.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64MCInstLower.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64MCInstLower.h
+ create mode 100644 llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.h
+ create mode 100644 llvm/lib/Target/Sw64/Sw64MacroFusion.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64MacroFusion.h
+ create mode 100644 llvm/lib/Target/Sw64/Sw64OptionRecord.h
+ create mode 100644 llvm/lib/Target/Sw64/Sw64PreLegalizerCombiner.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64RegisterInfo.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64RegisterInfo.h
+ create mode 100644 llvm/lib/Target/Sw64/Sw64RegisterInfo.td
+ create mode 100644 llvm/lib/Target/Sw64/Sw64Relocations.h
+ create mode 100644 llvm/lib/Target/Sw64/Sw64SchedCore3.td
+ create mode 100644 llvm/lib/Target/Sw64/Sw64SchedCore3SIMD.td
+ create mode 100644 llvm/lib/Target/Sw64/Sw64SchedCore4.td
+ create mode 100644 llvm/lib/Target/Sw64/Sw64Schedule.td
+ create mode 100644 llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.h
+ create mode 100644 llvm/lib/Target/Sw64/Sw64Subtarget.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64Subtarget.h
+ create mode 100644 llvm/lib/Target/Sw64/Sw64TargetMachine.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64TargetMachine.h
+ create mode 100644 llvm/lib/Target/Sw64/Sw64TargetObjectFile.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64TargetObjectFile.h
+ create mode 100644 llvm/lib/Target/Sw64/Sw64TargetStreamer.h
+ create mode 100644 llvm/lib/Target/Sw64/Sw64TargetTransformInfo.cpp
+ create mode 100644 llvm/lib/Target/Sw64/Sw64TargetTransformInfo.h
+ create mode 100644 llvm/lib/Target/Sw64/Sw64VectorVarDefine.td
+ create mode 100644 llvm/lib/Target/Sw64/TargetInfo/CMakeLists.txt
+ create mode 100644 llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.cpp
+ create mode 100644 llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.h
+
+diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
+index 79de9eb2e..e87d3091e 100644
+--- a/llvm/CMakeLists.txt
++++ b/llvm/CMakeLists.txt
+@@ -441,6 +441,7 @@ set(LLVM_ALL_TARGETS
+   PowerPC
+   RISCV
+   Sparc
++  Sw64
+   SystemZ
+   VE
+   WebAssembly
+@@ -458,7 +459,7 @@ set(LLVM_ALL_EXPERIMENTAL_TARGETS
+ )
+ 
+ # List of targets with JIT support:
+-set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM Mips SystemZ)
++set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM Mips SystemZ Sw64)
+ 
+ set(LLVM_TARGETS_TO_BUILD "all"
+     CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
+diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake
+index f63c3f1a3..5d430f974 100644
+--- a/llvm/cmake/config-ix.cmake
++++ b/llvm/cmake/config-ix.cmake
+@@ -512,6 +512,10 @@ elseif (LLVM_NATIVE_ARCH STREQUAL "m68k")
+   set(LLVM_NATIVE_ARCH M68k)
+ elseif (LLVM_NATIVE_ARCH MATCHES "loongarch")
+   set(LLVM_NATIVE_ARCH LoongArch)
++elseif (LLVM_NATIVE_ARCH MATCHES "sw64")
++  set(LLVM_NATIVE_ARCH Sw64)
++elseif (LLVM_NATIVE_ARCH MATCHES "sw_64")
++  set(LLVM_NATIVE_ARCH Sw64)
+ else ()
+   message(FATAL_ERROR "Unknown architecture ${LLVM_NATIVE_ARCH}")
+ endif ()
+diff --git a/llvm/cmake/config.guess b/llvm/cmake/config.guess
+index 71abbf939..121b90314 100644
+--- a/llvm/cmake/config.guess
++++ b/llvm/cmake/config.guess
+@@ -880,6 +880,15 @@ EOF
+ 	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
+ 	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+ 	exit ;;
++    sw_64:Linux:*:* | sw_64:Linux:*:*)
++        case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
++          SW6)   UNAME_MACHINE=sw_64 ;;
++        esac
++          UNAME_MACHINE=sw_64
++        objdump --private-headers /bin/sh | grep -q ld.so.1
++        if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
++        echo ${UNAME_MACHINE}-sunway-linux-gnu${LIBC}
++        exit ;;
+     arm*:Linux:*:*)
+ 	eval $set_cc_for_build
+ 	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
+diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
+index f5a7cdb38..bf13694cf 100644
+--- a/llvm/include/llvm/BinaryFormat/ELF.h
++++ b/llvm/include/llvm/BinaryFormat/ELF.h
+@@ -320,6 +320,7 @@ enum {
+   EM_VE = 251,            // NEC SX-Aurora VE
+   EM_CSKY = 252,          // C-SKY 32-bit processor
+   EM_LOONGARCH = 258,     // LoongArch
++  EM_SW64 = 0x9916,       // SW64
+ };
+ 
+ // Object file classes.
+@@ -604,6 +605,31 @@ enum {
+   ODK_PAGESIZE = 11   // Page size information
+ };
+ 
++// SW64 Specific e_flags
++enum {
++
++  EF_SW64_NOREORDER = 0x00000001,  // Don't reorder instructions
++  EF_SW64_PIC = 0x00000002,        // Position independent code
++  EF_SW64_CPIC = 0x00000004,       // Call object with Position independent code
++  EF_SW64_ABI2 = 0x00000020,       // File uses N32 ABI
++  EF_SW64_32BITMODE = 0x00000100,  // Code compiled for a 64-bit machine
++                                   // in 32-bit mode
++  EF_SW64_FP64 = 0x00000200,       // Code compiled for a 32-bit machine
++                                   // but uses 64-bit FP registers
++  EF_SW64_NAN2008 = 0x00000400,    // Uses IEE 754-2008 NaN encoding
++                                   // ABI flags
++  EF_SW64_ABI_EABI64 = 0x00004000, // EABI in 64 bit mode.
++  EF_SW64_ABI = 0x0000f000,        // Mask for selecting EF_SW64_ABI_ variant.
++  EF_SW64_32BIT = 0x00000001,      // All addresses must be below 2GB.
++  EF_SW64_CANRELAX = 0x00000002    // All relocations needed for relaxation with
++                                   // code movement are present.
++};
++
++// ELF Relocation types for Sw64.
++enum {
++#include "ELFRelocs/Sw64.def"
++};
++
+ // Hexagon-specific e_flags
+ enum {
+   // Object processor version flags, bits[11:0]
+@@ -1075,6 +1101,11 @@ enum : unsigned {
+ 
+   SHT_CSKY_ATTRIBUTES = 0x70000001U,
+ 
++  SHT_SW64_ABIFLAGS = 0x7000002a, // ABI information.
++  SHT_SW64_REGINFO = 0x70000002,  // Register usage information
++  SHT_SW64_OPTIONS = 0x7000000d,  // General options
++  SHT_SW64_DWARF = 0x7000001e,    // DWARF debugging section.
++
+   SHT_HIPROC = 0x7fffffff, // Highest processor arch-specific type.
+   SHT_LOUSER = 0x80000000, // Lowest type reserved for applications.
+   SHT_HIUSER = 0xffffffff  // Highest type reserved for applications.
+@@ -1180,6 +1211,9 @@ enum : unsigned {
+   // Section data is string data by default.
+   SHF_MIPS_STRING = 0x80000000,
+ 
++  // Do not strip this section.
++  SHF_SW64_NOSTRIP = 0x8000000,
++
+   // Make code section unreadable when in execute-only mode
+   SHF_ARM_PURECODE = 0x20000000
+ };
+diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/Sw64.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/Sw64.def
+new file mode 100644
+index 000000000..0edecd02b
+--- /dev/null
++++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/Sw64.def
+@@ -0,0 +1,44 @@
++#ifndef ELF_RELOC
++#error "ELF_RELOC must be defined"
++#endif
++
++ELF_RELOC(R_SW_64_NONE,         0)
++ELF_RELOC(R_SW_64_REFLONG,      1)
++ELF_RELOC(R_SW_64_REFQUAD,      2)
++ELF_RELOC(R_SW_64_GPREL32,      3)
++ELF_RELOC(R_SW_64_LITERAL,      4)
++ELF_RELOC(R_SW_64_LITUSE,       5)
++ELF_RELOC(R_SW_64_GPDISP,       6)
++ELF_RELOC(R_SW_64_BRADDR,       7)
++ELF_RELOC(R_SW_64_HINT,         8)
++ELF_RELOC(R_SW_64_SREL16,       9)
++ELF_RELOC(R_SW_64_SREL32,       10)
++ELF_RELOC(R_SW_64_SREL64,       11)
++ELF_RELOC(R_SW_64_GPRELHIGH,    17)
++ELF_RELOC(R_SW_64_GPRELLOW,     18)
++ELF_RELOC(R_SW_64_GPREL16,      19)
++ELF_RELOC(R_SW_64_COPY,         24)
++ELF_RELOC(R_SW_64_GLOB_DAT,     25)
++ELF_RELOC(R_SW_64_JMP_SLOT,     26)
++ELF_RELOC(R_SW_64_RELATIVE,     27)
++ELF_RELOC(R_SW_64_BRSGP,        28)
++ELF_RELOC(R_SW_64_TLSGD,        29)
++ELF_RELOC(R_SW_64_TLSLDM,       30)
++ELF_RELOC(R_SW_64_DTPMOD64,     31)
++ELF_RELOC(R_SW_64_GOTDTPREL,    32)
++ELF_RELOC(R_SW_64_DTPREL64,     33)
++ELF_RELOC(R_SW_64_DTPRELHI,     34)
++ELF_RELOC(R_SW_64_DTPRELLO,     35)
++ELF_RELOC(R_SW_64_DTPREL16,     36)
++ELF_RELOC(R_SW_64_GOTTPREL,     37)
++ELF_RELOC(R_SW_64_TPREL64,      38)
++ELF_RELOC(R_SW_64_TPRELHI,      39)
++ELF_RELOC(R_SW_64_TPRELLO,      40)
++ELF_RELOC(R_SW_64_TPREL16,      41)
++ELF_RELOC(R_SW_64_NUM,          42)
++ELF_RELOC(R_SW_64_LITERAL_GOT,  43)
++ELF_RELOC(R_SW_64_TLSREL_GOT,   44)
++ELF_RELOC(R_SW_64_PC32,         48)
++ELF_RELOC(R_SW_64_EH,           49)
++ELF_RELOC(R_SW_64_DUMMY_LITERAL, 98)
++ELF_RELOC(R_SW_64_DUMMY_LITUSE,  99)
+diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
+index 89eed7463..f73715f6d 100644
+--- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
++++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
+@@ -180,6 +180,16 @@ enum class MachineCombinerPattern {
+   DPWSSD,
+ 
+   FNMADD,
++
++  // SW64
++  VMULADDS_OP1,
++  VMULADDS_OP2,
++  VMULADDD_OP1,
++  VMULADDD_OP2,
++  VMULSUBS_OP1,
++  VMULSUBS_OP2,
++  VMULSUBD_OP1,
++  VMULSUBD_OP2,
+ };
+ 
+ } // end namespace llvm
+diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
+index 5d25a3e85..e514b0a46 100644
+--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
++++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
+@@ -416,6 +416,46 @@ public:
+                                       unsigned NumStubs);
+ };
+ 
++// @brief Sw64 support.
++//
++// Sw64 supports lazy JITing.
++class OrcSw64 {
++public:
++  static constexpr unsigned PointerSize = 8;
++  static constexpr unsigned TrampolineSize = 40;
++  static constexpr unsigned StubSize = 32;
++  static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31;
++  static constexpr unsigned ResolverCodeSize = 0x218;
++
++  /// Write the resolver code into the given memory. The user is
++  /// responsible for allocating the memory and setting permissions.
++  ///
++  /// ReentryFnAddr should be the address of a function whose signature matches
++  /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr
++  /// argument of writeResolverCode will be passed as the second argument to
++  /// the function at ReentryFnAddr.
++  static void writeResolverCode(char *ResolverWorkingMem,
++                                ExecutorAddr ResolverTargetAddress,
++                                ExecutorAddr ReentryFnAddr,
++                                ExecutorAddr ReentryCtxAddr);
++
++  /// Write the requested number of trampolines into the given memory,
++  /// which must be big enough to hold 1 pointer, plus NumTrampolines
++  /// trampolines.
++  static void writeTrampolines(char *TrampolineBlockWorkingMem,
++                               ExecutorAddr TrampolineBlockTargetAddress,
++                               ExecutorAddr ResolverFnAddr,
++                               unsigned NumTrampolines);
++  /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
++  /// Stubs will be written as if linked at StubsBlockTargetAddress, with the
++  /// Nth stub using the Nth pointer in memory starting at
++  /// PointersBlockTargetAddress.
++  static void writeIndirectStubsBlock(char *StubsBlockWorkingMem,
++                                      ExecutorAddr StubsBlockTargetAddress,
++                                      ExecutorAddr PointersBlockTargetAddress,
++                                      unsigned NumStubs);
++};
++
+ } // end namespace orc
+ } // end namespace llvm
+ 
+diff --git a/llvm/include/llvm/IR/CMakeLists.txt b/llvm/include/llvm/IR/CMakeLists.txt
+index 468d66379..7a1343b14 100644
+--- a/llvm/include/llvm/IR/CMakeLists.txt
++++ b/llvm/include/llvm/IR/CMakeLists.txt
+@@ -22,4 +22,5 @@ tablegen(LLVM IntrinsicsWebAssembly.h -gen-intrinsic-enums -intrinsic-prefix=was
+ tablegen(LLVM IntrinsicsX86.h -gen-intrinsic-enums -intrinsic-prefix=x86)
+ tablegen(LLVM IntrinsicsXCore.h -gen-intrinsic-enums -intrinsic-prefix=xcore)
+ tablegen(LLVM IntrinsicsVE.h -gen-intrinsic-enums -intrinsic-prefix=ve)
++tablegen(LLVM IntrinsicsSw64.h -gen-intrinsic-enums -intrinsic-prefix=sw64)
+ add_public_tablegen_target(intrinsics_gen)
+diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
+index e51c04fba..20a8fa419 100644
+--- a/llvm/include/llvm/IR/Intrinsics.td
++++ b/llvm/include/llvm/IR/Intrinsics.td
+@@ -2535,6 +2535,7 @@ include "llvm/IR/IntrinsicsSystemZ.td"
+ include "llvm/IR/IntrinsicsWebAssembly.td"
+ include "llvm/IR/IntrinsicsRISCV.td"
+ include "llvm/IR/IntrinsicsSPIRV.td"
++include "llvm/IR/IntrinsicsSw64.td"
+ include "llvm/IR/IntrinsicsVE.td"
+ include "llvm/IR/IntrinsicsDirectX.td"
+ include "llvm/IR/IntrinsicsLoongArch.td"
+diff --git a/llvm/include/llvm/IR/IntrinsicsSw64.td b/llvm/include/llvm/IR/IntrinsicsSw64.td
+new file mode 100644
+index 000000000..2e92ce525
+--- /dev/null
++++ b/llvm/include/llvm/IR/IntrinsicsSw64.td
+@@ -0,0 +1,654 @@
++//==- IntrinsicsSw64.td - Sw64 intrinsics                 -*- tablegen -*-==//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines all of the Sw64-specific intrinsics.
++//
++//===----------------------------------------------------------------------===//
++
++let TargetPrefix = "sw64" in {  // All intrinsics start with "llvm.sw64.".
++def int_sw64_umulh : ClangBuiltin<"__builtin_sw_64_umulh">,
++  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
++
++def int_sw64_crc32b : ClangBuiltin<"__builtin_sw64_crc32b">,
++  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_crc32h : ClangBuiltin<"__builtin_sw64_crc32h">,
++  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_crc32w : ClangBuiltin<"__builtin_sw64_crc32w">,
++  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_crc32l : ClangBuiltin<"__builtin_sw64_crc32l">,
++  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_crc32cb : ClangBuiltin<"__builtin_sw64_crc32cb">,
++  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_crc32ch : ClangBuiltin<"__builtin_sw64_crc32ch">,
++  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_crc32cw : ClangBuiltin<"__builtin_sw64_crc32cw">,
++  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_crc32cl : ClangBuiltin<"__builtin_sw64_crc32cl">,
++  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++
++def int_sw64_sbt : ClangBuiltin<"__builtin_sw64_sbt">,
++  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_cbt : ClangBuiltin<"__builtin_sw64_cbt">,
++  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++
++def int_sw64_ctpopow : ClangBuiltin<"__builtin_sw_ctpopow">,
++  Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty],
++  [IntrNoMem]>;
++def int_sw64_ctlzow : ClangBuiltin<"__builtin_sw_ctlzow">,
++  Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty],
++  [IntrNoMem]>;
++def int_sw64_reduc_plusw : ClangBuiltin<"__builtin_sw_reduc_plusw">,
++  Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_reduc_pluss : ClangBuiltin<"__builtin_sw_reduc_pluss">,
++  Intrinsic<[llvm_float_ty], [llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_reduc_plusd : ClangBuiltin<"__builtin_sw_reduc_plusd">,
++  Intrinsic<[llvm_double_ty], [llvm_v4f64_ty],
++  [IntrNoMem]>;
++
++// SIMD Intrincs
++def int_sw64_vaddw : ClangBuiltin<"__builtin_sw_vaddw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vucaddw  : ClangBuiltin<"__builtin_sw_vucaddw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vucaddwi : ClangBuiltin<"__builtin_sw_vucaddwi">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vucsubw : ClangBuiltin<"__builtin_sw_vucsubw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vucsubwi : ClangBuiltin<"__builtin_sw_vucsubwi">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vucaddh : ClangBuiltin<"__builtin_sw_vucaddh">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vucaddhi : ClangBuiltin<"__builtin_sw_vucaddhi">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vucsubh : ClangBuiltin<"__builtin_sw_vucsubh">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vucsubhi : ClangBuiltin<"__builtin_sw_vucsubhi">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vucaddb : ClangBuiltin<"__builtin_sw_vucaddb">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vucaddbi : ClangBuiltin<"__builtin_sw_vucaddbi">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vucsubb : ClangBuiltin<"__builtin_sw_vucsubb">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vucsubbi : ClangBuiltin<"__builtin_sw_vucsubbi">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vslls : ClangBuiltin<"__builtin_sw_vslls">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vslld : ClangBuiltin<"__builtin_sw_vslld">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vsrls : ClangBuiltin<"__builtin_sw_vsrls">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vsrld : ClangBuiltin<"__builtin_sw_vsrld">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++
++class sw64VectorIntArg
++      : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyint_ty],
++      [IntrNoMem]>;
++def int_sw64_vsll : sw64VectorIntArg;
++def int_sw64_vsrl : sw64VectorIntArg;
++def int_sw64_vsra : sw64VectorIntArg;
++def int_sw64_vrol : sw64VectorIntArg;
++
++def int_sw64_vsllw : ClangBuiltin<"__builtin_sw_vsllw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vsllh : ClangBuiltin<"__builtin_sw_vsllh">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vsllb : ClangBuiltin<"__builtin_sw_vslln">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vslll : ClangBuiltin<"__builtin_sw_vslll">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vsrlw : ClangBuiltin<"__builtin_sw_vsrlw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vsrlh : ClangBuiltin<"__builtin_sw_vsrlh">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vsrlb : ClangBuiltin<"__builtin_sw_vsrlb">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vsrll : ClangBuiltin<"__builtin_sw_vsrll">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vsraw : ClangBuiltin<"__builtin_sw_vsraw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vsrah : ClangBuiltin<"__builtin_sw_vsrah">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vsrab : ClangBuiltin<"__builtin_sw_vsrab">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vsral : ClangBuiltin<"__builtin_sw_vsral">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++
++def int_sw64_vrolw : ClangBuiltin<"__builtin_sw_vrolw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vrolwi : ClangBuiltin<"__builtin_sw_vrolwi">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vrolb : ClangBuiltin<"__builtin_sw_vrolb">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vrolbi : ClangBuiltin<"__builtin_sw_vrolbi">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vrolh : ClangBuiltin<"__builtin_sw_vrolh">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vrolhi : ClangBuiltin<"__builtin_sw_vrolhi">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vroll : ClangBuiltin<"__builtin_sw_vroll">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vrolli : ClangBuiltin<"__builtin_sw_vrolli">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty],
++  [IntrNoMem]>;
++
++def int_sw64_sllow : ClangBuiltin<"__builtin_sw_sllow">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_srlow : ClangBuiltin<"__builtin_sw_srlow">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_sllowi : ClangBuiltin<"__builtin_sw_sllowi">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_srlowi : ClangBuiltin<"__builtin_sw_srlowi">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty],
++  [IntrNoMem]>;
++
++def int_sw64_vsqrts : ClangBuiltin<"__builtin_sw_vsqrts">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vsqrtd : ClangBuiltin<"__builtin_sw_vsqrtd">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty],
++  [IntrNoMem]>;
++
++def int_sw64_vornotw : ClangBuiltin<"__builtin_sw_vornotw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_veqvw : ClangBuiltin<"__builtin_sw_veqvw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++
++def int_sw64_vfcmpeqs : ClangBuiltin<"__builtin_sw_vfcmpeqs">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcmples : ClangBuiltin<"__builtin_sw_vfcmples">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcmplts : ClangBuiltin<"__builtin_sw_vfcmplts">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcmpuns : ClangBuiltin<"__builtin_sw_vfcmpuns">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcmpeqd : ClangBuiltin<"__builtin_sw_vfcmpeqd">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcmpled : ClangBuiltin<"__builtin_sw_vfcmpled">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcmpltd : ClangBuiltin<"__builtin_sw_vfcmpltd">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcmpund : ClangBuiltin<"__builtin_sw_vfcmpund">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vcpyss : ClangBuiltin<"__builtin_sw_vcpyss">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vcpysns : ClangBuiltin<"__builtin_sw_vcpysns">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vcpyses : ClangBuiltin<"__builtin_sw_vcpyses">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vcpysd : ClangBuiltin<"__builtin_sw_vcpysd">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vcpysnd : ClangBuiltin<"__builtin_sw_vcpysnd">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vcpysed : ClangBuiltin<"__builtin_sw_vcpysed">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vseleqw : ClangBuiltin<"__builtin_sw_vseleqw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vseleqwi : ClangBuiltin<"__builtin_sw_vseleqwi">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vselltw : ClangBuiltin<"__builtin_sw_vselltw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vselltwi : ClangBuiltin<"__builtin_sw_vselltwi">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vsellew : ClangBuiltin<"__builtin_sw_vsellew">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vsellewi : ClangBuiltin<"__builtin_sw_vsellewi">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vsellbcw : ClangBuiltin<"__builtin_sw_vsellbcw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vsellbcwi : ClangBuiltin<"__builtin_sw_vsellbcwi">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfseleqs : ClangBuiltin<"__builtin_sw_vfseleqs">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vfselles : ClangBuiltin<"__builtin_sw_vfselles">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vfsellts : ClangBuiltin<"__builtin_sw_vfsellts">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vfseleqd : ClangBuiltin<"__builtin_sw_vfseleqd">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfselled : ClangBuiltin<"__builtin_sw_vfselled">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfselltd : ClangBuiltin<"__builtin_sw_vfselltd">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
++  [IntrNoMem]>;
++
++// Multiply-add instructions
++// FIXME
++//def int_sw64_vnmss : ClangBuiltin<"__builtin_sw_vnmss">,
++//  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
++//  [IntrNoMem]>;
++def int_sw64_vnmsd : ClangBuiltin<"__builtin_sw_vnmsd">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
++  [IntrNoMem]>;
++
++// Vector Insert Intrinsic
++def int_sw64_vinsfs : ClangBuiltin<"__builtin_sw_vinsfs">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_v4f32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vinsfd : ClangBuiltin<"__builtin_sw_vinsfd">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_double_ty, llvm_v4f64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vinsw : ClangBuiltin<"__builtin_sw_vinsw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_i64_ty, llvm_v8i32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vinsb  : ClangBuiltin<"__builtin_sw_vinsb">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_i64_ty, llvm_v32i8_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vinsh  : ClangBuiltin<"__builtin_sw_vinsh">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_i64_ty, llvm_v16i16_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vinsl : ClangBuiltin<"__builtin_sw_vinsl">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_i64_ty, llvm_v4i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++
++def int_sw64_vextfs : ClangBuiltin<"__builtin_sw_vextfs">,
++  Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vextfd : ClangBuiltin<"__builtin_sw_vextfd">,
++  Intrinsic<[llvm_double_ty], [llvm_v4f64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vextw : ClangBuiltin<"__builtin_sw_vextw">,
++  Intrinsic<[llvm_i64_ty], [llvm_v8i32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vextl : ClangBuiltin<"__builtin_sw_vextl">,
++  Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vconw : ClangBuiltin<"__builtin_sw_vconw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_ptr_ty],
++  [IntrNoMem, IntrArgMemOnly]>;
++def int_sw64_vconl : ClangBuiltin<"__builtin_sw_vconl">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_ptr_ty],
++  [IntrNoMem, IntrArgMemOnly]>;
++def int_sw64_vcons : ClangBuiltin<"__builtin_sw_vcons">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_ptr_ty],
++  [IntrNoMem, IntrArgMemOnly]>;
++def int_sw64_vcond : ClangBuiltin<"__builtin_sw_vcond">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_ptr_ty],
++  [IntrNoMem, IntrArgMemOnly]>;
++
++def int_sw64_vlogzz : ClangBuiltin<"__builtin_sw_vlogzz">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++
++def int_sw64_vshfw : ClangBuiltin<"__builtin_sw_vshfw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vucaddh_v16hi : ClangBuiltin<"__builtin_sw_vucaddh_v16hi">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
++  [IntrNoMem]>;
++def int_sw64_vucaddhi_v16hi : ClangBuiltin<"__builtin_sw_vucaddhi_v16hi">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
++  [IntrNoMem]>;
++def int_sw64_vucsubh_v16hi : ClangBuiltin<"__builtin_sw_vucsubh_v16hi">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
++  [IntrNoMem]>;
++def int_sw64_vucsubhi_v16hi : ClangBuiltin<"__builtin_sw_vucsubhi_v16hi">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
++  [IntrNoMem]>;
++def int_sw64_vucaddb_v32qi : ClangBuiltin<"__builtin_sw_vucaddb_v32qi">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [IntrNoMem]>;
++def int_sw64_vucaddbi_v32qi : ClangBuiltin<"__builtin_sw_vucaddbi_v32qi">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [IntrNoMem]>;
++def int_sw64_vucsubb_v32qi : ClangBuiltin<"__builtin_sw_vucsubb_v32qi">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [IntrNoMem]>;
++def int_sw64_vucsubbi_v32qi : ClangBuiltin<"__builtin_sw_vucsubbi_v32qi">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [IntrNoMem]>;
++def int_sw64_vmaxb : ClangBuiltin<"__builtin_sw_vmaxb">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [IntrNoMem]>;
++def int_sw64_vminb : ClangBuiltin<"__builtin_sw_vminb">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [IntrNoMem]>;
++def int_sw64_vmaxh : ClangBuiltin<"__builtin_sw_vmaxh">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
++  [IntrNoMem]>;
++def int_sw64_vminh : ClangBuiltin<"__builtin_sw_vminh">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
++  [IntrNoMem]>;
++def int_sw64_vmaxw : ClangBuiltin<"__builtin_sw_vmaxw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vminw : ClangBuiltin<"__builtin_sw_vminw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vmaxl : ClangBuiltin<"__builtin_sw_vmaxl">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vminl : ClangBuiltin<"__builtin_sw_vminl">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vumaxb : ClangBuiltin<"__builtin_sw_vumaxb">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [IntrNoMem]>;
++def int_sw64_vuminb : ClangBuiltin<"__builtin_sw_vuminb">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [IntrNoMem]>;
++def int_sw64_vumaxh : ClangBuiltin<"__builtin_sw_vumaxh">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
++  [IntrNoMem]>;
++def int_sw64_vuminh : ClangBuiltin<"__builtin_sw_vuminh">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
++  [IntrNoMem]>;
++def int_sw64_vumaxw : ClangBuiltin<"__builtin_sw_vumaxw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vuminw : ClangBuiltin<"__builtin_sw_vuminw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vumaxl : ClangBuiltin<"__builtin_sw_vumaxl">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vuminl : ClangBuiltin<"__builtin_sw_vuminl">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
++  [IntrNoMem]>;
++def int_sw64_sraow : ClangBuiltin<"__builtin_sw_sraow">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_sraowi : ClangBuiltin<"__builtin_sw_sraowi">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty],
++  [IntrNoMem]>;
++
++def int_sw64_vcmpgew : ClangBuiltin<"__builtin_sw_vcmpgew">,
++  Intrinsic<[llvm_i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vcmpeqw : ClangBuiltin<"__builtin_sw_vcmpeqw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vcmplew : ClangBuiltin<"__builtin_sw_vcmplew">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vcmpltw : ClangBuiltin<"__builtin_sw_vcmpltw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vcmpulew : ClangBuiltin<"__builtin_sw_vcmpulew">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vcmpultw : ClangBuiltin<"__builtin_sw_vcmpultw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++
++def int_sw64_vcmpueqb : ClangBuiltin<"__builtin_sw_vcmpueqb">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [IntrNoMem]>;
++def int_sw64_vcmpugtb : ClangBuiltin<"__builtin_sw_vcmpugtb">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [IntrNoMem]>;
++def int_sw64_vsumw : ClangBuiltin<"__builtin_sw_vsumw">,
++  Intrinsic<[llvm_i64_ty], [llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vsuml : ClangBuiltin<"__builtin_sw_vsuml">,
++  Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vbinvw : ClangBuiltin<"__builtin_sw_vbinvw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vwinv : ClangBuiltin<"__builtin_sw_vwinv">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vseleql : ClangBuiltin<"__builtin_sw_vseleql">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty],
++  [IntrNoMem]>;
++def int_sw64_veqvb : ClangBuiltin<"__builtin_sw_veqvb">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [IntrNoMem]>;
++def int_sw64_veqvh : ClangBuiltin<"__builtin_sw_veqvh">,
++  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
++  [IntrNoMem]>;
++def int_sw64_veqvl : ClangBuiltin<"__builtin_sw_veqvl">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vsums : ClangBuiltin<"__builtin_sw_vsums">,
++  Intrinsic<[llvm_float_ty], [llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vsumd : ClangBuiltin<"__builtin_sw_vsumd">,
++  Intrinsic<[llvm_double_ty], [llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfrecs : ClangBuiltin<"__builtin_sw_vfrecs">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vfrecd : ClangBuiltin<"__builtin_sw_vfrecd">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfris : ClangBuiltin<"__builtin_sw_vfris">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vfris_g : ClangBuiltin<"__builtin_sw_vfris_g">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vfris_p : ClangBuiltin<"__builtin_sw_vfris_p">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vfris_z : ClangBuiltin<"__builtin_sw_vfris_z">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vfris_n : ClangBuiltin<"__builtin_sw_vfris_n">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vfrid : ClangBuiltin<"__builtin_sw_vfrid">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfrid_g : ClangBuiltin<"__builtin_sw_vfrid_g">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfrid_p : ClangBuiltin<"__builtin_sw_vfrid_p">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfrid_z : ClangBuiltin<"__builtin_sw_vfrid_z">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfrid_n : ClangBuiltin<"__builtin_sw_vfrid_n">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vmaxs : ClangBuiltin<"__builtin_sw_vmaxs">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vmins : ClangBuiltin<"__builtin_sw_vmins">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vmaxd : ClangBuiltin<"__builtin_sw_vmaxd">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vmind : ClangBuiltin<"__builtin_sw_vmind">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcvtsd : ClangBuiltin<"__builtin_sw_vfcvtsd">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f32_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcvtds : ClangBuiltin<"__builtin_sw_vfcvtds">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcvtsh : ClangBuiltin<"__builtin_sw_vfcvtsh">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcvths : ClangBuiltin<"__builtin_sw_vfcvths">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcvtls : ClangBuiltin<"__builtin_sw_vfcvtls">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_v4i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcvtld : ClangBuiltin<"__builtin_sw_vfcvtld">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_v4i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcvtdl : ClangBuiltin<"__builtin_sw_vfcvtdl">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcvtdl_g : ClangBuiltin<"__builtin_sw_vfcvtdl_g">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcvtdl_p : ClangBuiltin<"__builtin_sw_vfcvtdl_p">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcvtdl_z : ClangBuiltin<"__builtin_sw_vfcvtdl_z">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcvtdl_n : ClangBuiltin<"__builtin_sw_vfcvtdl_n">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty],
++  [IntrNoMem]>;
++def int_sw64_vfcvtsl  : ClangBuiltin<"__builtin_sw_vfcvtsl">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_v4f32_ty],
++  [IntrNoMem]>;
++
++def int_sw64_vshfq : ClangBuiltin<"__builtin_sw_vshfq">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i64_ty],
++  [IntrNoMem]>;
++def int_sw64_vshfqb : ClangBuiltin<"__builtin_sw_vshfqb">,
++  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
++  [IntrNoMem]>;
++
++def int_sw64_vsm3msw  : ClangBuiltin<"__builtin_sw_vsm3msw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vsm3r  : ClangBuiltin<"__builtin_sw_vsm3r">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vsm4key  : ClangBuiltin<"__builtin_sw_vsm4key">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty],
++  [IntrNoMem]>;
++def int_sw64_vsm4r  : ClangBuiltin<"__builtin_sw_vsm4r">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
++  [IntrNoMem]>;
++
++def int_sw64_vldw : ClangBuiltin<"__builtin_sw_vldw">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++def int_sw64_vldl : ClangBuiltin<"__builtin_sw_vldl">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++def int_sw64_vlds : ClangBuiltin<"__builtin_sw_vlds">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_i32_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++def int_sw64_vldd : ClangBuiltin<"__builtin_sw_vldd">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_i32_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++
++def int_sw64_vload : Intrinsic<[llvm_anyvector_ty], [llvm_anyptr_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++def int_sw64_vloadu : Intrinsic<[llvm_anyvector_ty], [llvm_anyptr_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++def int_sw64_vload_u : Intrinsic<[llvm_anyvector_ty], [llvm_anyptr_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++def int_sw64_vloade : Intrinsic<[llvm_anyvector_ty], [llvm_anyptr_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++def int_sw64_vloadnc : Intrinsic<[llvm_anyvector_ty], [llvm_anyptr_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++def int_sw64_vstore   : Intrinsic<[], [llvm_anyvector_ty, llvm_anyptr_ty],
++  [IntrWriteMem, IntrArgMemOnly]>;
++def int_sw64_vstoreu  : Intrinsic<[], [llvm_anyvector_ty, llvm_anyptr_ty],
++  [IntrWriteMem, IntrArgMemOnly]>;
++def int_sw64_vstore_u : Intrinsic<[], [llvm_anyvector_ty, llvm_anyptr_ty],
++  [IntrWriteMem, IntrArgMemOnly]>;
++def int_sw64_vstoreuh : Intrinsic<[], [llvm_anyvector_ty, llvm_anyptr_ty],
++  [IntrWriteMem, IntrArgMemOnly]>;
++def int_sw64_vstoreul : Intrinsic<[], [llvm_anyvector_ty, llvm_anyptr_ty],
++  [IntrWriteMem, IntrArgMemOnly]>;
++def int_sw64_vstorenc : Intrinsic<[], [llvm_anyvector_ty, llvm_anyptr_ty],
++  [IntrWriteMem, IntrArgMemOnly]>;
++
++def int_sw64_loadu : ClangBuiltin<"__builtin_sw_loadu">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++def int_sw64_loadu_i : ClangBuiltin<"__builtin_sw_loadu_i">,
++  Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++def int_sw64_loadu_l : ClangBuiltin<"__builtin_sw_loadu_l">,
++  Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++def int_sw64_loadu_f : ClangBuiltin<"__builtin_sw_loadu_f">,
++  Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_i32_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++def int_sw64_loadu_d : ClangBuiltin<"__builtin_sw_loadu_d">,
++  Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_i32_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++def int_sw64_load_u : ClangBuiltin<"__builtin_sw_load_u">,
++  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty],
++  [IntrReadMem, IntrArgMemOnly]>;
++}
+diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h
+index c28cd1211..125fdefd0 100644
+--- a/llvm/include/llvm/MC/MCAsmInfo.h
++++ b/llvm/include/llvm/MC/MCAsmInfo.h
+@@ -536,6 +536,9 @@ protected:
+   // %hi(), and similar unary operators.
+   bool HasMipsExpressions = false;
+ 
++  // If true, then the assembler supports the .set directive.
++  bool HasSw64SetDirective = false;
++
+   // If true, use Motorola-style integers in Assembly (ex. $0ac).
+   bool UseMotorolaIntegers = false;
+ 
+@@ -881,6 +884,7 @@ public:
+   bool canRelaxRelocations() const { return RelaxELFRelocations; }
+   void setRelaxELFRelocations(bool V) { RelaxELFRelocations = V; }
+   bool hasMipsExpressions() const { return HasMipsExpressions; }
++  bool hasSw64SetDirective() const { return HasSw64SetDirective; }
+   bool needsFunctionDescriptors() const { return NeedsFunctionDescriptors; }
+   bool shouldUseMotorolaIntegers() const { return UseMotorolaIntegers; }
+ };
+diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
+index 5bc5e04f7..5d788b02a 100644
+--- a/llvm/include/llvm/MC/MCExpr.h
++++ b/llvm/include/llvm/MC/MCExpr.h
+@@ -314,6 +314,32 @@ public:
+     VK_PPC_NOTOC,           // symbol@notoc
+     VK_PPC_PCREL_OPT,       // .reloc expr, R_PPC64_PCREL_OPT, expr
+ 
++    VK_SW64_ELF_LITERAL,
++    VK_SW64_LITUSE_ADDR,
++    VK_SW64_LITUSE_BASE,
++    VK_SW64_LITUSE_BYTOFF,
++    VK_SW64_LITUSE_JSR,
++    VK_SW64_LITUSE_TLSGD,
++    VK_SW64_LITUSE_TLSLDM,
++    VK_SW64_LITUSE_JSRDIRECT,
++    VK_SW64_GPDISP,
++    VK_SW64_GPDISP_HI16,
++    VK_SW64_GPDISP_LO16,
++    VK_SW64_GPREL_HI16,
++    VK_SW64_GPREL_LO16,
++    VK_SW64_GPREL16,
++    VK_SW64_BRSGP,
++    VK_SW64_TLSGD,
++    VK_SW64_TLSLDM,
++    VK_SW64_GOTDTPREL16,
++    VK_SW64_DTPREL_HI16,
++    VK_SW64_DTPREL_LO16,
++    VK_SW64_DTPREL16,
++    VK_SW64_GOTTPREL16,
++    VK_SW64_TPREL_HI16,
++    VK_SW64_TPREL_LO16,
++    VK_SW64_TPREL16,
++
+     VK_COFF_IMGREL32, // symbol@imgrel (image-relative)
+ 
+     VK_Hexagon_LO16,
+diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
+index f3016cc14..3c004b384 100644
+--- a/llvm/include/llvm/Object/ELFObjectFile.h
++++ b/llvm/include/llvm/Object/ELFObjectFile.h
+@@ -1254,6 +1254,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
+       return "elf64-ve";
+     case ELF::EM_LOONGARCH:
+       return "elf64-loongarch";
++    case ELF::EM_SW64:
++      return "elf64-sw_64";
+     default:
+       return "elf64-unknown";
+     }
+@@ -1352,6 +1354,9 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const {
+   case ELF::EM_XTENSA:
+     return Triple::xtensa;
+ 
++  case ELF::EM_SW64:
++    return Triple::sw_64;
++
+   default:
+     return Triple::UnknownArch;
+   }
+diff --git a/llvm/include/llvm/Support/Sw64ABIFlags.h b/llvm/include/llvm/Support/Sw64ABIFlags.h
+new file mode 100644
+index 000000000..44fc9dbf1
+--- /dev/null
++++ b/llvm/include/llvm/Support/Sw64ABIFlags.h
+@@ -0,0 +1,39 @@
++//===--- Sw64ABIFlags.h - SW64 ABI flags ----------------------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines the constants for the ABI flags structure contained
++// in the .Sw64.abiflags section.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_SUPPORT_Sw64ABIFLAGS_H
++#define LLVM_SUPPORT_Sw64ABIFLAGS_H
++
++namespace llvm {
++namespace Sw64 {
++
++// Values for the xxx_size bytes of an ABI flags structure.
++enum AFL_REG {
++  AFL_REG_NONE = 0x00, // No registers
++  AFL_REG_32 = 0x01,   // 32-bit registers
++  AFL_REG_64 = 0x02,   // 64-bit registers
++  AFL_REG_128 = 0x03   // 128-bit registers
++};
++
++// Values for the flags1 word of an ABI flags structure.
++enum AFL_FLAGS1 { AFL_FLAGS1_ODDSPREG = 1 };
++
++enum AFL_EXT {
++  AFL_EXT_NONE = 0,  // None
++  AFL_EXT_OCTEON = 5 // Cavium Networks Octeon
++};
++} // namespace Sw64
++} // namespace llvm
++
++#endif
+diff --git a/llvm/include/llvm/Support/Sw64TargetParser.def b/llvm/include/llvm/Support/Sw64TargetParser.def
+new file mode 100644
+index 000000000..cb598dc25
+--- /dev/null
++++ b/llvm/include/llvm/Support/Sw64TargetParser.def
+@@ -0,0 +1,28 @@
++//===- Sw64TargetParser.def - Sw64 target parsing defines ---------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file provides defines to build up the Sw64 target parser's logic.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef PROC_ALIAS
++#define PROC_ALIAS(NAME, SW64)
++#endif
++
++#undef PROC_ALIAS
++
++#ifndef SW64_CPU
++#define SW64_CPU(ENUM, NAME, FEATURES, DEFAULT_MARCH)
++#endif
++
++SW64_CPU(INVALID, {"invalid"}, FK_INVALID, {""})
++SW64_CPU(SW6B, {"sw6b"}, FK_64BIT, {"core3b"})
++SW64_CPU(SW4D, {"sw4d"}, FK_64BIT, {"core3b"})
++SW64_CPU(SW8A, {"sw8a"}, FK_64BIT, {"core4"})
++
++#undef SW64_CPU
+diff --git a/llvm/include/llvm/Support/Sw64TargetParser.h b/llvm/include/llvm/Support/Sw64TargetParser.h
+new file mode 100644
+index 000000000..ceb0caff4
+--- /dev/null
++++ b/llvm/include/llvm/Support/Sw64TargetParser.h
+@@ -0,0 +1,53 @@
++//===-- Sw64TargetParser - Parser for Sw64 features -------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file implements a target parser to recognise SW64 hardware features
++// such as FPU/CPU/ARCH and extension names.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_SUPPORT_SW64TARGETPARSER_H
++#define LLVM_SUPPORT_SW64TARGETPARSER_H
++
++#include "llvm/ADT/SmallVector.h"
++#include "llvm/ADT/StringRef.h"
++#include <vector>
++
++// FIXME:This should be made into class design,to avoid dupplication.
++namespace llvm {
++class StringRef;
++
++namespace Sw64 {
++
++enum CPUKind : unsigned { CK_INVALID = 0, CK_SW6B, CK_SW4D, CK_SW8A };
++
++enum FeatureKind : unsigned {
++  FK_INVALID = 0,
++  FK_NONE = 1,
++  FK_STDEXTM = 1 << 2,
++  FK_STDEXTA = 1 << 3,
++  FK_STDEXTF = 1 << 4,
++  FK_STDEXTD = 1 << 5,
++  FK_STDEXTC = 1 << 6,
++  FK_64BIT = 1 << 7,
++};
++
++bool checkCPUKind(CPUKind Kind, bool IsSw64);
++bool checkTuneCPUKind(CPUKind Kind, bool IsSw64);
++CPUKind parseARCHKind(StringRef CPU);
++CPUKind parseTuneCPUKind(StringRef CPU, bool IsSw64);
++StringRef getMcpuFromMArch(StringRef CPU);
++void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsSw64);
++void fillValidTuneCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsSw64);
++StringRef resolveTuneCPUAlias(StringRef TuneCPU, bool IsSw64);
++CPUKind parseCPUArch(StringRef CPU);
++
++} // namespace Sw64
++} // namespace llvm
++
++#endif
+diff --git a/llvm/include/llvm/TargetParser/Host.h b/llvm/include/llvm/TargetParser/Host.h
+index af72045a8..8da1991e3 100644
+--- a/llvm/include/llvm/TargetParser/Host.h
++++ b/llvm/include/llvm/TargetParser/Host.h
+@@ -67,6 +67,7 @@ namespace sys {
+   StringRef getHostCPUNameForRISCV(StringRef ProcCpuinfoContent);
+   StringRef getHostCPUNameForSPARC(StringRef ProcCpuinfoContent);
+   StringRef getHostCPUNameForBPF();
++  StringRef getHostCPUNameForSW64(StringRef ProcCpuinfoContent);
+ 
+   /// Helper functions to extract CPU details from CPUID on x86.
+   namespace x86 {
+diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
+index 79ccd644a..7423fd417 100644
+--- a/llvm/include/llvm/TargetParser/Triple.h
++++ b/llvm/include/llvm/TargetParser/Triple.h
+@@ -17,6 +17,7 @@
+ #undef NetBSD
+ #undef mips
+ #undef sparc
++#undef sw_64
+ 
+ namespace llvm {
+ 
+@@ -106,7 +107,8 @@ public:
+     renderscript32, // 32-bit RenderScript
+     renderscript64, // 64-bit RenderScript
+     ve,             // NEC SX-Aurora Vector Engine
+-    LastArchType = ve
++    sw_64,          // sw64:basic Arch for SW
++    LastArchType = sw_64
+   };
+   enum SubArchType {
+     NoSubArch,
+@@ -153,6 +155,11 @@ public:
+ 
+     MipsSubArch_r6,
+ 
++    Sw64SubArch_4d,
++    Sw64SubArch_6a,
++    Sw64SubArch_6b,
++    Sw64SubArch_8a,
++
+     PPCSubArch_spe,
+ 
+     // SPIR-V sub-arch corresponds to its version.
+@@ -887,6 +894,21 @@ public:
+     return isMIPS32() || isMIPS64();
+   }
+ 
++  /// Tests whether the target is SW64 64-bit (little endian).
++  bool isSw64() const { return getArch() == Triple::sw_64; }
++
++  bool isSw6a() const { return getSubArch() == Triple::Sw64SubArch_6a; }
++
++  bool isSw6b() const { return getSubArch() == Triple::Sw64SubArch_6b; }
++
++  bool isSw4d() const { return getSubArch() == Triple::Sw64SubArch_4d; }
++
++  bool isSw8a() const { return getSubArch() == Triple::Sw64SubArch_8a; }
++
++  bool isSW() const {
++    return isSw64() || isSw6a() || isSw6b() || isSw4d() || isSw8a();
++  }
++
+   /// Tests whether the target is PowerPC (32- or 64-bit LE or BE).
+   bool isPPC() const {
+     return getArch() == Triple::ppc || getArch() == Triple::ppc64 ||
+@@ -1137,5 +1159,4 @@ public:
+ 
+ } // End llvm namespace
+ 
+-
+ #endif
+diff --git a/llvm/include/module.modulemap b/llvm/include/module.modulemap
+index 4c2ba437e..22256bb2c 100644
+--- a/llvm/include/module.modulemap
++++ b/llvm/include/module.modulemap
+@@ -100,6 +100,7 @@ module LLVM_BinaryFormat {
+     textual header "llvm/BinaryFormat/ELFRelocs/PowerPC.def"
+     textual header "llvm/BinaryFormat/ELFRelocs/RISCV.def"
+     textual header "llvm/BinaryFormat/ELFRelocs/Sparc.def"
++    textual header "llvm/BinaryFormat/ELFRelocs/Sw64.def"
+     textual header "llvm/BinaryFormat/ELFRelocs/SystemZ.def"
+     textual header "llvm/BinaryFormat/ELFRelocs/VE.def"
+     textual header "llvm/BinaryFormat/ELFRelocs/x86_64.def"
+diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+index a84d35a6e..5a2fca731 100644
+--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
++++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+@@ -408,6 +408,87 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
+   auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
+   NewLHS = Call.first;
+   NewRHS = DAG.getConstant(0, dl, RetVT);
++  if (Triple(this->getTargetMachine().getTargetTriple()).getArch() ==
++      Triple::sw_64) {
++
++    ShouldInvertCC = false;
++    switch (CCCode) {
++
++    case llvm::ISD::SETOGT:
++    case llvm::ISD::SETUGT:
++    case llvm::ISD::SETGT:
++      // from:
++      // ldi $1,0($31)
++      // cmplt $1,$0,$0
++      // to:
++      //
++      Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
++      NewLHS = Call.first;
++      NewRHS = DAG.getConstant(0, dl, RetVT);
++      break;
++    case llvm::ISD::SETOGE:
++    case llvm::ISD::SETUGE:
++    case llvm::ISD::SETGE:
++      // from:
++      // ldi $1,-1($31)
++      // cmplt $1,$0,$0
++      // to:
++      // ldi $1 0($31)
++      // complt $1,$0,$0
++
++      Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
++      NewLHS = Call.first;
++      NewRHS = DAG.getConstant(1, dl, RetVT);
++      break;
++    case llvm::ISD::SETOLT:
++    case llvm::ISD::SETULT:
++    case llvm::ISD::SETLT:
++
++      // from:
++      // cmplt $0,0,$0
++      // to:
++      // cmplt $31,$0,$0
++
++      Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
++      NewRHS = Call.first;
++      NewLHS = DAG.getConstant(0, dl, RetVT);
++      break;
++    case llvm::ISD::SETOLE:
++    case llvm::ISD::SETULE:
++    case llvm::ISD::SETLE:
++      //    from:
++      //    cmplt $0,-1,$0
++      //    to:
++      //    cmplt $31,$0,$0
++      Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
++      NewRHS = Call.first;
++      NewLHS = DAG.getConstant(1, dl, RetVT);
++      break;
++    case llvm::ISD::SETUEQ:
++    case llvm::ISD::SETOEQ:
++    case llvm::ISD::SETEQ:
++      // from:
++      // cmplt $0,0,$0
++      // to:
++      // cmplt $0,-1,$0
++      //
++      Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
++      NewLHS = Call.first;
++      NewRHS = DAG.getConstant(1, dl, RetVT);
++      break;
++    case llvm::ISD::SETONE:
++      ShouldInvertCC = true;
++      Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
++      NewLHS = Call.first;
++      NewRHS = DAG.getConstant(1, dl, RetVT);
++      break;
++    case llvm::ISD::SETO:
++      ShouldInvertCC = true;
++      LLVM_FALLTHROUGH;
++    default:
++      break;
++    }
++  }
+ 
+   CCCode = getCmpLibcallCC(LC1);
+   if (ShouldInvertCC) {
+diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+index 4ffffd85e..0980e8238 100644
+--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
++++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+@@ -290,6 +290,13 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
+     TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+                     dwarf::DW_EH_PE_sdata4;
+     break;
++  case Triple::sw_64:
++    PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
++      dwarf::DW_EH_PE_sdata4;
++    LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
++    TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
++      dwarf::DW_EH_PE_sdata4;
++    break;
+   default:
+     break;
+   }
+diff --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
+index 833be826f..8141c8ce7 100644
+--- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
++++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
+@@ -262,6 +262,9 @@ EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) {
+   case Triple::riscv64:
+     return CreateWithABI<OrcRiscv64>(EPC);
+ 
++  case Triple::sw_64:
++    return CreateWithABI<OrcSw64>(EPC);
++
+   case Triple::x86_64:
+     if (TT.getOS() == Triple::OSType::Win32)
+       return CreateWithABI<OrcX86_64_Win32>(EPC);
+diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+index a0d81cdf2..d8520bb96 100644
+--- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
++++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+@@ -158,6 +158,11 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES,
+       return CCMgrT::Create(ES, ErrorHandlerAddress);
+     }
+ 
++    case Triple::sw_64: {
++      typedef orc::LocalJITCompileCallbackManager<orc::OrcSw64> CCMgrT;
++      return CCMgrT::Create(ES, ErrorHandlerAddress);
++    }
++
+     case Triple::x86_64: {
+       if (T.getOS() == Triple::OSType::Win32) {
+         typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64_Win32> CCMgrT;
+@@ -224,6 +229,11 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) {
+             orc::LocalIndirectStubsManager<orc::OrcRiscv64>>();
+       };
+ 
++    case Triple::sw_64:
++      return []() {
++        return std::make_unique<orc::LocalIndirectStubsManager<orc::OrcSw64>>();
++      };
++
+     case Triple::x86_64:
+       if (T.getOS() == Triple::OSType::Win32) {
+         return [](){
+diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+index 7c7c2f000..da3a19b25 100644
+--- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
++++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+@@ -913,6 +913,10 @@ LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) {
+        S.JTMB->getTargetTriple().getArch() == Triple::ArchType::ppc64le))
+     Layer->setAutoClaimResponsibilityForObjectSymbols(true);
+ 
++  if (S.JTMB->getTargetTriple().isOSBinFormatELF() &&
++      S.JTMB->getTargetTriple().getArch() == Triple::ArchType::sw_64)
++    Layer->setAutoClaimResponsibilityForObjectSymbols(true);
++
+   // FIXME: Explicit conversion to std::unique_ptr<ObjectLayer> added to silence
+   //        errors from some GCC / libstdc++ bots. Remove this conversion (i.e.
+   //        just return ObjLinkingLayer) once those bots are upgraded.
+diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
+index d95a64293..0e5dc629d 100644
+--- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
++++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
+@@ -139,6 +139,8 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
+   case Triple::riscv64:
+     return LocalLazyCallThroughManager::Create<OrcRiscv64>(ES,
+                                                            ErrorHandlerAddr);
++  case Triple::sw_64:
++    return LocalLazyCallThroughManager::Create<OrcSw64>(ES, ErrorHandlerAddr);
+ 
+   case Triple::x86_64:
+     if (T.getOS() == Triple::OSType::Win32)
+diff --git a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
+index 6d5681993..07c1d14da 100644
+--- a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
++++ b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
+@@ -915,6 +915,268 @@ void OrcMips64::writeIndirectStubsBlock(char *StubsBlockWorkingMem,
+   }
+ }
+ 
++void OrcSw64::writeResolverCode(char *ResolverWorkingMem,
++                                ExecutorAddr ResolverTargetAddress,
++                                ExecutorAddr ReentryFnAddr,
++                                ExecutorAddr ReentryCtxAddr) {
++  const uint32_t ResolverCode[] = {
++      // resolver_entry:
++      0xfbdefe38, // ldi    sp,-456(sp)
++      0xac1e0000, // stl    v0,0(sp)
++      0xae1e0008, // stl    a0,8(sp)
++      0xae3e0010, // stl    a1,16(sp)
++      0xae5e0018, // stl    a2,24(sp)
++      0xae7e0020, // stl    a3,32(sp)
++      0xae9e0028, // stl    a4,40(sp)
++      0xaebe0030, // stl    a5,48(sp)
++      0xad3e0038, // stl    s0,56(sp)
++      0xad5e0040, // stl    s1,64(sp)
++      0xad7e0048, // stl    s2,72(sp)
++      0xad9e0050, // stl    s3,80(sp)
++      0xadbe0058, // stl    s4,88(sp)
++      0xadde0060, // stl    s5,96(sp)
++      0xac3e0068, // stl    t0,104(sp)
++      0xac5e0070, // stl    t1,112(sp)
++      0xac7e0078, // stl    t2,120(sp)
++      0xac9e0080, // stl    t3,128(sp)
++      0xacbe0088, // stl    t4,136(sp)
++      0xacde0090, // stl    t5,144(sp)
++      0xacfe0098, // stl    t6,152(sp)
++      0xad1e00a0, // stl    t7,160(sp)
++      0xaede00a8, // stl    t8,168(sp)
++      0xaefe00b0, // stl    t9,176(sp)
++      0xaf1e00b8, // stl    t10,184(sp)
++      0xaf3e00c0, // stl    t11,192(sp)
++      0xaf7e00c8, // stl    t12,200(sp)
++      0xadfe00d0, // stl    fp,208(sp)
++      0xaf5e00d8, // stl    ra,216(sp)
++
++      0xbc5e00e0, // fstd    $f2,224(sp)
++      0xbc7e00e8, // fstd    $f3,232(sp)
++      0xbc9e00f0, // fstd    $f4,240(sp)
++      0xbcbe00f8, // fstd    $f5,248(sp)
++      0xbcde0100, // fstd    $f6,256(sp)
++      0xbcfe0108, // fstd    $f7,264(sp)
++      0xbd1e0110, // fstd    $f8,272(sp)
++      0xbd3e0118, // fstd    $f9,280(sp)
++      0xbd5e0120, // fstd    $f10,288(sp)
++      0xbd7e0128, // fstd    $f11,296(sp)
++      0xbd9e0130, // fstd    $f12,304(sp)
++      0xbdbe0138, // fstd    $f13,312(sp)
++      0xbdde0140, // fstd    $f14,320(sp)
++      0xbdfe0148, // fstd    $f15,328(sp)
++      0xbe1e0150, // fstd    $f16,336(sp)
++      0xbe3e0158, // fstd    $f17,344(sp)
++      0xbe5e0160, // fstd    $f18,352(sp)
++      0xbe7e0168, // fstd    $f19,360(sp)
++      0xbe9e0170, // fstd    $f20,368(sp)
++      0xbebe0178, // fstd    $f21,376(sp)
++      0xbede0180, // fstd    $f22,384(sp)
++      0xbefe0188, // fstd    $f23,392(sp)
++      0xbf1e0190, // fstd    $f24,400(sp)
++      0xbf3e0198, // fstd    $f25,408(sp)
++      0xbf5e01a0, // fstd    $f26,416(sp)
++      0xbf7e01a8, // fstd    $f27,424(sp)
++      0xbf9e01b0, // fstd    $f28,432(sp)
++      0xbfbe01b8, // fstd    $f29,440(sp)
++      0xbfde01c0, // fstd    $f30,448(sp)
++
++      // JIT re-entry ctx addr.
++      0x00000000, // ldih   $16,ctxhighest($31)
++      0x00000000, // ldi    $16,ctxhigher($16)
++      0x00000000, // sll    $16,16,$16
++      0x00000000, // ldi    $16,ctxhi($16)
++      0x00000000, // sll    $16,16,$16
++      0x00000000, // ldi    $16,ctxlo($16)
++      0x435a0751, // or     ra,ra,a1
++      0xfa31ffe0, // ldi    a1,-32(a1)
++      // JIT re-entry fn addr:
++      0x00000000, // ldih   $27,reentry($31)
++      0x00000000, // ldi    $27,reentry($27)
++      0x00000000, // sll    $27,16,$27
++      0x00000000, // ldi    $27,reentryhi($27)
++      0x00000000, // sll    $27,16,$27
++      0x00000000, // ldi    $27,reentrylo($27)
++      0x075b0000, // call   ra,(t12),6c <main+0x6c>
++      0x43ff075f, // nop
++
++      0x9fde01c0, // fldd    $f30,448(sp)
++      0x9fbe01b8, // fldd    $f29,440(sp)
++      0x9f9e01b0, // fldd    $f28,432(sp)
++      0x9f7e01a8, // fldd    $f27,424(sp)
++      0x9f5e01a0, // fldd    $f26,416(sp)
++      0x9f3e0198, // fldd    $f25,408(sp)
++      0x9f1e0190, // fldd    $f24,400(sp)
++      0x9efe0188, // fldd    $f23,392(sp)
++      0x9ede0180, // fldd    $f22,384(sp)
++      0x9ebe0178, // fldd    $f21,376(sp)
++      0x9e9e0170, // fldd    $f20,368(sp)
++      0x9e7e0168, // fldd    $f19,360(sp)
++      0x9e5e0160, // fldd    $f18,352(sp)
++      0x9e3e0158, // fldd    $f17,344(sp)
++      0x9e1e0150, // fldd    $f16,336(sp)
++      0x9dfe0148, // fldd    $f15,328(sp)
++      0x9dde0140, // fldd    $f14,320(sp)
++      0x9dbe0138, // fldd    $f13,312(sp)
++      0x9d9e0130, // fldd    $f12,304(sp)
++      0x9d7e0128, // fldd    $f11,296(sp)
++      0x9d5e0120, // fldd    $f10,288(sp)
++      0x9d3e0118, // fldd    $f9,280(sp)
++      0x9d1e0110, // fldd    $f8,272(sp)
++      0x9cfe0108, // fldd    $f7,264(sp)
++      0x9cde0100, // fldd    $f6,256(sp)
++      0x9cbe00f8, // fldd    $f5,248(sp)
++      0x9c9e00f0, // fldd    $f4,240(sp)
++      0x9c7e00e8, // fldd    $f3,232(sp)
++      0x9c5e00e0, // fldd    $f2,224(sp)
++
++      0x8f5e00d8, // ldl    ra,216(sp)
++      0x8dfe00d0, // ldl    fp,208(sp)
++      0x8f7e00c8, // ldl    t12,200(sp)
++      0x8f3e00c0, // ldl    t11,192(sp)
++      0x8f1e00b8, // ldl    t10,184(sp)
++      0x8efe00b0, // ldl    t9,176(sp)
++      0x8ede00a8, // ldl    t8,168(sp)
++      0x8d1e00a0, // ldl    t7,160(sp)
++      0x8cfe0098, // ldl    t6,152(sp)
++      0x8cde0090, // ldl    t5,144(sp)
++      0x8cbe0088, // ldl    t4,136(sp)
++      0x8c9e0080, // ldl    t3,128(sp)
++      0x8c7e0078, // ldl    t2,120(sp)
++      0x8c5e0070, // ldl    t1,112(sp)
++      0x8c3e0068, // ldl    t0,104(sp)
++      0x8dde0060, // ldl    s5,96(sp)
++      0x8dbe0058, // ldl    s4,88(sp)
++      0x8d9e0050, // ldl    s3,80(sp)
++      0x8d7e0048, // ldl    s2,72(sp)
++      0x8d5e0040, // ldl    s1,64(sp)
++      0x8d3e0038, // ldl    s0,56(sp)
++      0x8ebe0030, // ldl    a5,48(sp)
++      0x8e9e0028, // ldl    a4,40(sp)
++      0x8e7e0020, // ldl    a3,32(sp)
++      0x8e5e0018, // ldl    a2,24(sp)
++      0x8e3e0010, // ldl    a1,16(sp)
++      0x8e1e0008, // ldl    a0,8(sp)
++      0xfbde01c8, // ldi    sp,456(sp)
++
++      0x4339075a, // or     t11,t11,ra
++      0x4000075b, // or     v0,v0,t12
++      0x0ffb0000, // jmp    zero,(t12),c4 <main+0xc4>
++  };
++  const unsigned ReentryFnAddrOffset = 0x108; // JIT re-entry fn addr lui
++  const unsigned ReentryCtxAddrOffset = 0xe8; // JIT re-entry ctx addr lui
++
++  memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
++
++  uint32_t ReentryCtxLDIh =
++      0xfe1f0000 | (((ReentryCtxAddr.getValue() >> 48) +
++                     ((ReentryCtxAddr.getValue() >> 47) & 1)) &
++                    0xFFFF);
++  uint32_t ReentryCtxLDI =
++      0xfa100000 | (((ReentryCtxAddr.getValue() >> 32) +
++                     ((ReentryCtxAddr.getValue() >> 31) & 1)) &
++                    0xFFFF);
++  uint32_t ReentryCtxSLL = 0x4a020910;
++  uint32_t ReentryCtxLDI2 =
++      0xfa100000 | (((ReentryCtxAddr.getValue() >> 16) +
++                     ((ReentryCtxAddr.getValue() >> 15) & 1)) &
++                    0xFFFF);
++  uint32_t ReentryCtxSLL2 = 0x4a020910;
++  uint32_t ReentryCtxLDI3 = 0xfa100000 | (ReentryCtxAddr.getValue() & 0xFFFF);
++  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLDIh,
++         sizeof(ReentryCtxLDIh));
++  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 4, &ReentryCtxLDI,
++         sizeof(ReentryCtxLDI));
++  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 8, &ReentryCtxSLL,
++         sizeof(ReentryCtxSLL));
++  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 12, &ReentryCtxLDI2,
++         sizeof(ReentryCtxLDI2));
++  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 16, &ReentryCtxSLL2,
++         sizeof(ReentryCtxSLL2));
++  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 20, &ReentryCtxLDI3,
++         sizeof(ReentryCtxLDI3));
++
++  uint32_t ReentryFnLDIh =
++      0xff7f0000 | (((ReentryFnAddr.getValue() >> 48) +
++                     ((ReentryFnAddr.getValue() >> 47) & 1)) &
++                    0xFFFF);
++  uint32_t ReentryFnLDI =
++      0xfb7b0000 | (((ReentryFnAddr.getValue() >> 32) +
++                     ((ReentryFnAddr.getValue() >> 31) & 1)) &
++                    0xFFFF);
++  uint32_t ReentryFnSLL = 0x4b62091b;
++  uint32_t ReentryFnLDI2 =
++      0xfb7b0000 | (((ReentryFnAddr.getValue() >> 16) +
++                     ((ReentryFnAddr.getValue() >> 15) & 1)) &
++                    0xFFFF);
++  uint32_t ReentryFnSLL2 = 0x4b62091b;
++  uint32_t ReentryFnLDI3 = 0xfb7b0000 | (ReentryFnAddr.getValue() & 0xFFFF);
++  memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnLDIh,
++         sizeof(ReentryFnLDIh));
++  memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 4, &ReentryFnLDI,
++         sizeof(ReentryFnLDI));
++  memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 8, &ReentryFnSLL,
++         sizeof(ReentryFnSLL));
++  memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 12, &ReentryFnLDI2,
++         sizeof(ReentryFnLDI2));
++  memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 16, &ReentryFnSLL2,
++         sizeof(ReentryFnSLL2));
++  memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 20, &ReentryFnLDI3,
++         sizeof(ReentryFnLDI3));
++}
++
++void OrcSw64::writeTrampolines(char *TrampolineBlockWorkingMem,
++                               ExecutorAddr TrampolineBlockTargetAddress,
++                               ExecutorAddr ResolverAddr,
++                               unsigned NumTrampolines) {
++
++  uint32_t *Trampolines =
++      reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
++  uint64_t HighestAddr =
++      (ResolverAddr.getValue() >> 48) + ((ResolverAddr.getValue() >> 47) & 1);
++  uint64_t HigherAddr =
++      (ResolverAddr.getValue() >> 32) + ((ResolverAddr.getValue() >> 31) & 1);
++  uint64_t HiAddr =
++      (ResolverAddr.getValue() >> 16) + ((ResolverAddr.getValue() >> 15) & 1);
++
++  for (unsigned I = 0; I < NumTrampolines; ++I) {
++    Trampolines[10 * I + 0] = 0x435a0759; // or  ra,ra,t11
++    Trampolines[10 * I + 1] = 0xff7f0000 | (HighestAddr & 0xFFFF);
++    Trampolines[10 * I + 2] = 0xfb7b0000 | (HigherAddr & 0xFFFF);
++    Trampolines[10 * I + 3] = 0x4b62091b; // sll
++    Trampolines[10 * I + 4] = 0xfb7b0000 | (HiAddr & 0xFFFF);
++    Trampolines[10 * I + 5] = 0x4b62091b; // sll2
++    Trampolines[10 * I + 6] = 0xfb7b0000 | (ResolverAddr.getValue() & 0xFFFF);
++    Trampolines[10 * I + 7] = 0x075b0000; // call
++    Trampolines[10 * I + 8] = 0x43ff075f; // nop
++    Trampolines[10 * I + 9] = 0x43ff075f; // nop
++  }
++}
++
++void OrcSw64::writeIndirectStubsBlock(char *StubsBlockWorkingMem,
++                                      ExecutorAddr StubsBlockTargetAddress,
++                                      ExecutorAddr PointersBlockTargetAddress,
++                                      unsigned NumStubs) {
++
++  // Populate the stubs page stubs and mark it executable.
++  uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
++  uint64_t PtrAddr = PointersBlockTargetAddress.getValue();
++
++  for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 8) {
++    uint64_t HighestAddr = (PtrAddr >> 48) + ((PtrAddr >> 47) & 1);
++    uint64_t HigherAddr = (PtrAddr >> 32) + ((PtrAddr >> 31) & 1);
++    uint64_t HiAddr = (PtrAddr >> 16) + ((PtrAddr >> 15) & 1);
++    Stub[8 * I + 0] = 0xff7f0000 | (HighestAddr & 0xFFFF); // ldih
++    Stub[8 * I + 1] = 0xfb7b0000 | (HigherAddr & 0xFFFF);  // ldi
++    Stub[8 * I + 2] = 0x4b62091b;                          // sll
++    Stub[8 * I + 3] = 0xfb7b0000 | (HiAddr & 0xFFFF);      // ldi
++    Stub[8 * I + 4] = 0x4b62091b;                          // sll2
++    Stub[8 * I + 5] = 0x8f7b0000 | (PtrAddr & 0xFFFF);     // ldl
++    Stub[8 * I + 6] = 0x0ffb0000;                          // jmp $31,($27),0
++    Stub[8 * I + 7] = 0x43ff075f;                          // nop
++  }
++}
++
+ void OrcRiscv64::writeResolverCode(char *ResolverWorkingMem,
+                                    ExecutorAddr ResolverTargetAddress,
+                                    ExecutorAddr ReentryFnAddr,
+diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
+index 1278e2f43..79c1fa6a4 100644
+--- a/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
+@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMRuntimeDyld
+   RuntimeDyldELF.cpp
+   RuntimeDyldMachO.cpp
+   Targets/RuntimeDyldELFMips.cpp
++  Targets/RuntimeDyldELFSw64.cpp
+ 
+   DEPENDS
+   intrinsics_gen
+diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+index d439b1b4e..66ed10693 100644
+--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+@@ -13,6 +13,7 @@
+ #include "RuntimeDyldELF.h"
+ #include "RuntimeDyldCheckerImpl.h"
+ #include "Targets/RuntimeDyldELFMips.h"
++#include "Targets/RuntimeDyldELFSw64.h"
+ #include "llvm/ADT/STLExtras.h"
+ #include "llvm/ADT/StringRef.h"
+ #include "llvm/BinaryFormat/ELF.h"
+@@ -241,6 +242,8 @@ llvm::RuntimeDyldELF::create(Triple::ArchType Arch,
+   case Triple::mips64:
+   case Triple::mips64el:
+     return std::make_unique<RuntimeDyldELFMips>(MemMgr, Resolver);
++  case Triple::sw_64:
++    return make_unique<RuntimeDyldELFSw64>(MemMgr, Resolver);
+   }
+ }
+ 
+@@ -1878,6 +1881,42 @@ RuntimeDyldELF::processRelocationRef(
+     } else {
+       processSimpleRelocation(SectionID, Offset, RelType, Value);
+     }
++  } else if (Arch == Triple::sw_64) {
++    uint32_t r_type = RelType & 0xff;
++    RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
++    LLVM_DEBUG(dbgs() << "Resolve Sw64 reloc" << TargetName << "\n");
++    if (r_type == ELF::R_SW_64_GPDISP) {
++      TargetName = "gphi";
++      StringMap<uint64_t>::iterator i = GOTSymbolOffsets.find(TargetName);
++      if (i != GOTSymbolOffsets.end())
++        RE.SymOffset = i->second;
++      else {
++        RE.SymOffset = allocateGOTEntries(1);
++        GOTSymbolOffsets[TargetName] = RE.SymOffset;
++      }
++      if (Value.SymbolName)
++        addRelocationForSymbol(RE, Value.SymbolName);
++      else
++        addRelocationForSection(RE, Value.SectionID);
++    } else if (RelType == ELF::R_SW_64_BRADDR) {
++      // This is an Sw64 branch relocation, need to use a stub function.
++      LLVM_DEBUG(dbgs() << "\t\tThis is a Sw64 branch relocation.");
++      llvm_unreachable(" Sw64 branch relocation not yet supported.");
++    } else if (r_type == ELF::R_SW_64_LITERAL) {
++      StringMap<uint64_t>::iterator a = GOTSymbolOffsets.find(TargetName);
++      if (a != GOTSymbolOffsets.end())
++        RE.SymOffset = a->second;
++      else {
++        RE.SymOffset = allocateGOTEntries(1);
++        GOTSymbolOffsets[TargetName] = RE.SymOffset;
++      }
++      if (Value.SymbolName)
++        addRelocationForSymbol(RE, Value.SymbolName);
++      else
++        addRelocationForSection(RE, Value.SectionID);
++    } else {
++      processSimpleRelocation(SectionID, Offset, RelType, Value);
++    }
+   } else {
+     if (Arch == Triple::x86) {
+       Value.Addend += support::ulittle32_t::ref(computePlaceholderAddress(SectionID, Offset));
+@@ -2221,6 +2260,7 @@ size_t RuntimeDyldELF::getGOTEntrySize() {
+   case Triple::aarch64_be:
+   case Triple::ppc64:
+   case Triple::ppc64le:
++  case Triple::sw_64:
+   case Triple::systemz:
+     Result = sizeof(uint64_t);
+     break;
+@@ -2390,6 +2430,25 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
+       }
+       GOTSymbolOffsets.clear();
+     }
++    if (Arch == Triple::sw_64) {
++      // To correctly resolve Sw64 GOT relocations, we need a mapping from
++      // object's sections to GOTs.
++      for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end();
++           SI != SE; ++SI) {
++        if (SI->relocation_begin() != SI->relocation_end()) {
++          Expected<section_iterator> RelSecOrErr = SI->getRelocatedSection();
++          if (!RelSecOrErr)
++            return make_error<RuntimeDyldError>(
++                toString(RelSecOrErr.takeError()));
++
++          section_iterator RelocatedSection = *RelSecOrErr;
++          ObjSectionToIDMap::iterator i = SectionMap.find(*RelocatedSection);
++          assert(i != SectionMap.end());
++          SectionToGOTMap[i->second] = GOTSectionID;
++        }
++      }
++      GOTSymbolOffsets.clear();
++    }
+   }
+ 
+   // Look for and record the EH frame section.
+diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+index dfdd98cb3..750941806 100644
+--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+@@ -15,6 +15,8 @@
+ 
+ #include "RuntimeDyldImpl.h"
+ #include "llvm/ADT/DenseMap.h"
++#include <vector>
++using namespace std;
+ 
+ using namespace llvm;
+ 
+@@ -60,6 +62,12 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
+   void resolveBPFRelocation(const SectionEntry &Section, uint64_t Offset,
+                             uint64_t Value, uint32_t Type, int64_t Addend);
+ 
++  void resolveSW64Relocation(const SectionEntry &Section, uint64_t Offset,
++                             uint64_t Value, uint32_t Type, int32_t Addend);
++  unsigned long *_GOT;
++  unsigned long *getSW64GOT();
++  vector<uint64_t> _got;
++
+   unsigned getMaxStubSize() const override {
+     if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be)
+       return 20; // movz; movk; movk; movk; br
+@@ -75,6 +83,8 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
+       return 6; // 2-byte jmp instruction + 32-bit relative address
+     else if (Arch == Triple::systemz)
+       return 16;
++    else if (Arch == Triple::sw_64)
++      return 16;
+     else
+       return 0;
+   }
+diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.cpp
+new file mode 100644
+index 000000000..81a819abc
+--- /dev/null
++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.cpp
+@@ -0,0 +1,217 @@
++//===-- RuntimeDyldELFSw64.cpp ---- ELF/Sw64 specific code. -----*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++#include "RuntimeDyldELFSw64.h"
++#include "llvm/BinaryFormat/ELF.h"
++
++#define DEBUG_TYPE "dyld"
++
++void RuntimeDyldELFSw64::resolveRelocation(const RelocationEntry &RE,
++                                           uint64_t Value) {
++  const SectionEntry &Section = Sections[RE.SectionID];
++
++  resolveSw64Relocation(Section, RE.Offset, Value, RE.RelType, RE.Addend,
++                        RE.SymOffset, RE.SectionID);
++}
++
++uint64_t RuntimeDyldELFSw64::evaluateRelocation(const RelocationEntry &RE,
++                                                uint64_t Value,
++                                                uint64_t Addend) {
++  const SectionEntry &Section = Sections[RE.SectionID];
++  Value = evaluateSw64Relocation(Section, RE.Offset, Value, RE.RelType, Addend,
++                                 RE.SymOffset, RE.SectionID);
++  return Value;
++}
++
++void RuntimeDyldELFSw64::applyRelocation(const RelocationEntry &RE,
++                                         uint64_t Value) {
++  const SectionEntry &Section = Sections[RE.SectionID];
++  applySw64Relocation(Section.getAddressWithOffset(RE.Offset), Value,
++                      RE.RelType);
++  return;
++}
++
++int64_t RuntimeDyldELFSw64::evaluateSw64Relocation(
++    const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type,
++    int64_t Addend, uint64_t SymOffset, SID SectionID) {
++
++  LLVM_DEBUG(dbgs() << "evaluateSw64Relocation, LocalAddress: 0x"
++                    << format("%llx", Section.getAddressWithOffset(Offset))
++                    << " GOTAddr: 0x"
++                    << format("%llx",
++                              getSectionLoadAddress(SectionToGOTMap[SectionID]))
++                    << " FinalAddress: 0x"
++                    << format("%llx", Section.getLoadAddressWithOffset(Offset))
++                    << " Value: 0x" << format("%llx", Value) << " Type: 0x"
++                    << format("%x", Type) << " Addend: 0x"
++                    << format("%llx", Addend)
++                    << " Offset: " << format("%llx", Offset)
++                    << " SID: " << format("%d", SectionID)
++                    << " SymOffset: " << format("%x", SymOffset) << "\n");
++
++  switch (Type) {
++  default:
++    llvm_unreachable("Not implemented relocation type!");
++    break;
++  case ELF::R_SW_64_GPDISP: {
++    uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]);
++    uint32_t *LocalAddress =
++        reinterpret_cast<uint32_t *>(Section.getAddressWithOffset(Offset));
++
++    uint8_t *LocalGOTAddr =
++        getSectionAddress(SectionToGOTMap[SectionID]) + SymOffset;
++    uint64_t GOTEntry = readBytesUnaligned(LocalGOTAddr, getGOTEntrySize());
++
++    LLVM_DEBUG(dbgs() << "Debug gpdisp: "
++                      << " GOTAddr: 0x" << format("%llx", GOTAddr)
++                      << " GOTEntry: 0x" << format("%llx", GOTEntry)
++                      << " LocalGOTAddr: 0x" << format("%llx", LocalGOTAddr)
++                      << " LocalAddress: 0x" << format("%llx", LocalAddress)
++                      << "\n");
++    if (GOTEntry)
++      assert(GOTEntry == Value && "GOT entry has two different addresses.");
++    else
++      writeBytesUnaligned(Value, LocalGOTAddr, getGOTEntrySize());
++
++    return (int64_t)GOTAddr + 0x8000 - (int64_t)LocalAddress;
++  }
++  case ELF::R_SW_64_LITERAL: {
++    uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]);
++    uint32_t *LocalAddress =
++        reinterpret_cast<uint32_t *>(Section.getAddressWithOffset(Offset));
++
++    uint8_t *LocalGOTAddr =
++        getSectionAddress(SectionToGOTMap[SectionID]) + SymOffset;
++    uint64_t GOTEntry = readBytesUnaligned(LocalGOTAddr, getGOTEntrySize());
++
++    LLVM_DEBUG(dbgs() << "Debug literal: "
++                      << " GOTAddr: 0x" << format("%llx", GOTAddr)
++                      << " GOTEntry: 0x" << format("%llx", GOTEntry)
++                      << " LocalGOTAddr: 0x" << format("%llx", LocalGOTAddr)
++                      << " LocalAddress: 0x" << format("%llx", LocalAddress)
++                      << "\n");
++
++    Value += Addend;
++    if (GOTEntry)
++      assert(GOTEntry == Value && "GOT entry has two different addresses.");
++    else
++      writeBytesUnaligned(Value, LocalGOTAddr, getGOTEntrySize());
++
++    if (SymOffset > 65536)
++      report_fatal_error(".got subsegment exceeds 64K (literal)!!\n");
++
++    if ((SymOffset) < 32768)
++      return (int64_t)(SymOffset - 0x8000);
++    else
++      return (int64_t)(0x8000 - SymOffset);
++  }
++  case ELF::R_SW_64_GPRELHIGH: {
++    // Get the higher 16-bits.
++    uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]);
++    uint64_t Disp = Value + Addend - (GOTAddr + 0x8000);
++    if (Disp & 0x8000)
++      return ((Disp + 0x8000) >> 16) & 0xffff;
++    else
++      return (Disp >> 16) & 0xffff;
++  }
++  case ELF::R_SW_64_GPRELLOW: {
++    uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]);
++
++    return (Value + Addend - (GOTAddr + 0x8000)) & 0xffff;
++  }
++  case ELF::R_SW_64_REFQUAD: {
++    return Value + Addend;
++  }
++  case ELF::R_SW_64_SREL32: {
++    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
++    return Value + Addend - FinalAddress;
++  }
++  case ELF::R_SW_64_GPREL32: {
++    uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]);
++    return Value + Addend - (GOTAddr + 0x7ff0);
++  }
++  case ELF::R_SW_64_TPRELHI:
++  case ELF::R_SW_64_TPRELLO:
++    report_fatal_error("Current Sw64 JIT does not support TPREL relocs");
++    break;
++  case ELF::R_SW_64_LITERAL_GOT:
++  case ELF::R_SW_64_HINT:
++  case ELF::R_SW_64_LITUSE:
++    return 0;
++  }
++  return 0;
++}
++
++void RuntimeDyldELFSw64::applySw64Relocation(uint8_t *TargetPtr, int64_t Value,
++                                             uint32_t Type) {
++  uint32_t Insn = readBytesUnaligned(TargetPtr, 4);
++  int64_t Disp_hi, Disp_lo;
++
++  switch (Type) {
++  default:
++    llvm_unreachable("Unknown relocation type!");
++    break;
++  case ELF::R_SW_64_GPDISP: {
++    uint32_t Insn1 = readBytesUnaligned(TargetPtr + 4, 4);
++    if ((Value > 2147483647LL) || (Value < -2147483648LL)) {
++      llvm::dbgs() << "gpdisp Value=" << Value << "\n";
++      report_fatal_error(".got subsegment exceeds 2GB (gpdisp)!!\n");
++    }
++
++    Disp_hi = (Value + 0x8000) >> 16;
++    Disp_lo = Value & 0xffff;
++
++    Insn = (Insn & 0xffff0000) | (Disp_hi & 0x0000ffff);
++    Insn1 = (Insn1 & 0xffff0000) | (Disp_lo & 0x0000ffff);
++
++    writeBytesUnaligned(Insn, TargetPtr, 4);
++    writeBytesUnaligned(Insn1, TargetPtr + 4, 4);
++    break;
++  }
++  case ELF::R_SW_64_LITERAL:
++    Insn = (Insn & 0xffff0000) | (Value & 0x0000ffff);
++    writeBytesUnaligned(Insn, TargetPtr, 4);
++    break;
++  case ELF::R_SW_64_LITERAL_GOT:
++    Insn = (Insn & 0xffff0000) | (Value & 0x0000ffff);
++    writeBytesUnaligned(Insn, TargetPtr, 4);
++    break;
++  case ELF::R_SW_64_GPRELHIGH:
++  case ELF::R_SW_64_GPRELLOW:
++    Insn = (Insn & 0xffff0000) | (Value & 0x0000ffff);
++    writeBytesUnaligned(Insn, TargetPtr, 4);
++    break;
++  case ELF::R_SW_64_REFQUAD:
++    writeBytesUnaligned(Value, TargetPtr, 8);
++    break;
++  case ELF::R_SW_64_SREL32:
++    writeBytesUnaligned(Value & 0xffffffff, TargetPtr, 4);
++    break;
++  case ELF::R_SW_64_GPREL32:
++    writeBytesUnaligned(Value & 0xffffffff, TargetPtr, 4);
++    break;
++  }
++}
++
++void RuntimeDyldELFSw64::resolveSw64Relocation(const SectionEntry &Section,
++                                               uint64_t Offset, uint64_t Value,
++                                               uint32_t Type, int64_t Addend,
++                                               uint64_t SymOffset,
++                                               SID SectionID) {
++  uint32_t r_type = Type & 0xff;
++
++  // RelType is used to keep information for which relocation type we are
++  // applying relocation.
++  uint32_t RelType = r_type;
++  int64_t CalculatedValue = evaluateSw64Relocation(
++      Section, Offset, Value, RelType, Addend, SymOffset, SectionID);
++
++  applySw64Relocation(Section.getAddressWithOffset(Offset), CalculatedValue,
++                      RelType);
++}
+diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.h
+new file mode 100644
+index 000000000..c333dc4bd
+--- /dev/null
++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.h
+@@ -0,0 +1,61 @@
++//===-- RuntimeDyldELFSw64.h ---- ELF/Sw64 specific code. -------*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDELFSw64_H
++#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDELFSw64_H
++
++#include "../RuntimeDyldELF.h"
++#include <string>
++
++#define DEBUG_TYPE "dyld"
++
++namespace llvm {
++
++class RuntimeDyldELFSw64 : public RuntimeDyldELF {
++public:
++  typedef uint64_t TargetPtrT;
++
++  RuntimeDyldELFSw64(RuntimeDyld::MemoryManager &MM,
++                     JITSymbolResolver &Resolver)
++      : RuntimeDyldELF(MM, Resolver) {}
++
++  void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override;
++
++protected:
++  void resolveSw64Relocation(const SectionEntry &Section, uint64_t Offset,
++                             uint64_t Value, uint32_t Type, int64_t Addend,
++                             uint64_t SymOffset, SID SectionID);
++
++  uint64_t GOTOffset = 0;
++  uint64_t GPOffset_Modify = 0;
++
++private:
++  /// A object file specific relocation resolver
++  /// \param RE The relocation to be resolved
++  /// \param Value Target symbol address to apply the relocation action
++  uint64_t evaluateRelocation(const RelocationEntry &RE, uint64_t Value,
++                              uint64_t Addend);
++
++  /// A object file specific relocation resolver
++  /// \param RE The relocation to be resolved
++  /// \param Value Target symbol address to apply the relocation action
++  void applyRelocation(const RelocationEntry &RE, uint64_t Value);
++
++  int64_t evaluateSw64Relocation(const SectionEntry &Section, uint64_t Offset,
++                                 uint64_t Value, uint32_t Type, int64_t Addend,
++                                 uint64_t SymOffset, SID SectionID);
++
++  void applySw64Relocation(uint8_t *TargetPtr, int64_t CalculatedValue,
++                           uint32_t Type);
++};
++} // namespace llvm
++
++#undef DEBUG_TYPE
++
++#endif
+diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
+index 435800d9e..e05d6b455 100644
+--- a/llvm/lib/IR/Function.cpp
++++ b/llvm/lib/IR/Function.cpp
+@@ -44,6 +44,7 @@
+ #include "llvm/IR/IntrinsicsR600.h"
+ #include "llvm/IR/IntrinsicsRISCV.h"
+ #include "llvm/IR/IntrinsicsS390.h"
++#include "llvm/IR/IntrinsicsSw64.h"
+ #include "llvm/IR/IntrinsicsVE.h"
+ #include "llvm/IR/IntrinsicsWebAssembly.h"
+ #include "llvm/IR/IntrinsicsX86.h"
+diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
+index 6a6befdd3..0c45a7d4f 100644
+--- a/llvm/lib/MC/ELFObjectWriter.cpp
++++ b/llvm/lib/MC/ELFObjectWriter.cpp
+@@ -1318,6 +1318,8 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
+   // in a relocation with a null section which is the desired result.
+   case MCSymbolRefExpr::VK_PPC_TOCBASE:
+     return false;
++  case MCSymbolRefExpr::VK_SW64_GPDISP:
++    return false;
+ 
+   // These VariantKind cause the relocation to refer to something other than
+   // the symbol itself, like a linker generated table. Since the address of
+@@ -1501,6 +1503,21 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm,
+         SecA ? cast<MCSymbolELF>(SecA->getBeginSymbol()) : nullptr;
+     if (SectionSymbol)
+       SectionSymbol->setUsedInReloc();
++    if (TargetObjectWriter->getEMachine() == ELF::EM_SW64) {
++      const MCFixupKindInfo &FKI =
++          Asm.getBackend().getFixupKindInfo((MCFixupKind)Fixup.getKind());
++      if (strcmp(FKI.Name, "fixup_SW64_GPDISP_HI16") == 0) {
++
++        Addend = 4;
++        const auto *RenamedSymA =
++            cast<MCSymbolELF>(Asm.getContext().getOrCreateSymbol(".text"));
++
++        RenamedSymA->setUsedInReloc();
++        ELFRelocationEntry Rec(FixupOffset, RenamedSymA, Type, Addend, SymA, C);
++        Relocations[&FixupSection].push_back(Rec);
++        return;
++      }
++    }
+     ELFRelocationEntry Rec(FixupOffset, SectionSymbol, Type, Addend, SymA, C);
+     Relocations[&FixupSection].push_back(Rec);
+     return;
+@@ -1511,6 +1528,22 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm,
+     if (const MCSymbolELF *R = Renames.lookup(SymA))
+       RenamedSymA = R;
+ 
++    if (TargetObjectWriter->getEMachine() == ELF::EM_SW64) {
++      const MCFixupKindInfo &FKI =
++          Asm.getBackend().getFixupKindInfo((MCFixupKind)Fixup.getKind());
++      if (strcmp(FKI.Name, "fixup_SW64_GPDISP_HI16") == 0) {
++        Addend = 4;
++        SymA = nullptr;
++        for (auto it = Asm.symbol_begin(), ie = Asm.symbol_end(); it != ie;
++             ++it) {
++          if (it->isInSection() && &(it->getSection()) == Fragment->getParent()) {
++            RenamedSymA = cast<MCSymbolELF>(&*it);
++            break;
++          }
++        }
++      }
++    }
++
+     if (ViaWeakRef)
+       RenamedSymA->setIsWeakrefUsedInReloc();
+     else
+diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
+index 06de70ad2..09d41f788 100644
+--- a/llvm/lib/MC/MCAsmStreamer.cpp
++++ b/llvm/lib/MC/MCAsmStreamer.cpp
+@@ -681,9 +681,14 @@ void MCAsmStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+     if (E->inlineAssignedExpr())
+       EmitSet = false;
+   if (EmitSet) {
+-    OS << ".set ";
+-    Symbol->print(OS, MAI);
+-    OS << ", ";
++    if (MAI->hasSw64SetDirective()) {
++      Symbol->print(OS, MAI);
++      OS << " = ";
++    } else {
++      OS << ".set ";
++      Symbol->print(OS, MAI);
++      OS << ", ";
++    }
+     Value->print(OS, MAI);
+ 
+     EmitEOL();
+diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp
+index 653ff4e94..abdd002f9 100644
+--- a/llvm/lib/MC/MCELFStreamer.cpp
++++ b/llvm/lib/MC/MCELFStreamer.cpp
+@@ -472,6 +472,16 @@ void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
+     case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HI:
+     case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA:
+     case MCSymbolRefExpr::VK_PPC_TLSLD:
++    case MCSymbolRefExpr::VK_SW64_TLSGD:
++    case MCSymbolRefExpr::VK_SW64_TLSLDM:
++    case MCSymbolRefExpr::VK_SW64_GOTDTPREL16:
++    case MCSymbolRefExpr::VK_SW64_DTPREL_HI16:
++    case MCSymbolRefExpr::VK_SW64_DTPREL_LO16:
++    case MCSymbolRefExpr::VK_SW64_DTPREL16:
++    case MCSymbolRefExpr::VK_SW64_GOTTPREL16:
++    case MCSymbolRefExpr::VK_SW64_TPREL_HI16:
++    case MCSymbolRefExpr::VK_SW64_TPREL_LO16:
++    case MCSymbolRefExpr::VK_SW64_TPREL16:
+       break;
+     }
+     getAssembler().registerSymbol(symRef.getSymbol());
+diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
+index c9ff1865c..4864468da 100644
+--- a/llvm/lib/MC/MCExpr.cpp
++++ b/llvm/lib/MC/MCExpr.cpp
+@@ -347,6 +347,56 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
+   case VK_PPC_LOCAL: return "local";
+   case VK_PPC_NOTOC: return "notoc";
+   case VK_PPC_PCREL_OPT: return "<<invalid>>";
++  case VK_SW64_ELF_LITERAL:
++    return "ELF_LITERAL";
++  case VK_SW64_LITUSE_ADDR:
++    return "LITUSE_ADDR";
++  case VK_SW64_LITUSE_BASE:
++    return "LITUSE_BASE";
++  case VK_SW64_LITUSE_BYTOFF:
++    return "LITUSE_BYTOFF";
++  case VK_SW64_LITUSE_JSR:
++    return "LITUSE_JSR";
++  case VK_SW64_LITUSE_TLSGD:
++    return "LITUSE_TLSGD";
++  case VK_SW64_LITUSE_TLSLDM:
++    return "LITUSE_TLSLDM";
++  case VK_SW64_LITUSE_JSRDIRECT:
++    return "LITUSE_JSRDIRECT";
++  case VK_SW64_GPDISP:
++    return "GPDISP";
++  case VK_SW64_GPDISP_HI16:
++    return "GPDISP_HI16";
++  case VK_SW64_GPDISP_LO16:
++    return "GPDISP_LO16";
++  case VK_SW64_GPREL_HI16:
++    return "GPREL_HI16";
++  case VK_SW64_GPREL_LO16:
++    return "GPREL_LO16";
++  case VK_SW64_GPREL16:
++    return "GPREL16";
++  case VK_SW64_BRSGP:
++    return "BRSGP";
++  case VK_SW64_TLSGD:
++    return "TLSGD";
++  case VK_SW64_TLSLDM:
++    return "TLSLDM";
++  case VK_SW64_GOTDTPREL16:
++    return "GOTDTPREL16";
++  case VK_SW64_DTPREL_HI16:
++    return "DTPREL_HI16";
++  case VK_SW64_DTPREL_LO16:
++    return "DTPREL_LO16";
++  case VK_SW64_DTPREL16:
++    return "DTPREL16";
++  case VK_SW64_GOTTPREL16:
++    return "GOTTPREL16";
++  case VK_SW64_TPREL_HI16:
++    return "TPREL_HI16";
++  case VK_SW64_TPREL_LO16:
++    return "TPREL_LO16";
++  case VK_SW64_TPREL16:
++    return "TPREL16";
+   case VK_COFF_IMGREL32: return "IMGREL";
+   case VK_Hexagon_LO16: return "LO16";
+   case VK_Hexagon_HI16: return "HI16";
+diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
+index 0b5109e41..745ab7578 100644
+--- a/llvm/lib/MC/MCObjectFileInfo.cpp
++++ b/llvm/lib/MC/MCObjectFileInfo.cpp
+@@ -365,6 +365,9 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
+   case Triple::xtensa:
+     FDECFIEncoding = dwarf::DW_EH_PE_sdata4;
+     break;
++  case Triple::sw_64:
++    FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
++    break;
+   default:
+     FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+     break;
+diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp
+index 666252ffc..768fa2cb6 100644
+--- a/llvm/lib/MC/MCSectionELF.cpp
++++ b/llvm/lib/MC/MCSectionELF.cpp
+@@ -153,6 +153,10 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+     // Print hex value of the flag while we do not have
+     // any standard symbolic representation of the flag.
+     OS << "0x7000001e";
++  else if (Type == ELF::SHT_SW64_DWARF)
++    // Print hex value of the flag while we do not have
++    // any standard symbolic representation of the flag.
++    OS << "0x7000001e";
+   else if (Type == ELF::SHT_LLVM_ODRTAB)
+     OS << "llvm_odrtab";
+   else if (Type == ELF::SHT_LLVM_LINKER_OPTIONS)
+diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
+index 0d1862e57..0b72a8921 100644
+--- a/llvm/lib/Object/ELF.cpp
++++ b/llvm/lib/Object/ELF.cpp
+@@ -22,6 +22,7 @@ using namespace object;
+ 
+ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
+                                                  uint32_t Type) {
++
+   switch (Machine) {
+   case ELF::EM_68K:
+     switch (Type) {
+@@ -181,6 +182,13 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
+       break;
+     }
+     break;
++  case ELF::EM_SW64:
++    switch (Type) {
++#include "llvm/BinaryFormat/ELFRelocs/Sw64.def"
++    default:
++      break;
++    }
++    break;
+   default:
+     break;
+   }
+@@ -233,6 +241,8 @@ uint32_t llvm::object::getELFRelativeRelocationType(uint32_t Machine) {
+     break;
+   case ELF::EM_LOONGARCH:
+     return ELF::R_LARCH_RELATIVE;
++  case ELF::EM_SW64:
++    break;
+   default:
+     break;
+   }
+@@ -276,6 +286,14 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
+       STRINGIFY_ENUM_CASE(ELF, SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC);
+       STRINGIFY_ENUM_CASE(ELF, SHT_AARCH64_MEMTAG_GLOBALS_STATIC);
+     }
++  case ELF::EM_SW64:
++    switch (Type) {
++      STRINGIFY_ENUM_CASE(ELF, SHT_SW64_REGINFO);
++      STRINGIFY_ENUM_CASE(ELF, SHT_SW64_OPTIONS);
++      STRINGIFY_ENUM_CASE(ELF, SHT_SW64_ABIFLAGS);
++      STRINGIFY_ENUM_CASE(ELF, SHT_SW64_DWARF);
++    }
++    break;
+   default:
+     break;
+   }
+@@ -498,6 +516,13 @@ std::string ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
+     }
+     break;
+ 
++  case ELF::EM_SW64:
++    switch (Type) {
++#define SW64_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
++#include "llvm/BinaryFormat/DynamicTags.def"
++#undef SW64_DYNAMIC_TAG
++    }
++
+   case ELF::EM_PPC64:
+     switch (Type) {
+ #define PPC64_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
+diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp
+index 0e5036d7d..2f97afd14 100644
+--- a/llvm/lib/Object/RelocationResolver.cpp
++++ b/llvm/lib/Object/RelocationResolver.cpp
+@@ -428,6 +428,31 @@ static uint64_t resolveSparc32(uint64_t Type, uint64_t Offset, uint64_t S,
+   return LocData;
+ }
+ 
++static bool supportsSw64(uint64_t Type) {
++  switch (Type) {
++  case ELF::R_SW_64_REFLONG:
++  case ELF::R_SW_64_REFQUAD:
++  case ELF::R_SW_64_SREL32:
++    return true;
++  default:
++    return false;
++  }
++}
++
++static uint64_t resolveSw64(uint64_t Type, uint64_t Offset, uint64_t S,
++                            uint64_t /*LocData*/, int64_t Addend) {
++  switch (Type) {
++  case ELF::R_SW_64_REFLONG:
++  case ELF::R_SW_64_REFQUAD:
++    return S + Addend;
++  case ELF::R_SW_64_SREL32:
++    return (S + Addend) & 0xFFFFFFFF;
++  default:
++    llvm_unreachable("Invalid relocation type");
++  }
++  return 0;
++}
++
+ static bool supportsHexagon(uint64_t Type) {
+   return Type == ELF::R_HEX_32;
+ }
+@@ -807,6 +832,8 @@ getRelocationResolver(const ObjectFile &Obj) {
+         return {supportsAmdgpu, resolveAmdgpu};
+       case Triple::riscv64:
+         return {supportsRISCV, resolveRISCV};
++      case Triple::sw_64:
++        return {supportsSw64, resolveSw64};
+       default:
+         if (isAMDGPU(Obj))
+           return {supportsAmdgpu, resolveAmdgpu};
+diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
+index 87fe7bebf..7be623c36 100644
+--- a/llvm/lib/Support/CMakeLists.txt
++++ b/llvm/lib/Support/CMakeLists.txt
+@@ -220,6 +220,7 @@ add_llvm_component_library(LLVMSupport
+   StringRef.cpp
+   SuffixTreeNode.cpp
+   SuffixTree.cpp
++  Sw64TargetParser.cpp
+   SystemUtils.cpp
+   TarWriter.cpp
+   ThreadPool.cpp
+diff --git a/llvm/lib/Support/Sw64TargetParser.cpp b/llvm/lib/Support/Sw64TargetParser.cpp
+new file mode 100644
+index 000000000..f31238c8f
+--- /dev/null
++++ b/llvm/lib/Support/Sw64TargetParser.cpp
+@@ -0,0 +1,96 @@
++//===-- Sw64TargetParser - Parser for Sw64 features -------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file implements a target parser to recognise Sw64 hardware features
++// such as FPU/CPU/ARCH and extension names.
++//
++//===----------------------------------------------------------------------===//
++
++#include "llvm/Support/Sw64TargetParser.h"
++#include "llvm/ADT/StringSwitch.h"
++#include "llvm/TargetParser/Triple.h"
++#include <cctype>
++
++namespace llvm {
++namespace Sw64 {
++
++struct CPUInfo {
++  StringLiteral Name;
++  CPUKind Kind;
++  unsigned Features;
++  StringLiteral DefaultMarch;
++  bool is64Bit() const { return (Features & FK_64BIT); }
++};
++
++constexpr CPUInfo Sw64CPUInfo[] = {
++#define SW64_CPU(ENUM, NAME, FEATURES, DEFAULT_MARCH)                          \
++  {NAME, CK_##ENUM, FEATURES, DEFAULT_MARCH},
++#include "llvm/Support/Sw64TargetParser.def"
++};
++
++bool checkTuneCPUKind(CPUKind Kind, bool IsSw64) {
++  if (Kind == CK_INVALID)
++    return false;
++  return Sw64CPUInfo[static_cast<unsigned>(Kind)].is64Bit() == IsSw64;
++}
++
++CPUKind parseARCHKind(StringRef CPU) {
++  return llvm::StringSwitch<CPUKind>(CPU)
++#define SW64_CPU(ENUM, NAME, FEATURES, DEFAULT_MARCH)                          \
++  .Case(DEFAULT_MARCH, CK_##ENUM)
++#include "llvm/Support/Sw64TargetParser.def"
++      .Default(CK_INVALID);
++}
++
++StringRef resolveTuneCPUAlias(StringRef TuneCPU, bool IsSw64) {
++  return llvm::StringSwitch<StringRef>(TuneCPU)
++#define PROC_ALIAS(NAME, Sw64) .Case(NAME, StringRef(Sw64))
++#include "llvm/Support/Sw64TargetParser.def"
++      .Default(TuneCPU);
++}
++
++CPUKind parseTuneCPUKind(StringRef TuneCPU, bool IsSw64) {
++  TuneCPU = resolveTuneCPUAlias(TuneCPU, IsSw64);
++
++  return llvm::StringSwitch<CPUKind>(TuneCPU)
++#define SW64_CPU(ENUM, NAME, FEATURES, DEFAULT_MARCH) .Case(NAME, CK_##ENUM)
++#include "llvm/Support/Sw64TargetParser.def"
++      .Default(CK_INVALID);
++}
++
++StringRef getMcpuFromMArch(StringRef CPU) {
++  CPUKind Kind = parseARCHKind(CPU);
++  return Sw64CPUInfo[static_cast<unsigned>(Kind)].Name;
++}
++
++void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsSw64) {
++  for (const auto &C : Sw64CPUInfo) {
++    if (C.Kind != CK_INVALID && IsSw64 == C.is64Bit())
++      Values.emplace_back(C.Name);
++  }
++}
++
++void fillValidTuneCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsSw64) {
++  for (const auto &C : Sw64CPUInfo) {
++    if (C.Kind != CK_INVALID && IsSw64 == C.is64Bit())
++      Values.emplace_back(C.Name);
++  }
++
++#define PROC_ALIAS(NAME, Sw64) Values.emplace_back(StringRef(NAME));
++#include "llvm/Support/Sw64TargetParser.def"
++}
++
++CPUKind parseCPUArch(StringRef CPU) {
++  return llvm::StringSwitch<CPUKind>(CPU)
++#define SW64_CPU(ENUM, NAME, FEATURES, DEFAULT_MARCH) .Case(NAME, CK_##ENUM)
++#include "llvm/Support/Sw64TargetParser.def"
++      .Default(CK_INVALID);
++}
++
++} // namespace Sw64
++} // namespace llvm
+diff --git a/llvm/lib/Target/Sw64/AsmParser/CMakeLists.txt b/llvm/lib/Target/Sw64/AsmParser/CMakeLists.txt
+new file mode 100644
+index 000000000..90d61cd90
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/AsmParser/CMakeLists.txt
+@@ -0,0 +1,13 @@
++add_llvm_component_library(LLVMSw64AsmParser
++  Sw64AsmParser.cpp
++
++  LINK_COMPONENTS
++  MC
++  MCParser
++  Sw64Desc
++  Sw64Info
++  Support
++
++  ADD_TO_COMPONENT
++  Sw64
++  )
+diff --git a/llvm/lib/Target/Sw64/AsmParser/Sw64AsmParser.cpp b/llvm/lib/Target/Sw64/AsmParser/Sw64AsmParser.cpp
+new file mode 100644
+index 000000000..e3ce6f0a6
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/AsmParser/Sw64AsmParser.cpp
+@@ -0,0 +1,2005 @@
++//===-- Sw64AsmParser.cpp - Parse Sw64 assembly to MCInst instructions ----===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++#include "MCTargetDesc/Sw64ABIFlagsSection.h"
++#include "MCTargetDesc/Sw64ABIInfo.h"
++#include "MCTargetDesc/Sw64BaseInfo.h"
++#include "MCTargetDesc/Sw64MCExpr.h"
++#include "MCTargetDesc/Sw64MCTargetDesc.h"
++#include "Sw64TargetStreamer.h"
++#include "llvm/ADT/APFloat.h"
++#include "llvm/ADT/STLExtras.h"
++#include "llvm/ADT/SmallVector.h"
++#include "llvm/ADT/StringRef.h"
++#include "llvm/ADT/StringSwitch.h"
++#include "llvm/ADT/Twine.h"
++#include "llvm/BinaryFormat/ELF.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCExpr.h"
++#include "llvm/MC/MCInst.h"
++#include "llvm/MC/MCInstrDesc.h"
++#include "llvm/MC/MCInstrInfo.h"
++#include "llvm/MC/MCObjectFileInfo.h"
++#include "llvm/MC/MCParser/MCAsmLexer.h"
++#include "llvm/MC/MCParser/MCAsmParser.h"
++#include "llvm/MC/MCParser/MCAsmParserExtension.h"
++#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
++#include "llvm/MC/MCParser/MCTargetAsmParser.h"
++#include "llvm/MC/MCSectionELF.h"
++#include "llvm/MC/MCStreamer.h"
++#include "llvm/MC/MCSubtargetInfo.h"
++#include "llvm/MC/MCSymbol.h"
++#include "llvm/MC/MCSymbolELF.h"
++#include "llvm/MC/MCValue.h"
++#include "llvm/MC/TargetRegistry.h"
++#include "llvm/Support/Casting.h"
++#include "llvm/Support/Compiler.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/MathExtras.h"
++#include "llvm/Support/SMLoc.h"
++#include "llvm/Support/SourceMgr.h"
++#include "llvm/Support/raw_ostream.h"
++#include "llvm/TargetParser/SubtargetFeature.h"
++#include "llvm/TargetParser/Triple.h"
++#include <algorithm>
++#include <cassert>
++#include <cstdint>
++#include <iostream>
++#include <memory>
++#include <string>
++#include <utility>
++
++using namespace llvm;
++
++#define DEBUG_TYPE "sw_64-asm-parser"
++
++static const StringRef RelocTable[] = {
++    "literal",   "lituse_addr", "lituse_jsr", "gpdisp",
++    "gprelhigh", "gprellow",    "gprel",      "tlsgd",
++    "tlsldm",    "gotdtprel",   "dtprelhi",   "dtprello",
++    "gottprel",  "tprelhi",     "tprello",    "tprel"};
++
++namespace llvm {
++
++class MCInstrInfo;
++
++} // end namespace llvm
++
++namespace {
++
++class Sw64AssemblerOptions {
++public:
++  Sw64AssemblerOptions(const FeatureBitset &Features_) : Features(Features_) {}
++
++  Sw64AssemblerOptions(const Sw64AssemblerOptions *Opts) {
++    ATReg = Opts->getATRegIndex();
++    Reorder = Opts->isReorder();
++    Macro = Opts->isMacro();
++    Features = Opts->getFeatures();
++  }
++
++  unsigned getATRegIndex() const { return ATReg; }
++  bool setATRegIndex(unsigned Reg) {
++    if (Reg > 31)
++      return false;
++
++    ATReg = Reg;
++    return true;
++  }
++
++  bool isReorder() const { return Reorder; }
++  void setReorder() { Reorder = true; }
++  void setNoReorder() { Reorder = false; }
++
++  bool isMacro() const { return Macro; }
++  void setMacro() { Macro = true; }
++  void setNoMacro() { Macro = false; }
++
++  const FeatureBitset &getFeatures() const { return Features; }
++  void setFeatures(const FeatureBitset &Features_) { Features = Features_; }
++
++  // Set of features that are either architecture features or referenced
++  // by them (e.g.: FeatureNaN2008 implied by FeatureSw6432r6).
++  // The full table can be found in Sw64GenSubtargetInfo.inc (Sw64FeatureKV[]).
++  // The reason we need this mask is explained in the selectArch function.
++  // FIXME: Ideally we would like TableGen to generate this information.
++  static const FeatureBitset AllArchRelatedMask;
++
++private:
++  unsigned ATReg = 1;
++  bool Reorder = true;
++  bool Macro = true;
++  FeatureBitset Features;
++};
++
++} // end anonymous namespace
++
++const FeatureBitset Sw64AssemblerOptions::AllArchRelatedMask = {
++    Sw64::FeatureCIX, Sw64::Featurecore3b, Sw64::Featurecore4,
++    Sw64::FeatureRelax, Sw64::FeatureEv};
++
++namespace {
++
++class Sw64AsmParser : public MCTargetAsmParser {
++  Sw64TargetStreamer &getTargetStreamer() {
++    MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
++    return static_cast<Sw64TargetStreamer &>(TS);
++  }
++
++  Sw64ABIInfo ABI;
++  SmallVector<std::unique_ptr<Sw64AssemblerOptions>, 2> AssemblerOptions;
++  MCSymbol *CurrentFn; // Pointer to the function being parsed. It may be a
++                       // nullptr, which indicates that no function is currently
++                       // selected. This usually happens after an '.end func'
++                       // directive.
++  bool IsLittleEndian;
++  bool IsPicEnabled;
++  bool IsCpRestoreSet;
++  int CpRestoreOffset;
++  unsigned CpSaveLocation;
++  // If true, then CpSaveLocation is a register, otherwise it's an offset.
++  bool CpSaveLocationIsRegister;
++
++  // Map of register aliases created via the .set directive.
++  StringMap<AsmToken> RegisterSets;
++
++#define GET_ASSEMBLER_HEADER
++#include "Sw64GenAsmMatcher.inc"
++
++  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
++                               OperandVector &Operands, MCStreamer &Out,
++                               uint64_t &ErrorInfo,
++                               bool MatchingInlineAsm) override;
++
++  // Parse a register as used in CFI directives
++  bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
++                     SMLoc &EndLoc) override;
++
++  OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
++                                        SMLoc &EndLoc) override;
++
++  bool parseParenSuffix(StringRef Name, OperandVector &Operands);
++
++  bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID);
++
++  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
++                        SMLoc NameLoc, OperandVector &Operands) override;
++
++  bool ParseDirective(AsmToken DirectiveID) override;
++
++  OperandMatchResultTy
++  matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
++                                    StringRef Identifier, SMLoc S);
++  OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands,
++                                                     const AsmToken &Token,
++                                                     SMLoc S);
++  OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands,
++                                                     SMLoc S);
++  OperandMatchResultTy parseAnyRegister(OperandVector &Operands);
++  OperandMatchResultTy parseMemOperand(OperandVector &Operands);
++  OperandMatchResultTy parseMemOperands(OperandVector &Operands);
++  OperandMatchResultTy parseJmpImm(OperandVector &Operands);
++
++  bool searchSymbolAlias(OperandVector &Operands);
++
++  bool parseOperand(OperandVector &, StringRef Mnemonic);
++
++  void ParsingFixupOperands(std::pair<StringRef, unsigned> reloc);
++
++  enum MacroExpanderResultTy {
++    MER_NotAMacro,
++    MER_Success,
++    MER_Fail,
++  };
++
++  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
++                                      unsigned Kind) override;
++
++  bool loadAndAddSymbolAddress(const MCExpr *SymExpr, unsigned DstReg,
++                               unsigned SrcReg, bool Is32BitSym, SMLoc IDLoc,
++                               MCStreamer &Out, const MCSubtargetInfo *STI);
++
++  void expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
++                     const MCSubtargetInfo *STI, bool IsLoad);
++
++  bool reportParseError(Twine ErrorMsg);
++
++  bool parseMemOffset(const MCExpr *&Res, bool isParenExpr);
++
++  bool isEvaluated(const MCExpr *Expr);
++  bool parseSetArchDirective();
++  bool parseDirectiveSet();
++
++  bool parseSetAtDirective();
++  bool parseSetNoAtDirective();
++  bool parseSetMacroDirective();
++  bool parseSetNoMacroDirective();
++  bool parseSetReorderDirective();
++  bool parseSetNoReorderDirective();
++
++  bool parseSetAssignment();
++
++  bool parseFpABIValue(Sw64ABIFlagsSection::FpABIKind &FpABI,
++                       StringRef Directive);
++
++  int matchCPURegisterName(StringRef Symbol);
++
++  int matchFPURegisterName(StringRef Name);
++
++  bool processInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
++                          const MCSubtargetInfo *STI);
++
++  // Helper function that checks if the value of a vector index is within the
++  // boundaries of accepted values for each RegisterKind
++  // Example: INSERT.B $w0[n], $1 => 16 > n >= 0
++  bool validateMSAIndex(int Val, int RegKind);
++
++  // Selects a new architecture by updating the FeatureBits with the necessary
++  // info including implied dependencies.
++  // Internally, it clears all the feature bits related to *any* architecture
++  // and selects the new one using the ToggleFeature functionality of the
++  // MCSubtargetInfo object that handles implied dependencies. The reason we
++  // clear all the arch related bits manually is because ToggleFeature only
++  // clears the features that imply the feature being cleared and not the
++  // features implied by the feature being cleared. This is easier to see
++  // with an example:
++  //  --------------------------------------------------
++  // | Feature         | Implies                        |
++  // | -------------------------------------------------|
++  // | FeatureCIX      |                                |
++  // | FeatureEV       |                                |
++  // | FeatureSw6a     |                                |
++  // | FeatureSw6b     |                                |
++  // | ...             |                                |
++  //  --------------------------------------------------
++  //
++  // Setting Sw643 is equivalent to set: (FeatureSw643 | FeatureSw642 |
++  // FeatureSw64GP64 | FeatureSw641)
++  // Clearing Sw643 is equivalent to clear (FeatureSw643 | FeatureSw644).
++  void selectArch(StringRef ArchFeature) {
++    MCSubtargetInfo &STI = copySTI();
++    FeatureBitset FeatureBits = STI.getFeatureBits();
++    FeatureBits &= ~Sw64AssemblerOptions::AllArchRelatedMask;
++    STI.setFeatureBits(FeatureBits);
++    setAvailableFeatures(
++        ComputeAvailableFeatures(STI.ToggleFeature(ArchFeature)));
++    AssemblerOptions.back()->setFeatures(STI.getFeatureBits());
++  }
++
++  void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
++    if (!(getSTI().getFeatureBits()[Feature])) {
++      MCSubtargetInfo &STI = copySTI();
++      setAvailableFeatures(
++          ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
++      AssemblerOptions.back()->setFeatures(STI.getFeatureBits());
++    }
++  }
++
++  void clearFeatureBits(uint64_t Feature, StringRef FeatureString) {
++    if (getSTI().getFeatureBits()[Feature]) {
++      MCSubtargetInfo &STI = copySTI();
++      setAvailableFeatures(
++          ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
++      AssemblerOptions.back()->setFeatures(STI.getFeatureBits());
++    }
++  }
++
++  void setModuleFeatureBits(uint64_t Feature, StringRef FeatureString) {
++    setFeatureBits(Feature, FeatureString);
++    AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits());
++  }
++
++  void clearModuleFeatureBits(uint64_t Feature, StringRef FeatureString) {
++    clearFeatureBits(Feature, FeatureString);
++    AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits());
++  }
++
++public:
++  MCFixupKind FixupKind;
++
++  enum Sw64MatchResultTy {
++    Match_RequiresDifferentSrcAndDst = FIRST_TARGET_MATCH_RESULT_TY,
++    Match_RequiresDifferentOperands,
++    Match_RequiresNoZeroRegister,
++    Match_RequiresSameSrcAndDst,
++    Match_NoFCCRegisterForCurrentISA,
++    Match_NonZeroOperandForSync,
++    Match_NonZeroOperandForMTCX,
++    Match_RequiresPosSizeRange0_32,
++    Match_RequiresPosSizeRange33_64,
++    Match_RequiresPosSizeUImm6,
++#define GET_OPERAND_DIAGNOSTIC_TYPES
++#include "Sw64GenAsmMatcher.inc"
++#undef GET_OPERAND_DIAGNOSTIC_TYPES
++  };
++
++  Sw64AsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser,
++                const MCInstrInfo &MII, const MCTargetOptions &Options)
++      : MCTargetAsmParser(Options, sti, MII),
++        ABI(Sw64ABIInfo::computeTargetABI(Triple(sti.getTargetTriple()),
++                                          sti.getCPU(), Options)) {
++    FixupKind = llvm::FirstTargetFixupKind;
++
++    MCAsmParserExtension::Initialize(parser);
++    parser.addAliasForDirective(".asciiz", ".asciz");
++    parser.addAliasForDirective(".hword", ".2byte");
++    parser.addAliasForDirective(".word", ".4byte");
++    parser.addAliasForDirective(".dword", ".8byte");
++
++    // Initialize the set of available features.
++    setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
++
++    // Remember the initial assembler options. The user can not modify these.
++    AssemblerOptions.push_back(
++        std::make_unique<Sw64AssemblerOptions>(getSTI().getFeatureBits()));
++
++    // Create an assembler options environment for the user to modify.
++    AssemblerOptions.push_back(
++        std::make_unique<Sw64AssemblerOptions>(getSTI().getFeatureBits()));
++
++    CurrentFn = nullptr;
++
++    IsPicEnabled = getContext().getObjectFileInfo()->isPositionIndependent();
++
++    IsCpRestoreSet = false;
++    CpRestoreOffset = -1;
++  }
++
++  const Sw64ABIInfo &getABI() const { return ABI; }
++
++  const MCExpr *createTargetUnaryExpr(const MCExpr *E,
++                                      AsmToken::TokenKind OperatorToken,
++                                      MCContext &Ctx) override {
++    switch (OperatorToken) {
++    default:
++      return nullptr;
++    case AsmToken::PercentGp_Rel:
++      return Sw64MCExpr::create(Sw64MCExpr::MEK_ELF_LITERAL, E, Ctx);
++    case AsmToken::PercentDtprel_Hi:
++      return Sw64MCExpr::create(Sw64MCExpr::MEK_GPREL_HI16, E, Ctx);
++    case AsmToken::PercentDtprel_Lo:
++      return Sw64MCExpr::create(Sw64MCExpr::MEK_GPREL_LO16, E, Ctx);
++    case AsmToken::PercentGot_Hi:
++      return Sw64MCExpr::create(Sw64MCExpr::MEK_GPDISP_HI16, E, Ctx);
++    case AsmToken::PercentGot_Lo:
++      return Sw64MCExpr::create(Sw64MCExpr::MEK_GPDISP_LO16, E, Ctx);
++
++    case AsmToken::PercentTprel_Hi:
++      return Sw64MCExpr::create(Sw64MCExpr::MEK_TPREL_HI16, E, Ctx);
++    case AsmToken::PercentTprel_Lo:
++      return Sw64MCExpr::create(Sw64MCExpr::MEK_TPREL_LO16, E, Ctx);
++    }
++  }
++};
++
++// Sw64Operand - Instances of this class represent a parsed Sw64 machine
++// instruction.
++class Sw64Operand : public MCParsedAsmOperand {
++public:
++  // Broad categories of register classes
++  // The exact class is finalized by the render method.
++  enum RegKind {
++    RegKind_GPR = 1,   // Sw64 GPR Register
++    RegKind_FPR = 2,   // Sw64 FPR Register
++    RegKind_TC = 4,    //  Sw64 Time counter
++    RegKind_CSR = 8,   // Sw64 Control & Status Register
++    RegKind_FPCR = 16, // Sw64 Floating-point Control Register
++                       // Potentially any (e.g. $1)
++    RegKind_Numeric =
++        RegKind_GPR | RegKind_FPR | RegKind_TC | RegKind_CSR | RegKind_FPCR
++  };
++
++private:
++  enum KindTy {
++    k_Immediate,     // An immediate (possibly involving symbol references)
++    k_Memory,        // Base + Offset Memory Address
++    k_Register,      // A RegKind.
++    k_RegisterIndex, // A register index in one or more RegKind.
++    k_Token          // A simple token
++  } Kind;
++
++public:
++  Sw64Operand(KindTy K, Sw64AsmParser &Parser)
++      : MCParsedAsmOperand(), Kind(K), AsmParser(Parser) {}
++
++  ~Sw64Operand() override {
++    switch (Kind) {
++    case k_Immediate:
++      break;
++    case k_Memory:
++      delete Mem.Base;
++      break;
++    case k_Register:
++    case k_RegisterIndex:
++    case k_Token:
++      break;
++    }
++  }
++
++private:
++  // For diagnostics, and checking the assembler temporary
++  Sw64AsmParser &AsmParser;
++
++  struct Token {
++    const char *Data;
++    unsigned Length;
++  };
++
++  struct RegIdxOp {
++    unsigned Index;   // Index into the register class
++    RegKind Kind;     // Bitfield of the kinds it could possibly be
++    struct Token Tok; // The input token this operand originated from.
++    const MCRegisterInfo *RegInfo;
++  };
++
++  struct ImmOp {
++    const MCExpr *Val;
++  };
++
++  struct MemOp {
++    Sw64Operand *Base;
++    const MCExpr *Off;
++  };
++
++  struct RegListOp {
++    SmallVector<unsigned, 10> *List;
++  };
++
++  union {
++    struct Token Tok;
++    struct RegIdxOp RegIdx;
++    struct ImmOp Imm;
++    struct MemOp Mem;
++    struct RegListOp RegList;
++  };
++
++  SMLoc StartLoc, EndLoc;
++
++  // Internal constructor for register kinds
++  static std::unique_ptr<Sw64Operand> CreateReg(unsigned Index, StringRef Str,
++                                                RegKind RegKind,
++                                                const MCRegisterInfo *RegInfo,
++                                                SMLoc S, SMLoc E,
++                                                Sw64AsmParser &Parser) {
++    auto Op = std::make_unique<Sw64Operand>(k_Register, Parser);
++    Op->RegIdx.Index = Index;
++    Op->RegIdx.RegInfo = RegInfo;
++    Op->RegIdx.Kind = RegKind;
++    Op->RegIdx.Tok.Data = Str.data();
++    Op->RegIdx.Tok.Length = Str.size();
++    Op->StartLoc = S;
++    Op->EndLoc = E;
++    return Op;
++  }
++
++public:
++  // Coerce the register to GPR64 and return the real register for the current
++  // target.
++  unsigned getGPRReg() const {
++    assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!");
++    return RegIdx.Index;
++  }
++
++  bool isV256AsmReg() const {
++    return isRegIdx() && RegIdx.Kind & RegKind_FPR &&
++           RegIdx.Index <= Sw64::F31 && RegIdx.Index >= Sw64::F0;
++  }
++
++  void addMemOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 2 && "Invalid number of operands!");
++
++    Inst.addOperand(MCOperand::createReg(getMemBase()->getGPR64Reg()));
++
++    const MCExpr *Expr = getMemOff();
++    addExpr(Inst, Expr);
++  }
++
++private:
++  // Coerce the register to FPR64 and return the real register for the current
++  // target.
++  unsigned getFPR64Reg() const {
++    assert(isRegIdx() && (RegIdx.Kind & RegKind_FPR) && "Invalid access!");
++    return RegIdx.Index;
++  }
++
++public:
++  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
++    // Add as immediate when possible.  Null MCExpr = 0.
++    if (!Expr)
++      Inst.addOperand(MCOperand::createImm(0));
++    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
++      Inst.addOperand(MCOperand::createImm(CE->getValue()));
++    else
++      Inst.addOperand(MCOperand::createExpr(Expr));
++  }
++
++  void addRegOperands(MCInst &Inst, unsigned N) const {
++    if (RegIdx.Index > 32)
++      Inst.addOperand(MCOperand::createReg(getGPRReg()));
++    else
++      Inst.addOperand(MCOperand::createReg(getFPR64Reg()));
++  }
++
++  void addImmOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    const MCExpr *Expr = getImm();
++    addExpr(Inst, Expr);
++  }
++
++  bool isReg() const override {
++    // As a special case until we sort out the definition of div/divu, accept
++    // $0/$zero here so that MCK_ZERO works correctly.
++    return isGPRAsmReg() || isFPRAsmReg();
++  }
++
++  bool isRegIdx() const { return Kind == k_Register; } // Operand.Kind
++  bool isImm() const override { return Kind == k_Immediate; }
++
++  bool isConstantImm() const {
++    int64_t Res;
++    return isImm() && getImm()->evaluateAsAbsolute(Res);
++  }
++
++  bool isToken() const override {
++    // Note: It's not possible to pretend that other operand kinds are tokens.
++    // The matcher emitter checks tokens first.
++    return Kind == k_Token;
++  }
++
++  bool isMem() const override { return Kind == k_Memory; }
++
++  StringRef getToken() const {
++    assert(Kind == k_Token && "Invalid access!");
++    return StringRef(Tok.Data, Tok.Length);
++  }
++
++  unsigned getReg() const override {
++    // As a special case until we sort out the definition of div/divu, accept
++    // $0/$zero here so that MCK_ZERO works correctly.
++    if (Kind == k_Register && RegIdx.Kind & RegKind_GPR)
++      return getGPRReg(); // FIXME: GPR64 too
++
++    if (Kind == k_Register && RegIdx.Kind & RegKind_FPR)
++      return getFPR64Reg(); // FIXME: GPR64 too
++
++    llvm_unreachable("Invalid access!");
++    return 0;
++  }
++
++  const MCExpr *getImm() const {
++    assert((Kind == k_Immediate) && "Invalid access!");
++    return Imm.Val;
++  }
++
++  int64_t getConstantImm() const {
++    const MCExpr *Val = getImm();
++    int64_t Value = 0;
++    (void)Val->evaluateAsAbsolute(Value);
++    return Value;
++  }
++
++  Sw64Operand *getMemBase() const {
++    assert((Kind == k_Memory) && "Invalid access!");
++    return Mem.Base;
++  }
++
++  const MCExpr *getMemOff() const {
++    assert((Kind == k_Memory) && "Invalid access!");
++    return Mem.Off;
++  }
++
++  int64_t getConstantMemOff() const {
++    return static_cast<const MCConstantExpr *>(getMemOff())->getValue();
++  }
++
++  static std::unique_ptr<Sw64Operand> CreateToken(StringRef Str, SMLoc S,
++                                                  Sw64AsmParser &Parser) {
++    auto Op = std::make_unique<Sw64Operand>(k_Token, Parser);
++    Op->Tok.Data = Str.data();
++    Op->Tok.Length = Str.size();
++    Op->StartLoc = S;
++    Op->EndLoc = S;
++    return Op;
++  }
++
++  // Create a numeric register (e.g. $1). The exact register remains
++  // unresolved until an instruction successfully matches
++  static std::unique_ptr<Sw64Operand>
++  createNumericReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
++                   SMLoc S, SMLoc E, Sw64AsmParser &Parser) {
++    LLVM_DEBUG(dbgs() << "createNumericReg(" << Index + 65 << ", ...)\n");
++    return CreateReg(Index + 65, Str, RegKind_Numeric, RegInfo, S, E, Parser);
++  }
++
++  // Create a register that is definitely a GPR.
++  // This is typically only used for named registers such as $gp.
++  static std::unique_ptr<Sw64Operand>
++  createGPRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
++               SMLoc S, SMLoc E, Sw64AsmParser &Parser) {
++    return CreateReg(Index, Str, RegKind_GPR, RegInfo, S, E, Parser);
++  }
++
++  // Create a register that is definitely a FPR.
++  // This is typically only used for named registers such as $f0.
++  static std::unique_ptr<Sw64Operand>
++  createFPRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
++               SMLoc S, SMLoc E, Sw64AsmParser &Parser) {
++    return CreateReg(Index, Str, RegKind_FPR, RegInfo, S, E, Parser);
++  }
++
++  static std::unique_ptr<Sw64Operand>
++  CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, Sw64AsmParser &Parser) {
++    auto Op = std::make_unique<Sw64Operand>(k_Immediate, Parser);
++    Op->Imm.Val = Val;
++    Op->StartLoc = S;
++    Op->EndLoc = E;
++    return Op;
++  }
++
++  static std::unique_ptr<Sw64Operand>
++  CreateMem(std::unique_ptr<Sw64Operand> Base, const MCExpr *Off, SMLoc S,
++            SMLoc E, Sw64AsmParser &Parser) {
++    auto Op = std::make_unique<Sw64Operand>(k_Memory, Parser);
++    Op->Mem.Base = Base.release();
++    Op->Mem.Off = Off;
++    Op->StartLoc = S;
++    Op->EndLoc = E;
++    return Op;
++  }
++
++  bool isGPRAsmReg() const {
++    return isRegIdx() && RegIdx.Kind & RegKind_GPR &&
++           RegIdx.Index <= Sw64::R31 && RegIdx.Index >= Sw64::R0;
++  }
++
++  bool isFPRAsmReg() const {
++    // AFPR64 is $0-$15 but we handle this in getAFGR64()
++    return isRegIdx() && RegIdx.Kind & RegKind_FPR &&
++           RegIdx.Index <= Sw64::F31 && RegIdx.Index >= Sw64::F0;
++    // return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index <= 64 &&
++    //        RegIdx.Index >= 33;
++  }
++
++  // Coerce the register to GPR64 and return the real register for the current
++  // target.
++  unsigned getGPR64Reg() const {
++    assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!");
++    return RegIdx.Index;
++  }
++
++  unsigned getFGR64Reg() const {
++    assert(isRegIdx() && (RegIdx.Kind & RegKind_FPR) && "Invalid access!");
++    return RegIdx.Index;
++  }
++
++  void addF4RCAsmRegOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getFGR64Reg()));
++  }
++
++  void addF8RCAsmRegOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getFGR64Reg()));
++  }
++
++  bool isFGRAsmReg() const {
++    return isRegIdx() && RegIdx.Kind & RegKind_FPR && RegIdx.Index <= 32;
++  }
++
++  // getStartLoc - Get the location of the first token of this operand.
++  SMLoc getStartLoc() const override { return StartLoc; }
++  // getEndLoc - Get the location of the last token of this operand.
++  SMLoc getEndLoc() const override { return EndLoc; }
++
++  void print(raw_ostream &OS) const override {
++    switch (Kind) {
++    case k_Immediate:
++      OS << "Imm<";
++      OS << *Imm.Val;
++      OS << ">";
++      break;
++    case k_Memory:
++      OS << "Mem<";
++      Mem.Base->print(OS);
++      OS << ", ";
++      OS << *Mem.Off;
++      OS << ">";
++      break;
++    case k_Register:
++      OS << "Reg<" << RegIdx.Kind << ", "
++         << StringRef(RegIdx.Tok.Data, RegIdx.Tok.Length) << ">";
++      break;
++    case k_RegisterIndex:
++      OS << "RegIdx<" << RegIdx.Index << ":" << RegIdx.Kind << ", "
++         << StringRef(RegIdx.Tok.Data, RegIdx.Tok.Length) << ">";
++      break;
++    case k_Token:
++      OS << getToken();
++      break;
++    }
++  }
++
++  bool isValidForTie(const Sw64Operand &Other) const {
++    if (Kind != Other.Kind)
++      return false;
++
++    switch (Kind) {
++    default:
++      llvm_unreachable("Unexpected kind");
++      return false;
++    case k_RegisterIndex: {
++      StringRef Token(RegIdx.Tok.Data, RegIdx.Tok.Length);
++      StringRef OtherToken(Other.RegIdx.Tok.Data, Other.RegIdx.Tok.Length);
++      return Token == OtherToken;
++    }
++    }
++  }
++
++  template <unsigned Bits, unsigned ShiftLeftAmount> bool isScaledSImm() const {
++    if (isConstantImm() &&
++        isShiftedInt<Bits, ShiftLeftAmount>(getConstantImm()))
++      return true;
++    // Operand can also be a symbol or symbol plus
++    // offset in case of relocations.
++    if (Kind != k_Immediate)
++      return false;
++    MCValue Res;
++    bool Success = getImm()->evaluateAsRelocatable(Res, nullptr, nullptr);
++    return Success && isShiftedInt<Bits, ShiftLeftAmount>(Res.getConstant());
++  }
++
++  template <unsigned Bits, int Offset = 0, int AdjustOffset = 0>
++  void addConstantSImmOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    int64_t Imm = getConstantImm() - Offset;
++    Imm = SignExtend64<Bits>(Imm);
++    Imm += Offset;
++    Imm += AdjustOffset;
++    Inst.addOperand(MCOperand::createImm(Imm));
++  }
++
++  template <unsigned Bits, int Offset = 0, int AdjustOffset = 0>
++  void addConstantUImmOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    uint64_t Imm = getConstantImm() - Offset;
++    Imm &= (1ULL << Bits) - 1;
++    Imm += Offset;
++    Imm += AdjustOffset;
++    Inst.addOperand(MCOperand::createImm(Imm));
++  }
++
++  template <unsigned Bottom, unsigned Top> bool isConstantUImmRange() const {
++    return isConstantImm() && getConstantImm() >= Bottom &&
++           getConstantImm() <= Top;
++  }
++
++  template <unsigned Bits, unsigned ShiftLeftAmount> bool isScaledUImm() const {
++    return isConstantImm() &&
++           isShiftedUInt<Bits, ShiftLeftAmount>(getConstantImm());
++  }
++
++  template <unsigned Bits, int Offset = 0> bool isConstantSImm() const {
++    return isConstantImm() && isInt<Bits>(getConstantImm() - Offset);
++  }
++
++  template <unsigned Bits, int Offset = 0> bool isConstantUImm() const {
++    return isConstantImm() && isUInt<Bits>(getConstantImm() - Offset);
++  }
++
++  // Coerce the register to SIMD and return the real register for the current
++  // target.
++  unsigned getV256Reg() const {
++    assert(isRegIdx() && (RegIdx.Kind & RegKind_FPR) && "Invalid access!");
++    // It doesn't matter which of the MSA128[BHWD] classes we use. They are all
++    // identical
++    unsigned ClassID = Sw64::V256LRegClassID;
++    // RegIdx.Index should be sub 1, or it will be error. such as: $f1 -> $f2
++    return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index - 1);
++  }
++
++  void addV256AsmRegOperands(MCInst &Inst, unsigned N) const {
++    assert(N == 1 && "Invalid number of operands!");
++    Inst.addOperand(MCOperand::createReg(getV256Reg()));
++  }
++
++  bool isConstantMemOff() const {
++    return isMem() && isa<MCConstantExpr>(getMemOff());
++  }
++
++  // Allow relocation operators.
++  // FIXME: This predicate and others need to look through binary expressions
++  //        and determine whether a Value is a constant or not.
++  template <unsigned Bits, unsigned ShiftAmount = 0>
++  bool isMemWithSimmOffset() const {
++    if (!isMem())
++      return false;
++    if (!getMemBase()->isGPRAsmReg())
++      return false;
++    if (isa<MCTargetExpr>(getMemOff()) ||
++        (isConstantMemOff() &&
++         isShiftedInt<Bits, ShiftAmount>(getConstantMemOff())))
++      return true;
++    MCValue Res;
++    bool IsReloc = getMemOff()->evaluateAsRelocatable(Res, nullptr, nullptr);
++    return IsReloc && isShiftedInt<Bits, ShiftAmount>(Res.getConstant());
++  }
++
++  template <unsigned Bits> bool isSImm() const {
++    return isConstantImm() ? isInt<Bits>(getConstantImm()) : isImm();
++  }
++
++  template <unsigned Bits> bool isUImm() const {
++    return isConstantImm() ? isUInt<Bits>(getConstantImm()) : isImm();
++  }
++
++  template <unsigned Bits> bool isAnyImm() const {
++    return isConstantImm() ? (isInt<Bits>(getConstantImm()) ||
++                              isUInt<Bits>(getConstantImm()))
++                           : isImm();
++  }
++
++}; // class Sw64Operand
++
++} // end anonymous namespace
++
++namespace llvm {} // end namespace llvm
++
++bool Sw64AsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
++                                       MCStreamer &Out,
++                                       const MCSubtargetInfo *STI) {
++  const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
++  Inst.setLoc(IDLoc);
++
++  if (MCID.mayLoad() || MCID.mayStore()) {
++    // Check the offset of memory operand, if it is a symbol
++    // reference or immediate we may have to expand instructions.
++    const MCOperandInfo &OpInfo = MCID.operands()[1];
++    if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) ||
++        (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
++      MCOperand &Op = Inst.getOperand(1);
++      if (Op.isImm()) {
++        const unsigned Opcode = Inst.getOpcode();
++        switch (Opcode) {
++        default:
++          break;
++        }
++
++        int64_t MemOffset = Op.getImm();
++        if (MemOffset < -32768 || MemOffset > 32767) {
++          // Offset can't exceed 16bit value.
++          expandMemInst(Inst, IDLoc, Out, STI, MCID.mayLoad());
++          return getParser().hasPendingError();
++        }
++      } else if (Op.isExpr()) {
++        const MCExpr *Expr = Op.getExpr();
++        if (Expr->getKind() == MCExpr::SymbolRef) {
++          const MCSymbolRefExpr *SR =
++              static_cast<const MCSymbolRefExpr *>(Expr);
++          if (SR->getKind() == MCSymbolRefExpr::VK_None) {
++            // Expand symbol.
++            expandMemInst(Inst, IDLoc, Out, STI, MCID.mayLoad());
++            return getParser().hasPendingError();
++          }
++        } else if (!isEvaluated(Expr)) {
++          expandMemInst(Inst, IDLoc, Out, STI, MCID.mayLoad());
++          return getParser().hasPendingError();
++        }
++      }
++    }
++  } // if load/store
++  static int lockReg = -1;
++  if (Inst.getOpcode() == Sw64::STQ_C || Inst.getOpcode() == Sw64::STL_C) {
++    lockReg = Inst.getOperand(0).getReg();
++  }
++
++  if (Inst.getOpcode() == Sw64::RD_F) {
++    if (lockReg != Inst.getOperand(0).getReg() && lockReg != -1) {
++      Error(IDLoc, "lstX and rd_f must use the same reg!");
++      lockReg = -1;
++      return false;
++    }
++  }
++
++  Out.emitInstruction(Inst, *STI);
++  return true;
++}
++
++// Can the value be represented by a unsigned N-bit value and a shift left?
++template <unsigned N> static bool isShiftedUIntAtAnyPosition(uint64_t x) {
++  return x && isUInt<N>(x >> llvm::countr_zero(x));
++}
++
++OperandMatchResultTy Sw64AsmParser::parseJmpImm(OperandVector &Operands) {
++  MCAsmParser &Parser = getParser();
++  LLVM_DEBUG(dbgs() << "parseJumpTarget\n");
++
++  SMLoc S = getLexer().getLoc();
++
++  // Registers are a valid target and have priority over symbols.
++  OperandMatchResultTy ResTy = parseAnyRegister(Operands);
++  if (ResTy != MatchOperand_NoMatch)
++    return ResTy;
++
++  // Integers and expressions are acceptable
++  const MCExpr *Expr = nullptr;
++  if (Parser.parseExpression(Expr)) {
++    // We have no way of knowing if a symbol was consumed so we must ParseFail
++    return MatchOperand_ParseFail;
++  }
++  Operands.push_back(
++      Sw64Operand::CreateImm(Expr, S, getLexer().getLoc(), *this));
++  return MatchOperand_Success;
++}
++
++OperandMatchResultTy Sw64AsmParser::parseMemOperands(OperandVector &Operands) {
++  LLVM_DEBUG(dbgs() << "Parsing Memory Operand for store/load\n");
++  SMLoc S = getParser().getTok().getLoc();
++  SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
++
++  const AsmToken &Tok = getParser().getTok();
++  switch (Tok.getKind()) {
++  default:
++    return MatchOperand_NoMatch;
++  case AsmToken::EndOfStatement:
++    // Zero register assumed, add a memory operand with ZERO as its base.
++    //  "Base" will be managed by k_Memory.
++    auto Base = Sw64Operand::createGPRReg(
++        0, "0", getContext().getRegisterInfo(), S, E, *this);
++    Operands.push_back(
++        Sw64Operand::CreateMem(std::move(Base), nullptr, S, E, *this));
++    return MatchOperand_Success;
++  }
++
++  return MatchOperand_NoMatch;
++}
++
++void Sw64AsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
++                                  const MCSubtargetInfo *STI, bool IsLoad) {
++  // ldl $0,a($gp)        Op0                 Op1              Op2
++  //<MCInst 295 <MCOperand Reg:33> <MCOperand Expr:(a)> <MCOperand Reg:62>>
++
++  const MCSymbolRefExpr *SR;
++  MCInst TempInst;
++  unsigned ImmOffset, HiOffset, LoOffset;
++  const MCExpr *ExprOffset;
++
++  // 1st operand is either the source or destination register.
++  assert(Inst.getOperand(0).isReg() && "expected register operand kind");
++  unsigned RegOpNum = Inst.getOperand(0).getReg();
++
++  // 3nd operand is the base register.
++  assert(Inst.getOperand(2).isReg() && "expected register operand kind");
++  unsigned BaseRegNum = Inst.getOperand(2).getReg();
++  const MCOperand &OffsetOp = Inst.getOperand(1);
++
++  // 2rd operand is either an immediate or expression.
++  if (OffsetOp.isImm()) {
++    assert(Inst.getOperand(1).isImm() && "expected immediate operand kind");
++    ImmOffset = Inst.getOperand(2).getImm();
++    LoOffset = ImmOffset & 0x0000ffff;
++    HiOffset = (ImmOffset & 0xffff0000) >> 16;
++    // If msb of LoOffset is 1(negative number) we must increment HiOffset.
++    if (LoOffset & 0x8000)
++      HiOffset++;
++  } else
++    ExprOffset = Inst.getOperand(1).getExpr();
++  // All instructions will have the same location.
++  TempInst.setLoc(IDLoc);
++  TempInst.setOpcode(Inst.getOpcode());
++  TempInst.addOperand(MCOperand::createReg(RegOpNum));
++  if (OffsetOp.isImm())
++    TempInst.addOperand(MCOperand::createImm(ImmOffset));
++  else {
++    if (ExprOffset->getKind() == MCExpr::SymbolRef) {
++      SR = static_cast<const MCSymbolRefExpr *>(ExprOffset);
++
++      TempInst.addOperand(MCOperand::createExpr(SR));
++    } else {
++      llvm_unreachable("Memory offset is not SymbolRef!");
++    }
++  }
++  TempInst.addOperand(MCOperand::createReg(BaseRegNum));
++  Out.emitInstruction(TempInst, *STI);
++  // Prepare TempInst for next instruction.
++  TempInst.clear();
++}
++
++// Expand a integer division macro.
++//
++// Notably we don't have to emit a warning when encountering $rt as the $zero
++// register, or 0 as an immediate. processInstruction() has already done that.
++//
++// The destination register can only be $zero when expanding (S)DivIMacro or
++// D(S)DivMacro.
++
++bool Sw64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
++                                            OperandVector &Operands,
++                                            MCStreamer &Out,
++                                            uint64_t &ErrorInfo,
++                                            bool MatchingInlineAsm) {
++  MCInst Inst;
++  unsigned MatchResult =
++      MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
++
++  switch (MatchResult) {
++  case Match_Success:
++    if (processInstruction(Inst, IDLoc, Out, STI))
++      return true;
++    return false;
++  case Match_MissingFeature:
++    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
++    return true;
++  case Match_InvalidTiedOperand:
++    Error(IDLoc, "operand must match destination register");
++    return true;
++  case Match_InvalidOperand: {
++    SMLoc ErrorLoc = IDLoc;
++    if (ErrorInfo != ~0ULL) {
++      if (ErrorInfo >= Operands.size())
++        return Error(IDLoc, "too few operands for instruction");
++
++      ErrorLoc = Operands[ErrorInfo]->getStartLoc();
++      if (ErrorLoc == SMLoc())
++        ErrorLoc = IDLoc;
++    }
++
++    return Error(ErrorLoc, "invalid operand for instruction");
++  }
++  case Match_MnemonicFail:
++    return Error(IDLoc, "invalid instruction");
++  }
++  llvm_unreachable("Implement any new match types added!");
++}
++
++int Sw64AsmParser::matchCPURegisterName(StringRef Name) {
++  int CC;
++  CC = StringSwitch<unsigned>(Name)
++           .Cases("v0", "r0", Sw64::R0)
++           .Cases("t0", "r1", Sw64::R1)
++           .Cases("t1", "r2", Sw64::R2)
++           .Cases("t2", "r3", Sw64::R3)
++           .Cases("t3", "r4", Sw64::R4)
++           .Cases("t4", "r5", Sw64::R5)
++           .Cases("t5", "r6", Sw64::R6)
++           .Cases("t6", "r7", Sw64::R7)
++           .Cases("t7", "r8", Sw64::R8)
++           .Cases("s0", "r9", Sw64::R9)
++           .Cases("s1", "r10", Sw64::R10)
++           .Cases("s2", "r11", Sw64::R11)
++           .Cases("s3", "r12", Sw64::R12)
++           .Cases("s4", "r13", Sw64::R13)
++           .Cases("s5", "r14", Sw64::R14)
++           .Cases("fp", "r15", Sw64::R15)
++           .Cases("a0", "r16", Sw64::R16)
++           .Cases("a1", "r17", Sw64::R17)
++           .Cases("a2", "r18", Sw64::R18)
++           .Cases("a3", "r19", Sw64::R19)
++           .Cases("a4", "r20", Sw64::R20)
++           .Cases("a5", "r21", Sw64::R21)
++           .Cases("t8", "r22", Sw64::R22)
++           .Cases("t9", "r23", Sw64::R23)
++           .Cases("t10", "r24", Sw64::R24)
++           .Cases("t11", "r25", Sw64::R25)
++           .Cases("ra", "r26", Sw64::R26)
++           .Cases("pv", "r27", Sw64::R27)
++           .Cases("at", "r28", Sw64::R28)
++           .Cases("gp", "r29", Sw64::R29)
++           .Cases("sp", "r30", Sw64::R30)
++           .Cases("zero", "r31", Sw64::R31)
++           .Default(-1);
++
++  return CC;
++}
++
++int Sw64AsmParser::matchFPURegisterName(StringRef Name) {
++  if (Name[0] == 'f') {
++    StringRef NumString = Name.substr(1);
++    unsigned IntVal;
++    if (NumString.getAsInteger(10, IntVal))
++      return -1;     // This is not an integer.
++    if (IntVal > 31) // Maximum index for fpu register.
++      return -1;
++    return IntVal + 1;
++  }
++  return -1;
++}
++
++bool Sw64AsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
++  MCAsmParser &Parser = getParser();
++  LLVM_DEBUG(dbgs() << "parseOperand\n");
++
++  // Check if the current operand has a custom associated parser, if so, try to
++  // custom parse the operand, or fallback to the general approach.
++  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
++  if (ResTy == MatchOperand_Success)
++    return false;
++  // If there wasn't a custom match, try the generic matcher below. Otherwise,
++  // there was a match, but an error occurred, in which case, just return that
++  // the operand parsing failed.
++  if (ResTy == MatchOperand_ParseFail)
++    return true;
++
++  if (parseMemOperands(Operands) == MatchOperand_Success)
++    return false;
++
++  LLVM_DEBUG(dbgs() << ".. Generic Parser\n");
++
++  switch (getLexer().getKind()) {
++  case AsmToken::Dollar: {
++    // Parse the register.
++    SMLoc S = Parser.getTok().getLoc();
++
++    // Almost all registers have been parsed by custom parsers. There is only
++    // one exception to this. $zero (and it's alias $0) will reach this point
++    // for div, divu, and similar instructions because it is not an operand
++    // to the instruction definition but an explicit register. Special case
++    // this situation for now.
++    if (parseAnyRegister(Operands) != MatchOperand_NoMatch)
++      return false;
++
++    // Maybe it is a symbol reference.
++    StringRef Identifier;
++    if (Parser.parseIdentifier(Identifier))
++      return true;
++
++    SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
++    MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
++
++    // Otherwise create a symbol reference.
++    const MCExpr *Res =
++        MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
++
++    Operands.push_back(Sw64Operand::CreateImm(Res, S, E, *this));
++    return false;
++  }
++  // parse jmp & ret: ($GPRC)
++  case AsmToken::LParen: {
++    return parseParenSuffix(Mnemonic, Operands);
++  }
++  case AsmToken::Minus:
++  case AsmToken::Plus:
++  case AsmToken::String:
++  case AsmToken::Integer: {
++    LLVM_DEBUG(dbgs() << ".. generic integer expression\n");
++    const MCExpr *IdVal;
++    SMLoc S = Parser.getTok().getLoc(); // Start location of the operand.
++    if (getParser().parseExpression(IdVal))
++      return true;
++
++    std::string Reloc;
++    const MCExpr *Expr;
++    const char *Mnem = Mnemonic.data();
++    AsmToken::TokenKind FirstTokenKind;
++    MCContext &Ctx = getStreamer().getContext();
++    std::string Stxt = S.getPointer();
++    size_t a = Stxt.find_first_of('!');
++    size_t c = Stxt.find_first_of('\n');
++
++    if (a != 0 && a < c) {
++      std::string Reloc1 = Stxt.substr(a + 1, c - a - 1);
++      size_t b = Reloc1.find_last_of('!');
++
++      Reloc = Reloc1.substr(0, b);
++
++      if (Reloc == "gpdisp") {
++        if (strcmp(Mnem, "ldih") == 0)
++          FirstTokenKind = AsmToken::TokenKind::PercentGot_Hi;
++        else if (strcmp(Mnem, "ldi") == 0)
++          FirstTokenKind = AsmToken::TokenKind::PercentGot_Lo;
++
++        Expr = createTargetUnaryExpr(IdVal, FirstTokenKind, Ctx);
++      }
++      SMLoc E =
++          SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
++
++      Operands.push_back(Sw64Operand::CreateImm(Expr, S, E, *this));
++      return false;
++    }
++
++    SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
++    Operands.push_back(Sw64Operand::CreateImm(IdVal, S, E, *this));
++    return false;
++  }
++  default: {
++    LLVM_DEBUG(dbgs() << ".. generic expr expression\n");
++
++    const MCExpr *Expr;
++    SMLoc S = Parser.getTok().getLoc();
++    if (getParser().parseExpression(Expr))
++      return true;
++
++    std::string Reloc;
++    AsmToken::TokenKind FirstTokenKind;
++    MCContext &Ctx = getStreamer().getContext();
++    std::string Stxt = S.getPointer();
++    size_t a = Stxt.find_first_of('!');
++    size_t b = Stxt.find_first_of('\n');
++    Reloc = Stxt.substr(a + 1, b - a - 1);
++
++    if (a < b) {
++      if (Reloc == "literal")
++        FirstTokenKind = AsmToken::TokenKind::PercentGp_Rel;
++      else if (Reloc == "gprelhigh")
++        FirstTokenKind = AsmToken::TokenKind::PercentDtprel_Hi;
++      else if (Reloc == "gprellow")
++        FirstTokenKind = AsmToken::TokenKind::PercentDtprel_Lo;
++      else if (Reloc == "tprelhi")
++        FirstTokenKind = AsmToken::TokenKind::PercentTprel_Hi;
++      else if (Reloc == "tprello")
++        FirstTokenKind = AsmToken::TokenKind::PercentTprel_Lo;
++
++      Expr = createTargetUnaryExpr(Expr, FirstTokenKind, Ctx);
++    }
++
++    SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
++
++    Operands.push_back(Sw64Operand::CreateImm(Expr, S, E, *this));
++    return false;
++  }
++  }
++  return true;
++}
++
++bool Sw64AsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
++                                  SMLoc &EndLoc) {
++  return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success;
++}
++
++OperandMatchResultTy Sw64AsmParser::tryParseRegister(MCRegister &RegNo,
++                                                     SMLoc &StartLoc,
++                                                     SMLoc &EndLoc) {
++  SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
++  OperandMatchResultTy ResTy = parseAnyRegister(Operands);
++  if (ResTy == MatchOperand_Success) {
++    assert(Operands.size() == 1);
++    Sw64Operand &Operand = static_cast<Sw64Operand &>(*Operands.front());
++    StartLoc = Operand.getStartLoc();
++    EndLoc = Operand.getEndLoc();
++
++    // AFAIK, we only support numeric registers and named GPR's in CFI
++    // directives.
++    // Don't worry about eating tokens before failing. Using an unrecognised
++    // register is a parse error.
++    if (Operand.isGPRAsmReg()) {
++      // Resolve to GPR32 or GPR64 appropriately.
++      RegNo = Operand.getGPRReg();
++    }
++
++    return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch
++                                   : MatchOperand_Success;
++  }
++
++  assert(Operands.size() == 0);
++  return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch : MatchOperand_Success;
++}
++
++bool Sw64AsmParser::isEvaluated(const MCExpr *Expr) {
++  switch (Expr->getKind()) {
++  case MCExpr::Constant:
++    return true;
++  case MCExpr::SymbolRef:
++    return (cast<MCSymbolRefExpr>(Expr)->getKind() != MCSymbolRefExpr::VK_None);
++  case MCExpr::Binary: {
++    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
++    if (!isEvaluated(BE->getLHS()))
++      return false;
++    return isEvaluated(BE->getRHS());
++  }
++  case MCExpr::Unary:
++    return isEvaluated(cast<MCUnaryExpr>(Expr)->getSubExpr());
++  case MCExpr::Target:
++    return true;
++  }
++  return false;
++}
++
++bool Sw64AsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
++  SMLoc S;
++
++  if (isParenExpr)
++    return getParser().parseParenExprOfDepth(0, Res, S);
++  return getParser().parseExpression(Res);
++}
++
++OperandMatchResultTy Sw64AsmParser::parseMemOperand(OperandVector &Operands) {
++  MCAsmParser &Parser = getParser();
++  LLVM_DEBUG(dbgs() << "parseMemOperand\n");
++  const MCExpr *IdVal = nullptr;
++  SMLoc S;
++  bool isParenExpr = false;
++  OperandMatchResultTy Res = MatchOperand_NoMatch;
++  // First operand is the offset.
++  S = Parser.getTok().getLoc();
++
++  if (getLexer().getKind() == AsmToken::LParen) {
++    Parser.Lex();
++    isParenExpr = true;
++  }
++
++  if (getLexer().getKind() != AsmToken::Dollar) {
++    if (parseMemOffset(IdVal, isParenExpr))
++      return MatchOperand_ParseFail;
++
++    const AsmToken &Tok = Parser.getTok(); // Get the next token.
++    if (Tok.isNot(AsmToken::LParen)) {
++      Sw64Operand &Mnemonic = static_cast<Sw64Operand &>(*Operands[0]);
++      if (Mnemonic.getToken() == "la" || Mnemonic.getToken() == "dla") {
++        SMLoc E =
++            SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
++        Operands.push_back(Sw64Operand::CreateImm(IdVal, S, E, *this));
++        return MatchOperand_Success;
++      }
++      if (Tok.is(AsmToken::EndOfStatement)) {
++        SMLoc E =
++            SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
++
++        // Zero register assumed, add a memory operand with ZERO as its base.
++        // "Base" will be managed by k_Memory.
++        auto Base = Sw64Operand::createGPRReg(
++            0, "0", getContext().getRegisterInfo(), S, E, *this);
++        Operands.push_back(
++            Sw64Operand::CreateMem(std::move(Base), IdVal, S, E, *this));
++        return MatchOperand_Success;
++      }
++
++      MCBinaryExpr::Opcode Opcode;
++      // GAS and LLVM treat comparison operators different. GAS will generate -1
++      // or 0, while LLVM will generate 0 or 1. Since a comparsion operator is
++      // highly unlikely to be found in a memory offset expression, we don't
++      // handle them.
++      switch (Tok.getKind()) {
++      case AsmToken::Plus:
++        Opcode = MCBinaryExpr::Add;
++        Parser.Lex();
++        break;
++      case AsmToken::Minus:
++        Opcode = MCBinaryExpr::Sub;
++        Parser.Lex();
++        break;
++      case AsmToken::Star:
++        Opcode = MCBinaryExpr::Mul;
++        Parser.Lex();
++        break;
++      case AsmToken::Pipe:
++        Opcode = MCBinaryExpr::Or;
++        Parser.Lex();
++        break;
++      case AsmToken::Amp:
++        Opcode = MCBinaryExpr::And;
++        Parser.Lex();
++        break;
++      case AsmToken::LessLess:
++        Opcode = MCBinaryExpr::Shl;
++        Parser.Lex();
++        break;
++      case AsmToken::GreaterGreater:
++        Opcode = MCBinaryExpr::LShr;
++        Parser.Lex();
++        break;
++      case AsmToken::Caret:
++        Opcode = MCBinaryExpr::Xor;
++        Parser.Lex();
++        break;
++      case AsmToken::Slash:
++        Opcode = MCBinaryExpr::Div;
++        Parser.Lex();
++        break;
++      case AsmToken::Percent:
++        Opcode = MCBinaryExpr::Mod;
++        Parser.Lex();
++        break;
++      default:
++        Error(Parser.getTok().getLoc(), "'(' or expression expected");
++        return MatchOperand_ParseFail;
++      }
++      const MCExpr *NextExpr;
++      if (getParser().parseExpression(NextExpr))
++        return MatchOperand_ParseFail;
++      IdVal = MCBinaryExpr::create(Opcode, IdVal, NextExpr, getContext());
++    }
++
++    Parser.Lex(); // Eat the '(' token.
++  }
++
++  Res = parseAnyRegister(Operands);
++  if (Res != MatchOperand_Success)
++    return Res;
++
++  if (Parser.getTok().isNot(AsmToken::RParen)) {
++    Error(Parser.getTok().getLoc(), "')' expected");
++    return MatchOperand_ParseFail;
++  }
++
++  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
++
++  Parser.Lex(); // Eat the ')' token.
++
++  if (!IdVal)
++    IdVal = MCConstantExpr::create(0, getContext());
++
++  // Replace the register operand with the memory operand.
++  std::unique_ptr<Sw64Operand> op(
++      static_cast<Sw64Operand *>(Operands.back().release()));
++  // Remove the register from the operands.
++  // "op" will be managed by k_Memory.
++  Operands.pop_back();
++
++  // Add the memory operand.
++  if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(IdVal)) {
++    int64_t Imm;
++    if (IdVal->evaluateAsAbsolute(Imm))
++      IdVal = MCConstantExpr::create(Imm, getContext());
++    else if (BE->getLHS()->getKind() != MCExpr::SymbolRef)
++      IdVal = MCBinaryExpr::create(BE->getOpcode(), BE->getRHS(), BE->getLHS(),
++                                   getContext());
++  }
++
++  Operands.push_back(Sw64Operand::CreateMem(std::move(op), IdVal, S, E, *this));
++  return MatchOperand_Success;
++}
++
++bool Sw64AsmParser::searchSymbolAlias(OperandVector &Operands) {
++  MCAsmParser &Parser = getParser();
++  MCSymbol *Sym = getContext().lookupSymbol(Parser.getTok().getIdentifier());
++  if (!Sym)
++    return false;
++
++  SMLoc S = Parser.getTok().getLoc();
++  if (Sym->isVariable()) {
++    const MCExpr *Expr = Sym->getVariableValue();
++    if (Expr->getKind() == MCExpr::SymbolRef) {
++      const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr *>(Expr);
++      StringRef DefSymbol = Ref->getSymbol().getName();
++      if (DefSymbol.startswith("$")) {
++        OperandMatchResultTy ResTy =
++            matchAnyRegisterNameWithoutDollar(Operands, DefSymbol.substr(1), S);
++        if (ResTy == MatchOperand_Success) {
++          Parser.Lex();
++          return true;
++        }
++        if (ResTy == MatchOperand_ParseFail)
++          llvm_unreachable("Should never ParseFail");
++      }
++    }
++  } else if (Sym->isUnset()) {
++    // If symbol is unset, it might be created in the `parseSetAssignment`
++    // routine as an alias for a numeric register name.
++    // Lookup in the aliases list.
++    auto Entry = RegisterSets.find(Sym->getName());
++    if (Entry != RegisterSets.end()) {
++      OperandMatchResultTy ResTy =
++          matchAnyRegisterWithoutDollar(Operands, Entry->getValue(), S);
++      if (ResTy == MatchOperand_Success) {
++        Parser.Lex();
++        return true;
++      }
++    }
++  }
++
++  return false;
++}
++
++OperandMatchResultTy Sw64AsmParser::matchAnyRegisterNameWithoutDollar(
++    OperandVector &Operands, StringRef Identifier, SMLoc S) {
++  int Index = matchCPURegisterName(Identifier);
++  if (Index != -1) {
++    Operands.push_back(Sw64Operand::createGPRReg(
++        Index, Identifier, getContext().getRegisterInfo(), S,
++        getLexer().getLoc(), *this));
++    return MatchOperand_Success;
++  }
++  Index = matchFPURegisterName(Identifier);
++  if (Index != -1) {
++    Operands.push_back(Sw64Operand::createFPRReg(
++        Index, Identifier, getContext().getRegisterInfo(), S,
++        getLexer().getLoc(), *this));
++    return MatchOperand_Success;
++  }
++  return MatchOperand_NoMatch;
++}
++
++OperandMatchResultTy
++Sw64AsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands,
++                                             const AsmToken &Token, SMLoc S) {
++  if (Token.is(AsmToken::Identifier)) {
++    LLVM_DEBUG(dbgs() << ".. identifier\n");
++    StringRef Identifier = Token.getIdentifier();
++    OperandMatchResultTy ResTy =
++        matchAnyRegisterNameWithoutDollar(Operands, Identifier, S);
++    return ResTy;
++  } else if (Token.is(AsmToken::Integer)) {
++    LLVM_DEBUG(dbgs() << ".. integer\n");
++    int64_t RegNum = Token.getIntVal();
++    Operands.push_back(Sw64Operand::createNumericReg(
++        RegNum, Token.getString(), getContext().getRegisterInfo(), S,
++        Token.getLoc(), *this));
++    return MatchOperand_Success;
++  }
++
++  LLVM_DEBUG(dbgs() << Token.getKind() << "\n");
++
++  return MatchOperand_NoMatch;
++}
++
++OperandMatchResultTy
++Sw64AsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) {
++  auto Token = getLexer().peekTok(false);
++  return matchAnyRegisterWithoutDollar(Operands, Token, S);
++}
++
++OperandMatchResultTy Sw64AsmParser::parseAnyRegister(OperandVector &Operands) {
++  MCAsmParser &Parser = getParser();
++  LLVM_DEBUG(dbgs() << "parseAnyRegister\n");
++
++  auto Token = Parser.getTok();
++
++  SMLoc S = Token.getLoc();
++
++  if (Token.isNot(AsmToken::Dollar)) {
++    LLVM_DEBUG(dbgs() << ".. !$ -> try sym aliasing\n");
++    if (Token.is(AsmToken::Identifier)) {
++      if (searchSymbolAlias(Operands))
++        return MatchOperand_Success;
++    }
++    LLVM_DEBUG(dbgs() << ".. !symalias -> NoMatch\n");
++    return MatchOperand_NoMatch;
++  }
++  LLVM_DEBUG(dbgs() << ".. $\n");
++
++  OperandMatchResultTy ResTy = matchAnyRegisterWithoutDollar(Operands, S);
++  if (ResTy == MatchOperand_Success) {
++    Parser.Lex(); // $
++    Parser.Lex(); // identifier
++  }
++  return ResTy;
++}
++
++bool Sw64AsmParser::parseParenSuffix(StringRef Name, OperandVector &Operands) {
++  MCAsmParser &Parser = getParser();
++  if (getLexer().is(AsmToken::LParen)) {
++    Operands.push_back(
++        Sw64Operand::CreateToken("(", getLexer().getLoc(), *this));
++    Parser.Lex();
++    if (Name == "ret") {
++      Operands.push_back(
++          Sw64Operand::CreateToken("$26)", getLexer().getLoc(), *this));
++      Parser.Lex(); // eat "$"
++      Parser.Lex(); // eat "26"
++      Parser.Lex(); // eat ")"
++    } else {
++      if (parseOperand(Operands, Name)) {
++        SMLoc Loc = getLexer().getLoc();
++        return Error(Loc, "unexpected token in argument list");
++      }
++      if (Parser.getTok().isNot(AsmToken::RParen)) {
++        SMLoc Loc = getLexer().getLoc();
++        return Error(Loc, "unexpected token, expected ')'");
++      }
++      Operands.push_back(
++          Sw64Operand::CreateToken(")", getLexer().getLoc(), *this));
++      Parser.Lex();
++    }
++  }
++  return false;
++}
++
++bool Sw64AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
++                                     SMLoc NameLoc, OperandVector &Operands) {
++  MCAsmParser &Parser = getParser();
++  LLVM_DEBUG(dbgs() << "ParseInstruction\n");
++
++  std::pair<StringRef, unsigned> RelocOperands;
++  // We have reached first instruction, module directive are now forbidden.
++  // getTargetStreamer().forbidModuleDirective();
++
++  // Check if we have valid mnemonic
++  if (!mnemonicIsValid(Name, 0)) {
++    return Error(NameLoc, "unknown instruction");
++  }
++  // First operand in MCInst is instruction mnemonic.
++  Operands.push_back(Sw64Operand::CreateToken(Name, NameLoc, *this));
++
++  // Read the remaining operands.
++  if (getLexer().isNot(AsmToken::EndOfStatement)) {
++    // Read the first operand.
++    if (parseOperand(Operands, Name)) {
++      SMLoc Loc = getLexer().getLoc();
++      return Error(Loc, "unexpected token in argument list");
++    }
++
++    while (getLexer().is(AsmToken::Comma)) {
++      Parser.Lex(); // Eat the comma.
++      // Parse and remember the operand.
++      if (parseOperand(Operands, Name)) {
++        SMLoc Loc = getLexer().getLoc();
++        return Error(Loc, "unexpected token in argument list");
++      }
++      // Parse parenthesis suffixes before we iterate
++      if (getLexer().is(AsmToken::LParen) && parseParenSuffix(Name, Operands))
++        return true;
++    }
++  }
++  while (Parser.getTok().is(AsmToken::Exclaim)) {
++    if (false) {
++      LLVM_DEBUG(dbgs() << ".. Skip Parse " << Name << " Relocation Symbol\n");
++      Parser.Lex(); // Eat !
++      Parser.Lex(); // Eat reloction symbol.
++    } else {
++      LLVM_DEBUG(dbgs() << ".. Parse \"!");
++      Parser.Lex(); // Eat !
++
++      if (Parser.getTok().is(AsmToken::Identifier)) {
++        // Parse Relocation Symbol ,Add Rel Kind Here !
++        StringRef Identifier = Parser.getTok().getIdentifier();
++        LLVM_DEBUG(dbgs() << Identifier << "\"\n");
++        RelocOperands.first = Identifier;
++      }
++      if (Parser.getTok().is(AsmToken::Integer)) {
++        int64_t RelNum = Parser.getTok().getIntVal();
++        LLVM_DEBUG(dbgs() << RelNum << "\"\n");
++        RelocOperands.second = RelNum;
++      }
++      ParsingFixupOperands(RelocOperands);
++      Parser.Lex(); // Eat reloction symbol.
++    }
++  }
++  if (getLexer().isNot(AsmToken::EndOfStatement)) {
++    SMLoc Loc = getLexer().getLoc();
++    return Error(Loc, "unexpected token in argument list");
++  }
++  Parser.Lex(); // Consume the EndOfStatement.
++  return false;
++}
++
++// FIXME: Given that these have the same name, these should both be
++// consistent on affecting the Parser.
++bool Sw64AsmParser::reportParseError(Twine ErrorMsg) {
++  SMLoc Loc = getLexer().getLoc();
++  return Error(Loc, ErrorMsg);
++}
++
++bool Sw64AsmParser::parseSetNoAtDirective() {
++  MCAsmParser &Parser = getParser();
++  // Line should look like: ".set noat".
++
++  // Set the $at register to $0.
++  AssemblerOptions.back()->setATRegIndex(0);
++
++  Parser.Lex(); // Eat "noat".
++
++  // If this is not the end of the statement, report an error.
++  if (getLexer().isNot(AsmToken::EndOfStatement)) {
++    reportParseError("unexpected token, expected end of statement");
++    return false;
++  }
++
++  Parser.Lex(); // Consume the EndOfStatement.
++  return false;
++}
++
++bool Sw64AsmParser::parseSetAtDirective() {
++  // Line can be: ".set at", which sets $at to $1
++  //          or  ".set at=$reg", which sets $at to $reg.
++  MCAsmParser &Parser = getParser();
++  Parser.Lex(); // Eat "at".
++
++  if (getLexer().is(AsmToken::EndOfStatement)) {
++    // No register was specified, so we set $at to $1.
++    AssemblerOptions.back()->setATRegIndex(1);
++
++    Parser.Lex(); // Consume the EndOfStatement.
++    return false;
++  }
++
++  if (getLexer().isNot(AsmToken::Equal)) {
++    reportParseError("unexpected token, expected equals sign");
++    return false;
++  }
++  Parser.Lex(); // Eat "=".
++
++  if (getLexer().isNot(AsmToken::Dollar)) {
++    if (getLexer().is(AsmToken::EndOfStatement)) {
++      reportParseError("no register specified");
++      return false;
++    } else {
++      reportParseError("unexpected token, expected dollar sign '$'");
++      return false;
++    }
++  }
++  Parser.Lex(); // Eat "$".
++
++  // Find out what "reg" is.
++  unsigned AtRegNo;
++  const AsmToken &Reg = Parser.getTok();
++  if (Reg.is(AsmToken::Identifier)) {
++    AtRegNo = matchCPURegisterName(Reg.getIdentifier());
++  } else if (Reg.is(AsmToken::Integer)) {
++    AtRegNo = Reg.getIntVal();
++  } else {
++    reportParseError("unexpected token, expected identifier or integer");
++    return false;
++  }
++
++  // Check if $reg is a valid register. If it is, set $at to $reg.
++  if (!AssemblerOptions.back()->setATRegIndex(AtRegNo)) {
++    reportParseError("invalid register");
++    return false;
++  }
++  Parser.Lex(); // Eat "reg".
++
++  // If this is not the end of the statement, report an error.
++  if (getLexer().isNot(AsmToken::EndOfStatement)) {
++    reportParseError("unexpected token, expected end of statement");
++    return false;
++  }
++
++  Parser.Lex(); // Consume the EndOfStatement.
++  return false;
++}
++
++bool Sw64AsmParser::parseSetReorderDirective() {
++  MCAsmParser &Parser = getParser();
++  Parser.Lex();
++  // If this is not the end of the statement, report an error.
++  if (getLexer().isNot(AsmToken::EndOfStatement)) {
++    reportParseError("unexpected token, expected end of statement");
++    return false;
++  }
++  AssemblerOptions.back()->setReorder();
++  Parser.Lex(); // Consume the EndOfStatement.
++  return false;
++}
++
++bool Sw64AsmParser::parseSetNoReorderDirective() {
++  MCAsmParser &Parser = getParser();
++  Parser.Lex();
++  // If this is not the end of the statement, report an error.
++  if (getLexer().isNot(AsmToken::EndOfStatement)) {
++    reportParseError("unexpected token, expected end of statement");
++    return false;
++  }
++  AssemblerOptions.back()->setNoReorder();
++  Parser.Lex(); // Consume the EndOfStatement.
++  return false;
++}
++
++bool Sw64AsmParser::parseSetMacroDirective() {
++  MCAsmParser &Parser = getParser();
++  Parser.Lex();
++  // If this is not the end of the statement, report an error.
++  if (getLexer().isNot(AsmToken::EndOfStatement)) {
++    reportParseError("unexpected token, expected end of statement");
++    return false;
++  }
++  AssemblerOptions.back()->setMacro();
++  Parser.Lex(); // Consume the EndOfStatement.
++  return false;
++}
++
++bool Sw64AsmParser::parseSetNoMacroDirective() {
++  MCAsmParser &Parser = getParser();
++  Parser.Lex();
++  // If this is not the end of the statement, report an error.
++  if (getLexer().isNot(AsmToken::EndOfStatement)) {
++    reportParseError("unexpected token, expected end of statement");
++    return false;
++  }
++  if (AssemblerOptions.back()->isReorder()) {
++    reportParseError("`noreorder' must be set before `nomacro'");
++    return false;
++  }
++  AssemblerOptions.back()->setNoMacro();
++  Parser.Lex(); // Consume the EndOfStatement.
++  return false;
++}
++
++bool Sw64AsmParser::parseSetAssignment() {
++  StringRef Name;
++  const MCExpr *Value;
++  MCAsmParser &Parser = getParser();
++
++  if (Parser.parseIdentifier(Name))
++    return reportParseError("expected identifier after .set");
++
++  if (getLexer().isNot(AsmToken::Comma))
++    return reportParseError("unexpected token, expected comma");
++  Lex(); // Eat comma
++
++  if (getLexer().is(AsmToken::Dollar) &&
++      getLexer().peekTok().is(AsmToken::Integer)) {
++    // Parse assignment of a numeric register:
++    //   .set r1,$1
++    Parser.Lex(); // Eat $.
++    RegisterSets[Name] = Parser.getTok();
++    Parser.Lex(); // Eat identifier.
++    getContext().getOrCreateSymbol(Name);
++  } else if (!Parser.parseExpression(Value)) {
++    // Parse assignment of an expression including
++    // symbolic registers:
++    //   .set  $tmp, $BB0-$BB1
++    //   .set  r2, $f2
++    MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
++    Sym->setVariableValue(Value);
++  } else {
++    return reportParseError("expected valid expression after comma");
++  }
++
++  return false;
++}
++
++bool Sw64AsmParser::parseSetArchDirective() {
++  MCAsmParser &Parser = getParser();
++
++  StringRef Arch;
++  if (Parser.parseIdentifier(Arch))
++    return reportParseError("expected arch identifier");
++
++  StringRef ArchFeatureName = StringSwitch<StringRef>(Arch)
++                                  .Case("sw_64", "sw_64")
++                                  .Case("core3b", "core3b")
++                                  .Case("core4", "core4")
++                                  .Default("");
++
++  if (ArchFeatureName.empty())
++    return reportParseError("unsupported architecture");
++
++  selectArch(ArchFeatureName);
++  return false;
++}
++
++bool Sw64AsmParser::parseDirectiveSet() {
++  const AsmToken &Tok = getParser().getTok();
++  StringRef IdVal = Tok.getString();
++
++  if (IdVal == "noat")
++    return parseSetNoAtDirective();
++  if (IdVal == "at")
++    return parseSetAtDirective();
++  if (IdVal == "arch")
++    return parseSetArchDirective();
++
++  if (Tok.getString() == "reorder") {
++    return parseSetReorderDirective();
++  }
++  if (Tok.getString() == "noreorder") {
++    return parseSetNoReorderDirective();
++  }
++  if (Tok.getString() == "macro") {
++    return parseSetMacroDirective();
++  }
++  if (Tok.getString() == "nomacro") {
++    return parseSetNoMacroDirective();
++  }
++  // TODO: temp write
++  if (Tok.getString() == "volatile") {
++    return parseSetNoMacroDirective();
++  }
++  // It is just an identifier, look for an assignment.
++  return parseSetAssignment();
++}
++
++bool Sw64AsmParser::ParseDirective(AsmToken DirectiveID) {
++  // This returns false if this function recognizes the directive
++  // regardless of whether it is successfully handles or reports an
++  // error. Otherwise it returns true to give the generic parser a
++  // chance at recognizing it.
++
++  MCAsmParser &Parser = getParser();
++  StringRef IDVal = DirectiveID.getString();
++
++  if (IDVal == ".ent") {
++    // Ignore this directive for now.
++    Parser.Lex();
++    return false;
++  }
++
++  if (IDVal == ".end") {
++    // Ignore this directive for now.
++    Parser.Lex();
++    return false;
++  }
++
++  if (IDVal == ".frame") {
++    // Ignore this directive for now.
++    Parser.eatToEndOfStatement();
++    return false;
++  }
++
++  if (IDVal == ".set") {
++    parseDirectiveSet();
++    return false;
++  }
++
++  if (IDVal == ".mask" || IDVal == ".fmask") {
++    // Ignore this directive for now.
++    Parser.eatToEndOfStatement();
++    return false;
++  }
++  if (IDVal == ".arch") {
++    // Ignore this directive for now.
++    parseSetArchDirective();
++    Parser.eatToEndOfStatement();
++    return false;
++  }
++  if (IDVal == ".word") {
++    // Ignore this directive for now.
++    Parser.eatToEndOfStatement();
++  }
++  return true;
++}
++
++extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSw64AsmParser() {
++  RegisterMCAsmParser<Sw64AsmParser> X(getTheSw64Target());
++}
++
++#define GET_REGISTER_MATCHER
++#define GET_MATCHER_IMPLEMENTATION
++#include "Sw64GenAsmMatcher.inc"
++
++bool Sw64AsmParser::mnemonicIsValid(StringRef Mnemonic, unsigned VariantID) {
++  // Find the appropriate table for this asm variant.
++  const MatchEntry *Start, *End;
++  switch (VariantID) {
++  default:
++    llvm_unreachable("invalid variant!");
++  case 0:
++    Start = std::begin(MatchTable0);
++    End = std::end(MatchTable0);
++    break;
++  }
++  // Search the table.
++  auto MnemonicRange = std::equal_range(Start, End, Mnemonic, LessOpcode());
++  return MnemonicRange.first != MnemonicRange.second;
++}
++
++unsigned Sw64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
++                                                   unsigned Kind) {
++  Sw64Operand &Op = static_cast<Sw64Operand &>(AsmOp);
++  int64_t ExpectedVal;
++
++  switch (Kind) {
++  default:
++    return Match_InvalidOperand;
++  }
++
++  if (!Op.isReg())
++    return Match_InvalidOperand;
++
++  if (Op.getReg() == ExpectedVal)
++    return Match_Success;
++  return Match_InvalidOperand;
++}
++
++void Sw64AsmParser::ParsingFixupOperands(std::pair<StringRef, unsigned> reloc) {
++  for (auto i : RelocTable) {
++    if (reloc.first.startswith(i))
++      FixupKind =
++          StringSwitch<MCFixupKind>(i)
++              .Case("literal", (MCFixupKind)Sw64::fixup_SW64_ELF_LITERAL)
++              .Case("literal_got",
++                    (MCFixupKind)Sw64::fixup_SW64_ELF_LITERAL_GOT)
++              .Case("lituse_addr", (MCFixupKind)Sw64::fixup_SW64_LITUSE)
++              .Case("lituse_jsr", (MCFixupKind)Sw64::fixup_SW64_HINT)
++              .Case("gpdisp", (MCFixupKind)Sw64::fixup_SW64_GPDISP)
++              .Case("gprelhigh", (MCFixupKind)Sw64::fixup_SW64_GPDISP_HI16)
++              .Case("gprellow", (MCFixupKind)Sw64::fixup_SW64_GPDISP_LO16)
++              .Case("gprel", (MCFixupKind)Sw64::fixup_SW64_GPREL16)
++              .Case("tlsgd", (MCFixupKind)Sw64::fixup_SW64_TLSGD)
++              .Case("tlsldm", (MCFixupKind)Sw64::fixup_SW64_TLSLDM)
++              .Case("gotdtprel", (MCFixupKind)Sw64::fixup_SW64_GOTDTPREL16)
++              .Case("dtprelhi", (MCFixupKind)Sw64::fixup_SW64_DTPREL_HI16)
++              .Case("dtprello", (MCFixupKind)Sw64::fixup_SW64_DTPREL_LO16)
++              .Case("gottprel", (MCFixupKind)Sw64::fixup_SW64_GOTTPREL16)
++              .Case("tprelhi", (MCFixupKind)Sw64::fixup_SW64_TPREL_HI16)
++              .Case("tprello", (MCFixupKind)Sw64::fixup_SW64_TPREL_LO16)
++              .Case("tprel", (MCFixupKind)Sw64::fixup_SW64_TPREL16)
++              .Default(llvm::FirstTargetFixupKind);
++  }
++}
+diff --git a/llvm/lib/Target/Sw64/CMakeLists.txt b/llvm/lib/Target/Sw64/CMakeLists.txt
+new file mode 100644
+index 000000000..11598fbb8
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/CMakeLists.txt
+@@ -0,0 +1,64 @@
++add_llvm_component_group(Sw64)
++
++set(LLVM_TARGET_DEFINITIONS Sw64.td)
++
++tablegen(LLVM Sw64GenAsmMatcher.inc -gen-asm-matcher)
++tablegen(LLVM Sw64GenAsmWriter.inc -gen-asm-writer)
++tablegen(LLVM Sw64GenCallingConv.inc -gen-callingconv)
++tablegen(LLVM Sw64GenDAGISel.inc -gen-dag-isel)
++tablegen(LLVM Sw64GenDisassemblerTables.inc -gen-disassembler)
++tablegen(LLVM Sw64GenInstrInfo.inc -gen-instr-info)
++tablegen(LLVM Sw64GenRegisterInfo.inc -gen-register-info)
++tablegen(LLVM Sw64GenSubtargetInfo.inc -gen-subtarget)
++tablegen(LLVM Sw64GenMCCodeEmitter.inc -gen-emitter)
++tablegen(LLVM Sw64GenMCPseudoLowering.inc -gen-pseudo-lowering)
++
++add_public_tablegen_target(Sw64CommonTableGen)
++
++add_llvm_target(Sw64CodeGen
++  Sw64AsmPrinter.cpp
++  Sw64FrameLowering.cpp
++  Sw64LLRP.cpp
++  Sw64BranchSelector.cpp
++  Sw64InstrInfo.cpp
++  Sw64ISelDAGToDAG.cpp
++  Sw64ISelLowering.cpp
++  Sw64MCInstLower.cpp
++  Sw64MachineFunctionInfo.cpp
++  Sw64MacroFusion.cpp
++  Sw64RegisterInfo.cpp
++  Sw64Subtarget.cpp
++  Sw64TargetMachine.cpp
++  Sw64TargetObjectFile.cpp
++  Sw64SelectionDAGInfo.cpp
++  Sw64ExpandPseudo.cpp
++  Sw64ExpandPseudo2.cpp
++  Sw64PreLegalizerCombiner.cpp
++  Sw64CombineLS.cpp
++  Sw64IEEEConstraint.cpp
++  Sw64TargetTransformInfo.cpp
++
++  LINK_COMPONENTS
++  Analysis
++  AsmPrinter
++  CodeGen
++  Core
++  MC
++  SelectionDAG
++  Support
++  Target
++  TransformUtils
++  Sw64AsmPrinter
++  Sw64Desc
++  Sw64Info
++  GlobalISel
++
++  ADD_TO_COMPONENT
++  Sw64
++  )
++
++add_subdirectory(InstPrinter)
++add_subdirectory(MCTargetDesc)
++add_subdirectory(Disassembler)
++add_subdirectory(TargetInfo)
++add_subdirectory(AsmParser)
+diff --git a/llvm/lib/Target/Sw64/Disassembler/CMakeLists.txt b/llvm/lib/Target/Sw64/Disassembler/CMakeLists.txt
+new file mode 100644
+index 000000000..123e27b07
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Disassembler/CMakeLists.txt
+@@ -0,0 +1,11 @@
++add_llvm_component_library(LLVMSw64Disassembler
++  Sw64Disassembler.cpp
++
++  LINK_COMPONENTS
++  MCDisassembler
++  Sw64Info
++  Support
++
++  ADD_TO_COMPONENT
++  Sw64
++  )
+diff --git a/llvm/lib/Target/Sw64/Disassembler/Sw64Disassembler.cpp b/llvm/lib/Target/Sw64/Disassembler/Sw64Disassembler.cpp
+new file mode 100644
+index 000000000..9141e7172
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Disassembler/Sw64Disassembler.cpp
+@@ -0,0 +1,390 @@
++//===-- Sw64Disassembler.cpp - Disassembler for Sw64 --------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file implements the Sw64Disassembler class.
++//
++//===----------------------------------------------------------------------===//
++
++#include "MCTargetDesc/Sw64MCTargetDesc.h"
++#include "TargetInfo/Sw64TargetInfo.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCDecoderOps.h"
++#include "llvm/MC/MCDisassembler/MCDisassembler.h"
++#include "llvm/MC/MCInst.h"
++#include "llvm/MC/MCInstrInfo.h"
++#include "llvm/MC/MCRegisterInfo.h"
++#include "llvm/MC/MCSubtargetInfo.h"
++#include "llvm/MC/TargetRegistry.h"
++#include "llvm/Support/Endian.h"
++
++using namespace llvm;
++
++#define DEBUG_TYPE "Sw64-disassembler"
++
++typedef MCDisassembler::DecodeStatus DecodeStatus;
++
++namespace {
++class Sw64Disassembler : public MCDisassembler {
++
++public:
++  Sw64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
++      : MCDisassembler(STI, Ctx) {}
++  ~Sw64Disassembler() {}
++
++  DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
++                              ArrayRef<uint8_t> Bytes, uint64_t Address,
++                              raw_ostream &CStream) const override;
++};
++} // end anonymous namespace
++
++static MCDisassembler *createSw64Disassembler(const Target &T,
++                                              const MCSubtargetInfo &STI,
++                                              MCContext &Ctx) {
++  return new Sw64Disassembler(STI, Ctx);
++}
++
++extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSw64Disassembler() {
++  // Register the disassembler for each target.
++  TargetRegistry::RegisterMCDisassembler(getTheSw64Target(),
++                                         createSw64Disassembler);
++}
++
++static const unsigned GPRDecoderTable[] = {
++    Sw64::R0,  Sw64::R1,  Sw64::R2,  Sw64::R3,  Sw64::R4,  Sw64::R5,  Sw64::R6,
++    Sw64::R7,  Sw64::R8,  Sw64::R9,  Sw64::R10, Sw64::R11, Sw64::R12, Sw64::R13,
++    Sw64::R14, Sw64::R15, Sw64::R16, Sw64::R17, Sw64::R18, Sw64::R19, Sw64::R20,
++    Sw64::R21, Sw64::R22, Sw64::R23, Sw64::R24, Sw64::R25, Sw64::R26, Sw64::R27,
++    Sw64::R28, Sw64::R29, Sw64::R30, Sw64::R31};
++
++// This instruction does not have a working decoder, and needs to be
++// fixed. This "fixme" function was introduced to keep the backend comiling
++// while making changes to tablegen code.
++static DecodeStatus DecodeFIXMEInstruction(MCInst &Inst, uint64_t RegNo,
++                                           uint64_t Address,
++                                           const MCDisassembler *Decoder) {
++  return MCDisassembler::Fail;
++}
++
++static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
++                                            uint64_t Address,
++                                            const MCDisassembler *Decoder) {
++  if (RegNo > std::size(GPRDecoderTable))
++    return MCDisassembler::Fail;
++
++  // We must define our own mapping from RegNo to register identifier.
++  // Accessing index RegNo in the register class will work in the case that
++  // registers were added in ascending order, but not in general.
++  unsigned Reg = GPRDecoderTable[RegNo];
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++static const unsigned FPRDecoderTable[] = {
++    Sw64::F0,  Sw64::F1,  Sw64::F2,  Sw64::F3,  Sw64::F4,  Sw64::F5,  Sw64::F6,
++    Sw64::F7,  Sw64::F8,  Sw64::F9,  Sw64::F10, Sw64::F11, Sw64::F12, Sw64::F13,
++    Sw64::F14, Sw64::F15, Sw64::F16, Sw64::F17, Sw64::F18, Sw64::F19, Sw64::F20,
++    Sw64::F21, Sw64::F22, Sw64::F23, Sw64::F24, Sw64::F25, Sw64::F26, Sw64::F27,
++    Sw64::F28, Sw64::F29, Sw64::F30, Sw64::F31};
++
++static DecodeStatus DecodeF4RCRegisterClass(MCInst &Inst, uint64_t RegNo,
++                                            uint64_t Address,
++                                            const void *Decoder) {
++  if (RegNo > 32) {
++    return MCDisassembler::Fail;
++  }
++  unsigned Reg = FPRDecoderTable[RegNo];
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeF8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
++                                            uint64_t Address,
++                                            const void *Decoder) {
++  if (RegNo > 32) {
++    return MCDisassembler::Fail;
++  }
++  unsigned Reg = FPRDecoderTable[RegNo];
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeV256LRegisterClass(MCInst &Inst, uint64_t RegNo,
++                                            uint64_t Address,
++                                            const void *Decoder) {
++  if (RegNo > 32) {
++    return MCDisassembler::Fail;
++  }
++  unsigned Reg = FPRDecoderTable[RegNo];
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeFPRC_loRegisterClass(MCInst &Inst, uint64_t RegNo,
++                                            uint64_t Address,
++                                            const void *Decoder) {
++  if (RegNo > 32) {
++    return MCDisassembler::Fail;
++  }
++  unsigned Reg = FPRDecoderTable[RegNo];
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus DecodeFPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
++                                            uint64_t Address,
++                                            const void *Decoder) {
++  if (RegNo > 32) {
++    return MCDisassembler::Fail;
++  }
++  unsigned Reg = FPRDecoderTable[RegNo];
++  Inst.addOperand(MCOperand::createReg(Reg));
++  return MCDisassembler::Success;
++}
++
++template <unsigned N>
++static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
++                                      int64_t Address, const void *Decoder) {
++  assert(isUInt<N>(Imm) && "Invalid immediate");
++  Inst.addOperand(MCOperand::createImm(Imm));
++  return MCDisassembler::Success;
++}
++
++template <unsigned N>
++static DecodeStatus decodeUImmNonZeroOperand(MCInst &Inst, uint64_t Imm,
++                                             int64_t Address,
++                                             const void *Decoder) {
++  if (Imm == 0)
++    return MCDisassembler::Fail;
++  return decodeUImmOperand<N>(Inst, Imm, Address, Decoder);
++}
++
++template <unsigned N>
++static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
++                                      int64_t Address, const void *Decoder) {
++  assert(isUInt<N>(Imm) && "Invalid immediate");
++  // Sign-extend the number in the bottom N bits of Imm
++  Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm)));
++  return MCDisassembler::Success;
++}
++
++template <unsigned N>
++static DecodeStatus decodeSImmNonZeroOperand(MCInst &Inst, uint64_t Imm,
++                                             int64_t Address,
++                                             const void *Decoder) {
++  if (Imm == 0)
++    return MCDisassembler::Fail;
++  return decodeSImmOperand<N>(Inst, Imm, Address, Decoder);
++}
++
++static DecodeStatus decodeFloatCopyInstruction(uint32_t func, MCInst &MI,
++                                               uint32_t Insn, uint64_t Address,
++                                               const void *Decoder) {
++  switch (func) {
++  default:
++    return MCDisassembler::Fail;
++  case 0x30:
++    MI.setOpcode(Sw64::CPYSS);
++    break;
++  case 0x31:
++    MI.setOpcode(Sw64::CPYSNS);
++    break;
++  case 0x32:
++    MI.setOpcode(Sw64::CPYSES);
++    break;
++  }
++  uint32_t RegOp1 = Insn << 6 >> 27;  // Inst {25-21} Reg operand 1
++  uint32_t RegOp2 = Insn << 11 >> 27; // Inst [20-16] Reg operand 2
++  uint32_t RegOp3 = Insn & 0x1F;      // Inst [4-0 ] Reg operand 3
++  MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp3]));
++  MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp1]));
++  MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp2]));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus decodeFloatInstruction(MCInst &MI, uint32_t Insn,
++                                           uint64_t Address,
++                                           const void *Decoder) {
++  uint32_t func = (Insn & 0x1FE0) >> 5;
++  switch ((func & 0xF0) >> 4) {
++  default:
++    return MCDisassembler::Fail;
++  case 0x3:
++    return decodeFloatCopyInstruction(func, MI, Insn, Address, Decoder);
++  }
++}
++
++static DecodeStatus decodeFloatSelectInstruction(MCInst &MI, uint32_t Insn,
++                                                 uint64_t Address,
++                                                 const void *Decoder) {
++  uint32_t func = (Insn & 0xFC00) >> 10;
++  switch (func) {
++  default:
++    return MCDisassembler::Fail;
++  case 0x10:
++    MI.setOpcode(Sw64::FSELEQS);
++    break;
++  case 0x11:
++    MI.setOpcode(Sw64::FSELNES);
++    break;
++  case 0x12:
++    MI.setOpcode(Sw64::FSELLTS);
++    break;
++  case 0x13:
++    MI.setOpcode(Sw64::FSELLES);
++    break;
++  case 0x14:
++    MI.setOpcode(Sw64::FSELGTS);
++    break;
++  case 0x15:
++    MI.setOpcode(Sw64::FSELGES);
++    break;
++  }
++  uint32_t RegOp1 = Insn << 6 >> 27;     // Inst {25-21} Reg operand 1
++  uint32_t RegOp2 = Insn << 11 >> 27;    // Inst [20-16] Reg operand 2
++  uint32_t RegOp3 = (Insn & 0x3E0) >> 5; // Inst [4-0 ] Reg operand 3
++  uint32_t RegOp4 = Insn & 0x1F;         // Inst [4-0 ] Reg operand 3
++  MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp4]));
++  MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp3]));
++  MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp2]));
++  MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp1]));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus decodePostLSInstruction(MCInst &MI, uint32_t Insn,
++                                            uint64_t Address,
++                                            const void *Decoder) {
++  uint32_t func = (Insn & 0xFC00) >> 12;
++  bool isFloat = false;
++  bool isStore = false;
++  switch (func) {
++  default:
++    return MCDisassembler::Fail;
++  case 0x0:
++    MI.setOpcode(Sw64::LDBU_A);
++    break;
++  case 0x1:
++    MI.setOpcode(Sw64::LDHU_A);
++    break;
++  case 0x2:
++    MI.setOpcode(Sw64::LDW_A);
++    break;
++  case 0x3:
++    MI.setOpcode(Sw64::LDL_A);
++    break;
++  case 0x4:
++    MI.setOpcode(Sw64::LDS_A);
++    isFloat = true;
++    break;
++  case 0x5:
++    MI.setOpcode(Sw64::LDD_A);
++    isFloat = true;
++    break;
++  case 0x6:
++    MI.setOpcode(Sw64::STB_A);
++    break;
++  case 0x7:
++    MI.setOpcode(Sw64::STH_A);
++    break;
++  case 0x8:
++    MI.setOpcode(Sw64::STW_A);
++    break;
++  case 0x9:
++    MI.setOpcode(Sw64::STL_A);
++    break;
++  case 0xA:
++    MI.setOpcode(Sw64::STS_A);
++    isFloat = true;
++    isStore = true;
++    break;
++  case 0xB:
++    MI.setOpcode(Sw64::STD_A);
++    isFloat = true;
++    isStore = true;
++    break;
++  }
++  uint32_t RegOp1 = Insn << 6 >> 27;  // Inst {25-21} Reg operand 1
++  uint32_t RegOp2 = Insn << 11 >> 27; // Inst [20-16] Reg operand 2
++  unsigned RegOp3 = Insn & 0xFFF;     // Inst [11-0 ] Reg operand 3
++  uint32_t RegOp4 = Insn << 11 >> 27;
++  MI.addOperand((isFloat && !isStore)
++                    ? MCOperand::createReg(FPRDecoderTable[RegOp1])
++                    : MCOperand::createReg(GPRDecoderTable[RegOp1]));
++  MI.addOperand((isFloat && isStore)
++                    ? MCOperand::createReg(FPRDecoderTable[RegOp4])
++                    : MCOperand::createReg(GPRDecoderTable[RegOp4]));
++  MI.addOperand(MCOperand::createReg(GPRDecoderTable[RegOp2]));
++  MI.addOperand(MCOperand::createImm(RegOp3));
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus decodeBarrierInstruction(MCInst &MI, uint32_t Insn,
++                                             uint64_t Address,
++                                             const void *Decoder) {
++  uint32_t func = Insn & 0xFFFF;
++  switch (func) {
++  default:
++    return MCDisassembler::Fail;
++  case 0x00:
++    MI.setOpcode(Sw64::MB);
++    break;
++  case 0x01:
++    MI.setOpcode(Sw64::IMEMB);
++    break;
++  case 0x02:
++    MI.setOpcode(Sw64::WMEMB);
++    break;
++  }
++  return MCDisassembler::Success;
++}
++
++static DecodeStatus decodeConlictInstruction(MCInst &MI, uint32_t Insn,
++                                             uint64_t Address,
++                                             const void *Decoder) {
++  uint32_t Opcode = Insn >> 26;
++  switch (Opcode) {
++  default:
++    return MCDisassembler::Fail;
++  case 0x06:
++    return decodeBarrierInstruction(MI, Insn, Address, Decoder);
++  case 0x18:
++    return decodeFloatInstruction(MI, Insn, Address, Decoder);
++  case 0x19:
++    return decodeFloatSelectInstruction(MI, Insn, Address, Decoder);
++  case 0x1E:
++    return decodePostLSInstruction(MI, Insn, Address, Decoder);
++  }
++}
++
++#include "Sw64GenDisassemblerTables.inc"
++
++DecodeStatus Sw64Disassembler::getInstruction(MCInst &Instr, uint64_t &Size,
++                                              ArrayRef<uint8_t> Bytes,
++                                              uint64_t Address,
++                                              raw_ostream &CStream) const {
++  // TODO: This will need modification when supporting instruction set
++  // extensions with instructions > 32-bits (up to 176 bits wide).
++  uint32_t Insn;
++  DecodeStatus Result;
++
++  if (Bytes.size() < 4) {
++    Size = 0;
++    return MCDisassembler::Fail;
++  }
++  Insn = support::endian::read32le(Bytes.data());
++  LLVM_DEBUG(dbgs() << "Trying Decode Conflict Instruction :\n");
++  Result = decodeConlictInstruction(Instr, Insn, Address, this);
++  if (Result != MCDisassembler::Fail) {
++    Size = 4;
++    return Result;
++  }
++  LLVM_DEBUG(dbgs() << "Trying Sw64 table :\n");
++  Result = decodeInstruction(DecoderTable32, Instr, Insn, Address, this, STI);
++  Size = 4;
++
++  return Result;
++}
+diff --git a/llvm/lib/Target/Sw64/InstPrinter/CMakeLists.txt b/llvm/lib/Target/Sw64/InstPrinter/CMakeLists.txt
+new file mode 100644
+index 000000000..b07b33f37
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/InstPrinter/CMakeLists.txt
+@@ -0,0 +1,10 @@
++add_llvm_component_library(LLVMSw64AsmPrinter
++  Sw64InstPrinter.cpp
++
++  LINK_COMPONENTS
++  MC
++  Support
++
++  ADD_TO_COMPONENT
++  Sw64
++  )
+diff --git a/llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.cpp b/llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.cpp
+new file mode 100644
+index 000000000..74ae067f9
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.cpp
+@@ -0,0 +1,148 @@
++//===-- Sw64InstPrinter.cpp - Convert Sw64 MCInst to assembly syntax ----===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This class prints an Sw64 MCInst to a .s file.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64InstPrinter.h"
++#include "MCTargetDesc/Sw64BaseInfo.h"
++#include "MCTargetDesc/Sw64MCExpr.h"
++#include "llvm/ADT/StringRef.h"
++#include "llvm/MC/MCExpr.h"
++#include "llvm/MC/MCInst.h"
++#include "llvm/MC/MCInstrDesc.h"
++#include "llvm/MC/MCSymbol.h"
++#include "llvm/Support/Casting.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/raw_ostream.h"
++#include <cassert>
++
++using namespace llvm;
++
++#define DEBUG_TYPE "asm-printer"
++
++#include "Sw64GenAsmWriter.inc"
++
++void Sw64InstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
++  OS << StringRef(getRegisterName(Reg)).lower();
++}
++
++void Sw64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
++                                StringRef Annot, const MCSubtargetInfo &STI,
++                                raw_ostream &OS) {
++  printInstruction(MI, Address, OS);
++  if (!Annot.empty()) {
++    OS << "\t" << Annot;
++  } else
++    printAnnotation(OS, Annot);
++}
++
++void Sw64InstPrinter::printInlineJT(const MCInst *MI, int opNum,
++                                    raw_ostream &O) {
++  report_fatal_error("can't handle InlineJT");
++}
++
++void Sw64InstPrinter::printInlineJT32(const MCInst *MI, int opNum,
++                                      raw_ostream &O) {
++  report_fatal_error("can't handle InlineJT32");
++}
++
++void Sw64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
++                                   raw_ostream &O) {
++
++  const MCOperand &Op = MI->getOperand(OpNo);
++  if (Op.isReg()) {
++    printRegName(O, Op.getReg());
++    return;
++  }
++
++  if (Op.isImm()) {
++    if (Op.getImm() > 65535) {
++      O << formatHex(Op.getImm());
++      return;
++    }
++    O << Op.getImm();
++    return;
++  }
++
++  assert(Op.isExpr() && "unknown operand kind in printOperand");
++  Op.getExpr()->print(O, &MAI, true);
++}
++
++void Sw64InstPrinter::printMemoryArg(const MCInst *MI, unsigned OpNo,
++                                     raw_ostream &O) {
++  const MCOperand &Op = MI->getOperand(OpNo);
++
++  if (Op.isExpr()) {
++    const MCExpr *Expr = Op.getExpr();
++    if (Expr->getKind() == MCExpr::Target) {
++      const Sw64MCExpr *Sw64Expr = cast<Sw64MCExpr>(Expr);
++
++      switch (Sw64Expr->getKind()) {
++      default:
++        break;
++      case Sw64MCExpr::MEK_GPDISP_HI16:
++      case Sw64MCExpr::MEK_GPDISP_LO16:
++      case Sw64MCExpr::MEK_GPDISP:
++        O << "0";
++        return;
++      }
++    }
++  }
++  printOperand(MI, OpNo, O);
++}
++
++void Sw64InstPrinter::printMemOperand(const MCInst *MI, int opNum,
++                                      raw_ostream &O) {
++  // Load/Store memory operands -- imm($reg)
++
++  if (MI->getOperand(opNum).isImm() && MI->getOperand(opNum + 1).isReg()) {
++    printOperand(MI, opNum, O);
++    O << "(";
++    printOperand(MI, opNum + 1, O);
++    O << ")";
++  } else {
++    printOperand(MI, opNum + 1, O);
++    O << "(";
++    printOperand(MI, opNum, O);
++    O << ")";
++  }
++}
++
++template <unsigned Bits, unsigned Offset>
++void Sw64InstPrinter::printUImm(const MCInst *MI, int opNum, raw_ostream &O) {
++  const MCOperand &MO = MI->getOperand(opNum);
++  if (MO.isImm()) {
++    uint64_t Imm = MO.getImm();
++    Imm -= Offset;
++    Imm &= (1 << Bits) - 1;
++    Imm += Offset;
++    if (MI->getOpcode() == Sw64::VLOGZZ)
++      O << format("%x", Imm);
++    else
++      O << formatImm(Imm);
++    return;
++  }
++
++  printOperand(MI, opNum, O);
++}
++
++// Only for Instruction VLOG
++void Sw64InstPrinter::printHexImm(const MCInst *MI, int opNum, raw_ostream &O) {
++  const MCOperand &MO = MI->getOperand(opNum);
++  if (MO.isImm()) {
++    uint64_t Imm = MO.getImm();
++    O << format("%x", ((Imm >> 4) & 0xf)) << format("%x", (Imm & 0xf));
++    return;
++  }
++
++  printOperand(MI, opNum, O);
++}
+diff --git a/llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.h b/llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.h
+new file mode 100644
+index 000000000..8d721ac01
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.h
+@@ -0,0 +1,57 @@
++//== Sw64InstPrinter.h - Convert Sw64 MCInst to assembly syntax -*- C++ -*-=//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the declaration of the Sw64InstPrinter class,
++// which is used to print Sw64 MCInst to a .s file.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_INSTPRINTER_SW64INSTPRINTER_H
++#define LLVM_LIB_TARGET_SW64_INSTPRINTER_SW64INSTPRINTER_H
++
++#include "llvm/ADT/StringRef.h"
++#include "llvm/MC/MCInstPrinter.h"
++
++namespace llvm {
++
++class Sw64InstPrinter : public MCInstPrinter {
++public:
++  Sw64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
++                  const MCRegisterInfo &MRI)
++      : MCInstPrinter(MAI, MII, MRI) {}
++
++  // Autogenerated by tblgen.
++  std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
++  void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
++  static const char *getRegisterName(MCRegister Reg);
++
++  void printRegName(raw_ostream &OS, MCRegister Reg) const override;
++  void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
++                 const MCSubtargetInfo &STI, raw_ostream &OS) override;
++  void printMemoryArg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
++
++private:
++  void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O);
++  void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O);
++  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
++  void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum,
++                    raw_ostream &O) {
++    printOperand(MI, OpNum, O);
++  }
++  void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
++
++  void printHexImm(const MCInst *MI, int opNum, raw_ostream &O);
++
++  template <unsigned Bits, unsigned Offset = 0>
++  void printUImm(const MCInst *MI, int opNum, raw_ostream &O);
++};
++
++} // end namespace llvm
++
++#endif // LLVM_LIB_TARGET_SW64_INSTPRINTER_SW64INSTPRINTER_H
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/Sw64/MCTargetDesc/CMakeLists.txt
+new file mode 100644
+index 000000000..69169bf24
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/CMakeLists.txt
+@@ -0,0 +1,22 @@
++add_llvm_component_library(LLVMSw64Desc
++  Sw64ABIInfo.cpp
++  Sw64ABIFlagsSection.cpp
++  Sw64AsmBackend.cpp
++  Sw64ELFObjectWriter.cpp
++  Sw64ELFStreamer.cpp
++  Sw64MCAsmInfo.cpp
++  Sw64MCCodeEmitter.cpp
++  Sw64MCExpr.cpp
++  Sw64MCTargetDesc.cpp
++  Sw64OptionRecord.cpp
++  Sw64TargetStreamer.cpp
++
++  LINK_COMPONENTS
++  MC
++  Sw64Info
++  Sw64AsmPrinter
++  Support
++
++  ADD_TO_COMPONENT
++  Sw64
++)
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.cpp
+new file mode 100644
+index 000000000..0f714e724
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.cpp
+@@ -0,0 +1,31 @@
++//===- Sw64ABIFlagsSection.cpp - Sw64 ELF ABI Flags Section ---------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++#include "MCTargetDesc/Sw64ABIFlagsSection.h"
++#include "llvm/ADT/StringRef.h"
++#include "llvm/MC/MCStreamer.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/Sw64ABIFlags.h"
++
++using namespace llvm;
++
++uint8_t Sw64ABIFlagsSection::getFpABIValue() {
++  llvm_unreachable("unexpected fp abi value");
++}
++
++StringRef Sw64ABIFlagsSection::getFpABIString(FpABIKind Value) {
++  llvm_unreachable("unsupported fp abi value");
++}
++namespace llvm {
++
++MCStreamer &operator<<(MCStreamer &OS, Sw64ABIFlagsSection &ABIFlagsSection) {
++  return OS;
++}
++
++} // end namespace llvm
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.h
+new file mode 100644
+index 000000000..058c47f58
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.h
+@@ -0,0 +1,127 @@
++//===- Sw64ABIFlagsSection.h - Sw64 ELF ABI Flags Section -------*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ABIFLAGSSECTION_H
++#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ABIFLAGSSECTION_H
++
++#include "llvm/ADT/StringRef.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/Sw64ABIFlags.h"
++#include <cstdint>
++
++namespace llvm {
++
++class MCStreamer;
++
++struct Sw64ABIFlagsSection {
++  // Internal representation of the fp_abi related values used in .module.
++  enum class FpABIKind { ANY, XX, S32, S64, SOFT };
++
++  // Version of flags structure.
++  uint16_t Version = 0;
++  // The level of the ISA: 1-5, 32, 64.
++  uint8_t ISALevel = 0;
++  // The revision of ISA: 0 for SW64 V and below, 1-n otherwise.
++  uint8_t ISARevision = 0;
++  // The size of general purpose registers.
++  Sw64::AFL_REG GPRSize = Sw64::AFL_REG_NONE;
++  // The size of co-processor 1 registers.
++  Sw64::AFL_REG CPR1Size = Sw64::AFL_REG_NONE;
++  // The size of co-processor 2 registers.
++  Sw64::AFL_REG CPR2Size = Sw64::AFL_REG_NONE;
++  // Processor-specific extension.
++  Sw64::AFL_EXT ISAExtension = Sw64::AFL_EXT_NONE;
++  // Mask of ASEs used.
++  uint32_t ASESet = 0;
++
++  bool OddSPReg = false;
++
++protected:
++  // The floating-point ABI.
++  FpABIKind FpABI = FpABIKind::ANY;
++
++public:
++  Sw64ABIFlagsSection() = default;
++
++  uint16_t getVersionValue() { return (uint16_t)Version; }
++  uint8_t getISALevelValue() { return (uint8_t)ISALevel; }
++  uint8_t getISARevisionValue() { return (uint8_t)ISARevision; }
++  uint8_t getGPRSizeValue() { return (uint8_t)GPRSize; }
++  uint8_t getCPR1SizeValue();
++  uint8_t getCPR2SizeValue() { return (uint8_t)CPR2Size; }
++  uint8_t getFpABIValue();
++  uint32_t getISAExtensionValue() { return (uint32_t)ISAExtension; }
++  uint32_t getASESetValue() { return (uint32_t)ASESet; }
++
++  uint32_t getFlags1Value() {
++    uint32_t Value = 0;
++
++    if (OddSPReg)
++      Value |= (uint32_t)Sw64::AFL_FLAGS1_ODDSPREG;
++
++    return Value;
++  }
++
++  uint32_t getFlags2Value() { return 0; }
++
++  FpABIKind getFpABI() { return FpABI; }
++  void setFpABI(FpABIKind Value) {
++    FpABI = Value;
++  }
++
++  StringRef getFpABIString(FpABIKind Value);
++
++  template <class PredicateLibrary>
++  void setGPRSizeFromPredicates(const PredicateLibrary &P) {
++    GPRSize = P.isGP64bit() ? Sw64::AFL_REG_64 : Sw64::AFL_REG_32;
++  }
++
++  template <class PredicateLibrary>
++  void setCPR1SizeFromPredicates(const PredicateLibrary &P) {
++    if (P.useSoftFloat())
++      CPR1Size = Sw64::AFL_REG_NONE;
++    else if (P.hasMSA())
++      CPR1Size = Sw64::AFL_REG_128;
++    else
++      CPR1Size = P.isFP64bit() ? Sw64::AFL_REG_64 : Sw64::AFL_REG_32;
++  }
++
++  template <class PredicateLibrary>
++  void setISAExtensionFromPredicates(const PredicateLibrary &P) {
++    if (P.hasCnSw64())
++      ISAExtension = Sw64::AFL_EXT_OCTEON;
++    else
++      ISAExtension = Sw64::AFL_EXT_NONE;
++  }
++
++  template <class PredicateLibrary>
++  void setFpAbiFromPredicates(const PredicateLibrary &P) {
++    FpABI = FpABIKind::ANY;
++    if (P.useSoftFloat())
++      FpABI = FpABIKind::SOFT;
++
++    if (P.isABI_S64())
++      FpABI = FpABIKind::S64;
++  }
++
++  template <class PredicateLibrary>
++  void setAllFromPredicates(const PredicateLibrary &P) {
++    setGPRSizeFromPredicates(P);
++    setCPR1SizeFromPredicates(P);
++    setISAExtensionFromPredicates(P);
++    setFpAbiFromPredicates(P);
++    OddSPReg = P.useOddSPReg();
++  }
++};
++
++MCStreamer &operator<<(MCStreamer &OS, Sw64ABIFlagsSection &ABIFlagsSection);
++
++} // end namespace llvm
++
++#endif // LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ABIFLAGSSECTION_H
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.cpp
+new file mode 100644
+index 000000000..ea5b1f585
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.cpp
+@@ -0,0 +1,29 @@
++//===---- Sw64ABIInfo.cpp - Information about SW64 ABI's ------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64ABIInfo.h"
++#include "Sw64RegisterInfo.h"
++#include "llvm/ADT/StringRef.h"
++#include "llvm/ADT/StringSwitch.h"
++#include "llvm/MC/MCTargetOptions.h"
++
++using namespace llvm;
++Sw64ABIInfo Sw64ABIInfo::computeTargetABI(const Triple &TT, StringRef CPU,
++                                          const MCTargetOptions &Options) {
++  if (Options.getABIName().startswith("n64"))
++    return Sw64ABIInfo::S64();
++
++  assert(Options.getABIName().empty() && "Unknown ABI option for SW64");
++
++  if (TT.isSw64())
++    return Sw64ABIInfo::S64();
++  else
++    assert(!TT.isSw64() && "sw_64 ABI is not appoint 64 bit.");
++  return Sw64ABIInfo::S64();
++}
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.h
+new file mode 100644
+index 000000000..ae758ca8d
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.h
+@@ -0,0 +1,77 @@
++//===---- Sw64ABIInfo.h - Information about SW64 ABI's --------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ABIINFO_H
++#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ABIINFO_H
++
++#include "llvm/IR/CallingConv.h"
++#include "llvm/MC/MCRegisterInfo.h"
++#include "llvm/TargetParser/Triple.h"
++
++namespace llvm {
++
++template <typename T> class ArrayRef;
++class MCTargetOptions;
++class StringRef;
++class TargetRegisterClass;
++
++class Sw64ABIInfo {
++public:
++  enum class ABI { Unknown, S64 };
++
++protected:
++  ABI ThisABI;
++
++public:
++  Sw64ABIInfo(ABI ThisABI) : ThisABI(ThisABI) {}
++
++  static Sw64ABIInfo Unknown() { return Sw64ABIInfo(ABI::Unknown); }
++  static Sw64ABIInfo S64() { return Sw64ABIInfo(ABI::S64); }
++  static Sw64ABIInfo computeTargetABI(const Triple &TT, StringRef CPU,
++                                      const MCTargetOptions &Options);
++
++  bool IsKnown() const { return ThisABI != ABI::Unknown; }
++  bool IsS64() const { return ThisABI == ABI::S64; }
++  ABI GetEnumValue() const { return ThisABI; }
++
++  /// The registers to use for byval arguments.
++  ArrayRef<MCPhysReg> GetByValArgRegs() const;
++
++  /// The registers to use for the variable argument list.
++  ArrayRef<MCPhysReg> GetVarArgRegs() const;
++
++  /// Obtain the size of the area allocated by the callee for arguments.
++  /// CallingConv::FastCall affects the value for S32.
++  unsigned GetCalleeAllocdArgSizeInBytes(CallingConv::ID CC) const;
++
++  /// Ordering of ABI's
++  /// Sw64GenSubtargetInfo.inc will use this to resolve conflicts when given
++  /// multiple ABI options.
++  bool operator<(const Sw64ABIInfo Other) const {
++    return ThisABI < Other.GetEnumValue();
++  }
++
++  unsigned GetStackPtr() const;
++  unsigned GetFramePtr() const;
++  unsigned GetBasePtr() const;
++  unsigned GetGlobalPtr() const;
++  unsigned GetNullPtr() const;
++  unsigned GetZeroReg() const;
++  unsigned GetPtrAdduOp() const;
++  unsigned GetPtrAddiuOp() const;
++  unsigned GetPtrSubuOp() const;
++  unsigned GetPtrAndOp() const;
++  unsigned GetGPRMoveOp() const;
++  inline bool ArePtrs64bit() const { return IsS64(); }
++  inline bool AreGprs64bit() const { return IsS64(); }
++
++  unsigned GetEhDataReg(unsigned I) const;
++};
++} // namespace llvm
++#endif
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.cpp
+new file mode 100644
+index 000000000..a94a307bc
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.cpp
+@@ -0,0 +1,317 @@
++//===-- Sw64AsmBackend.cpp - Sw64 Asm Backend  ----------------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file implements the Sw64AsmBackend class.
++//
++//===----------------------------------------------------------------------===//
++//
++
++#include "MCTargetDesc/Sw64AsmBackend.h"
++#include "MCTargetDesc/Sw64ABIInfo.h"
++#include "MCTargetDesc/Sw64FixupKinds.h"
++#include "MCTargetDesc/Sw64MCExpr.h"
++#include "MCTargetDesc/Sw64MCTargetDesc.h"
++#include "llvm/ADT/STLExtras.h"
++#include "llvm/MC/MCAsmBackend.h"
++#include "llvm/MC/MCAssembler.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCDirectives.h"
++#include "llvm/MC/MCELFObjectWriter.h"
++#include "llvm/MC/MCFixupKindInfo.h"
++#include "llvm/MC/MCObjectWriter.h"
++#include "llvm/MC/MCSubtargetInfo.h"
++#include "llvm/MC/MCTargetOptions.h"
++#include "llvm/MC/MCValue.h"
++#include "llvm/Support/EndianStream.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/Format.h"
++#include "llvm/Support/MathExtras.h"
++#include "llvm/Support/raw_ostream.h"
++
++using namespace llvm;
++
++// Prepare value for the target space for it
++static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
++                                 MCContext &Ctx) {
++
++  unsigned Kind = Fixup.getKind();
++  switch (Kind) {
++  default:
++    return 0;
++  case Sw64::fixup_SW64_32:
++  case Sw64::fixup_SW64_64:
++  case FK_Data_4:
++  case FK_Data_8:
++  case Sw64::fixup_SW64_GPREL32:
++  case Sw64::fixup_SW64_LITUSE:
++  case Sw64::fixup_SW64_GPREL_HI16:
++  case Sw64::fixup_SW64_GPREL_LO16:
++  case Sw64::fixup_SW64_GPREL16:
++  case Sw64::fixup_SW64_TLSGD:
++  case Sw64::fixup_SW64_TLSLDM:
++  case Sw64::fixup_SW64_DTPMOD64:
++  case Sw64::fixup_SW64_GOTDTPREL16:
++  case Sw64::fixup_SW64_DTPREL64:
++  case Sw64::fixup_SW64_DTPREL_HI16:
++  case Sw64::fixup_SW64_DTPREL_LO16:
++  case Sw64::fixup_SW64_DTPREL16:
++  case Sw64::fixup_SW64_GOTTPREL16:
++  case Sw64::fixup_SW64_TPREL64:
++  case Sw64::fixup_SW64_TPREL_HI16:
++  case Sw64::fixup_SW64_TPREL_LO16:
++  case Sw64::fixup_SW64_TPREL16:
++    break;
++  case Sw64::fixup_SW64_23_PCREL_S2:
++    // So far we are only using this type for branches.
++    // For branches we start 1 instruction after the branch
++    // so the displacement will be one instruction size less.
++    Value -= 4;
++    // The displacement is then divided by 4 to give us an 18 bit
++    // address range.
++    Value >>= 2;
++    break;
++  case Sw64::fixup_SW64_BRSGP:
++    // So far we are only using this type for jumps.
++    // The displacement is then divided by 4 to give us an 28 bit
++    // address range.
++    Value >>= 2;
++    break;
++  case Sw64::fixup_SW64_ELF_LITERAL:
++    Value &= 0xffff;
++    break;
++  case Sw64::fixup_SW64_ELF_LITERAL_GOT:
++    Value = ((Value + 0x8000) >> 16) & 0xffff;
++    break;
++  }
++  return Value;
++}
++
++std::unique_ptr<MCObjectTargetWriter>
++Sw64AsmBackend::createObjectTargetWriter() const {
++  return createSw64ELFObjectWriter(TheTriple, IsS32);
++}
++
++/// ApplyFixup - Apply the \p Value for given \p Fixup into the provided
++/// data fragment, at the offset specified by the fixup and following the
++/// fixup kind as appropriate.
++void Sw64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
++                                const MCValue &Target,
++                                MutableArrayRef<char> Data, uint64_t Value,
++                                bool IsResolved,
++                                const MCSubtargetInfo *STI) const {
++  MCFixupKind Kind = Fixup.getKind();
++  MCContext &Ctx = Asm.getContext();
++  Value = adjustFixupValue(Fixup, Value, Ctx);
++
++  if (!Value)
++    return; // Doesn't change encoding.
++
++  // Where do we start in the object
++  unsigned Offset = Fixup.getOffset();
++  // Number of bytes we need to fixup
++  unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
++  // Used to point to big endian bytes
++  unsigned FullSize;
++
++  switch ((unsigned)Kind) {
++  case Sw64::fixup_SW64_32:
++    FullSize = 4;
++    break;
++  case Sw64::fixup_SW64_64:
++    FullSize = 8;
++    break;
++  default:
++    FullSize = 4;
++    break;
++  }
++
++  // Grab current value, if any, from bits.
++  uint64_t CurVal = 0;
++
++  for (unsigned i = 0; i != NumBytes; ++i) {
++    unsigned Idx = Endian == support::little ? i : (FullSize - 1 - i);
++    CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i * 8);
++  }
++
++  uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize));
++  CurVal |= Value & Mask;
++
++  // Write out the fixed up bytes back to the code/data bits.
++  for (unsigned i = 0; i != NumBytes; ++i) {
++    unsigned Idx = Endian == support::little ? i : (FullSize - 1 - i);
++    Data[Offset + Idx] = (uint8_t)((CurVal >> (i * 8)) & 0xff);
++  }
++}
++
++std::optional<MCFixupKind> Sw64AsmBackend::getFixupKind(StringRef Name) const {
++  return StringSwitch<std::optional<MCFixupKind>>(Name)
++      .Case("R_SW_64_REFLONG", (MCFixupKind)Sw64::fixup_SW64_32)
++      .Case("R_SW_64_REFQUAD", (MCFixupKind)Sw64::fixup_SW64_64)
++      .Case("R_SW_64_REFQUAD", (MCFixupKind)Sw64::fixup_SW64_CTOR)
++      .Case("R_SW_64_GPREL32", (MCFixupKind)Sw64::fixup_SW64_GPREL32)
++      .Case("R_SW_64_LITERAL", (MCFixupKind)Sw64::fixup_SW64_ELF_LITERAL)
++      .Case("R_SW_64_LITUSE", (MCFixupKind)Sw64::fixup_SW64_LITUSE)
++      .Case("R_SW_64_GPDISP", (MCFixupKind)Sw64::fixup_SW64_GPDISP)
++      .Case("R_SW_64_BRADDR", (MCFixupKind)Sw64::fixup_SW64_23_PCREL_S2)
++      .Case("R_SW_64_HINT", (MCFixupKind)Sw64::fixup_SW64_HINT)
++      .Case("R_SW_64_SREL16", (MCFixupKind)Sw64::fixup_SW64_16_PCREL)
++      .Case("R_SW_64_SREL32", (MCFixupKind)Sw64::fixup_SW64_32_PCREL)
++      .Case("R_SW_64_SREL64", (MCFixupKind)Sw64::fixup_SW64_64_PCREL)
++      .Case("R_SW_64_GPRELHIGH", (MCFixupKind)Sw64::fixup_SW64_GPREL_HI16)
++      .Case("R_SW_64_GPRELLOW", (MCFixupKind)Sw64::fixup_SW64_GPREL_LO16)
++      .Case("R_SW_64_GPREL16", (MCFixupKind)Sw64::fixup_SW64_GPREL16)
++      .Case("R_SW_64_BRSGP", (MCFixupKind)Sw64::fixup_SW64_BRSGP)
++      .Case("R_SW_64_TLSGD", (MCFixupKind)Sw64::fixup_SW64_TLSGD)
++      .Case("R_SW_64_TLSLDM", (MCFixupKind)Sw64::fixup_SW64_TLSLDM)
++      .Case("R_SW_64_DTPMOD64", (MCFixupKind)Sw64::fixup_SW64_DTPMOD64)
++      .Case("R_SW_64_GOTDTPREL", (MCFixupKind)Sw64::fixup_SW64_GOTDTPREL16)
++      .Case("R_SW_64_DTPREL64", (MCFixupKind)Sw64::fixup_SW64_DTPREL64)
++      .Case("R_SW_64_DTPRELHI", (MCFixupKind)Sw64::fixup_SW64_DTPREL_HI16)
++      .Case("R_SW_64_DTPRELLO", (MCFixupKind)Sw64::fixup_SW64_DTPREL_LO16)
++      .Case("R_SW_64_DTPREL16", (MCFixupKind)Sw64::fixup_SW64_DTPREL16)
++      .Case("R_SW_64_GOTTPREL", (MCFixupKind)Sw64::fixup_SW64_GOTTPREL16)
++      .Case("R_SW_64_TPREL64", (MCFixupKind)Sw64::fixup_SW64_TPREL64)
++      .Case("R_SW_64_TPRELHI", (MCFixupKind)Sw64::fixup_SW64_TPREL_HI16)
++      .Case("R_SW_64_TPRELLO", (MCFixupKind)Sw64::fixup_SW64_TPREL_LO16)
++      .Case("R_SW_64_TPREL16", (MCFixupKind)Sw64::fixup_SW64_TPREL16)
++      .Case("R_SW_64_LITERAL_GOT",
++            (MCFixupKind)Sw64::fixup_SW64_ELF_LITERAL_GOT)
++      .Default(MCAsmBackend::getFixupKind(Name));
++}
++
++const MCFixupKindInfo &
++Sw64AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
++  const static MCFixupKindInfo LittleEndianInfos[] = {
++      // This table *must* be in same the order of fixup_* kinds in
++      // Sw64FixupKinds.h.
++      // name                    offset  bits  flags
++      {"fixup_SW64_NONE", 0, 0, 0},
++      {"fixup_SW64_32", 0, 32, 0},
++      {"fixup_SW64_64", 0, 64, 0},
++      {"fixup_SW64_CTOR", 0, 64, 0},
++      {"fixup_SW64_GPREL32", 0, 32, 0},
++      {"fixup_SW64_ELF_LITERAL", 0, 16, 0},
++      {"fixup_SW64_LITUSE", 0, 32, 0},
++      {"fixup_SW64_GPDISP", 0, 16, 0},
++      {"fixup_SW64_GPDISP_HI16", 0, 16, 0},
++      {"fixup_SW64_GPDISP_LO16", 0, 16, 0},
++      {"fixup_SW64_23_PCREL_S2", 0, 21, MCFixupKindInfo::FKF_IsPCRel},
++      {"fixup_SW64_HINT", 0, 14, 0},
++      {"fixup_SW64_16_PCREL", 0, 16, 0},
++      {"fixup_SW64_32_PCREL", 0, 32, 0},
++      {"fixup_SW64_64_PCREL", 0, 64, 0},
++      {"fixup_SW64_GPREL_HI16", 0, 16, 0},
++      {"fixup_SW64_GPREL_LO16", 0, 16, 0},
++      {"fixup_SW64_GPREL16", 0, 16, 0},
++      {"fixup_SW64_BRSGP", 0, 21, 0},
++      {"fixup_SW64_TLSGD", 0, 16, 0},
++      {"fixup_SW64_TLSLDM", 0, 16, 0},
++      {"fixup_SW64_DTPMOD64", 0, 64, 0},
++      {"fixup_SW64_GOTDTPREL16", 0, 16, 0},
++      {"fixup_SW64_DTPREL64", 0, 64, 0},
++      {"fixup_SW64_DTPREL_HI16", 0, 16, 0},
++      {"fixup_SW64_DTPREL_LO16", 0, 16, 0},
++      {"fixup_SW64_DTPREL16", 0, 16, 0},
++      {"fixup_SW64_GOTTPREL16", 0, 16, 0},
++      {"fixup_SW64_TPREL64", 0, 64, 0},
++      {"fixup_SW64_TPREL_HI16", 0, 16, 0},
++      {"fixup_SW64_TPREL_LO16", 0, 16, 0},
++      {"fixup_SW64_TPREL16", 0, 16, 0},
++      {"fixup_SW64_ELF_LITERAL_GOT", 0, 16, 0},
++      {"fixup_SW64_LITERAL_BASE", 0, 16, 0},
++      {"fixup_SW64_LITUSE_JSRDIRECT", 0, 16, 0}};
++
++  static_assert(std::size(LittleEndianInfos) == Sw64::NumTargetFixupKinds,
++                "Not all SW64 little endian fixup kinds added!");
++
++  if (Kind < FirstTargetFixupKind)
++    return MCAsmBackend::getFixupKindInfo(Kind);
++
++  assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
++         "Invalid kind!");
++
++  if (Endian == support::little)
++    return LittleEndianInfos[Kind - FirstTargetFixupKind];
++  else
++    llvm_unreachable("sw_64 is not appoint litter endian.");
++}
++
++/// WriteNopData - Write an (optimal) nop sequence of Count bytes
++/// to the given output. If the target cannot generate such a sequence,
++/// it should return an error.
++///
++/// \return - True on success.
++bool Sw64AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
++                                  const MCSubtargetInfo *STI) const {
++  // If the count is not 4-byte aligned, we must be writing data into the text
++  // section (otherwise we have unaligned instructions, and thus have far
++  // bigger problems), so just write zeros instead.
++  OS.write_zeros(Count % 4);
++
++  // We are properly aligned, so write NOPs as requested.
++  Count /= 4;
++  for (uint64_t i = 0; i != Count; ++i)
++    support::endian::write<uint32_t>(OS, 0x43ff075f, support::little);
++  return true;
++}
++
++bool Sw64AsmBackend::shouldForceRelocation(const MCAssembler &Asm,
++                                           const MCFixup &Fixup,
++                                           const MCValue &Target) {
++  const unsigned FixupKind = Fixup.getKind();
++  switch (FixupKind) {
++  default:
++    return false;
++  // All these relocations require special processing
++  // at linking time. Delegate this work to a linker.
++  case Sw64::fixup_SW64_32:
++  case Sw64::fixup_SW64_64:
++  case Sw64::fixup_SW64_CTOR:
++  case Sw64::fixup_SW64_GPREL32:
++  case Sw64::fixup_SW64_ELF_LITERAL:
++  case Sw64::fixup_SW64_LITUSE:
++  case Sw64::fixup_SW64_GPDISP:
++  case Sw64::fixup_SW64_GPDISP_HI16:
++  case Sw64::fixup_SW64_HINT:
++  case Sw64::fixup_SW64_16_PCREL:
++  case Sw64::fixup_SW64_32_PCREL:
++  case Sw64::fixup_SW64_64_PCREL:
++  case Sw64::fixup_SW64_GPREL_HI16:
++  case Sw64::fixup_SW64_GPREL_LO16:
++  case Sw64::fixup_SW64_GPREL16:
++  case Sw64::fixup_SW64_BRSGP:
++  case Sw64::fixup_SW64_TLSGD:
++  case Sw64::fixup_SW64_TLSLDM:
++  case Sw64::fixup_SW64_DTPMOD64:
++  case Sw64::fixup_SW64_GOTDTPREL16:
++  case Sw64::fixup_SW64_DTPREL64:
++  case Sw64::fixup_SW64_DTPREL_HI16:
++  case Sw64::fixup_SW64_DTPREL_LO16:
++  case Sw64::fixup_SW64_DTPREL16:
++  case Sw64::fixup_SW64_GOTTPREL16:
++  case Sw64::fixup_SW64_TPREL64:
++  case Sw64::fixup_SW64_TPREL_HI16:
++  case Sw64::fixup_SW64_TPREL_LO16:
++  case Sw64::fixup_SW64_TPREL16:
++  case Sw64::fixup_SW64_ELF_LITERAL_GOT:
++    return true;
++  case Sw64::fixup_SW64_23_PCREL_S2:
++    return false;
++  }
++}
++
++MCAsmBackend *llvm::createSw64AsmBackend(const Target &T,
++                                         const MCSubtargetInfo &STI,
++                                         const MCRegisterInfo &MRI,
++                                         const MCTargetOptions &Options) {
++  Sw64ABIInfo ABI = Sw64ABIInfo::computeTargetABI(STI.getTargetTriple(),
++                                                  STI.getCPU(), Options);
++  return new Sw64AsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(),
++                            ABI.IsS64());
++}
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.h
+new file mode 100644
+index 000000000..3f8bb0cf3
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.h
+@@ -0,0 +1,96 @@
++//===-- Sw64AsmBackend.h - Sw64 Asm Backend  ------------------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines the Sw64AsmBackend class.
++//
++//===----------------------------------------------------------------------===//
++//
++
++#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ASMBACKEND_H
++#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ASMBACKEND_H
++
++#include "MCTargetDesc/Sw64FixupKinds.h"
++#include "llvm/MC/MCAsmBackend.h"
++#include "llvm/TargetParser/Triple.h"
++
++namespace llvm {
++
++class MCAssembler;
++struct MCFixupKindInfo;
++class MCObjectWriter;
++class MCRegisterInfo;
++class MCSymbolELF;
++class Target;
++
++class Sw64AsmBackend : public MCAsmBackend {
++  Triple TheTriple;
++  bool IsS32;
++
++public:
++  Sw64AsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT,
++                 StringRef CPU, bool S32)
++      : MCAsmBackend(support::little), TheTriple(TT), IsS32(S32) {}
++
++  std::unique_ptr<MCObjectTargetWriter>
++  createObjectTargetWriter() const override;
++
++  void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
++                  const MCValue &Target, MutableArrayRef<char> Data,
++                  uint64_t Value, bool IsResolved,
++                  const MCSubtargetInfo *STI) const override;
++
++  std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
++  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
++
++  unsigned getNumFixupKinds() const override {
++    return Sw64::NumTargetFixupKinds;
++  }
++
++  /// @name Target Relaxation Interfaces
++  /// @{
++
++  /// MayNeedRelaxation - Check whether the given instruction may need
++  /// relaxation.
++  ///
++  /// \param Inst - The instruction to test.
++  bool mayNeedRelaxation(const MCInst &Inst,
++                         const MCSubtargetInfo &STI) const override {
++    return false;
++  }
++
++  /// fixupNeedsRelaxation - Target specific predicate for whether a given
++  /// fixup requires the associated instruction to be relaxed.
++  bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
++                            const MCRelaxableFragment *DF,
++                            const MCAsmLayout &Layout) const override {
++    // FIXME.
++    llvm_unreachable("RelaxInstruction() unimplemented");
++    return false;
++  }
++
++  /// RelaxInstruction - Relax the instruction in the given fragment
++  /// to the next wider instruction.
++  ///
++  /// \param Inst - The instruction to relax, which may be the same
++  /// as the output.
++  /// \param [out] Res On return, the relaxed instruction.
++
++  /// @}
++
++  bool writeNopData(raw_ostream &OS, uint64_t Count,
++                    const MCSubtargetInfo *STI) const override;
++
++  bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
++                             const MCValue &Target) override;
++
++}; // class Sw64AsmBackend
++
++} // namespace llvm
++
++#endif
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64BaseInfo.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64BaseInfo.h
+new file mode 100644
+index 000000000..822d04381
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64BaseInfo.h
+@@ -0,0 +1,146 @@
++//===-- Sw64BaseInfo.h - Top level definitions for SW64 MC ------*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains small standalone helper functions and enum definitions for
++// the Sw64 target useful for the compiler back-end and the MC libraries.
++//
++//===----------------------------------------------------------------------===//
++#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64BASEINFO_H
++#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64BASEINFO_H
++
++#include "Sw64FixupKinds.h"
++#include "Sw64MCTargetDesc.h"
++#include "llvm/MC/MCExpr.h"
++#include "llvm/Support/DataTypes.h"
++#include "llvm/Support/ErrorHandling.h"
++
++namespace llvm {
++
++/// Sw64II - This namespace holds all of the target specific flags that
++/// instruction info tracks.
++///
++namespace Sw64II {
++/// Target Operand Flag enum.
++enum TOF {
++  //===------------------------------------------------------------------===//
++  // Sw64 Specific MachineOperand flags.
++  MO_NO_FLAG,
++
++  /// MO_GOT - Represents the offset into the global offset table at which
++  /// the address the relocation entry symbol resides during execution.
++  MO_LITERAL, // LITERAL
++  MO_GPDISP,
++  MO_GPDISP_HI,
++  MO_GPDISP_LO,
++
++  /// MO_GOT_CALL - Represents the offset into the global offset table at
++  /// which the address of a call site relocation entry symbol resides
++  /// during execution. This is different from the above since this flag
++  /// can only be present in call instructions.
++  MO_GOT_CALL,
++
++  /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol
++  /// address.
++  MO_ABS_HI,
++  MO_ABS_LO,
++  MO_ABS_HILO,
++  MO_ABS_LI,
++
++  /// MO_GPREL - Represents the offset from the current gp value to be used
++  /// for the relocatable object file being produced.
++  MO_GPREL_HI,
++  MO_GPREL_LO,
++
++  /// MO_TLSGD - Represents the offset into the global offset table at which
++  // the module ID and TSL block offset reside during execution (General
++  // Dynamic TLS).
++  MO_TLSGD,
++
++  /// MO_TLSLDM - Represents the offset into the global offset table at which
++  // the module ID and TSL block offset reside during execution (Local
++  // Dynamic TLS).
++  MO_TLSLDM,
++  MO_DTPREL_HI,
++  MO_DTPREL_LO,
++
++  /// MO_GOTTPREL - Represents the offset from the thread pointer (Initial
++  // Exec TLS).
++  MO_GOTTPREL,
++
++  /// MO_TPREL_HI/LO - Represents the hi and low part of the offset from
++  // the thread pointer (Local Exec TLS).
++  MO_TPREL_HI,
++  MO_TPREL_LO,
++
++  // S32/64 Flags.
++  MO_GPOFF_HI,
++  MO_GPOFF_LO,
++  MO_GOT_DISP,
++  MO_GOT_PAGE,
++  MO_GOT_OFST,
++
++  /// MO_HIGHER/HIGHEST - Represents the highest or higher half word of a
++  /// 64-bit symbol address.
++  MO_HIGHER,
++  MO_HIGHEST,
++
++  /// MO_GOT_HI16/LO16, MO_CALL_HI16/LO16 - Relocations used for large GOTs.
++  MO_GOT_HI16,
++  MO_GOT_LO16,
++  MO_CALL_HI16,
++  MO_CALL_LO16,
++
++  /// Helper operand used to generate R_SW64_JALR
++  MO_JALR,
++
++  // LITERAL_GOT
++  MO_LITERAL_GOT,
++
++  MO_HINT,
++  MO_LITERAL_BASE = 0x40,
++  MO_LITUSE = 0x80 // LITERAL
++};
++
++enum {
++  //===------------------------------------------------------------------===//
++  // Instruction encodings.  These are the standard/most common forms for
++  // Sw64 instructions.
++  //
++
++  // Pseudo - This represents an instruction that is a pseudo instruction
++  // or one that has not been implemented yet.  It is illegal to code generate
++  // it, but tolerated for intermediate implementation stages.
++  Pseudo = 0,
++
++  /// FrmR - This form is for instructions of the format R.
++  FrmR = 1,
++  /// FrmI - This form is for instructions of the format I.
++  FrmI = 2,
++  /// FrmJ - This form is for instructions of the format J.
++  FrmJ = 3,
++  /// FrmFR - This form is for instructions of the format FR.
++  FrmFR = 4,
++  /// FrmFI - This form is for instructions of the format FI.
++  FrmFI = 5,
++  /// FrmOther - This form is for instructions that have no specific format.
++  FrmOther = 6,
++  FormMask = 15,
++  /// IsCTI - Instruction is a Control Transfer Instruction.
++  IsCTI = 1 << 4,
++  /// HasForbiddenSlot - Instruction has a forbidden slot.
++  HasForbiddenSlot = 1 << 5,
++  /// IsPCRelativeLoad - A Load instruction with implicit source register
++  ///                    ($pc) with explicit offset and destination register
++  IsPCRelativeLoad = 1 << 6,
++  /// HasFCCRegOperand - Instruction uses an $fcc<x> register.
++  HasFCCRegOperand = 1 << 7
++};
++} // namespace Sw64II
++} // namespace llvm
++#endif
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFObjectWriter.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFObjectWriter.cpp
+new file mode 100644
+index 000000000..3db5c0ab4
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFObjectWriter.cpp
+@@ -0,0 +1,463 @@
++//===-- Sw64ELFObjectWriter.cpp - Sw64 ELF Writer -------------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++#include "MCTargetDesc/Sw64FixupKinds.h"
++#include "MCTargetDesc/Sw64MCTargetDesc.h"
++#include "llvm/ADT/STLExtras.h"
++#include "llvm/BinaryFormat/ELF.h"
++#include "llvm/MC/MCAsmBackend.h"
++#include "llvm/MC/MCAsmInfo.h"
++#include "llvm/MC/MCAsmLayout.h"
++#include "llvm/MC/MCAssembler.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCELFObjectWriter.h"
++#include "llvm/MC/MCFixup.h"
++#include "llvm/MC/MCObjectWriter.h"
++#include "llvm/MC/MCSymbolELF.h"
++#include "llvm/Support/Casting.h"
++#include "llvm/Support/Compiler.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/MathExtras.h"
++#include "llvm/Support/raw_ostream.h"
++#include <algorithm>
++#include <cassert>
++#include <cstdint>
++#include <iterator>
++#include <list>
++#include <utility>
++
++#define DEBUG_TYPE "sw_64-elf-object-writer"
++
++using namespace llvm;
++
++namespace {
++
++// Holds additional information needed by the relocation ordering algorithm.
++struct Sw64RelocationEntry {
++  const ELFRelocationEntry R; // < The relocation.
++  bool Matched = false;       // < Is this relocation part of a match.
++
++  Sw64RelocationEntry(const ELFRelocationEntry &R) : R(R) {}
++
++  void print(raw_ostream &Out) const {
++    R.print(Out);
++    Out << ", Matched=" << Matched;
++  }
++};
++
++#ifndef NDEBUG
++raw_ostream &operator<<(raw_ostream &OS, const Sw64RelocationEntry &RHS) {
++  RHS.print(OS);
++  return OS;
++}
++#endif
++
++class Sw64ELFObjectWriter : public MCELFObjectTargetWriter {
++public:
++  Sw64ELFObjectWriter(uint8_t OSABI, bool HasRelocationAddend, bool Is64);
++
++  ~Sw64ELFObjectWriter() override = default;
++
++  unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
++                        const MCFixup &Fixup, bool IsPCRel) const override;
++  bool needsRelocateWithSymbol(const MCSymbol &Sym,
++                               unsigned Type) const override;
++  void sortRelocs(const MCAssembler &Asm,
++                  std::vector<ELFRelocationEntry> &Relocs) override;
++};
++
++// The possible results of the Predicate function used by find_best.
++enum FindBestPredicateResult {
++  FindBest_NoMatch = 0,  // < The current element is not a match.
++  FindBest_Match,        // < The current element is a match but better ones are
++                         //   possible.
++  FindBest_PerfectMatch, // < The current element is an unbeatable match.
++};
++
++} // end anonymous namespace
++
++// Copy elements in the range [First, Last) to d1 when the predicate is true or
++// d2 when the predicate is false. This is essentially both std::copy_if and
++// std::remove_copy_if combined into a single pass.
++template <class InputIt, class OutputIt1, class OutputIt2, class UnaryPredicate>
++static std::pair<OutputIt1, OutputIt2> copy_if_else(InputIt First, InputIt Last,
++                                                    OutputIt1 d1, OutputIt2 d2,
++                                                    UnaryPredicate Predicate) {
++  for (InputIt I = First; I != Last; ++I) {
++    if (Predicate(*I)) {
++      *d1 = *I;
++      d1++;
++    } else {
++      *d2 = *I;
++      d2++;
++    }
++  }
++
++  return std::make_pair(d1, d2);
++}
++
++// Find the best match in the range [First, Last).
++//
++// An element matches when Predicate(X) returns FindBest_Match or
++// FindBest_PerfectMatch. A value of FindBest_PerfectMatch also terminates
++// the search. BetterThan(A, B) is a comparator that returns true when A is a
++// better match than B. The return value is the position of the best match.
++//
++// This is similar to std::find_if but finds the best of multiple possible
++// matches.
++template <class InputIt, class UnaryPredicate>
++static InputIt find_best(InputIt First, InputIt Last,
++                         UnaryPredicate Predicate) {
++  InputIt Best = Last;
++
++  for (InputIt I = First; I != Last; ++I) {
++    unsigned Matched = Predicate(*I);
++    if (Matched != FindBest_NoMatch) {
++      LLVM_DEBUG(dbgs() << std::distance(First, I) << " is a match (";
++                 I->print(dbgs()); dbgs() << ")\n");
++      if (Best == Last) {
++        LLVM_DEBUG(dbgs() << ".. and it beats the last one\n");
++        Best = I;
++      }
++    }
++    if (Matched == FindBest_PerfectMatch) {
++      LLVM_DEBUG(dbgs() << ".. and it is unbeatable\n");
++      break;
++    }
++  }
++
++  return Best;
++}
++
++#ifndef NDEBUG
++// Print all the relocations.
++template <class Container>
++static void dumpRelocs(const char *Prefix, const Container &Relocs) {
++  for (const auto &R : Relocs) {
++    dbgs() << Prefix;
++    R.print(dbgs());
++    dbgs() << "\n";
++  }
++}
++#endif
++
++Sw64ELFObjectWriter::Sw64ELFObjectWriter(uint8_t OSABI,
++                                         bool HasRelocationAddend, bool Is64)
++    : MCELFObjectTargetWriter(Is64, OSABI, ELF::EM_SW64, HasRelocationAddend) {}
++
++unsigned Sw64ELFObjectWriter::getRelocType(MCContext &Ctx,
++                                           const MCValue &Target,
++                                           const MCFixup &Fixup,
++                                           bool IsPCRel) const {
++  // Determine the type of the relocation.
++  unsigned Kind = (unsigned)Fixup.getKind();
++  switch (Kind) {
++  case Sw64::fixup_SW64_NONE:
++    return ELF::R_SW_64_NONE;
++  case FK_Data_1:
++  case FK_Data_2:
++    Ctx.reportError(Fixup.getLoc(),
++                    "SW64 does not support one byte relocations");
++    return ELF::R_SW_64_NONE;
++  case FK_Data_4:
++    if (Fixup.getValue()->getKind() == MCExpr::Binary)
++      return ELF::R_SW_64_SREL32; // .cfi_startproc
++    else
++      return ELF::R_SW_64_REFLONG; // R_SW_64_32
++    break;
++  case FK_Data_8: // .8byte ($.str)
++    if (IsPCRel)
++      return ELF::R_SW_64_SREL64;
++    else
++      return ELF::R_SW_64_REFQUAD; // R_SW_64_64
++    break;
++  case Sw64::fixup_SW64_32:
++    return ELF::R_SW_64_REFLONG;
++    break;
++  case Sw64::fixup_SW64_64:
++  case Sw64::fixup_SW64_CTOR:
++    return ELF::R_SW_64_REFQUAD;
++    break;
++  case Sw64::fixup_SW64_GPREL32:
++    return ELF::R_SW_64_GPREL32;
++    break;
++
++  case Sw64::fixup_SW64_ELF_LITERAL:
++    return ELF::R_SW_64_LITERAL;
++    break;
++  case Sw64::fixup_SW64_LITUSE:
++    return ELF::R_SW_64_LITUSE;
++    break;
++  case Sw64::fixup_SW64_LITERAL_BASE:
++    return ELF::R_SW_64_DUMMY_LITERAL;
++    break;
++  case Sw64::fixup_SW64_LITUSE_JSRDIRECT:
++    return ELF::R_SW_64_DUMMY_LITUSE;
++    break;
++  case Sw64::fixup_SW64_GPDISP:
++    return ELF::R_SW_64_GPDISP;
++    break;
++  case Sw64::fixup_SW64_GPDISP_HI16:
++    return ELF::R_SW_64_GPDISP;
++    break;
++  case Sw64::fixup_SW64_GPDISP_LO16:
++    return ELF::R_SW_64_GPDISP;
++    break;
++  case Sw64::fixup_SW64_23_PCREL_S2:
++    return ELF::R_SW_64_BRADDR;
++    break;
++  case Sw64::fixup_SW64_HINT:
++    return ELF::R_SW_64_HINT;
++    break;
++  case Sw64::fixup_SW64_16_PCREL:
++    return ELF::R_SW_64_SREL16;
++    break;
++  case Sw64::fixup_SW64_32_PCREL:
++    return ELF::R_SW_64_SREL32;
++    break;
++  case Sw64::fixup_SW64_64_PCREL:
++    return ELF::R_SW_64_SREL64;
++    break;
++  case Sw64::fixup_SW64_GPREL_HI16:
++    return ELF::R_SW_64_GPRELHIGH;
++    break;
++  case Sw64::fixup_SW64_GPREL_LO16:
++    return ELF::R_SW_64_GPRELLOW;
++    break;
++  case Sw64::fixup_SW64_GPREL16:
++    return ELF::R_SW_64_GPREL16;
++    break;
++  case Sw64::fixup_SW64_BRSGP:
++    return ELF::R_SW_64_BRSGP;
++    break;
++  case Sw64::fixup_SW64_TLSGD:
++    return ELF::R_SW_64_TLSGD;
++    break;
++  case Sw64::fixup_SW64_TLSLDM:
++    return ELF::R_SW_64_TLSLDM;
++    break;
++  case Sw64::fixup_SW64_DTPMOD64:
++    return ELF::R_SW_64_DTPMOD64;
++    break;
++  case Sw64::fixup_SW64_GOTDTPREL16:
++    return ELF::R_SW_64_GOTDTPREL;
++    break;
++  case Sw64::fixup_SW64_DTPREL64:
++    return ELF::R_SW_64_DTPREL64;
++    break;
++  case Sw64::fixup_SW64_DTPREL_HI16:
++    return ELF::R_SW_64_DTPRELHI;
++    break;
++  case Sw64::fixup_SW64_DTPREL_LO16:
++    return ELF::R_SW_64_DTPRELLO;
++    break;
++  case Sw64::fixup_SW64_DTPREL16:
++    return ELF::R_SW_64_DTPREL16;
++    break;
++  case Sw64::fixup_SW64_GOTTPREL16:
++    return ELF::R_SW_64_GOTTPREL;
++    break;
++  case Sw64::fixup_SW64_TPREL64:
++    return ELF::R_SW_64_TPREL64;
++    break;
++  case Sw64::fixup_SW64_TPREL_HI16:
++    return ELF::R_SW_64_TPRELHI;
++    break;
++  case Sw64::fixup_SW64_TPREL_LO16:
++    return ELF::R_SW_64_TPRELLO;
++    break;
++  case Sw64::fixup_SW64_TPREL16:
++    return ELF::R_SW_64_TPREL16;
++    break;
++  case Sw64::fixup_SW64_ELF_LITERAL_GOT:
++    return ELF::R_SW_64_LITERAL_GOT;
++    break;
++  }
++  llvm_unreachable("invalid fixup kind!");
++}
++
++// Determine whether a relocation (X) matches the one given in R.
++//
++// A relocation matches if:
++// - It's type matches that of a corresponding low part. This is provided in
++//   MatchingType for efficiency.
++// - It's based on the same symbol.
++// - It's offset of greater or equal to that of the one given in R.
++//   It should be noted that this rule assumes the programmer does not use
++//   offsets that exceed the alignment of the symbol. The carry-bit will be
++//   incorrect if this is not true.
++//
++// A matching relocation is unbeatable if:
++// - It is not already involved in a match.
++// - It's offset is exactly that of the one given in R.
++static FindBestPredicateResult isMatchingReloc(const Sw64RelocationEntry &X,
++                                               const ELFRelocationEntry &R,
++                                               unsigned MatchingType) {
++  if (X.R.Type == MatchingType && X.R.OriginalSymbol == R.OriginalSymbol) {
++    if (!X.Matched && X.R.OriginalAddend == R.OriginalAddend)
++      return FindBest_PerfectMatch;
++  }
++  return FindBest_NoMatch;
++}
++
++// Rewrite Reloc Target And Type
++static ELFRelocationEntry RewriteTypeReloc(const ELFRelocationEntry R,
++                                           const MCSymbolELF *RenamedSymA) {
++  ELFRelocationEntry Entry = R;
++  switch (R.Type) {
++  default:
++    break;
++  case ELF::R_SW_64_DUMMY_LITUSE:
++    Entry.Type = ELF::R_SW_64_LITUSE;
++    Entry.Symbol = RenamedSymA;
++    Entry.Addend = 0x3;
++    break;
++  case ELF::R_SW_64_DUMMY_LITERAL:
++    Entry.Type = ELF::R_SW_64_LITERAL;
++    break;
++  case ELF::R_SW_64_GPDISP:
++    Entry.Symbol = RenamedSymA;
++    Entry.Addend = 0x4;
++    break;
++  }
++  return Entry;
++}
++
++void Sw64ELFObjectWriter::sortRelocs(const MCAssembler &Asm,
++                                     std::vector<ELFRelocationEntry> &Relocs) {
++  if (Relocs.size() < 2)
++    return;
++
++  MCContext &Ctx = Asm.getContext();
++  std::list<Sw64RelocationEntry> Sorted;
++  std::list<ELFRelocationEntry> Remainder;
++  std::list<ELFRelocationEntry> Orig;
++  const auto *RenamedSymA = cast<MCSymbolELF>(Ctx.getOrCreateSymbol(".text"));
++
++  LLVM_DEBUG(dumpRelocs("R: ", Relocs));
++
++  // Sort relocations by the address they are applied to.
++  llvm::sort(Relocs,
++             [](const ELFRelocationEntry &A, const ELFRelocationEntry &B) {
++               return A.Offset < B.Offset;
++             });
++
++  // copy all reloc entry into remainder, except lituse.
++  // all lituse will be insert literal->next later.
++  copy_if_else(Relocs.begin(), Relocs.end(), std::back_inserter(Remainder),
++               std::back_inserter(Sorted), [](const ELFRelocationEntry &Reloc) {
++                 return Reloc.Type == ELF::R_SW_64_DUMMY_LITUSE;
++               });
++
++  // Separate the movable relocations (AHL relocations using the high bits) from
++  // the immobile relocations (everything else). This does not preserve high/low
++  // matches that already existed in the input.
++  for (auto &R : Remainder) {
++    LLVM_DEBUG(dbgs() << "Matching: " << R << "\n");
++
++    auto InsertionPoint = find_best(
++        Sorted.begin(), Sorted.end(), [&R](const Sw64RelocationEntry &X) {
++          return isMatchingReloc(X, R, ELF::R_SW_64_DUMMY_LITERAL);
++        });
++
++    if (InsertionPoint != Sorted.end()) {
++      // if lit_use and literal correctly matched,
++      // InsertPoint is the reloc entry next to the literal
++      InsertionPoint->Matched = true;
++      InsertionPoint = std::next(InsertionPoint, 1);
++    }
++    Sorted.insert(InsertionPoint, R)->Matched = true;
++  }
++  assert(Relocs.size() == Sorted.size() && "Some relocs were not consumed");
++
++  // Overwrite the original vector with the sorted elements. The caller expects
++  // them in reverse order.
++  unsigned CopyTo = 0;
++  for (const auto &R : reverse(Sorted)) {
++    ELFRelocationEntry Entry = RewriteTypeReloc(R.R, RenamedSymA);
++    Relocs[CopyTo++] = Entry;
++  }
++}
++
++bool Sw64ELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
++                                                  unsigned Type) const {
++  if (!isUInt<8>(Type))
++    return needsRelocateWithSymbol(Sym, Type & 0xff) ||
++           needsRelocateWithSymbol(Sym, (Type >> 8) & 0xff) ||
++           needsRelocateWithSymbol(Sym, (Type >> 16) & 0xff);
++
++  switch (Type) {
++  default:
++    errs() << Type << "\n";
++    llvm_unreachable("Unexpected relocation");
++    return true;
++
++  // This relocation doesn't affect the section data.
++  case ELF::R_SW_64_NONE:
++    return false;
++  // On REL ABI's (e.g. S32), these relocations form pairs. The pairing is done
++  // by the static linker by matching the symbol and offset.
++  // We only see one relocation at a time but it's still safe to relocate with
++  // the section so long as both relocations make the same decision.
++  //
++  // Some older linkers may require the symbol for particular cases. Such cases
++  // are not supported yet but can be added as required.
++  case ELF::R_SW_64_REFLONG:
++  case ELF::R_SW_64_REFQUAD:
++  case ELF::R_SW_64_GPREL32:
++  case ELF::R_SW_64_LITERAL:
++  case ELF::R_SW_64_DUMMY_LITERAL:
++  case ELF::R_SW_64_DUMMY_LITUSE:
++  case ELF::R_SW_64_LITUSE:
++  case ELF::R_SW_64_BRADDR:
++  case ELF::R_SW_64_HINT:
++  case ELF::R_SW_64_SREL16:
++  case ELF::R_SW_64_SREL32:
++  case ELF::R_SW_64_SREL64:
++  case ELF::R_SW_64_GPRELHIGH:
++  case ELF::R_SW_64_GPRELLOW:
++  case ELF::R_SW_64_GPREL16:
++  case ELF::R_SW_64_COPY:
++  case ELF::R_SW_64_GLOB_DAT:
++  case ELF::R_SW_64_JMP_SLOT:
++  case ELF::R_SW_64_RELATIVE:
++  case ELF::R_SW_64_BRSGP:
++  case ELF::R_SW_64_TLSGD:
++  case ELF::R_SW_64_TLSLDM:
++  case ELF::R_SW_64_DTPMOD64:
++  case ELF::R_SW_64_GOTDTPREL:
++  case ELF::R_SW_64_DTPREL64:
++  case ELF::R_SW_64_DTPRELHI:
++  case ELF::R_SW_64_DTPRELLO:
++  case ELF::R_SW_64_DTPREL16:
++  case ELF::R_SW_64_GOTTPREL:
++  case ELF::R_SW_64_TPREL64:
++  case ELF::R_SW_64_TPRELHI:
++  case ELF::R_SW_64_TPRELLO:
++  case ELF::R_SW_64_TPREL16:
++  case ELF::R_SW_64_NUM:
++  case ELF::R_SW_64_LITERAL_GOT:
++  case ELF::R_SW_64_PC32:
++  case ELF::R_SW_64_EH:
++    return false;
++
++  case ELF::R_SW_64_GPDISP:
++    return true;
++  }
++}
++
++std::unique_ptr<MCObjectTargetWriter>
++llvm::createSw64ELFObjectWriter(const Triple &TT, bool IsS32) {
++  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
++  bool IsS64 = true;
++  bool HasRelocationAddend = TT.isArch64Bit();
++  return std::make_unique<Sw64ELFObjectWriter>(OSABI, HasRelocationAddend,
++                                               IsS64);
++}
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.cpp
+new file mode 100644
+index 000000000..2d5271da7
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.cpp
+@@ -0,0 +1,108 @@
++//===-------- Sw64ELFStreamer.cpp - ELF Object Output ---------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64ELFStreamer.h"
++#include "Sw64OptionRecord.h"
++#include "Sw64TargetStreamer.h"
++#include "llvm/BinaryFormat/ELF.h"
++#include "llvm/MC/MCAsmBackend.h"
++#include "llvm/MC/MCAssembler.h"
++#include "llvm/MC/MCCodeEmitter.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCDwarf.h"
++#include "llvm/MC/MCInst.h"
++#include "llvm/MC/MCObjectWriter.h"
++#include "llvm/MC/MCSymbolELF.h"
++#include "llvm/Support/Casting.h"
++
++using namespace llvm;
++
++Sw64ELFStreamer::Sw64ELFStreamer(MCContext &Context,
++                                 std::unique_ptr<MCAsmBackend> MAB,
++                                 std::unique_ptr<MCObjectWriter> OW,
++                                 std::unique_ptr<MCCodeEmitter> Emitter)
++    : MCELFStreamer(Context, std::move(MAB), std::move(OW),
++                    std::move(Emitter)) {
++  RegInfoRecord = new Sw64RegInfoRecord(this, Context);
++  Sw64OptionRecords.push_back(
++      std::unique_ptr<Sw64RegInfoRecord>(RegInfoRecord));
++}
++
++void Sw64ELFStreamer::emitInstruction(const MCInst &Inst,
++                                      const MCSubtargetInfo &STI) {
++  MCELFStreamer::emitInstruction(Inst, STI);
++
++  MCContext &Context = getContext();
++  const MCRegisterInfo *MCRegInfo = Context.getRegisterInfo();
++
++  for (unsigned OpIndex = 0; OpIndex < Inst.getNumOperands(); ++OpIndex) {
++    const MCOperand &Op = Inst.getOperand(OpIndex);
++
++    if (!Op.isReg())
++      continue;
++
++    unsigned Reg = Op.getReg();
++    RegInfoRecord->SetPhysRegUsed(Reg, MCRegInfo);
++  }
++
++  createPendingLabelRelocs();
++}
++
++void Sw64ELFStreamer::emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
++  Frame.Begin = getContext().createTempSymbol();
++  MCELFStreamer::emitLabel(Frame.Begin);
++}
++
++MCSymbol *Sw64ELFStreamer::emitCFILabel() {
++  MCSymbol *Label = getContext().createTempSymbol("cfi", true);
++  MCELFStreamer::emitLabel(Label);
++  return Label;
++}
++
++void Sw64ELFStreamer::emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
++  Frame.End = getContext().createTempSymbol();
++  MCELFStreamer::emitLabel(Frame.End);
++}
++
++void Sw64ELFStreamer::createPendingLabelRelocs() { Labels.clear(); }
++
++void Sw64ELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
++  MCELFStreamer::emitLabel(Symbol);
++  Labels.push_back(Symbol);
++}
++
++void Sw64ELFStreamer::switchSection(MCSection *Section,
++                                    const MCExpr *Subsection) {
++  MCELFStreamer::switchSection(Section, Subsection);
++  Labels.clear();
++}
++
++void Sw64ELFStreamer::emitValueImpl(const MCExpr *Value, unsigned Size,
++                                    SMLoc Loc) {
++  MCELFStreamer::emitValueImpl(Value, Size, Loc);
++  Labels.clear();
++}
++
++void Sw64ELFStreamer::emitIntValue(uint64_t Value, unsigned Size) {
++  MCELFStreamer::emitIntValue(Value, Size);
++  Labels.clear();
++}
++
++void Sw64ELFStreamer::EmitSw64OptionRecords() {
++  for (const auto &I : Sw64OptionRecords)
++    I->EmitSw64OptionRecord();
++}
++
++MCELFStreamer *llvm::createSw64ELFStreamer(
++    MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
++    std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
++    bool RelaxAll) {
++  return new Sw64ELFStreamer(Context, std::move(MAB), std::move(OW),
++                             std::move(Emitter));
++}
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.h
+new file mode 100644
+index 000000000..d391bfe71
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.h
+@@ -0,0 +1,83 @@
++//===- Sw64ELFStreamer.h - ELF Object Output --------------------*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This is a custom MCELFStreamer which allows us to insert some hooks before
++// emitting data into an actual object file.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ELFSTREAMER_H
++#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ELFSTREAMER_H
++
++#include "Sw64OptionRecord.h"
++#include "llvm/ADT/SmallVector.h"
++#include "llvm/MC/MCELFStreamer.h"
++#include <memory>
++
++namespace llvm {
++
++class MCAsmBackend;
++class MCCodeEmitter;
++class MCContext;
++class MCSubtargetInfo;
++struct MCDwarfFrameInfo;
++
++class Sw64ELFStreamer : public MCELFStreamer {
++  SmallVector<std::unique_ptr<Sw64OptionRecord>, 8> Sw64OptionRecords;
++  Sw64RegInfoRecord *RegInfoRecord;
++  SmallVector<MCSymbol *, 4> Labels;
++
++public:
++  Sw64ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
++                  std::unique_ptr<MCObjectWriter> OW,
++                  std::unique_ptr<MCCodeEmitter> Emitter);
++
++  // Overriding this function allows us to add arbitrary behaviour before the
++  // \p Inst is actually emitted. For example, we can inspect the operands and
++  // gather sufficient information that allows us to reason about the register
++  // usage for the translation unit.
++  void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
++
++  // Overriding this function allows us to record all labels that should be
++  // marked as microSW64. Based on this data marking is done in
++  // EmitInstruction.
++  void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
++
++  // Overriding this function allows us to dismiss all labels that are
++  // candidates for marking as microSW64 when .section directive is processed.
++  void switchSection(MCSection *Section,
++                     const MCExpr *Subsection = nullptr) override;
++
++  // Overriding these functions allows us to dismiss all labels that are
++  // candidates for marking as microSW64 when .word/.long/.4byte etc
++  // directives are emitted.
++  void emitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override;
++  void emitIntValue(uint64_t Value, unsigned Size) override;
++
++  // Overriding these functions allows us to avoid recording of these labels
++  // in EmitLabel and later marking them as microSW64.
++  void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override;
++  void emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override;
++  MCSymbol *emitCFILabel() override;
++
++  // Emits all the option records stored up until the point it's called.
++  void EmitSw64OptionRecords();
++
++  // Mark labels as microSW64, if necessary for the subtarget.
++  void createPendingLabelRelocs();
++};
++
++MCELFStreamer *createSw64ELFStreamer(MCContext &Context,
++                                     std::unique_ptr<MCAsmBackend> MAB,
++                                     std::unique_ptr<MCObjectWriter> OW,
++                                     std::unique_ptr<MCCodeEmitter> Emitter,
++                                     bool RelaxAll);
++} // end namespace llvm
++
++#endif // LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ELFSTREAMER_H
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64FixupKinds.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64FixupKinds.h
+new file mode 100644
+index 000000000..ae378ac17
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64FixupKinds.h
+@@ -0,0 +1,174 @@
++//===-- Sw64FixupKinds.h - Sw64 Specific Fixup Entries ----------*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64FIXUPKINDS_H
++#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64FIXUPKINDS_H
++
++#include "llvm/MC/MCFixup.h"
++
++namespace llvm {
++namespace Sw64 {
++// Although most of the current fixup types reflect a unique relocation
++// one can have multiple fixup types for a given relocation and thus need
++// to be uniquely named.
++//
++// This table *must* be in the same order of
++// MCFixupKindInfo Infos[Sw64::NumTargetFixupKinds]
++// in Sw64AsmBackend.cpp.
++//
++enum Fixups {
++  // Branch fixups resulting in R_SW64_NONE.
++  fixup_SW64_NONE = FirstTargetFixupKind,
++
++  // A 32 bit reference to a symbol.
++  // resulting in R_SW_64_REFLONG.
++  fixup_SW64_32,
++
++  // A 64 bit reference to a symbol.
++  // resulting in - R_SW_64_REFQUAD.
++  fixup_SW64_64,
++
++  // A 64 bit reference to a symbol.
++  // resulting in - R_SW_64_REFQUAD.
++  fixup_SW64_CTOR,
++
++  // A 32 bit GP relative offset. This is just like REFLONG except
++  // that when the value is used the value of the gp register will be
++  // added in.
++  // resulting in - R_SW_64_GPREL32.
++  fixup_SW64_GPREL32,
++
++  // Used for an instruction that refers to memory off the GP register
++  // resulting in - R_SW_64_LITERAL.
++  fixup_SW64_ELF_LITERAL,
++  // This reloc only appears immediately following an ELF_LITERAL reloc.
++  // It identifies a use of the literal.  The symbol index is special:
++  // 1 means the literal address is in the base register of a memory
++  // format instruction; 2 means the literal address is in the byte
++  // offset register of a byte-manipulation instruction; 3 means the
++  // literal address is in the target register of a jsr instruction.
++  // This does not actually do any relocation.
++  // resulting in - R_SW_64_LITUSE.
++  fixup_SW64_LITUSE,
++
++  // Load the gp register.  This is always used for a ldih instruction
++  // which loads the upper 16 bits of the gp register.  The symbol
++  // index of the GPDISP instruction is an offset in bytes to the lda
++  // instruction that loads the lower 16 bits.  The value to use for
++  // the relocation is the difference between the GP value and the
++  // current location; the load will always be done against a register
++  // holding the current address.
++  // resulting in - R_SW_64_GPDISP.
++  fixup_SW64_GPDISP,
++  fixup_SW64_GPDISP_HI16,
++  fixup_SW64_GPDISP_LO16,
++
++  // A 21 bit branch.
++  // resulting in - R_SW_64_BRADDR.
++  fixup_SW64_23_PCREL_S2,
++  // A hint for a jump to a register.
++  // resulting in - R_SW_64_HINT.
++  fixup_SW64_HINT,
++
++  // 16 bit PC relative offset.
++  // resulting in - R_SW_64_SREL16.
++  fixup_SW64_16_PCREL,
++
++  // 32 bit PC relative offset.
++  // resulting in - R_SW_64_SREL32.
++  fixup_SW64_32_PCREL,
++
++  // 64 bit PC relative offset.
++  // resulting in - R_SW_64_SREL64.
++  fixup_SW64_64_PCREL,
++
++  // The high 16 bits of the displacement from GP to the target
++  // resulting in - R_SW_64_GPRELHIGH.
++  fixup_SW64_GPREL_HI16,
++
++  // The low 16 bits of the displacement from GP to the target
++  // resulting in - R_SW_64_GPRELLOW.
++  fixup_SW64_GPREL_LO16,
++
++  //  A 16-bit displacement from the GP to the target
++  //  resulting in - R_SW_64_GPREL16.
++  fixup_SW64_GPREL16,
++  // A 21 bit branch that adjusts for gp loads
++  // resulting in - R_SW_64_BRSGP.
++  fixup_SW64_BRSGP,
++
++  // Creates a tls_index for the symbol in the got.
++  // resulting in - R_SW_64_TLSGD.
++  fixup_SW64_TLSGD,
++
++  // Creates a tls_index for the (current) module in the got.
++  // resulting in - R_SW_64_TLSLDM.
++  fixup_SW64_TLSLDM,
++
++  // A dynamic relocation for a DTP module entry.
++  // resulting in - R_SW_64_DTPMOD64.
++  fixup_SW64_DTPMOD64,
++
++  // Creates a 64-bit offset in the got for the displacement from DTP to the
++  // target.
++  // resulting in - R_SW_64_GOTDTPREL.
++  fixup_SW64_GOTDTPREL16,
++
++  // A dynamic relocation for a displacement from DTP to the target.
++  // resulting in - R_SW_64_DTPREL64.
++  fixup_SW64_DTPREL64,
++
++  // The high 16 bits of the displacement from DTP to the target.
++  // resulting in - R_SW_64_DTPRELHI.
++  fixup_SW64_DTPREL_HI16,
++  // The low 16 bits of the displacement from DTP to the target.
++  // resulting in - R_SW_64_DTPRELLO.
++  fixup_SW64_DTPREL_LO16,
++
++  // A 16-bit displacement from DTP to the target.
++  // resulting in - R_SW_64_DTPREL16
++  fixup_SW64_DTPREL16,
++
++  // Creates a 64-bit offset in the got for the displacement from TP to the
++  // target.
++  // resulting in - R_SW_64_GOTTPREL
++  fixup_SW64_GOTTPREL16,
++
++  // A dynamic relocation for a displacement from TP to the target.
++  // resulting in - R_SW_64_TPREL64
++  fixup_SW64_TPREL64,
++
++  //  The high 16 bits of the displacement from TP to the target.
++  //  resulting in - R_SW_64_TPRELHI
++  fixup_SW64_TPREL_HI16,
++
++  // The low 16 bits of the displacement from TP to the target.
++  // resulting in - R_SW_64_TPRELLO
++  fixup_SW64_TPREL_LO16,
++
++  // A 16-bit displacement from TP to the target.
++  // resulting in - R_SW_64_TPREL16
++  fixup_SW64_TPREL16,
++
++  // Used for an instruction that refers to memory off the GP register
++  // together with literal, expand call range to 32 bits offset
++  // resulting in - R_SW_64_LITERAL_GOT
++  fixup_SW64_ELF_LITERAL_GOT,
++
++  // TODO: for literal sorting reloc
++  fixup_SW64_LITERAL_BASE,
++  fixup_SW64_LITUSE_JSRDIRECT,
++
++  // Marker
++  LastTargetFixupKind,
++  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
++};
++} // namespace Sw64
++} // namespace llvm
++#endif
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.cpp
+new file mode 100644
+index 000000000..bdbd6d0bd
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.cpp
+@@ -0,0 +1,42 @@
++//===-- Sw64MCAsmInfo.cpp - Sw64 asm properties -------------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the declarations of the Sw64MCAsmInfo properties.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64MCAsmInfo.h"
++#include "llvm/TargetParser/Triple.h"
++
++using namespace llvm;
++
++void Sw64MCAsmInfo::anchor() {}
++
++Sw64MCAsmInfo::Sw64MCAsmInfo(const Triple &TheTriple,
++                             const MCTargetOptions &Options) {
++  IsLittleEndian = TheTriple.isLittleEndian();
++  assert(IsLittleEndian == true && "sw_64 machine is litter endian!");
++
++  CodePointerSize = CalleeSaveStackSlotSize = 8;
++
++  PrivateGlobalPrefix = ".L";
++  AlignmentIsInBytes = false;
++  Data16bitsDirective = "\t.2byte\t";
++  Data32bitsDirective = "\t.4byte\t";
++  Data64bitsDirective = "\t.8byte\t";
++  WeakRefDirective = "\t.weak\t";
++  CommentString = "#";
++  // For chang assemble directer ".set LA, LB" to "LA = LB"
++  HasSw64SetDirective = true;
++  UsesELFSectionDirectiveForBSS = true;
++  SupportsDebugInformation = true;
++  ExceptionsType = ExceptionHandling::DwarfCFI;
++  DwarfRegNumForCFI = true;
++  UseIntegratedAssembler = true;
++}
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.h
+new file mode 100644
+index 000000000..f7809419e
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.h
+@@ -0,0 +1,32 @@
++//===-- Sw64MCAsmInfo.h - Sw64 Asm Info ------------------------*- C++ -*--===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the declaration of the Sw64MCAsmInfo class.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCASMINFO_H
++#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCASMINFO_H
++
++#include "llvm/MC/MCAsmInfoELF.h"
++
++namespace llvm {
++class Triple;
++
++class Sw64MCAsmInfo : public MCAsmInfoELF {
++  void anchor() override;
++
++public:
++  explicit Sw64MCAsmInfo(const Triple &TheTriple,
++                         const MCTargetOptions &Options);
++};
++
++} // namespace llvm
++
++#endif
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.cpp
+new file mode 100644
+index 000000000..d74229cda
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.cpp
+@@ -0,0 +1,452 @@
++//===-- Sw64MCCodeEmitter.cpp - Convert Sw64 Code to Machine Code ---------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file implements the Sw64MCCodeEmitter class.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64MCCodeEmitter.h"
++#include "MCTargetDesc/Sw64FixupKinds.h"
++#include "MCTargetDesc/Sw64MCExpr.h"
++#include "MCTargetDesc/Sw64MCTargetDesc.h"
++#include "llvm/ADT/APFloat.h"
++#include "llvm/ADT/APInt.h"
++#include "llvm/ADT/SmallVector.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCExpr.h"
++#include "llvm/MC/MCFixup.h"
++#include "llvm/MC/MCInst.h"
++#include "llvm/MC/MCInstrDesc.h"
++#include "llvm/MC/MCInstrInfo.h"
++#include "llvm/MC/MCRegisterInfo.h"
++#include "llvm/MC/MCSubtargetInfo.h"
++#include "llvm/Support/Casting.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/raw_ostream.h"
++#include <cassert>
++#include <cstdint>
++
++using namespace llvm;
++
++#define DEBUG_TYPE "mccodeemitter"
++
++#define GET_INSTRMAP_INFO
++#include "Sw64GenInstrInfo.inc"
++#undef GET_INSTRMAP_INFO
++
++namespace llvm {
++
++MCCodeEmitter *createSw64MCCodeEmitterEB(const MCInstrInfo &MCII,
++                                         MCContext &Ctx) {
++  return new Sw64MCCodeEmitter(MCII, Ctx, false);
++}
++
++MCCodeEmitter *createSw64MCCodeEmitterEL(const MCInstrInfo &MCII,
++                                         MCContext &Ctx) {
++  return new Sw64MCCodeEmitter(MCII, Ctx, true);
++}
++
++} // end namespace llvm
++
++MCInst Sw64MCCodeEmitter::LowerCompactBranch(MCInst TmpInst) const {
++  // <MCInst 194 <MCOperand Imm:0> <MCOperand Reg:33> <MCOperand
++  // Expr:(.LBB0_2)>>
++  // ==> <MCInst 194 <MCOperand Reg:33> <MCOperand Expr:(.LBB0_2)>>
++
++  MCInst TI;
++  unsigned int Size = TmpInst.getNumOperands();
++  // for test op is or not a imm
++  // as "bsr $RA,disp" will be convert to " bsr disp" will be an error
++  TI.setOpcode(TmpInst.getOpcode());
++  if (TmpInst.getOperand(0).isImm())
++    for (unsigned int i = 0; i < Size; i++) {
++      if (i == 0)
++        continue;
++      TI.addOperand(TmpInst.getOperand(i));
++    }
++  else {
++    return TmpInst;
++  }
++
++  return TI;
++}
++
++void Sw64MCCodeEmitter::EmitByte(unsigned char C, raw_ostream &OS) const {
++  OS << (char)C;
++}
++
++void Sw64MCCodeEmitter::EmitInstruction(uint64_t Val, unsigned Size,
++                                        const MCSubtargetInfo &STI,
++                                        raw_ostream &OS) const {
++  // Output the instruction encoding in little endian byte order.
++  // Little-endian byte ordering:
++  // sw_64:   4 | 3 | 2 | 1
++  for (unsigned i = 0; i < Size; ++i) {
++    unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8;
++    EmitByte((Val >> Shift) & 0xff, OS);
++  }
++}
++
++/// encodeInstruction - Emit the instruction.
++/// Size the instruction with Desc.getSize().
++void Sw64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
++                                          SmallVectorImpl<MCFixup> &Fixups,
++                                          const MCSubtargetInfo &STI) const {
++  // Non-pseudo instructions that get changed for direct object
++  // only based on operand values.
++  // If this list of instructions get much longer we will move
++  // the check to a function call. Until then, this is more efficient.
++  MCInst TmpInst = MI;
++
++  switch (MI.getOpcode()) {
++  // If shift amount is >= 32 it the inst needs to be lowered further
++  case Sw64::BEQ:
++  case Sw64::BGE:
++  case Sw64::BGT:
++  case Sw64::BLBC:
++  case Sw64::BLBS:
++  case Sw64::BLE:
++  case Sw64::BLT:
++  case Sw64::BNE:
++  case Sw64::BR:
++  case Sw64::BSR:
++  case Sw64::FBEQ:
++  case Sw64::FBGE:
++  case Sw64::FBGT:
++  case Sw64::FBLE:
++  case Sw64::FBLT:
++  case Sw64::FBNE:
++    break;
++  case Sw64::ALTENT:
++    return;
++  }
++
++  uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
++
++  const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode());
++
++  // Get byte count of instruction
++  unsigned Size = Desc.getSize();
++  if (!Size)
++    llvm_unreachable("Desc.getSize() returns 0");
++
++  EmitInstruction(Binary, Size, STI, OS);
++}
++
++/// getBranchTargetOpValue - Return binary encoding of the branch
++/// target operand. If the machine operand requires relocation,
++//                [(store F4RC:$RA, (Sw64_gprello tglobaladdr:$DISP,
++// GPRC:$RB))], s_ild_lo>;
++/// record the relocation and return zero.
++unsigned
++Sw64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
++                                          SmallVectorImpl<MCFixup> &Fixups,
++                                          const MCSubtargetInfo &STI) const {
++  const MCOperand &MO = MI.getOperand(OpNo);
++
++  // If the destination is an immediate, divide by 4.
++  if (MO.isImm())
++    return MO.getImm() >> 2;
++
++  assert(MO.isExpr() &&
++         "getBranchTargetOpValue expects only expressions or immediates");
++
++  const MCExpr *FixupExpression = MO.getExpr();
++
++  Fixups.push_back(MCFixup::create(0, FixupExpression,
++                                   MCFixupKind(Sw64::fixup_SW64_23_PCREL_S2)));
++  return 0;
++}
++
++/// getJumpTargetOpValue - Return binary encoding of the jump
++/// target operand. If the machine operand requires relocation,
++/// record the relocation and return zero.
++unsigned
++Sw64MCCodeEmitter::getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
++                                        SmallVectorImpl<MCFixup> &Fixups,
++                                        const MCSubtargetInfo &STI) const {
++  const MCOperand &MO = MI.getOperand(OpNo);
++  // If the destination is an immediate, divide by 4.
++  if (MO.isImm())
++    return MO.getImm() >> 2;
++
++  assert(MO.isExpr() &&
++         "getJumpTargetOpValue expects only expressions or an immediate");
++
++  const MCExpr *FixupExpression = MO.getExpr();
++
++  Fixups.push_back(MCFixup::create(0, FixupExpression,
++                                   MCFixupKind(Sw64::fixup_SW64_23_PCREL_S2)));
++  return 0;
++}
++
++static MCOperand createLituse(MCContext *Ctx) {
++  const MCSymbol *Sym = Ctx->getOrCreateSymbol(".text");
++  const MCExpr *Expr = MCSymbolRefExpr::create(Sym, *Ctx);
++
++  return MCOperand::createExpr(
++      Sw64MCExpr::create(Sw64MCExpr::MEK_LITUSE_JSR, Expr, *Ctx));
++}
++
++unsigned Sw64MCCodeEmitter::getExprOpValue(const MCExpr *Expr,
++                                           SmallVectorImpl<MCFixup> &Fixups,
++                                           const MCSubtargetInfo &STI) const {
++
++  MCExpr::ExprKind Kind = Expr->getKind();
++  if (Kind == MCExpr::Constant) {
++    return cast<MCConstantExpr>(Expr)->getValue();
++  }
++
++  if (Kind == MCExpr::Binary) {
++    unsigned Res =
++        getExprOpValue(cast<MCBinaryExpr>(Expr)->getLHS(), Fixups, STI);
++    Res += getExprOpValue(cast<MCBinaryExpr>(Expr)->getRHS(), Fixups, STI);
++    return Res;
++  }
++
++  if (Kind == MCExpr::Target) {
++    const Sw64MCExpr *Sw64Expr = cast<Sw64MCExpr>(Expr);
++
++    Sw64::Fixups FixupKind = Sw64::Fixups(0);
++    switch (Sw64Expr->getKind()) {
++    default:
++      llvm_unreachable("Unknown fixup kind!");
++      break;
++    case Sw64MCExpr::MEK_LITUSE_BASE:
++      FixupKind = Sw64::fixup_SW64_LITERAL_BASE;
++      break;
++    case Sw64MCExpr::MEK_LITUSE_JSRDIRECT:
++      FixupKind = Sw64::fixup_SW64_LITUSE_JSRDIRECT;
++      Fixups.push_back(
++          MCFixup::create(0, Sw64Expr, MCFixupKind(Sw64::fixup_SW64_HINT)));
++      break;
++    case Sw64MCExpr::MEK_ELF_LITERAL:
++      FixupKind = Sw64::fixup_SW64_ELF_LITERAL;
++      break;
++    case Sw64MCExpr::MEK_LITUSE_ADDR:
++      FixupKind = Sw64::fixup_SW64_LITUSE;
++      break;
++    case Sw64MCExpr::MEK_LITUSE_BYTOFF:
++      FixupKind = Sw64::fixup_SW64_LITUSE;
++      break;
++    case Sw64MCExpr::MEK_LITUSE_JSR:
++      FixupKind = Sw64::fixup_SW64_LITUSE;
++      break;
++    case Sw64MCExpr::MEK_LITUSE_TLSGD:
++      FixupKind = Sw64::fixup_SW64_LITUSE;
++      break;
++    case Sw64MCExpr::MEK_LITUSE_TLSLDM:
++      FixupKind = Sw64::fixup_SW64_LITUSE;
++      break;
++    case Sw64MCExpr::MEK_HINT:
++      FixupKind = Sw64::fixup_SW64_HINT;
++      break;
++    case Sw64MCExpr::MEK_GPDISP:
++      FixupKind = Sw64::fixup_SW64_GPDISP;
++      break;
++    case Sw64MCExpr::MEK_GPDISP_HI16:
++      FixupKind = Sw64::fixup_SW64_GPDISP_HI16;
++      break;
++    case Sw64MCExpr::MEK_GPDISP_LO16:
++      return 0;
++    case Sw64MCExpr::MEK_GPREL_HI16:
++      FixupKind = Sw64::fixup_SW64_GPREL_HI16;
++      break;
++    case Sw64MCExpr::MEK_GPREL_LO16:
++      FixupKind = Sw64::fixup_SW64_GPREL_LO16;
++      break;
++    case Sw64MCExpr::MEK_GPREL16:
++      FixupKind = Sw64::fixup_SW64_GPREL16;
++      break;
++    case Sw64MCExpr::MEK_BRSGP:
++      FixupKind = Sw64::fixup_SW64_BRSGP;
++      break;
++    case Sw64MCExpr::MEK_TLSGD:
++      FixupKind = Sw64::fixup_SW64_TLSGD;
++      break;
++    case Sw64MCExpr::MEK_TLSLDM:
++      FixupKind = Sw64::fixup_SW64_TLSLDM;
++      break;
++    case Sw64MCExpr::MEK_GOTDTPREL16:
++      FixupKind = Sw64::fixup_SW64_GOTDTPREL16;
++      break;
++    case Sw64MCExpr::MEK_DTPREL_HI16:
++      FixupKind = Sw64::fixup_SW64_DTPREL_HI16;
++      break;
++    case Sw64MCExpr::MEK_DTPREL_LO16:
++      FixupKind = Sw64::fixup_SW64_DTPREL_LO16;
++      break;
++    case Sw64MCExpr::MEK_DTPREL16:
++      FixupKind = Sw64::fixup_SW64_DTPREL16;
++      break;
++    case Sw64MCExpr::MEK_GOTTPREL16:
++      FixupKind = Sw64::fixup_SW64_GOTTPREL16;
++      break;
++    case Sw64MCExpr::MEK_TPREL_HI16:
++      FixupKind = Sw64::fixup_SW64_TPREL_HI16;
++      break;
++    case Sw64MCExpr::MEK_TPREL_LO16:
++      FixupKind = Sw64::fixup_SW64_TPREL_LO16;
++      break;
++    case Sw64MCExpr::MEK_TPREL16:
++      FixupKind = Sw64::fixup_SW64_TPREL16;
++      break;
++    case Sw64MCExpr::MEK_ELF_LITERAL_GOT:
++      FixupKind = Sw64::fixup_SW64_ELF_LITERAL_GOT;
++      break;
++    } // switch
++
++    Fixups.push_back(MCFixup::create(0, Sw64Expr, MCFixupKind(FixupKind)));
++    return 0;
++  }
++
++  return 0;
++}
++
++/// getMachineOpValue - Return binary encoding of operand. If the machine
++/// operand requires relocation, record the relocation and return zero.
++unsigned
++Sw64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
++                                     SmallVectorImpl<MCFixup> &Fixups,
++                                     const MCSubtargetInfo &STI) const {
++  if (MO.isReg()) {
++    unsigned Reg = MO.getReg();
++    unsigned RegNo = Ctx.getRegisterInfo()->getEncodingValue(Reg);
++    return RegNo;
++  } else if (MO.isImm()) {
++    return static_cast<unsigned>(MO.getImm());
++  } else if (MO.isDFPImm()) {
++    return static_cast<unsigned>(bit_cast<double>(MO.getDFPImm()));
++  }
++
++  // beq         op1    op2
++  // to
++  // beq  opc    op1    op2
++  if (MCII.get(MI.getOpcode()).isBranch() && MI.getNumOperands() == 3) {
++    // for beq/bne/fbeq ....
++    return getBranchTargetOpValue(MI, 2, Fixups, STI);
++  } else if (MCII.get(MI.getOpcode()).isBranch() && MI.getNumOperands() == 2) {
++    // for br/bsr
++    return getJumpTargetOpValue(MI, 1, Fixups, STI);
++  }
++
++  // MO must be an Expr.
++  assert(MO.isExpr());
++  return getExprOpValue(MO.getExpr(), Fixups, STI);
++}
++
++/// Return binary encoding of memory related operand.
++/// If the offset operand requires relocation, record the relocation.
++template <unsigned ShiftAmount>
++unsigned Sw64MCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo,
++                                           SmallVectorImpl<MCFixup> &Fixups,
++                                           const MCSubtargetInfo &STI) const {
++  unsigned RegBits; // Base register is encoded in bits 20-16.
++  unsigned OffBits; // offset is encoded in bits 15-0.
++
++  if (MI.getOperand(OpNo).isImm()) { // vload
++    RegBits = getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI) << 16;
++    OffBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI);
++  } else { // vstore
++    RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI) << 16;
++    OffBits = getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI);
++  }
++
++  // Apply the scale factor if there is one.
++  // OffBits >>= ShiftAmount;
++
++  return (OffBits & 0xFFFF) | RegBits;
++}
++
++// FIXME: should be called getMSBEncoding
++//
++unsigned
++Sw64MCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
++                                      SmallVectorImpl<MCFixup> &Fixups,
++                                      const MCSubtargetInfo &STI) const {
++  assert(MI.getOperand(OpNo - 1).isImm());
++  assert(MI.getOperand(OpNo).isImm());
++  unsigned Position =
++      getMachineOpValue(MI, MI.getOperand(OpNo - 1), Fixups, STI);
++  unsigned Size = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI);
++
++  return Position + Size - 1;
++}
++
++unsigned Sw64MCCodeEmitter::getUImm4AndValue(const MCInst &MI, unsigned OpNo,
++                                             SmallVectorImpl<MCFixup> &Fixups,
++                                             const MCSubtargetInfo &STI) const {
++  assert(MI.getOperand(OpNo).isImm());
++  const MCOperand &MO = MI.getOperand(OpNo);
++  unsigned Value = MO.getImm();
++  switch (Value) {
++  case 128:
++    return 0x0;
++  case 1:
++    return 0x1;
++  case 2:
++    return 0x2;
++  case 3:
++    return 0x3;
++  case 4:
++    return 0x4;
++  case 7:
++    return 0x5;
++  case 8:
++    return 0x6;
++  case 15:
++    return 0x7;
++  case 16:
++    return 0x8;
++  case 31:
++    return 0x9;
++  case 32:
++    return 0xa;
++  case 63:
++    return 0xb;
++  case 64:
++    return 0xc;
++  case 255:
++    return 0xd;
++  case 32768:
++    return 0xe;
++  case 65535:
++    return 0xf;
++  }
++  llvm_unreachable("Unexpected value");
++}
++
++unsigned
++Sw64MCCodeEmitter::getRegisterListOpValue(const MCInst &MI, unsigned OpNo,
++                                          SmallVectorImpl<MCFixup> &Fixups,
++                                          const MCSubtargetInfo &STI) const {
++  unsigned res = 0;
++
++  // Register list operand is always first operand of instruction and it is
++  // placed before memory operand (register + imm).
++
++  for (unsigned I = OpNo, E = MI.getNumOperands() - 2; I < E; ++I) {
++    unsigned Reg = MI.getOperand(I).getReg();
++    unsigned RegNo = Ctx.getRegisterInfo()->getEncodingValue(Reg);
++    if (RegNo != 31)
++      res++;
++    else
++      res |= 0x10;
++  }
++  return res;
++}
++
++unsigned
++Sw64MCCodeEmitter::getRegisterListOpValue16(const MCInst &MI, unsigned OpNo,
++                                            SmallVectorImpl<MCFixup> &Fixups,
++                                            const MCSubtargetInfo &STI) const {
++  return (MI.getNumOperands() - 4);
++}
++
++#include "Sw64GenMCCodeEmitter.inc"
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.h
+new file mode 100644
+index 000000000..56539f35c
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.h
+@@ -0,0 +1,111 @@
++//===- Sw64MCCodeEmitter.h - Convert Sw64 Code to Machine Code --*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines the Sw64MCCodeEmitter class.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCCODEEMITTER_H
++#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCCODEEMITTER_H
++
++#include "llvm/MC/MCCodeEmitter.h"
++#include <cstdint>
++
++namespace llvm {
++
++class MCContext;
++class MCExpr;
++class MCFixup;
++class MCInst;
++class MCInstrInfo;
++class MCOperand;
++class MCSubtargetInfo;
++class raw_ostream;
++
++class Sw64MCCodeEmitter : public MCCodeEmitter {
++  const MCInstrInfo &MCII;
++  MCContext &Ctx;
++  bool IsLittleEndian;
++
++public:
++  Sw64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, bool IsLittle)
++      : MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {}
++  Sw64MCCodeEmitter(const Sw64MCCodeEmitter &) = delete;
++  Sw64MCCodeEmitter &operator=(const Sw64MCCodeEmitter &) = delete;
++  ~Sw64MCCodeEmitter() override = default;
++
++  void EmitByte(unsigned char C, raw_ostream &OS) const;
++
++  void EmitInstruction(uint64_t Val, unsigned Size, const MCSubtargetInfo &STI,
++                       raw_ostream &OS) const;
++
++  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
++                         SmallVectorImpl<MCFixup> &Fixups,
++                         const MCSubtargetInfo &STI) const override;
++
++  // getBinaryCodeForInstr - TableGen'erated function for getting the
++  // binary encoding for an instruction.
++  uint64_t getBinaryCodeForInstr(const MCInst &MI,
++                                 SmallVectorImpl<MCFixup> &Fixups,
++                                 const MCSubtargetInfo &STI) const;
++
++  // getJumpTargetOpValue - Return binary encoding of the jump
++  // target operand. If the machine operand requires relocation,
++  // record the relocation and return zero.
++  unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
++                                SmallVectorImpl<MCFixup> &Fixups,
++                                const MCSubtargetInfo &STI) const;
++
++  // getBranchTargetOpValue - Return binary encoding of the branch
++  // target operand. If the machine operand requires relocation,
++  // record the relocation and return zero.
++  unsigned getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
++                                  SmallVectorImpl<MCFixup> &Fixups,
++                                  const MCSubtargetInfo &STI) const;
++
++  // getMachineOpValue - Return binary encoding of operand. If the machin
++  // operand requires relocation, record the relocation and return zero.
++  unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
++                             SmallVectorImpl<MCFixup> &Fixups,
++                             const MCSubtargetInfo &STI) const;
++
++  unsigned getMSAMemEncoding(const MCInst &MI, unsigned OpNo,
++                             SmallVectorImpl<MCFixup> &Fixups,
++                             const MCSubtargetInfo &STI) const;
++
++  template <unsigned ShiftAmount = 0>
++  unsigned getMemEncoding(const MCInst &MI, unsigned OpNo,
++                          SmallVectorImpl<MCFixup> &Fixups,
++                          const MCSubtargetInfo &STI) const;
++  unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
++                              SmallVectorImpl<MCFixup> &Fixups,
++                              const MCSubtargetInfo &STI) const;
++
++  unsigned getUImm4AndValue(const MCInst &MI, unsigned OpNo,
++                            SmallVectorImpl<MCFixup> &Fixups,
++                            const MCSubtargetInfo &STI) const;
++
++  unsigned getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
++                          const MCSubtargetInfo &STI) const;
++
++  unsigned getRegisterListOpValue(const MCInst &MI, unsigned OpNo,
++                                  SmallVectorImpl<MCFixup> &Fixups,
++                                  const MCSubtargetInfo &STI) const;
++
++  unsigned getRegisterListOpValue16(const MCInst &MI, unsigned OpNo,
++                                    SmallVectorImpl<MCFixup> &Fixups,
++                                    const MCSubtargetInfo &STI) const;
++
++private:
++  MCInst LowerCompactBranch(MCInst TempInst) const;
++};
++
++} // end namespace llvm
++
++#endif // LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCCODEEMITTER_H
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.cpp
+new file mode 100644
+index 000000000..49aef02c0
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.cpp
+@@ -0,0 +1,177 @@
++//===-- Sw64MCExpr.cpp - Sw64 specific MC expression classes --------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64MCExpr.h"
++#include "llvm/BinaryFormat/ELF.h"
++#include "llvm/MC/MCAsmInfo.h"
++#include "llvm/MC/MCAssembler.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCStreamer.h"
++#include "llvm/MC/MCSymbolELF.h"
++#include "llvm/MC/MCValue.h"
++#include "llvm/Support/Casting.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/MathExtras.h"
++#include "llvm/Support/raw_ostream.h"
++#include <cstdint>
++
++using namespace llvm;
++
++#define DEBUG_TYPE "sw64mcexpr"
++
++const Sw64MCExpr *Sw64MCExpr::create(Sw64MCExpr::Sw64ExprKind Kind,
++                                     const MCExpr *Expr, MCContext &Ctx) {
++  return new (Ctx) Sw64MCExpr(Kind, Expr);
++}
++
++void Sw64MCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
++  int64_t AbsVal;
++  // FIXME: the end "(" need match
++  if (Expr->evaluateAsAbsolute(AbsVal))
++    OS << AbsVal;
++  else
++    Expr->print(OS, MAI, true);
++  //  OS << ')';
++}
++
++bool Sw64MCExpr::evaluateAsRelocatableImpl(MCValue &Res,
++                                           const MCAsmLayout *Layout,
++                                           const MCFixup *Fixup) const {
++  if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup))
++    return false;
++
++  if (Res.getRefKind() != MCSymbolRefExpr::VK_None)
++    return false;
++
++  // evaluateAsAbsolute() and evaluateAsValue() require that we evaluate the
++  // %hi/%lo/etc. here. Fixup is a null pointer when either of these is the
++  // caller.
++  if (Res.isAbsolute() && Fixup == nullptr) {
++    int64_t AbsVal = Res.getConstant();
++    switch (Kind) {
++    case MEK_None:
++      llvm_unreachable("MEK_None is invalid");
++    case MEK_DTPREL16:
++      // MEK_DTPREL is used for marking TLS DIEExpr only
++      // and contains a regular sub-expression.
++      return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup);
++    case MEK_ELF_LITERAL:      /* !literal relocation.  */
++    case MEK_LITUSE_ADDR:      /* !lituse_addr relocation.  */
++    case MEK_LITUSE_BASE:      /* !lituse_base relocation.  */
++    case MEK_LITUSE_BYTOFF:    /* !lituse_bytoff relocation.  */
++    case MEK_LITUSE_JSR:       /* !lituse_jsr relocation.  */
++    case MEK_LITUSE_TLSGD:     /* !lituse_tlsgd relocation.  */
++    case MEK_LITUSE_TLSLDM:    /* !lituse_tlsldm relocation.  */
++    case MEK_LITUSE_JSRDIRECT: /* !lituse_jsrdirect relocation.  */
++    case MEK_GPDISP:           /* !gpdisp relocation.  */
++    case MEK_GPDISP_HI16:
++    case MEK_GPDISP_LO16:
++    case MEK_GPREL_HI16:      /* !gprelhigh relocation.  */
++    case MEK_GPREL_LO16:      /* !gprellow relocation.  */
++    case MEK_GPREL16:         /* !gprel relocation.  */
++    case MEK_BRSGP:           /* !samegp relocation.  */
++    case MEK_TLSGD:           /* !tlsgd relocation.  */
++    case MEK_TLSLDM:          /* !tlsldm relocation.  */
++    case MEK_GOTDTPREL16:     /* !gotdtprel relocation.  */
++    case MEK_DTPREL_HI16:     /* !dtprelhi relocation.  */
++    case MEK_DTPREL_LO16:     /* !dtprello relocation.  */
++    case MEK_GOTTPREL16:      /* !gottprel relocation.  */
++    case MEK_TPREL_HI16:      /* !tprelhi relocation.  */
++    case MEK_TPREL_LO16:      /* !tprello relocation.  */
++    case MEK_TPREL16:         /* !tprel relocation.  */
++    case MEK_ELF_LITERAL_GOT: /* !literal_got relocation.  */
++      return false;
++    }
++    Res = MCValue::get(AbsVal);
++    return true;
++  }
++  // We want to defer it for relocatable expressions since the constant is
++  // applied to the whole symbol value.
++  //
++  // The value of getKind() that is given to MCValue is only intended to aid
++  // debugging when inspecting MCValue objects. It shouldn't be relied upon
++  // for decision making.
++  Res =
++      MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind());
++
++  return true;
++}
++
++void Sw64MCExpr::visitUsedExpr(MCStreamer &Streamer) const {
++  Streamer.visitUsedExpr(*getSubExpr());
++}
++
++static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
++  switch (Expr->getKind()) {
++  case MCExpr::Target:
++    fixELFSymbolsInTLSFixupsImpl(cast<Sw64MCExpr>(Expr)->getSubExpr(), Asm);
++    break;
++  case MCExpr::Constant:
++    break;
++  case MCExpr::Binary: {
++    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
++    fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
++    fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
++    break;
++  }
++  case MCExpr::SymbolRef: {
++    // We're known to be under a TLS fixup, so any symbol should be
++    // modified. There should be only one.
++    const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
++    cast<MCSymbolELF>(SymRef.getSymbol()).setType(ELF::STT_TLS);
++    break;
++  }
++  case MCExpr::Unary:
++    fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
++    break;
++  }
++}
++
++// for lituse relocation, we don't need to change symbol type
++// to tls.
++void Sw64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
++  switch (getKind()) {
++  case MEK_None:
++    llvm_unreachable("MEK_None and MEK_Special are invalid");
++    break;
++  case MEK_GPDISP:
++  case MEK_LITUSE_BASE:      /* !lituse_base relocation.  */
++  case MEK_LITUSE_JSRDIRECT: /* !lituse_jsrdirect relocation.  */
++  case MEK_GPDISP_HI16:
++  case MEK_GPDISP_LO16:
++  case MEK_ELF_LITERAL:
++  case MEK_ELF_LITERAL_GOT:
++  case MEK_GPREL_HI16:
++  case MEK_GPREL_LO16:
++  case MEK_GPREL16:
++  case MEK_BRSGP:
++    // If we do have nested target-specific expressions, they will be in
++    // a consecutive chain.
++    if (const Sw64MCExpr *E = dyn_cast<const Sw64MCExpr>(getSubExpr()))
++      E->fixELFSymbolsInTLSFixups(Asm);
++    break;
++  case MEK_DTPREL16:
++  case MEK_LITUSE_ADDR:   /* !lituse_addr relocation.  */
++  case MEK_LITUSE_BYTOFF: /* !lituse_bytoff relocation.  */
++  case MEK_LITUSE_JSR:    /* !lituse_jsr relocation.  */
++  case MEK_LITUSE_TLSGD:  /* !lituse_tlsgd relocation.  */
++  case MEK_LITUSE_TLSLDM: /* !lituse_tlsldm relocation.  */
++  case MEK_TLSGD:         /* !tlsgd relocation.  */
++  case MEK_TLSLDM:        /* !tlsldm relocation.  */
++  case MEK_GOTDTPREL16:   /* !gotdtprel relocation.  */
++  case MEK_DTPREL_HI16:   /* !dtprelhi relocation.  */
++  case MEK_DTPREL_LO16:   /* !dtprello relocation.  */
++  case MEK_GOTTPREL16:    /* !gottprel relocation.  */
++  case MEK_TPREL_HI16:    /* !tprelhi relocation.  */
++  case MEK_TPREL_LO16:    /* !tprello relocation.  */
++  case MEK_TPREL16:       /* !tprel relocation.  */
++    fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
++    break;
++  }
++}
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.h
+new file mode 100644
+index 000000000..782e0bd03
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.h
+@@ -0,0 +1,99 @@
++//===- Sw64MCExpr.h - Sw64 specific MC expression classes -------*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCEXPR_H
++#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCEXPR_H
++
++#include "llvm/MC/MCAsmLayout.h"
++#include "llvm/MC/MCExpr.h"
++#include "llvm/MC/MCValue.h"
++
++namespace llvm {
++class Sw64MCExpr : public MCTargetExpr {
++public:
++  // for linker relax, add complex relocation
++  // exprkind here
++  enum Sw64ExprKind {
++    // use for relax
++    MEK_HINT = 0x100,
++    MEK_LITERAL = 0x200,
++    MEK_LITUSE = 0x400,
++
++    // do complex relocation
++    MEK_LITUSE_BASE = MEK_LITERAL | MEK_LITUSE,
++    MEK_LITUSE_JSRDIRECT = MEK_HINT | MEK_LITUSE,
++
++    // None
++    MEK_None = 0x000,
++
++    // final reloc
++    MEK_ELF_LITERAL,     /* !literal relocation.  */
++    MEK_ELF_LITERAL_GOT, /* !literal_got relocation */
++    MEK_LITUSE_ADDR,     /* !lituse_addr relocation.  */
++    //    MEK_LITUSE_BASE,      /* !lituse_base relocation.  */
++    MEK_LITUSE_BYTOFF, /* !lituse_bytoff relocation.  */
++    MEK_LITUSE_JSR,    /* !lituse_jsr relocation.  */
++    MEK_LITUSE_TLSGD,  /* !lituse_tlsgd relocation.  */
++    MEK_LITUSE_TLSLDM, /* !lituse_tlsldm relocation.  */
++    //    MEK_LITUSE_JSRDIRECT, /* !lituse_jsrdirect relocation.  */
++    MEK_GPDISP, /* !gpdisp relocation.  */
++    MEK_GPDISP_HI16,
++    MEK_GPDISP_LO16,
++    MEK_GPREL_HI16,  /* !gprelhigh relocation.  */
++    MEK_GPREL_LO16,  /* !gprellow relocation.  */
++    MEK_GPREL16,     /* !gprel relocation.  */
++    MEK_BRSGP,       /* !samegp relocation.  */
++    MEK_TLSGD,       /* !tlsgd relocation.  */
++    MEK_TLSLDM,      /* !tlsldm relocation.  */
++    MEK_GOTDTPREL16, /* !gotdtprel relocation.  */
++    MEK_DTPREL_HI16, /* !dtprelhi relocation.  */
++    MEK_DTPREL_LO16, /* !dtprello relocation.  */
++    MEK_DTPREL16,    /* !dtprel relocation.  */
++    MEK_GOTTPREL16,  /* !gottprel relocation.  */
++    MEK_TPREL_HI16,  /* !tprelhi relocation.  */
++    MEK_TPREL_LO16,  /* !tprello relocation.  */
++    MEK_TPREL16,     /* !tprel relocation.  */
++  };
++
++private:
++  const Sw64ExprKind Kind;
++  const MCExpr *Expr;
++
++  explicit Sw64MCExpr(Sw64ExprKind Kind, const MCExpr *Expr)
++      : Kind(Kind), Expr(Expr) {}
++
++public:
++  static const Sw64MCExpr *create(Sw64ExprKind Kind, const MCExpr *Expr,
++                                  MCContext &Ctx);
++
++  /// Get the kind of this expression.
++  Sw64ExprKind getKind() const { return Kind; }
++
++  /// Get the child of this expression.
++  const MCExpr *getSubExpr() const { return Expr; }
++
++  void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
++  bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
++                                 const MCFixup *Fixup) const override;
++  void visitUsedExpr(MCStreamer &Streamer) const override;
++
++  MCFragment *findAssociatedFragment() const override {
++    return getSubExpr()->findAssociatedFragment();
++  }
++
++  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
++
++  static bool classof(const MCExpr *E) {
++    return E->getKind() == MCExpr::Target;
++  }
++};
++
++} // end namespace llvm
++
++#endif // LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCEXPR_H
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.cpp
+new file mode 100644
+index 000000000..d07dc3ff5
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.cpp
+@@ -0,0 +1,189 @@
++//===-- Sw64MCTargetDesc.cpp - Sw64 Target Descriptions -------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file provides Sw64 specific target descriptions.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64MCTargetDesc.h"
++#include "InstPrinter/Sw64InstPrinter.h"
++#include "Sw64AsmBackend.h"
++#include "Sw64ELFStreamer.h"
++#include "Sw64MCAsmInfo.h"
++#include "Sw64TargetStreamer.h"
++#include "llvm/MC/MCCodeEmitter.h"
++#include "llvm/MC/MCELFStreamer.h"
++#include "llvm/MC/MCInstrAnalysis.h"
++#include "llvm/MC/MCInstrInfo.h"
++#include "llvm/MC/MCObjectWriter.h"
++#include "llvm/MC/MCRegisterInfo.h"
++#include "llvm/MC/MCSubtargetInfo.h"
++#include "llvm/MC/MCSymbol.h"
++#include "llvm/MC/MachineLocation.h"
++#include "llvm/MC/TargetRegistry.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/FormattedStream.h"
++#include "llvm/TargetParser/Triple.h"
++
++using namespace llvm;
++namespace llvm {
++
++class MCInstrInfo;
++
++} // end namespace llvm
++#define GET_INSTRINFO_MC_DESC
++#include "Sw64GenInstrInfo.inc"
++
++#define GET_SUBTARGETINFO_MC_DESC
++#include "Sw64GenSubtargetInfo.inc"
++
++#define GET_REGINFO_MC_DESC
++#include "Sw64GenRegisterInfo.inc"
++
++/// Select the Sw64 CPU for the given triple and cpu name.
++/// FIXME: Merge with the copy in Sw64Subtarget.cpp
++StringRef SW64_MC::selectSw64CPU(const Triple &TT, StringRef CPU) {
++  return CPU = "sw_64";
++}
++
++static MCInstrInfo *createSw64MCInstrInfo() {
++  MCInstrInfo *X = new MCInstrInfo();
++  InitSw64MCInstrInfo(X);
++  return X;
++}
++
++static MCRegisterInfo *createSw64MCRegisterInfo(const Triple &TT) {
++  MCRegisterInfo *X = new MCRegisterInfo();
++  InitSw64MCRegisterInfo(X, Sw64::R26);
++  return X;
++}
++
++static MCSubtargetInfo *createSw64MCSubtargetInfo(const Triple &TT,
++                                                  StringRef CPU, StringRef FS) {
++  CPU = SW64_MC::selectSw64CPU(TT, CPU);
++  return createSw64MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
++}
++
++static MCAsmInfo *createSw64MCAsmInfo(const MCRegisterInfo &MRI,
++                                      const Triple &TT,
++                                      const MCTargetOptions &Options) {
++  MCAsmInfo *MAI = new Sw64MCAsmInfo(TT, Options);
++
++  unsigned SP = MRI.getDwarfRegNum(Sw64::R30, true);
++  MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, SP, 0);
++  MAI->addInitialFrameState(Inst);
++
++  return MAI;
++}
++
++static MCInstPrinter *createSw64MCInstPrinter(const Triple &T,
++                                              unsigned SyntaxVariant,
++                                              const MCAsmInfo &MAI,
++                                              const MCInstrInfo &MII,
++                                              const MCRegisterInfo &MRI) {
++  return new Sw64InstPrinter(MAI, MII, MRI);
++}
++
++static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
++                                    std::unique_ptr<MCAsmBackend> &&MAB,
++                                    std::unique_ptr<MCObjectWriter> &&OW,
++                                    std::unique_ptr<MCCodeEmitter> &&Emitter,
++                                    bool RelaxAll) {
++  MCStreamer *S;
++  S = createSw64ELFStreamer(Context, std::move(MAB), std::move(OW),
++                            std::move(Emitter), RelaxAll);
++  return S;
++}
++
++static MCTargetStreamer *createSw64AsmTargetStreamer(MCStreamer &S,
++                                                     formatted_raw_ostream &OS,
++                                                     MCInstPrinter *InstPrint,
++                                                     bool isVerboseAsm) {
++  return new Sw64TargetAsmStreamer(S, OS);
++}
++
++static MCTargetStreamer *createSw64NullTargetStreamer(MCStreamer &S) {
++  return new Sw64TargetStreamer(S);
++}
++
++static MCTargetStreamer *
++createSw64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
++  return new Sw64TargetELFStreamer(S, STI);
++}
++
++namespace {
++
++class Sw64MCInstrAnalysis : public MCInstrAnalysis {
++public:
++  Sw64MCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {}
++
++  bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
++                      uint64_t &Target) const override {
++    unsigned NumOps = Inst.getNumOperands();
++    if (NumOps == 0)
++      return false;
++    if (Inst.getOpcode() == Sw64::JSR || Inst.getOpcode() == Sw64::JSR) {
++      Target = Inst.getOperand(NumOps - 1).getImm() != 0
++                   ? Inst.getOperand(NumOps - 2).getImm()
++                   : Addr + 4;
++      return true;
++    }
++    switch (Info->get(Inst.getOpcode()).operands()[NumOps - 1].OperandType) {
++    default:
++      return false;
++    case MCOI::OPERAND_PCREL:
++      Target = Addr + Inst.getOperand(NumOps - 1).getImm() * 4 + 4;
++      return true;
++    }
++  }
++};
++} // namespace
++
++static MCInstrAnalysis *createSw64MCInstrAnalysis(const MCInstrInfo *Info) {
++  return new Sw64MCInstrAnalysis(Info);
++}
++
++extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSw64TargetMC() {
++  Target *T = &getTheSw64Target();
++
++  // Register the MC asm info.
++  RegisterMCAsmInfoFn X(*T, createSw64MCAsmInfo);
++
++  // Register the MC instruction info.
++  TargetRegistry::RegisterMCInstrInfo(*T, createSw64MCInstrInfo);
++
++  // Register the MC register info.
++  TargetRegistry::RegisterMCRegInfo(*T, createSw64MCRegisterInfo);
++
++  // Register the elf streamer.
++  TargetRegistry::RegisterELFStreamer(*T, createMCStreamer);
++
++  // Register the asm target streamer.
++  TargetRegistry::RegisterAsmTargetStreamer(*T, createSw64AsmTargetStreamer);
++
++  TargetRegistry::RegisterNullTargetStreamer(*T, createSw64NullTargetStreamer);
++
++  // Register the MC subtarget info.
++  TargetRegistry::RegisterMCSubtargetInfo(*T, createSw64MCSubtargetInfo);
++
++  // Register the MC instruction analyzer.
++  TargetRegistry::RegisterMCInstrAnalysis(*T, createSw64MCInstrAnalysis);
++
++  // Register the MCInstPrinter.
++  TargetRegistry::RegisterMCInstPrinter(*T, createSw64MCInstPrinter);
++
++  TargetRegistry::RegisterObjectTargetStreamer(*T,
++                                               createSw64ObjectTargetStreamer);
++
++  // Register the asm backend.
++  TargetRegistry::RegisterMCAsmBackend(*T, createSw64AsmBackend);
++
++  // Register the MC Code Emitter
++  TargetRegistry::RegisterMCCodeEmitter(*T, createSw64MCCodeEmitterEL);
++}
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.h
+new file mode 100644
+index 000000000..4ab9d2fff
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.h
+@@ -0,0 +1,66 @@
++//===-- Sw64MCTargetDesc.h - Sw64 Target Descriptions -----------*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file provides Sw64 specific target descriptions.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCTARGETDESC_H
++#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCTARGETDESC_H
++
++#include "llvm/Support/DataTypes.h"
++
++#include <memory>
++
++namespace llvm {
++class MCAsmBackend;
++class MCCodeEmitter;
++class MCContext;
++class MCInstrInfo;
++class MCObjectTargetWriter;
++class MCRegisterInfo;
++class MCSubtargetInfo;
++class MCTargetOptions;
++class StringRef;
++class Target;
++class Triple;
++class raw_ostream;
++class raw_pwrite_stream;
++
++Target &getTheSw64Target();
++
++MCCodeEmitter *createSw64MCCodeEmitterEL(const MCInstrInfo &MCII,
++                                         MCContext &Ctx);
++
++MCAsmBackend *createSw64AsmBackend(const Target &T, const MCSubtargetInfo &STI,
++                                   const MCRegisterInfo &MRI,
++                                   const MCTargetOptions &Options);
++
++std::unique_ptr<MCObjectTargetWriter>
++createSw64ELFObjectWriter(const Triple &TT, bool IsS32);
++
++namespace SW64_MC {
++StringRef selectSw64CPU(const Triple &TT, StringRef CPU);
++}
++
++} // namespace llvm
++
++// Defines symbolic names for Sw64 registers.  This defines a mapping from
++// register name to register number.
++#define GET_REGINFO_ENUM
++#include "Sw64GenRegisterInfo.inc"
++
++// Defines symbolic names for the Sw64 instructions.
++#define GET_INSTRINFO_ENUM
++#include "Sw64GenInstrInfo.inc"
++
++#define GET_SUBTARGETINFO_ENUM
++#include "Sw64GenSubtargetInfo.inc"
++
++#endif
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64OptionRecord.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64OptionRecord.cpp
+new file mode 100644
+index 000000000..07bddfbac
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64OptionRecord.cpp
+@@ -0,0 +1,32 @@
++//===- Sw64OptionRecord.cpp - Abstraction for storing information ---------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64OptionRecord.h"
++#include "Sw64ABIInfo.h"
++#include "Sw64ELFStreamer.h"
++#include "Sw64TargetStreamer.h"
++#include "llvm/BinaryFormat/ELF.h"
++#include "llvm/MC/MCAssembler.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCRegisterInfo.h"
++#include "llvm/MC/MCSectionELF.h"
++#include <cassert>
++
++using namespace llvm;
++
++void Sw64RegInfoRecord::EmitSw64OptionRecord() {
++
++  // We need to distinguish between S64 and the rest because at the moment
++  // we don't emit .Sw64.options for other ELFs other than S64.
++  // Since .reginfo has the same information as .Sw64.options (ODK_REGINFO),
++  // we can use the same abstraction (Sw64RegInfoRecord class) to handle both.
++}
++
++void Sw64RegInfoRecord::SetPhysRegUsed(unsigned Reg,
++                                       const MCRegisterInfo *MCRegInfo) {}
+diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64TargetStreamer.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64TargetStreamer.cpp
+new file mode 100644
+index 000000000..19cdbc7d0
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64TargetStreamer.cpp
+@@ -0,0 +1,388 @@
++//===-- Sw64TargetStreamer.cpp - Sw64 Target Streamer Methods -------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file provides Sw64 specific target streamer methods.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64TargetStreamer.h"
++#include "InstPrinter/Sw64InstPrinter.h"
++#include "MCTargetDesc/Sw64ABIInfo.h"
++#include "MCTargetDesc/Sw64BaseInfo.h"
++#include "MCTargetDesc/Sw64MCExpr.h"
++#include "MCTargetDesc/Sw64MCTargetDesc.h"
++#include "Sw64ELFStreamer.h"
++#include "Sw64MCExpr.h"
++#include "Sw64MCTargetDesc.h"
++#include "Sw64TargetObjectFile.h"
++#include "llvm/BinaryFormat/ELF.h"
++#include "llvm/MC/MCAssembler.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCInstrDesc.h"
++#include "llvm/MC/MCInstrInfo.h"
++#include "llvm/MC/MCSectionELF.h"
++#include "llvm/MC/MCSubtargetInfo.h"
++#include "llvm/MC/MCSymbolELF.h"
++#include "llvm/Support/Casting.h"
++#include "llvm/Support/CommandLine.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/FormattedStream.h"
++
++using namespace llvm;
++namespace llvm {
++struct Sw64InstrTable {
++  MCInstrDesc Insts[4445];
++  MCOperandInfo OperandInfo[3026];
++  MCPhysReg ImplicitOps[130];
++};
++extern const Sw64InstrTable Sw64Descs;
++} // end namespace llvm
++
++namespace {
++static cl::opt<bool> RoundSectionSizes(
++    "sw_64-round-section-sizes", cl::init(false),
++    cl::desc("Round section sizes up to the section alignment"), cl::Hidden);
++} // end anonymous namespace
++
++Sw64TargetStreamer::Sw64TargetStreamer(MCStreamer &S)
++    : MCTargetStreamer(S), ModuleDirectiveAllowed(true) {
++  GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
++}
++void Sw64TargetStreamer::emitDirectiveSetReorder() { forbidModuleDirective(); }
++void Sw64TargetStreamer::emitDirectiveSetNoReorder() {}
++void Sw64TargetStreamer::emitDirectiveSetMacro() { forbidModuleDirective(); }
++void Sw64TargetStreamer::emitDirectiveSetNoMacro() { forbidModuleDirective(); }
++void Sw64TargetStreamer::emitDirectiveSetAt() { forbidModuleDirective(); }
++void Sw64TargetStreamer::emitDirectiveSetNoAt() { forbidModuleDirective(); }
++void Sw64TargetStreamer::emitDirectiveEnd(StringRef Name) {}
++void Sw64TargetStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {}
++void Sw64TargetStreamer::emitDirectiveNaN2008() {}
++void Sw64TargetStreamer::emitDirectiveNaNLegacy() {}
++void Sw64TargetStreamer::emitDirectiveInsn() { forbidModuleDirective(); }
++void Sw64TargetStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
++                                   unsigned ReturnReg) {}
++
++void Sw64TargetStreamer::emitDirectiveSetCore3b() {}
++void Sw64TargetStreamer::emitDirectiveSetCore4() {}
++
++void Sw64TargetAsmStreamer::emitDirectiveSetCore3b() {
++  OS << "\t.arch= \t core3b\n";
++  forbidModuleDirective();
++}
++void Sw64TargetAsmStreamer::emitDirectiveSetCore4() {
++  OS << "\t.arch= \t core4\n";
++  forbidModuleDirective();
++}
++
++void Sw64TargetStreamer::emitDirectiveSetArch(StringRef Arch) {
++  forbidModuleDirective();
++}
++
++void Sw64TargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) {}
++
++Sw64TargetAsmStreamer::Sw64TargetAsmStreamer(MCStreamer &S,
++                                             formatted_raw_ostream &OS)
++    : Sw64TargetStreamer(S), OS(OS) {}
++
++void Sw64TargetAsmStreamer::emitDirectiveSetReorder() {
++  Sw64TargetStreamer::emitDirectiveSetReorder();
++}
++
++void Sw64TargetAsmStreamer::emitDirectiveSetNoReorder() {
++  forbidModuleDirective();
++}
++
++void Sw64TargetAsmStreamer::emitDirectiveSetMacro() {
++  Sw64TargetStreamer::emitDirectiveSetMacro();
++}
++
++void Sw64TargetAsmStreamer::emitDirectiveSetNoMacro() {
++  Sw64TargetStreamer::emitDirectiveSetNoMacro();
++}
++
++void Sw64TargetAsmStreamer::emitDirectiveSetAt() {
++  Sw64TargetStreamer::emitDirectiveSetAt();
++}
++
++void Sw64TargetAsmStreamer::emitDirectiveSetNoAt() {
++  Sw64TargetStreamer::emitDirectiveSetNoAt();
++}
++
++void Sw64TargetAsmStreamer::emitDirectiveEnd(StringRef Name) {
++  OS << "\t.end\t" << Name << '\n';
++}
++
++void Sw64TargetAsmStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {
++  OS << "\t.ent\t" << Symbol.getName() << '\n';
++}
++
++void Sw64TargetAsmStreamer::emitDirectiveNaN2008() { OS << "\t.nan\t2008\n"; }
++
++void Sw64TargetAsmStreamer::emitDirectiveNaNLegacy() {
++  OS << "\t.nan\tlegacy\n";
++}
++
++void Sw64TargetAsmStreamer::emitDirectiveInsn() {
++  Sw64TargetStreamer::emitDirectiveInsn();
++  OS << "\t.insn\n";
++}
++
++void Sw64TargetAsmStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
++                                      unsigned ReturnReg) {
++  OS << "\t.frame\t$"
++     << StringRef(Sw64InstPrinter::getRegisterName(StackReg)).lower() << ","
++     << StackSize << ",$"
++     << StringRef(Sw64InstPrinter::getRegisterName(ReturnReg)).lower() << '\n';
++}
++
++void Sw64TargetAsmStreamer::emitDirectiveSetArch(StringRef Arch) {
++  OS << "\t.set arch=" << Arch << "\n";
++  Sw64TargetStreamer::emitDirectiveSetArch(Arch);
++}
++
++// This part is for ELF object output.
++Sw64TargetELFStreamer::Sw64TargetELFStreamer(MCStreamer &S,
++                                             const MCSubtargetInfo &STI)
++    : Sw64TargetStreamer(S), STI(STI) {
++  MCAssembler &MCA = getStreamer().getAssembler();
++
++  // It's possible that MCObjectFileInfo isn't fully initialized at this point
++  // due to an initialization order problem where LLVMTargetMachine creates the
++  // target streamer before TargetLoweringObjectFile calls
++  // InitializeMCObjectFileInfo. There doesn't seem to be a single place that
++  // covers all cases so this statement covers most cases and direct object
++  // emission must call setPic() once MCObjectFileInfo has been initialized. The
++  // cases we don't handle here are covered by Sw64AsmPrinter.
++  Pic = MCA.getContext().getObjectFileInfo()->isPositionIndependent();
++
++  // Set the header flags that we can in the constructor.
++  // FIXME: This is a fairly terrible hack. We set the rest
++  // of these in the destructor. The problem here is two-fold:
++  //
++  // a: Some of the eflags can be set/reset by directives.
++  // b: There aren't any usage paths that initialize the ABI
++  //    pointer until after we initialize either an assembler
++  //    or the target machine.
++  // We can fix this by making the target streamer construct
++  // the ABI, but this is fraught with wide ranging dependency
++  // issues as well.
++  unsigned EFlags = MCA.getELFHeaderEFlags();
++
++  // FIXME: Fix a dependency issue by instantiating the ABI object to some
++  // default based off the triple. The triple doesn't describe the target
++  // fully, but any external user of the API that uses the MCTargetStreamer
++  // would otherwise crash on assertion failure.
++
++  ABI = Sw64ABIInfo(Sw64ABIInfo::S64());
++
++  MCA.setELFHeaderEFlags(EFlags);
++}
++
++void Sw64TargetELFStreamer::emitLabel(MCSymbol *S) {
++  auto *Symbol = cast<MCSymbolELF>(S);
++  getStreamer().getAssembler().registerSymbol(*Symbol);
++  uint8_t Type = Symbol->getType();
++  if (Type != ELF::STT_FUNC)
++    return;
++}
++
++void Sw64TargetELFStreamer::finish() {
++  MCAssembler &MCA = getStreamer().getAssembler();
++  const MCObjectFileInfo &OFI = *MCA.getContext().getObjectFileInfo();
++
++  // .bss, .text and .data are always at least 16-byte aligned.
++  MCSection &TextSection = *OFI.getTextSection();
++  MCA.registerSection(TextSection);
++  MCSection &DataSection = *OFI.getDataSection();
++  MCA.registerSection(DataSection);
++  MCSection &BSSSection = *OFI.getBSSSection();
++  MCA.registerSection(BSSSection);
++
++  TextSection.ensureMinAlignment(Align(16));
++  DataSection.ensureMinAlignment(Align(16));
++  BSSSection.ensureMinAlignment(Align(16));
++
++  if (RoundSectionSizes) {
++    // Make sections sizes a multiple of the alignment. This is useful for
++    // verifying the output of IAS against the output of other assemblers but
++    // it's not necessary to produce a correct object and increases section
++    // size.
++    MCStreamer &OS = getStreamer();
++    for (MCSection &S : MCA) {
++      MCSectionELF &Section = static_cast<MCSectionELF &>(S);
++
++      Align Alignment = Section.getAlign();
++      OS.switchSection(&Section);
++      if (Section.useCodeAlign())
++        OS.emitCodeAlignment(Alignment, &STI, Alignment.value());
++      else
++        OS.emitValueToAlignment(Alignment, 0, 1, Alignment.value());
++    }
++  }
++
++  // Update e_header flags. See the FIXME and comment above in
++  // the constructor for a full rundown on this.
++  unsigned EFlags = MCA.getELFHeaderEFlags();
++
++  if (Pic)
++    EFlags |= ELF::EF_SW64_PIC | ELF::EF_SW64_CPIC;
++
++  MCA.setELFHeaderEFlags(EFlags);
++
++  // Emit all the option records.
++  // At the moment we are only emitting .Sw64.options (ODK_REGINFO) and
++  // .reginfo.
++  Sw64ELFStreamer &MEF = static_cast<Sw64ELFStreamer &>(Streamer);
++  MEF.EmitSw64OptionRecords();
++}
++
++MCELFStreamer &Sw64TargetELFStreamer::getStreamer() {
++  return static_cast<MCELFStreamer &>(Streamer);
++}
++
++void Sw64TargetELFStreamer::emitDirectiveSetNoReorder() {
++  MCAssembler &MCA = getStreamer().getAssembler();
++  unsigned Flags = MCA.getELFHeaderEFlags();
++  Flags |= ELF::EF_SW64_NOREORDER;
++  MCA.setELFHeaderEFlags(Flags);
++  forbidModuleDirective();
++}
++
++void Sw64TargetELFStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {
++  GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
++
++  // .ent also acts like an implicit '.type symbol, STT_FUNC'
++  static_cast<const MCSymbolELF &>(Symbol).setType(ELF::STT_FUNC);
++}
++
++void Sw64TargetELFStreamer::emitDirectiveNaN2008() {
++  MCAssembler &MCA = getStreamer().getAssembler();
++  unsigned Flags = MCA.getELFHeaderEFlags();
++  Flags |= ELF::EF_SW64_NAN2008;
++  MCA.setELFHeaderEFlags(Flags);
++}
++
++void Sw64TargetELFStreamer::emitDirectiveNaNLegacy() {
++  MCAssembler &MCA = getStreamer().getAssembler();
++  unsigned Flags = MCA.getELFHeaderEFlags();
++  Flags &= ~ELF::EF_SW64_NAN2008;
++  MCA.setELFHeaderEFlags(Flags);
++}
++
++void Sw64TargetELFStreamer::emitDirectiveInsn() {
++  Sw64TargetStreamer::emitDirectiveInsn();
++  Sw64ELFStreamer &MEF = static_cast<Sw64ELFStreamer &>(Streamer);
++  MEF.createPendingLabelRelocs();
++}
++
++void Sw64TargetELFStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
++                                      unsigned ReturnReg_) {
++  MCContext &Context = getStreamer().getAssembler().getContext();
++  const MCRegisterInfo *RegInfo = Context.getRegisterInfo();
++
++  FrameInfoSet = true;
++  FrameReg = RegInfo->getEncodingValue(StackReg);
++  FrameOffset = StackSize;
++  ReturnReg = RegInfo->getEncodingValue(ReturnReg_);
++}
++
++static const char *getRelType(const MCExpr *Expr, const MCSubtargetInfo &STI) {
++  const Sw64MCExpr *Sw64Expr = cast<Sw64MCExpr>(Expr);
++  static int curgpdist = 0;
++  switch (Sw64Expr->getKind()) {
++  default:
++    return "";
++  case Sw64MCExpr::MEK_GPDISP_HI16:
++  case Sw64MCExpr::MEK_GPDISP_LO16:
++  case Sw64MCExpr::MEK_GPDISP: {
++    std::string a =
++        std::string("!gpdisp!") + std::to_string((curgpdist) / 2 + 1);
++    curgpdist++;
++    return strdup(a.c_str());
++  }
++  case Sw64MCExpr::MEK_ELF_LITERAL:
++    return "!literal";
++  case Sw64MCExpr::MEK_LITUSE_ADDR: /* !lituse_addr relocation.  */
++    return "!lituse_addr";
++  case Sw64MCExpr::MEK_LITUSE_BASE: /* !lituse_base relocation.  */
++    return "!literal";
++  case Sw64MCExpr::MEK_LITUSE_BYTOFF: /* !lituse_bytoff relocation.  */
++    return "!lituse_bytoff";
++  case Sw64MCExpr::MEK_LITUSE_JSR: /* !lituse_jsr relocation.  */
++    return "!lituse_jsr";
++  case Sw64MCExpr::MEK_LITUSE_TLSGD: /* !lituse_tlsgd relocation.  */
++    return "!lituse_tlsgd";
++  case Sw64MCExpr::MEK_LITUSE_TLSLDM: /* !lituse_tlsldm relocation.  */
++    return "!lituse_tlsldm";
++    //  case Sw64MCExpr::MEK_LITUSE_JSRDIRECT: /* !lituse_jsrdirect relocation.
++    //  */
++    //    return "!lituse_jsrdirect";
++  case Sw64MCExpr::MEK_GPREL_HI16: /* !gprelhigh relocation.  */
++    return "!gprelhigh";
++  case Sw64MCExpr::MEK_GPREL_LO16: /* !gprellow relocation.  */
++    return "!gprellow";
++  case Sw64MCExpr::MEK_GPREL16: /* !gprel relocation.  */
++    return "!gprel";
++  case Sw64MCExpr::MEK_BRSGP: /* !samegp relocation.  */
++    return "!samegp";
++  case Sw64MCExpr::MEK_TLSGD: /* !tlsgd relocation.  */
++    return "!tlsgd";
++  case Sw64MCExpr::MEK_TLSLDM: /* !tlsldm relocation.  */
++    return "!tlsldm";
++  case Sw64MCExpr::MEK_GOTDTPREL16: /* !gotdtprel relocation.  */
++    return "!gotdtprel";
++  case Sw64MCExpr::MEK_DTPREL_HI16: /* !dtprelhi relocation.  */
++    return "!dtprelhi";
++  case Sw64MCExpr::MEK_DTPREL_LO16: /* !dtprello relocation.  */
++    return "!dtprello";
++  case Sw64MCExpr::MEK_DTPREL16: /* !dtprel relocation.  */
++    return "!dtprel";
++  case Sw64MCExpr::MEK_GOTTPREL16: /* !gottprel relocation.  */
++    return "!gottprel";
++  case Sw64MCExpr::MEK_TPREL_HI16: /* !tprelhi relocation.  */
++    return "!tprelhi";
++  case Sw64MCExpr::MEK_TPREL_LO16: /* !tprello relocation.  */
++    return "!tprello";
++  case Sw64MCExpr::MEK_TPREL16: /* !tprel relocation.  */
++    return "!tprel";
++  case Sw64MCExpr::MEK_ELF_LITERAL_GOT: /* !literal_got relocation.  */
++    return "!literal_got";
++  }
++}
++
++static void printRelocInst(MCInstPrinter &InstPrinter, const MCInst &Inst,
++                           raw_ostream &OS, const MCSubtargetInfo &STI,
++                           uint64_t Address) {
++  MCOperand Op = Inst.getOperand(1);
++  if (Op.isExpr()) {
++    const MCExpr *Expr = Op.getExpr();
++    if (Expr->getKind() == MCExpr::Target) {
++      const char *RelName = getRelType(Expr, STI);
++      InstPrinter.printInst(&Inst, Address, RelName, STI, OS);
++      return;
++    }
++  }
++  InstPrinter.printInst(&Inst, Address, "", STI, OS);
++}
++
++void Sw64TargetStreamer::prettyPrintAsm(MCInstPrinter &InstPrinter,
++                                        uint64_t Address, const MCInst &Inst,
++                                        const MCSubtargetInfo &STI,
++                                        raw_ostream &OS) {
++  const MCInstrDesc &MCID =
++      Sw64Descs.Insts[Sw64::INSTRUCTION_LIST_END - 1 - Inst.getOpcode()];
++  // while moving mayload flags for ldi/ldih
++  // adding opcode determine here
++  if (MCID.mayLoad() || MCID.mayStore() || Inst.getOpcode() == Sw64::LDAH ||
++      Inst.getOpcode() == Sw64::LDA) {
++    printRelocInst(InstPrinter, Inst, OS, STI, Address);
++    return;
++  }
++  InstPrinter.printInst(&Inst, Address, "", STI, OS);
++}
+diff --git a/llvm/lib/Target/Sw64/README.txt b/llvm/lib/Target/Sw64/README.txt
+new file mode 100644
+index 000000000..b69205b49
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/README.txt
+@@ -0,0 +1,7 @@
++To-do
++-----
++
++* Instruction encodings
++* Tailcalls
++* Investigate loop alignment
++* Add builtins
+diff --git a/llvm/lib/Target/Sw64/Sw64.h b/llvm/lib/Target/Sw64/Sw64.h
+new file mode 100644
+index 000000000..1d2d3f05b
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64.h
+@@ -0,0 +1,56 @@
++//===-- Sw64.h - Top-level interface for Sw64 representation --*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the entry points for global functions defined in the LLVM
++// Sw64 back-end.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_SW64_H
++#define LLVM_LIB_TARGET_SW64_SW64_H
++
++#include "MCTargetDesc/Sw64MCTargetDesc.h"
++#include "llvm/CodeGen/AsmPrinter.h"
++#include "llvm/Target/TargetMachine.h"
++
++namespace llvm {
++namespace Sw64 {
++// These describe LDAx
++static const int IMM_LOW = -32768;
++static const int IMM_HIGH = 32767;
++static const int IMM_MULT = 65536;
++} // namespace Sw64
++
++class FunctionPass;
++class ModulePass;
++class TargetMachine;
++class Sw64TargetMachine;
++class formatted_raw_ostream;
++
++FunctionPass *createSw64ISelDag(Sw64TargetMachine &TM,
++                                CodeGenOpt::Level OptLevel);
++
++FunctionPass *createSw64LLRPPass(Sw64TargetMachine &tm);
++FunctionPass *createSw64BranchSelectionPass();
++FunctionPass *createSw64BranchSelection();
++FunctionPass *createSw64PreLegalizeCombiner(); // for fmad
++FunctionPass *createSw64ExpandPseudoPass();
++FunctionPass *createSw64ExpandPseudo2Pass();
++FunctionPass *createSw64CombineLSPass();
++FunctionPass *createSw64IEEEConstraintPass();
++
++bool LowerSw64MachineOperandToMCOperand(const MachineOperand &MO,
++                                        MCOperand &MCOp, const AsmPrinter &AP);
++
++void initializeSw64BranchSelectionPass(PassRegistry &);
++void initializeSw64PreLegalizerCombinerPass(PassRegistry &); // for fmad
++void initializeSw64DAGToDAGISelPass(PassRegistry &);
++} // namespace llvm
++
++#endif
+diff --git a/llvm/lib/Target/Sw64/Sw64.td b/llvm/lib/Target/Sw64/Sw64.td
+new file mode 100644
+index 000000000..f72250992
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64.td
+@@ -0,0 +1,154 @@
++//===- Sw64.td - Describe the Sw64 Target Machine --------*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++//
++//===----------------------------------------------------------------------===//
++
++// Get the target-independent interfaces which we are implementing...
++//
++include "llvm/Target/Target.td"
++
++//Sw64 is little endian
++
++//===----------------------------------------------------------------------===//
++// Subtarget Features
++//===----------------------------------------------------------------------===//
++
++def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true",
++                                  "Enable CIX extensions">;
++
++// argument, type, value, help text
++
++def Featurecore3b : SubtargetFeature<"core3b", "Sw64ArchVersion", "core3b",
++                                  "Enable core4b Feature">;
++
++def Featurecore4 : SubtargetFeature<"core4", "Sw64ArchVersion", "core4",
++                                  "Enable core4 Feature">;
++
++def FeatureRelax : SubtargetFeature<"relax", "relax", "true",
++                                  "Enable relax ld attribute">;
++
++def FeatureEv : SubtargetFeature<"swEv", "Ev", "true",
++                                  "Enable Sw6a Feature test">;
++foreach i = {1-14, 22-25} in
++    def FeatureReserve#i : SubtargetFeature<"reserve-r"#i, "ReserveRegister["#i#"]", "true",
++                                             "Reserve "#i#", making it unavailable "
++                                             "as a GPR">;
++
++def FeatureOptMul : SubtargetFeature<"swOptMul", "Sw64OptMul", "true",
++                                     "Enable Sw6b optimize mul">;
++
++def Featureintarith : SubtargetFeature<"swIntArith", "Sw64EnableIntAri", "true",
++                                       "Enable core4 integer arithmetic instructions">;
++def Featureintshift : SubtargetFeature<"swIntShift", "Sw64EnableIntShift", "true",
++                                       "Enable core4 integer shift instructions">;
++def Featurebyteinst : SubtargetFeature<"swByteInst", "Sw64EnableByteInst", "true",
++                                       "Enable core4 byte manipulation instructions">;
++def Featurefloatarith : SubtargetFeature<"swFloatArith", "Sw64EnableFloatAri", "true",
++                                       "Enable core4 float arithmetic instructions">;
++def Featurefloatround : SubtargetFeature<"swFloatRound", "Sw64EnableFloatRound", "true",
++                                       "Enable core4 float round instructions">;
++def Featurepostinc : SubtargetFeature<"swPostInc", "Sw64EnablePostInc", "true",
++                                       "Enable core4 post-inc load and store instructions">;
++def Featurecrcinst : SubtargetFeature<"swCrcInst", "Sw64EnableCrcInst", "true",
++                                       "Enable core4 crc32 instructions">;
++
++def FeatureSIMD : SubtargetFeature<"simd", "HasSIMD", "true",
++				  "Sw64 SIMD Instruction">;
++
++//***********************
++// Subtarget Support test
++//***********************
++def HasMieee : Predicate<"MF->getSubtarget<Sw64Subtarget>().hasMieee()">,
++               AssemblerPredicate<(all_of FeatureCIX)>;
++
++def HasCore3b : Predicate<"Subtarget->hasCore3b()">,
++              AssemblerPredicate<(all_of Featurecore3b)>;
++
++def HasCore4 : Predicate<"Subtarget->hasCore4()">,
++              AssemblerPredicate<(all_of Featurecore4)>;
++
++def enRelax : Predicate<"Subtarget->enRelax()">,
++              AssemblerPredicate<(all_of FeatureRelax)>;
++
++def HasEv : Predicate<"Subtarget->hasEv()">,
++            AssemblerPredicate<(all_of FeatureEv)>;
++
++
++//===----------------------------------------------------------------------===//
++// Register File Description
++//===----------------------------------------------------------------------===//
++
++include "Sw64RegisterInfo.td"
++
++//===----------------------------------------------------------------------===//
++// Calling Convention Description
++//===----------------------------------------------------------------------===//
++
++include "Sw64CallingConv.td"
++
++//===----------------------------------------------------------------------===//
++// Base Schedule Description
++//===----------------------------------------------------------------------===//
++
++include "Sw64Schedule.td"
++
++//===----------------------------------------------------------------------===//
++// Instruction Descriptions
++//===----------------------------------------------------------------------===//
++
++include "Sw64InstrInfo.td"
++
++
++//===----------------------------------------------------------------------===//
++// MicroArchitechural Schedule Descriptions
++//===----------------------------------------------------------------------===//
++
++include "Sw64SchedCore3.td"
++include "Sw64SchedCore4.td"
++include "Sw64SchedCore3SIMD.td"
++
++def Sw64InstrInfo : InstrInfo {
++}
++
++//===----------------------------------------------------------------------===//
++// Sw64 Processor Definitions
++//===----------------------------------------------------------------------===//
++
++//***********************
++//  Sw processor test
++//***********************
++
++class Proc<string Name, list<SubtargetFeature> Features>
++    : ProcessorModel<Name, GenericSw64Model, Features>;
++
++def : Proc<"sw_64", []>;
++def : Proc<"sw6a", [Featurecore3b]>;
++def : Proc<"sw6b", [Featurecore3b]>;
++def : Proc<"sw4d", [Featurecore3b]>;
++def : Proc<"sw8a", [Featurecore3b, Featurecore4]>;
++
++//===----------------------------------------------------------------------===//
++// The Sw64 Target
++//===----------------------------------------------------------------------===//
++def Sw64AsmWriter : AsmWriter {
++  string AsmWriterClassName = "InstPrinter";
++  bit isMCAsmWriter = 1;
++}
++
++def Sw64AsmParser : AsmParser {
++  let ShouldEmitMatchRegisterName = 0;
++}
++
++def Sw64 : Target {
++  // Pull in Instruction Info:
++  let InstructionSet = Sw64InstrInfo;
++  let AssemblyWriters = [Sw64AsmWriter];
++  let AssemblyParsers = [Sw64AsmParser];
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64AsmPrinter.cpp b/llvm/lib/Target/Sw64/Sw64AsmPrinter.cpp
+new file mode 100644
+index 000000000..642e474e6
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64AsmPrinter.cpp
+@@ -0,0 +1,322 @@
++//===-- Sw64AsmPrinter.cpp - Sw64 LLVM assembly writer ------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains a printer that converts from our internal representation
++// of machine-dependent LLVM code to the XAS-format Sw64 assembly language.
++//
++//===----------------------------------------------------------------------===//
++
++#include "InstPrinter/Sw64InstPrinter.h"
++#include "MCTargetDesc/Sw64BaseInfo.h"
++#include "Sw64.h"
++#include "Sw64InstrInfo.h"
++#include "Sw64MCInstLower.h"
++#include "Sw64Subtarget.h"
++#include "Sw64TargetMachine.h"
++#include "Sw64TargetStreamer.h"
++#include "llvm/ADT/SmallString.h"
++#include "llvm/ADT/StringExtras.h"
++#include "llvm/CodeGen/AsmPrinter.h"
++#include "llvm/CodeGen/MachineConstantPool.h"
++#include "llvm/CodeGen/MachineFunctionPass.h"
++#include "llvm/CodeGen/MachineInstr.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/MachineJumpTableInfo.h"
++#include "llvm/CodeGen/MachineModuleInfo.h"
++#include "llvm/IR/BasicBlock.h"
++#include "llvm/IR/Constants.h"
++#include "llvm/IR/DataLayout.h"
++#include "llvm/IR/DebugInfo.h"
++#include "llvm/IR/DerivedTypes.h"
++#include "llvm/IR/Mangler.h"
++#include "llvm/IR/Module.h"
++#include "llvm/MC/MCAsmInfo.h"
++#include "llvm/MC/MCExpr.h"
++#include "llvm/MC/MCInst.h"
++#include "llvm/MC/MCInstBuilder.h"
++#include "llvm/MC/MCStreamer.h"
++#include "llvm/MC/MCSymbolELF.h"
++#include "llvm/MC/TargetRegistry.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/raw_ostream.h"
++#include "llvm/Target/TargetLoweringObjectFile.h"
++#include <algorithm>
++#include <cctype>
++using namespace llvm;
++
++#define DEBUG_TYPE "asm-printer"
++
++namespace {
++class Sw64AsmPrinter : public AsmPrinter {
++  Sw64MCInstLower MCInstLowering;
++  Sw64TargetStreamer &getTargetStreamer();
++  /// InConstantPool - Maintain state when emitting a sequence of constant
++  /// pool entries so we can properly mark them as data regions.
++  bool InConstantPool = false;
++
++public:
++  explicit Sw64AsmPrinter(TargetMachine &TM,
++                          std::unique_ptr<MCStreamer> Streamer)
++      : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(*this) {}
++
++  StringRef getPassName() const override { return "Sw64 Assembly Printer"; }
++
++  void printOp(const MachineOperand &MO, raw_ostream &O);
++  void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
++  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
++                       const char *ExtraCode, raw_ostream &O) override;
++  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
++                             const char *ExtraCode, raw_ostream &O) override;
++
++  void emitFunctionEntryLabel() override;
++  void emitInstruction(const MachineInstr *MI) override;
++  void emitFunctionBodyStart() override;
++  void emitFunctionBodyEnd() override;
++  void emitStartOfAsmFile(Module &M) override;
++  bool isBlockOnlyReachableByFallthrough(
++      const MachineBasicBlock *MBB) const override;
++  bool runOnMachineFunction(MachineFunction &MF) override;
++
++  bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
++                                   const MachineInstr *MI);
++  bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const {
++    return LowerSw64MachineOperandToMCOperand(MO, MCOp, *this);
++  }
++  //===------------------------------------------------------------------===//
++  // XRay implementation
++  //===------------------------------------------------------------------===//
++public:
++  // XRay-specific lowering for Sw64.
++  void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
++  void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
++  void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
++
++private:
++  void emitSled(const MachineInstr &MI, SledKind Kind);
++};
++} // end of anonymous namespace
++
++bool Sw64AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
++
++  // Initialize TargetLoweringObjectFile.
++  AsmPrinter::runOnMachineFunction(MF);
++  emitXRayTable();
++  return true;
++}
++
++bool Sw64AsmPrinter::isBlockOnlyReachableByFallthrough(
++    const MachineBasicBlock *MBB) const {
++  // The predecessor has to be immediately before this block.
++  const MachineBasicBlock *Pred = *MBB->pred_begin();
++
++  // If the predecessor is a switch statement, assume a jump table
++  // implementation, so it is not a fall through.
++  if (const BasicBlock *bb = Pred->getBasicBlock())
++    if (isa<SwitchInst>(bb->getTerminator()))
++      return false;
++
++  // If this is a landing pad, it isn't a fall through.  If it has no preds,
++  // then nothing falls through to it.
++  if (MBB->isEHPad() || MBB->pred_empty())
++    return false;
++
++  // If there isn't exactly one predecessor, it can't be a fall through.
++  MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
++  ++PI2;
++
++  if (PI2 != MBB->pred_end())
++    return false;
++
++  // The predecessor has to be immediately before this block.
++  if (!Pred->isLayoutSuccessor(MBB))
++    return false;
++
++  // If the block is completely empty, then it definitely does fall through.
++  if (Pred->empty())
++    return true;
++
++  // Otherwise, check the last instruction.
++  // Check if the last terminator is an unconditional branch.
++  MachineBasicBlock::const_iterator I = Pred->end();
++  while (I != Pred->begin() && !(--I)->isTerminator())
++    ;
++  return false;
++  //  return !I->isBarrier();
++  // ;
++}
++
++Sw64TargetStreamer &Sw64AsmPrinter::getTargetStreamer() {
++  return static_cast<Sw64TargetStreamer &>(*OutStreamer->getTargetStreamer());
++}
++
++//===----------------------------------------------------------------------===//
++// Frame and Set directives
++//===----------------------------------------------------------------------===//
++/// EmitFunctionBodyStart - Targets can override this to emit stuff before
++/// the first basic block in the function.
++void Sw64AsmPrinter::emitFunctionBodyStart() {
++  MCInstLowering.Initialize(&MF->getContext());
++}
++
++/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
++/// the last basic block in the function.
++void Sw64AsmPrinter::emitFunctionBodyEnd() {
++  // Emit function end directives
++  Sw64TargetStreamer &TS = getTargetStreamer();
++
++  // There are instruction for this macros, but they must
++  // always be at the function end, and we can't emit and
++  // break with BB logic.
++  TS.emitDirectiveSetAt();
++  TS.emitDirectiveSetMacro();
++  TS.emitDirectiveSetReorder();
++
++  TS.emitDirectiveEnd(CurrentFnSym->getName());
++  // Make sure to terminate any constant pools that were at the end
++  // of the function.
++  if (!InConstantPool)
++    return;
++  InConstantPool = false;
++  OutStreamer->emitDataRegion(MCDR_DataRegionEnd);
++}
++
++void Sw64AsmPrinter::emitFunctionEntryLabel() {
++  Sw64TargetStreamer &TS = getTargetStreamer();
++
++  TS.emitDirectiveEnt(*CurrentFnSym);
++  OutStreamer->emitLabel(CurrentFnSym);
++}
++
++void Sw64AsmPrinter::printOperand(const MachineInstr *MI, int opNum,
++                                  raw_ostream &O) {
++  const MachineOperand &MO = MI->getOperand(opNum);
++
++  if (MO.isReg()) {
++    assert(Register::isPhysicalRegister(MO.getReg()) && "Not physreg??");
++    O << Sw64InstPrinter::getRegisterName(MO.getReg());
++  } else if (MO.isImm()) {
++    O << MO.getImm();
++  } else {
++    printOp(MO, O);
++  }
++}
++void Sw64AsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
++  switch (MO.getType()) {
++  case MachineOperand::MO_Register:
++    O << Sw64InstPrinter::getRegisterName(MO.getReg());
++    return;
++
++  case MachineOperand::MO_Immediate:
++    assert(0 && "printOp() does not handle immediate values");
++    return;
++
++  case MachineOperand::MO_MachineBasicBlock:
++    MO.getMBB()->getSymbol()->print(O, MAI);
++    return;
++
++  case MachineOperand::MO_ConstantPoolIndex:
++    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
++      << MO.getIndex();
++    return;
++
++  case MachineOperand::MO_ExternalSymbol:
++    O << MO.getSymbolName();
++    return;
++
++  case MachineOperand::MO_GlobalAddress:
++    getSymbol(MO.getGlobal())->print(O, MAI);
++    return;
++  case MachineOperand::MO_JumpTableIndex:
++    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
++      << MO.getIndex();
++    return;
++
++  default:
++    O << "<unknown operand type: "; //  << MO.getType() << ">";
++    return;
++  }
++}
++
++/// PrintAsmOperand - Print out an operand for an inline asm expression.
++///
++bool Sw64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
++                                     const char *ExtraCode, raw_ostream &O) {
++  // Print the operand if there is no operand modifier.
++  if (!ExtraCode || !ExtraCode[0]) {
++    printOperand(MI, OpNo, O);
++    return false;
++  }
++  if (ExtraCode && ExtraCode[0])
++    if (ExtraCode[1] != 0)
++      return true;
++
++  switch (ExtraCode[0]) {
++  default:
++    return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
++  case 'r':
++    printOperand(MI, OpNo, O);
++    return false;
++  }
++  // Otherwise fallback on the default implementation.
++  return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
++}
++
++void Sw64AsmPrinter::emitStartOfAsmFile(Module &M) {
++  if (OutStreamer->hasRawTextSupport()) {
++    OutStreamer->emitRawText(StringRef("\t.set noreorder"));
++    OutStreamer->emitRawText(StringRef("\t.set volatile"));
++    OutStreamer->emitRawText(StringRef("\t.set noat"));
++    OutStreamer->emitRawText(StringRef("\t.set nomacro"));
++  }
++}
++
++bool Sw64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
++                                           unsigned OpNum,
++                                           const char *ExtraCode,
++                                           raw_ostream &O) {
++  assert(OpNum + 1 < MI->getNumOperands() && "Insufficient operands");
++
++  const MachineOperand &BaseMO = MI->getOperand(OpNum);
++
++  assert(BaseMO.isReg() &&
++         "Unexpected base pointer for inline asm memory operand.");
++
++  if (ExtraCode && ExtraCode[0]) {
++    return true; // Unknown modifier.
++  }
++
++  O << "0(" << Sw64InstPrinter::getRegisterName(BaseMO.getReg()) << ")";
++
++  return false;
++}
++
++#include "Sw64GenMCPseudoLowering.inc"
++
++void Sw64AsmPrinter::emitInstruction(const MachineInstr *MI) {
++  if (MI->isDebugValue())
++    return;
++  SmallString<128> Str;
++  raw_svector_ostream O(Str);
++
++  if (emitPseudoExpansionLowering(*OutStreamer, MI))
++    return;
++
++  if (MI->getOpcode() == Sw64::STQ_C || MI->getOpcode() == Sw64::STL_C)
++    OutStreamer->emitCodeAlignment(Align(8), &getSubtargetInfo());
++
++  MCInst TmpInst;
++  MCInstLowering.Lower(MI, TmpInst);
++
++  EmitToStreamer(*OutStreamer, TmpInst);
++}
++
++// Force static initialization.
++extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSw64AsmPrinter() {
++  RegisterAsmPrinter<Sw64AsmPrinter> X(getTheSw64Target());
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64BranchSelector.cpp b/llvm/lib/Target/Sw64/Sw64BranchSelector.cpp
+new file mode 100644
+index 000000000..cd1c3c4c3
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64BranchSelector.cpp
+@@ -0,0 +1,81 @@
++//===-- Sw64BranchSelector.cpp - Convert Pseudo branchs ----------*- C++ -*-=//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// Replace Pseudo COND_BRANCH_* with their appropriate real branch
++// Simplified version of the PPC Branch Selector
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64.h"
++#include "Sw64InstrInfo.h"
++#include "llvm/ADT/SmallVector.h"
++#include "llvm/ADT/Statistic.h"
++#include "llvm/ADT/StringRef.h"
++#include "llvm/CodeGen/MachineBasicBlock.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/MachineFunctionPass.h"
++#include "llvm/CodeGen/MachineInstr.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/MachineModuleInfo.h"
++#include "llvm/CodeGen/MachineOperand.h"
++#include "llvm/CodeGen/TargetSubtargetInfo.h"
++#include "llvm/IR/DebugLoc.h"
++#include "llvm/MC/MCAsmInfo.h"
++#include "llvm/Support/CommandLine.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/MathExtras.h"
++#include "llvm/Target/TargetMachine.h"
++#include <algorithm>
++#include <cassert>
++#include <cstdint>
++#include <iterator>
++#include <utility>
++
++using namespace llvm;
++
++#define DEBUG_TYPE "sw_64-branch-expansion"
++
++namespace {
++class Sw64BranchSelection : public MachineFunctionPass {
++public:
++  static char ID;
++
++  Sw64BranchSelection() : MachineFunctionPass(ID) {
++    initializeSw64BranchSelectionPass(*PassRegistry::getPassRegistry());
++  }
++
++  StringRef getPassName() const override {
++    return "Sw64 Branch Expansion Pass";
++  }
++
++  bool runOnMachineFunction(MachineFunction &F) override;
++
++  MachineFunctionProperties getRequiredProperties() const override {
++    return MachineFunctionProperties().set(
++        MachineFunctionProperties::Property::NoVRegs);
++  }
++};
++} // end of anonymous namespace
++
++char Sw64BranchSelection::ID = 0;
++
++INITIALIZE_PASS(Sw64BranchSelection, DEBUG_TYPE,
++                "Expand out of range branch instructions and fix forbidden"
++                " slot hazards",
++                false, false)
++
++/// Returns a pass that clears pipeline hazards.
++FunctionPass *llvm::createSw64BranchSelection() {
++  return new Sw64BranchSelection();
++}
++
++bool Sw64BranchSelection::runOnMachineFunction(MachineFunction &F) {
++
++  return true;
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64CallingConv.td b/llvm/lib/Target/Sw64/Sw64CallingConv.td
+new file mode 100644
+index 000000000..7b0275c8c
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64CallingConv.td
+@@ -0,0 +1,72 @@
++//===- Sw64CallingConv.td - Calling Conventions for Sw64 -*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++// This describes the calling conventions for Sw64 architecture.
++//===----------------------------------------------------------------------===//
++
++/// CCIfSubtarget - Match if the current subtarget has a feature F.
++class CCIfSubtarget<string F, CCAction A>
++    : CCIf<!strconcat("static_cast<const Sw64Subtarget&>"
++                       "(State.getMachineFunction().getSubtarget()).",
++                     F), A>;
++
++//===----------------------------------------------------------------------===//
++// Sw64 Return Value Calling Convention
++//===----------------------------------------------------------------------===//
++def RetCC_Sw64 : CallingConv<[
++  // i64 is returned in register R0
++  // R1 is an llvm extension, I don't know what gcc does
++  CCIfType<[i64], CCAssignToReg<[R0, R1, R2, R3]>>,
++
++  // f32 / f64 are returned in F0/F1
++  CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>>,
++
++  CCIfType<[v32i8, v16i16, v8i32, v4i64, v4f32, v4f64],
++       CCAssignToReg<[V0, V1]>>,
++
++  CCIfSubtarget<"hasSIMD()",
++       CCIfType<[v32i8, v16i16, v8i32, v4i64, v4f32, v4f64],
++       CCAssignToReg<[F0, F1]>>>
++]>;
++
++// In soft-mode, register R16+R17, instead of R0+R1, is used to return a long
++// double value.
++def RetCC_F128Soft_Sw64 : CallingConv<[
++  CCIfType<[i64], CCAssignToReg<[R16, R17]>>
++]>;
++
++
++//===----------------------------------------------------------------------===//
++// Sw64 Argument Calling Conventions
++//===----------------------------------------------------------------------===//
++def CC_Sw64 : CallingConv<[
++  // The first 6 arguments are passed in registers, whether integer or
++  // floating-point
++
++  CCIfType<[i64], CCAssignToRegWithShadow<[R16, R17, R18, R19, R20, R21],
++                                          [F16, F17, F18, F19, F20, F21]>>,
++
++  CCIfType<[f32, f64], CCAssignToRegWithShadow<[F16, F17, F18, F19, F20, F21],
++                                               [R16, R17, R18, R19, R20, R21]>>,
++
++  CCIfType<[v32i8, v16i16, v8i32, v4i64, v4f64, v4f32],
++            CCAssignToRegWithShadow<[V16, V17, V18, V19, V20, V21],
++                                    [R16, R17, R18, R19, R20, R21]>>,
++
++  // Stack slots are 8 bytes in size and 8-byte aligned.
++  CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>>,
++
++  CCIfSubtarget<"hasSIMD()",
++       CCIfType<[v32i8, v16i16, v8i32, v4i64, v4f32, v4f64],
++       CCAssignToReg<[F16, F17, F18, F19, F20, F21]>>>
++]>;
++
++// CalleeSavedRegs
++def CSR_I64 : CalleeSavedRegs<(add (sequence "R%u", 9, 14), R15, R26)>;
++
++def CSR_F64 : CalleeSavedRegs<(add CSR_I64, (sequence "F%u", 2, 9))>;
+diff --git a/llvm/lib/Target/Sw64/Sw64CombineLS.cpp b/llvm/lib/Target/Sw64/Sw64CombineLS.cpp
+new file mode 100644
+index 000000000..fbf63b69f
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64CombineLS.cpp
+@@ -0,0 +1,63 @@
++#include "MCTargetDesc/Sw64BaseInfo.h"
++#include "Sw64.h"
++#include "Sw64FrameLowering.h"
++#include "Sw64Subtarget.h"
++#include "llvm/ADT/SetOperations.h"
++#include "llvm/ADT/Statistic.h"
++#include "llvm/CodeGen/MachineFunctionPass.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/TargetInstrInfo.h"
++#include "llvm/Support/CommandLine.h"
++#include "llvm/Target/TargetMachine.h"
++
++#define DEBUG_TYPE "sw_64-combineLS"
++
++using namespace llvm;
++
++namespace llvm {
++
++struct Sw64CombineLS : public MachineFunctionPass {
++  /// Target machine description which we query for reg. names, data
++  /// layout, etc.
++  static char ID;
++  Sw64CombineLS() : MachineFunctionPass(ID) {}
++
++  StringRef getPassName() const { return "Sw64 Combine Load Store insn"; }
++
++  bool runOnMachineFunction(MachineFunction &F) {
++    for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE;
++         ++FI) {
++      MachineBasicBlock &MBB = *FI;
++      MachineBasicBlock::iterator MBBI = MBB.begin();
++      MachineBasicBlock::iterator NMBBI = std::next(MBBI);
++      NMBBI++;
++      for (; NMBBI != MBB.end(); MBBI++, NMBBI++) {
++
++        MachineInstr &MI = *MBBI, &NMI = *NMBBI;
++        DebugLoc DL = MI.getDebugLoc();
++        const MCInstrDesc &MCID = NMI.getDesc();
++
++        if (MI.getOpcode() == Sw64::LDA &&
++            (MCID.mayLoad() || MCID.mayStore())) {
++          LLVM_DEBUG(dbgs() << "combining Load/Store instr\n"; MI.dump();
++                     dbgs() << "\n"; NMI.dump(); dbgs() << "\n");
++
++          if (MI.getOperand(0).getReg() == NMI.getOperand(2).getReg() &&
++              NMI.getOperand(2).getReg() != Sw64::R30) {
++            BuildMI(MBB, MBBI, DL, MCID)
++                .add(NMI.getOperand(0))
++                .add(MI.getOperand(1))
++                .add(MI.getOperand(0));
++            NMI.eraseFromParent();
++            MI.eraseFromParent();
++          }
++        }
++      }
++    }
++    return true;
++  }
++};
++char Sw64CombineLS::ID = 0;
++} // end namespace llvm
++
++FunctionPass *llvm::createSw64CombineLSPass() { return new Sw64CombineLS(); }
+diff --git a/llvm/lib/Target/Sw64/Sw64ExpandPseudo.cpp b/llvm/lib/Target/Sw64/Sw64ExpandPseudo.cpp
+new file mode 100644
+index 000000000..aa62eb029
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64ExpandPseudo.cpp
+@@ -0,0 +1,1176 @@
++//===-- Sw64ExpandPseudoInsts.cpp - Expand pseudo instructions ------------===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains a pass that expands pseudo instructions into target
++// instructions to allow proper scheduling, if-conversion, and other late
++// optimizations. This pass should be run after register allocation but before
++// the post-regalloc scheduling pass.
++//
++// This is currently only used for expanding atomic pseudos after register
++// allocation. We do this to avoid the fast register allocator introducing
++// spills between ll and sc. These stores cause some other implementations to
++// abort the atomic RMW sequence.
++//
++//===----------------------------------------------------------------------===//
++
++#include "MCTargetDesc/Sw64BaseInfo.h"
++#include "Sw64.h"
++#include "Sw64InstrInfo.h"
++#include "Sw64Subtarget.h"
++#include "llvm/CodeGen/LivePhysRegs.h"
++#include "llvm/CodeGen/MachineFunctionPass.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/Support/Debug.h"
++
++using namespace llvm;
++
++#define DEBUG_TYPE "sw_64-pseudo"
++namespace llvm {
++extern const MCInstrDesc Sw64Insts[];
++}
++
++namespace {
++class Sw64ExpandPseudo : public MachineFunctionPass {
++public:
++  static char ID;
++  Sw64ExpandPseudo() : MachineFunctionPass(ID) {}
++
++  const Sw64InstrInfo *TII;
++  const Sw64Subtarget *STI;
++
++  bool runOnMachineFunction(MachineFunction &Fn) override;
++
++  MachineFunctionProperties getRequiredProperties() const override {
++    return MachineFunctionProperties().set(
++        MachineFunctionProperties::Property::NoVRegs);
++  }
++
++  StringRef getPassName() const override {
++    return "Sw64 pseudo instruction expansion pass";
++  }
++
++private:
++  bool expandAtomicCmpSwap(MachineBasicBlock &MBB,
++                           MachineBasicBlock::iterator MBBI,
++                           MachineBasicBlock::iterator &NextMBBI,
++                           unsigned Size);
++  bool expandAtomicCmpSwapSubword(MachineBasicBlock &MBB,
++                                  MachineBasicBlock::iterator MBBI,
++                                  MachineBasicBlock::iterator &NextMBBI);
++
++  bool expandAtomicBinOp(MachineBasicBlock &BB, MachineBasicBlock::iterator I,
++                         MachineBasicBlock::iterator &NMBBI, unsigned Size);
++  bool expandAtomicBinOpSubword(MachineBasicBlock &BB,
++                                MachineBasicBlock::iterator I,
++                                MachineBasicBlock::iterator &NMBBI);
++  bool expandCurGpdisp(MachineBasicBlock &MBB,
++                       MachineBasicBlock::iterator MBBI);
++
++  bool expandLoadAddress(MachineBasicBlock &MBB,
++                         MachineBasicBlock::iterator MBBI,
++                         MachineBasicBlock::iterator &NextMBBI);
++
++  bool expandLoadCPAddress(MachineBasicBlock &MBB,
++                           MachineBasicBlock::iterator MBBI,
++                           MachineBasicBlock::iterator &NextMBBI);
++
++  bool expandLdihInstPair(MachineBasicBlock &MBB,
++                          MachineBasicBlock::iterator MBBI,
++                          MachineBasicBlock::iterator &NextMBBI,
++                          unsigned FlagsHi, unsigned SecondOpcode,
++                          unsigned FlagsLo = Sw64II::MO_GPREL_LO,
++                          unsigned srcReg = Sw64::R29);
++
++  bool expandLoadGotAddress(MachineBasicBlock &MBB,
++                            MachineBasicBlock::iterator MBBI,
++                            MachineBasicBlock::iterator &NextMBBI);
++
++  bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
++                MachineBasicBlock::iterator &NMBB);
++
++  bool expandMBB(MachineBasicBlock &MBB);
++  bool expandIntReduceSum(MachineBasicBlock &BB, MachineBasicBlock::iterator I,
++                          MachineBasicBlock::iterator &NMBBI);
++  bool expandFPReduceSum(MachineBasicBlock &BB, MachineBasicBlock::iterator I,
++                         MachineBasicBlock::iterator &NMBBI);
++};
++char Sw64ExpandPseudo::ID = 0;
++} // namespace
++
++bool Sw64ExpandPseudo::expandAtomicCmpSwapSubword(
++    MachineBasicBlock &BB, MachineBasicBlock::iterator I,
++    MachineBasicBlock::iterator &NMBBI) {
++
++  MachineFunction *MF = BB.getParent();
++  DebugLoc DL = I->getDebugLoc();
++
++  unsigned LL, SC, BEQ;
++  unsigned BIC, BIS;
++  unsigned EXTL, INSL, MASKL;
++  unsigned mask;
++  BIS = Sw64::BISr;
++  BIC = Sw64::BICi;
++  BEQ = Sw64::BEQ;
++  LL = Sw64 ::LDQ_L;
++  SC = Sw64::STQ_C;
++  Register Dest = I->getOperand(0).getReg();
++  Register Ptr = I->getOperand(1).getReg();
++  Register OldVal = I->getOperand(2).getReg();
++  Register NewVal = I->getOperand(3).getReg();
++  // add
++  Register Reg_bic = I->getOperand(4).getReg();
++  Register Reg_ins = I->getOperand(5).getReg();
++  Register LockVal = I->getOperand(6).getReg();
++  Register Reg_cmp = I->getOperand(7).getReg();
++  Register Reg_mas = I->getOperand(8).getReg();
++  switch (I->getOpcode()) {
++  case Sw64::ATOMIC_CMP_SWAP_I8_POSTRA:
++    mask = 1;
++    EXTL = Sw64::EXTLBr;
++    INSL = Sw64::INSLBr;
++    MASKL = Sw64::MASKLBr;
++    break;
++  case Sw64::ATOMIC_CMP_SWAP_I16_POSTRA:
++    mask = 3;
++    EXTL = Sw64::EXTLHr;
++    INSL = Sw64::INSLHr;
++    MASKL = Sw64::MASKLHr;
++    break;
++  default:
++    llvm_unreachable("Unknown pseudo atomic!");
++  }
++
++  //  if (STI->hasCore4() && STI->enableCasInst()) {
++  //    BuildMI(BB, I, DL, TII->get(BIC), Reg_bic).addReg(Ptr).addImm(7);
++  //    BuildMI(BB, I, DL, TII->get(INSL), NewVal).addReg(NewVal).addReg(Ptr);
++  //    BuildMI(BB, I, DL, TII->get(INSL), OldVal).addReg(OldVal).addReg(Ptr);
++  //    BuildMI(BB, I, DL, TII->get(Sw64::LDL),
++  //    LockVal).addImm(0).addReg(Reg_bic); BuildMI(BB, I, DL, TII->get(EXTL),
++  //    Dest).addReg(LockVal).addReg(Ptr); BuildMI(BB, I, DL, TII->get(MASKL),
++  //    Reg_mas).addReg(LockVal).addReg(Ptr); BuildMI(BB, I, DL,
++  //    TII->get(Sw64::BISr), NewVal).addReg(Reg_mas).addReg(NewVal);
++  //    BuildMI(BB, I, DL, TII->get(Sw64::BISr),
++  //    OldVal).addReg(Reg_mas).addReg(OldVal); BuildMI(BB, I, DL,
++  //    TII->get(Sw64::LDA), Ptr).addImm(0).addReg(Reg_bic); BuildMI(BB, I, DL,
++  //    TII->get(Sw64::CASL)).addReg(NewVal).addReg(OldVal).addReg(Ptr);
++  //    I->eraseFromParent();   // The instruction is gone now.
++  //    return true;
++  //  }
++
++  const BasicBlock *LLVM_BB = BB.getBasicBlock();
++  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineFunction::iterator It = ++BB.getIterator();
++  MF->insert(It, loopMBB);
++  MF->insert(It, exitMBB);
++
++  exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end());
++  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);
++
++  BB.addSuccessor(loopMBB, BranchProbability::getOne());
++  loopMBB->addSuccessor(loopMBB);
++  loopMBB->addSuccessor(exitMBB);
++  loopMBB->normalizeSuccProbs();
++
++  // memb
++  BuildMI(loopMBB, DL, TII->get(Sw64::MB));
++
++  // bic
++  BuildMI(loopMBB, DL, TII->get(BIC), Reg_bic).addReg(Ptr).addImm(7);
++
++  // inslh
++  BuildMI(loopMBB, DL, TII->get(INSL), Reg_ins).addReg(NewVal).addReg(Ptr);
++
++  // lldl
++  BuildMI(loopMBB, DL, TII->get(LL), LockVal).addImm(0).addReg(Reg_bic);
++
++  // extlh
++  BuildMI(loopMBB, DL, TII->get(EXTL), Dest).addReg(LockVal).addReg(Ptr);
++
++  // cmpeq
++  // zapnot
++  BuildMI(loopMBB, DL, TII->get(Sw64::ZAPNOTi), OldVal)
++      .addReg(OldVal)
++      .addImm(mask);
++  BuildMI(loopMBB, DL, TII->get(Sw64::ZAPNOTi), Dest).addReg(Dest).addImm(mask);
++  BuildMI(loopMBB, DL, TII->get(Sw64::CMPEQr), Reg_cmp)
++      .addReg(OldVal)
++      .addReg(Dest);
++
++  if (STI->hasCore4())
++    // beq
++    BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Reg_cmp).addMBB(exitMBB);
++  else
++    // wr_f
++    BuildMI(loopMBB, DL, TII->get(Sw64::WR_F)).addReg(Reg_cmp);
++
++  // masklh
++  BuildMI(loopMBB, DL, TII->get(MASKL), Reg_mas).addReg(LockVal).addReg(Ptr);
++
++  // bis
++  BuildMI(loopMBB, DL, TII->get(BIS), Reg_ins).addReg(Reg_mas).addReg(Reg_ins);
++
++  // lstw
++  BuildMI(loopMBB, DL, TII->get(SC)).addReg(Reg_ins).addImm(0).addReg(Reg_bic);
++
++  if (!STI->hasCore4())
++    // rd_f
++    BuildMI(loopMBB, DL, TII->get(Sw64::RD_F)).addReg(Reg_ins);
++
++  // beq
++  BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Reg_cmp).addMBB(exitMBB);
++
++  // beq
++  BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Reg_ins).addMBB(loopMBB);
++
++  NMBBI = BB.end();
++  I->eraseFromParent(); // The instruction is gone now.
++
++  LivePhysRegs LiveRegs;
++  computeAndAddLiveIns(LiveRegs, *loopMBB);
++  computeAndAddLiveIns(LiveRegs, *exitMBB);
++  return true;
++}
++
++bool Sw64ExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB,
++                                           MachineBasicBlock::iterator I,
++                                           MachineBasicBlock::iterator &NMBBI,
++                                           unsigned Size) {
++  MachineFunction *MF = BB.getParent();
++  DebugLoc DL = I->getDebugLoc();
++  unsigned LL, SC;
++  unsigned BEQ = Sw64::BEQ;
++
++  if (Size == 4) {
++    LL = Sw64 ::LDL_L;
++    SC = Sw64::STL_C;
++  } else {
++    LL = Sw64::LDQ_L;
++    SC = Sw64::STQ_C;
++  }
++
++  Register Dest = I->getOperand(0).getReg();
++  Register Ptr = I->getOperand(1).getReg();
++  Register OldVal = I->getOperand(2).getReg();
++  Register NewVal = I->getOperand(3).getReg();
++  Register Scratch = I->getOperand(4).getReg();
++  // add
++  Register Reg_cmp = I->getOperand(5).getReg();
++
++  // insert new blocks after the current block
++  const BasicBlock *LLVM_BB = BB.getBasicBlock();
++  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineFunction::iterator It = ++BB.getIterator();
++  MF->insert(It, loop1MBB);
++  MF->insert(It, exitMBB);
++
++  // Transfer the remainder of BB and its successor edges to exitMBB.
++  exitMBB->splice(exitMBB->begin(), &BB,
++                  std::next(MachineBasicBlock::iterator(I)), BB.end());
++  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);
++
++  //  thisMBB:
++  //    ...
++  //    fallthrough --> loop1MBB
++  BB.addSuccessor(loop1MBB, BranchProbability::getOne());
++
++  loop1MBB->addSuccessor(loop1MBB);
++  loop1MBB->addSuccessor(exitMBB);
++  loop1MBB->normalizeSuccProbs();
++
++  // memb
++  BuildMI(loop1MBB, DL, TII->get(Sw64::MB));
++
++  // ldi
++  BuildMI(loop1MBB, DL, TII->get(Sw64::LDA), Ptr).addImm(0).addReg(Ptr);
++
++  // lldw
++  BuildMI(loop1MBB, DL, TII->get(LL), Dest).addImm(0).addReg(Ptr);
++
++  // zapnot
++  if (Size == 4) {
++    BuildMI(loop1MBB, DL, TII->get(Sw64::ZAPNOTi), OldVal)
++        .addReg(OldVal)
++        .addImm(15);
++    BuildMI(loop1MBB, DL, TII->get(Sw64::ZAPNOTi), Dest)
++        .addReg(Dest)
++        .addImm(15);
++  }
++
++  // cmpeq
++  BuildMI(loop1MBB, DL, TII->get(Sw64::CMPEQr))
++      .addReg(Reg_cmp)
++      .addReg(OldVal)
++      .addReg(Dest);
++
++  if (STI->hasCore4())
++    // beq
++    BuildMI(loop1MBB, DL, TII->get(BEQ)).addReg(Reg_cmp).addMBB(exitMBB);
++  else
++    // wr_f
++    BuildMI(loop1MBB, DL, TII->get(Sw64::WR_F)).addReg(Reg_cmp);
++
++  // mov
++  BuildMI(loop1MBB, DL, TII->get(Sw64::BISr), Scratch)
++      .addReg(NewVal)
++      .addReg(NewVal);
++
++  // lstw
++  BuildMI(loop1MBB, DL, TII->get(SC)).addReg(Scratch).addImm(0).addReg(Ptr);
++
++  if (!STI->hasCore4())
++    // rd_f
++    BuildMI(loop1MBB, DL, TII->get(Sw64::RD_F)).addReg(Scratch);
++
++  // beq
++  BuildMI(loop1MBB, DL, TII->get(BEQ)).addReg(Reg_cmp).addMBB(exitMBB);
++
++  BuildMI(loop1MBB, DL, TII->get(BEQ)).addReg(Scratch).addMBB(loop1MBB);
++
++  NMBBI = BB.end();
++  I->eraseFromParent(); // The instruction is gone now.
++
++  LivePhysRegs LiveRegs;
++  computeAndAddLiveIns(LiveRegs, *loop1MBB);
++  computeAndAddLiveIns(LiveRegs, *exitMBB);
++
++  return true;
++}
++
++bool Sw64ExpandPseudo::expandAtomicBinOpSubword(
++    MachineBasicBlock &BB, MachineBasicBlock::iterator I,
++    MachineBasicBlock::iterator &NMBBI) {
++
++  MachineFunction *MF = BB.getParent();
++  DebugLoc DL = I->getDebugLoc();
++  unsigned LL, SC, ZERO, BEQ;
++  unsigned EXTL, INSL, MASKL;
++
++  unsigned WR_F, RD_F, LDA, BIS, BIC;
++  WR_F = Sw64::WR_F;
++  RD_F = Sw64::RD_F;
++  LDA = Sw64::LDA;
++  BIS = Sw64::BISr;
++  BIC = Sw64::BICi;
++  LL = Sw64::LDQ_L;
++  SC = Sw64::STQ_C;
++  ZERO = Sw64::R31;
++  BEQ = Sw64::BEQ;
++
++  Register OldVal = I->getOperand(0).getReg();
++  Register Ptr = I->getOperand(1).getReg();
++  Register Incr = I->getOperand(2).getReg();
++  Register StoreVal = I->getOperand(3).getReg();
++  // add
++  Register LockVal = I->getOperand(4).getReg();
++  Register Reg_bic = I->getOperand(5).getReg();
++  Register cmpres = I->getOperand(6).getReg();
++
++  unsigned Opcode = 0;
++  switch (I->getOpcode()) {
++  case Sw64::ATOMIC_LOAD_ADD_I8_POSTRA:
++    Opcode = Sw64::ADDLr;
++    EXTL = Sw64::EXTLBr;
++    INSL = Sw64::INSLBr;
++    MASKL = Sw64::MASKLBr;
++    break;
++  case Sw64::ATOMIC_LOAD_SUB_I8_POSTRA:
++    Opcode = Sw64::SUBLr;
++    EXTL = Sw64::EXTLBr;
++    INSL = Sw64::INSLBr;
++    MASKL = Sw64::MASKLBr;
++    break;
++  case Sw64::ATOMIC_LOAD_AND_I8_POSTRA:
++    Opcode = Sw64::ANDr;
++    EXTL = Sw64::EXTLBr;
++    INSL = Sw64::INSLBr;
++    MASKL = Sw64::MASKLBr;
++    break;
++  case Sw64::ATOMIC_LOAD_OR_I8_POSTRA:
++    Opcode = Sw64::BISr;
++    EXTL = Sw64::EXTLBr;
++    INSL = Sw64::INSLBr;
++    MASKL = Sw64::MASKLBr;
++    break;
++  case Sw64::ATOMIC_LOAD_XOR_I8_POSTRA:
++    Opcode = Sw64::XORr;
++    EXTL = Sw64::EXTLBr;
++    INSL = Sw64::INSLBr;
++    MASKL = Sw64::MASKLBr;
++    break;
++  case Sw64::ATOMIC_SWAP_I8_POSTRA:
++    EXTL = Sw64::EXTLBr;
++    INSL = Sw64::INSLBr;
++    MASKL = Sw64::MASKLBr;
++    break;
++  case Sw64::ATOMIC_LOAD_ADD_I16_POSTRA:
++    Opcode = Sw64::ADDQr;
++    EXTL = Sw64::EXTLHr;
++    INSL = Sw64::INSLHr;
++    MASKL = Sw64::MASKLHr;
++    break;
++  case Sw64::ATOMIC_LOAD_SUB_I16_POSTRA:
++    Opcode = Sw64::SUBQr;
++    EXTL = Sw64::EXTLHr;
++    INSL = Sw64::INSLHr;
++    MASKL = Sw64::MASKLHr;
++    break;
++  case Sw64::ATOMIC_LOAD_AND_I16_POSTRA:
++    Opcode = Sw64::ANDr;
++    EXTL = Sw64::EXTLHr;
++    INSL = Sw64::INSLHr;
++    MASKL = Sw64::MASKLHr;
++    break;
++  case Sw64::ATOMIC_LOAD_OR_I16_POSTRA:
++    Opcode = Sw64::BISr;
++    EXTL = Sw64::EXTLHr;
++    INSL = Sw64::INSLHr;
++    MASKL = Sw64::MASKLHr;
++    break;
++  case Sw64::ATOMIC_LOAD_XOR_I16_POSTRA:
++    Opcode = Sw64::XORr;
++    EXTL = Sw64::EXTLHr;
++    INSL = Sw64::INSLHr;
++    MASKL = Sw64::MASKLHr;
++    break;
++  case Sw64::ATOMIC_SWAP_I16_POSTRA:
++    EXTL = Sw64::EXTLHr;
++    INSL = Sw64::INSLHr;
++    MASKL = Sw64::MASKLHr;
++    break;
++  case Sw64::ATOMIC_LOAD_UMAX_I8_POSTRA:
++  case Sw64::ATOMIC_LOAD_MAX_I8_POSTRA:
++  case Sw64::ATOMIC_LOAD_UMIN_I8_POSTRA:
++  case Sw64::ATOMIC_LOAD_MIN_I8_POSTRA:
++  case Sw64::ATOMIC_LOAD_NAND_I8_POSTRA:
++    EXTL = Sw64::EXTLBr;
++    INSL = Sw64::INSLBr;
++    MASKL = Sw64::MASKLBr;
++    break;
++  case Sw64::ATOMIC_LOAD_UMAX_I16_POSTRA:
++  case Sw64::ATOMIC_LOAD_MAX_I16_POSTRA:
++  case Sw64::ATOMIC_LOAD_UMIN_I16_POSTRA:
++  case Sw64::ATOMIC_LOAD_MIN_I16_POSTRA:
++  case Sw64::ATOMIC_LOAD_NAND_I16_POSTRA:
++    EXTL = Sw64::EXTLHr;
++    INSL = Sw64::INSLHr;
++    MASKL = Sw64::MASKLHr;
++    break;
++  default:
++    llvm_unreachable("Unknown pseudo atomic!");
++  }
++
++  const BasicBlock *LLVM_BB = BB.getBasicBlock();
++  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineFunction::iterator It = ++BB.getIterator();
++  MF->insert(It, loopMBB);
++  MF->insert(It, exitMBB);
++
++  exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end());
++  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);
++
++  BB.addSuccessor(loopMBB, BranchProbability::getOne());
++  loopMBB->addSuccessor(loopMBB);
++  loopMBB->addSuccessor(exitMBB);
++  loopMBB->normalizeSuccProbs();
++
++  // memb
++  BuildMI(loopMBB, DL, TII->get(Sw64::MB));
++
++  // bic
++  BuildMI(loopMBB, DL, TII->get(BIC), Reg_bic).addReg(Ptr).addImm(7);
++
++  // lldl
++  BuildMI(loopMBB, DL, TII->get(LL), LockVal).addImm(0).addReg(Reg_bic);
++
++  // ldi
++  BuildMI(loopMBB, DL, TII->get(LDA), StoreVal).addImm(1).addReg(ZERO);
++
++  if (!STI->hasCore4())
++    // wr_f
++    BuildMI(loopMBB, DL, TII->get(WR_F)).addReg(StoreVal);
++
++  // extlh
++  BuildMI(loopMBB, DL, TII->get(EXTL), OldVal).addReg(LockVal).addReg(Ptr);
++
++  //  if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
++  //    BuildMI(BB, DL, TII->get(EXTL)).addReg(OldVal,
++  //    RegState::EarlyClobber).addReg(LockVal).addReg(Ptr);
++  //  else
++  BuildMI(loopMBB, DL, TII->get(EXTL), OldVal).addReg(LockVal).addReg(Ptr);
++
++  // BinOpcode
++  // Use a tmp reg since the src and dst reg of ORNOT op shall not be the same
++  // one for unknown reason.
++  switch (I->getOpcode()) {
++  case Sw64::ATOMIC_LOAD_UMAX_I8_POSTRA:
++    // cmpult OldVal, Incr, cmpres          -- cmpres = OldVal < Incr ? 1 : 0
++    // seleq cmpres, OldVal, Incr, StoreVal -- StoreVal = cmpres == 0 ? OldVal :
++    // Incr
++    BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
++        .addReg(cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    break;
++  case Sw64::ATOMIC_LOAD_MAX_I8_POSTRA:
++    // cmplt OldVal, Incr, cmpres
++    // seleq cmpres, OldVal, Incr, StoreVal
++    BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
++        .addReg(cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    break;
++  case Sw64::ATOMIC_LOAD_UMIN_I8_POSTRA:
++    // cmpult OldVal, Incr, cmpres          -- cmpres = OldVal < Incr ? 1 : 0
++    // seleq cmpres, Incr, OldVal, StoreVal -- StoreVal = cmpres == 0 ? Incr :
++    // OldVal
++    BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
++        .addReg(cmpres)
++        .addReg(Incr)
++        .addReg(OldVal);
++    break;
++  case Sw64::ATOMIC_LOAD_MIN_I8_POSTRA:
++    // cmplt OldVal, Incr, cmpres
++    // seleq cmpres, Incr, OldVal, StoreVal
++    BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
++        .addReg(cmpres)
++        .addReg(Incr)
++        .addReg(OldVal);
++    break;
++  case Sw64::ATOMIC_LOAD_NAND_I8_POSTRA:
++    // and OldVal, Incr, andres
++    // ornot andres, 0, StoreVal
++    BuildMI(loopMBB, DL, TII->get(Sw64::ANDr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::ORNOTr), StoreVal)
++        .addReg(Sw64::R31)
++        .addReg(cmpres);
++    break;
++  case Sw64::ATOMIC_LOAD_UMAX_I16_POSTRA:
++    // cmpult OldVal, Incr, cmpres          -- cmpres = OldVal < Incr ? 1 : 0
++    // seleq cmpres, OldVal, Incr, StoreVal -- StoreVal = cmpres == 0 ? OldVal :
++    // Incr
++    BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
++        .addReg(cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    break;
++  case Sw64::ATOMIC_LOAD_MAX_I16_POSTRA:
++    // cmplt OldVal, Incr, cmpres
++    // seleq cmpres, OldVal, Incr, StoreVal
++    BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
++        .addReg(cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    break;
++  case Sw64::ATOMIC_LOAD_UMIN_I16_POSTRA:
++    // cmpult OldVal, Incr, cmpres          -- cmpres = OldVal < Incr ? 1 : 0
++    // seleq cmpres, Incr, OldVal, StoreVal -- StoreVal = cmpres == 0 ? Incr :
++    // OldVal
++    BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
++        .addReg(cmpres)
++        .addReg(Incr)
++        .addReg(OldVal);
++    break;
++  case Sw64::ATOMIC_LOAD_MIN_I16_POSTRA:
++    // cmplt OldVal, Incr, cmpres
++    // seleq cmpres, Incr, OldVal, StoreVal
++    BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
++        .addReg(cmpres)
++        .addReg(Incr)
++        .addReg(OldVal);
++    break;
++  case Sw64::ATOMIC_LOAD_NAND_I16_POSTRA:
++    // and OldVal, Incr, andres
++    // ornot andres, 0, StoreVal
++    BuildMI(loopMBB, DL, TII->get(Sw64::ANDr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::ORNOTr), StoreVal)
++        .addReg(Sw64::R31)
++        .addReg(cmpres);
++    break;
++  default:
++    if (Opcode) {
++      //      if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
++      //        BuildMI(BB, DL, TII->get(Opcode)).addReg(StoreVal,
++      //        RegState::EarlyClobber)
++      //               .addReg(OldVal)
++      //               .addReg(Incr);
++      //      else
++      BuildMI(loopMBB, DL, TII->get(Opcode), StoreVal)
++          .addReg(OldVal)
++          .addReg(Incr);
++    } else {
++      BuildMI(loopMBB, DL, TII->get(Sw64::BISr), StoreVal)
++          .addReg(Incr)
++          .addReg(Incr);
++    }
++  }
++
++  // inslh
++  BuildMI(loopMBB, DL, TII->get(INSL), StoreVal).addReg(StoreVal).addReg(Ptr);
++
++  // masklh
++  BuildMI(loopMBB, DL, TII->get(MASKL), LockVal).addReg(LockVal).addReg(Ptr);
++
++  // bis
++  BuildMI(loopMBB, DL, TII->get(BIS), LockVal).addReg(LockVal).addReg(StoreVal);
++
++  // lstl
++  BuildMI(loopMBB, DL, TII->get(SC)).addReg(LockVal).addImm(0).addReg(Reg_bic);
++
++  if (!STI->hasCore4())
++    // rd_f
++    BuildMI(loopMBB, DL, TII->get(RD_F)).addReg(LockVal);
++
++  // beq
++  BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(LockVal).addMBB(loopMBB);
++
++  NMBBI = BB.end();
++  I->eraseFromParent(); // The instruction is gone now.
++
++  LivePhysRegs LiveRegs;
++  computeAndAddLiveIns(LiveRegs, *loopMBB);
++  computeAndAddLiveIns(LiveRegs, *exitMBB);
++
++  return true;
++}
++
++bool Sw64ExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
++                                         MachineBasicBlock::iterator I,
++                                         MachineBasicBlock::iterator &NMBBI,
++                                         unsigned Size) {
++  MachineFunction *MF = BB.getParent();
++  DebugLoc DL = I->getDebugLoc();
++  unsigned LL, SC;
++  unsigned LDA = Sw64::LDA;
++  unsigned ZERO = Sw64::R31;
++  unsigned BEQ = Sw64::BEQ;
++
++  if (Size == 4) {
++    LL = Sw64::LDL_L;
++    SC = Sw64::STL_C;
++  } else {
++    LL = Sw64::LDQ_L;
++    SC = Sw64::STQ_C;
++  }
++
++  Register OldVal = I->getOperand(0).getReg();
++  Register Ptr = I->getOperand(1).getReg();
++  Register Incr = I->getOperand(2).getReg();
++  Register StoreVal = I->getOperand(3).getReg();
++  Register Scratch1 = I->getOperand(4).getReg();
++  Register cmpres = I->getOperand(5).getReg();
++
++  unsigned Opcode = 0;
++  switch (I->getOpcode()) {
++  case Sw64::ATOMIC_LOAD_ADD_I32_POSTRA:
++    Opcode = Sw64::ADDLr;
++    break;
++  case Sw64::ATOMIC_LOAD_SUB_I32_POSTRA:
++    Opcode = Sw64::SUBLr;
++    break;
++  case Sw64::ATOMIC_LOAD_AND_I32_POSTRA:
++    Opcode = Sw64::ANDr;
++    break;
++  case Sw64::ATOMIC_LOAD_OR_I32_POSTRA:
++    Opcode = Sw64::BISr;
++    break;
++  case Sw64::ATOMIC_LOAD_XOR_I32_POSTRA:
++    Opcode = Sw64::XORr;
++    break;
++  case Sw64::ATOMIC_SWAP_I32_POSTRA:
++    break;
++  case Sw64::ATOMIC_LOAD_ADD_I64_POSTRA:
++    Opcode = Sw64::ADDQr;
++    break;
++  case Sw64::ATOMIC_LOAD_SUB_I64_POSTRA:
++    Opcode = Sw64::SUBQr;
++    break;
++  case Sw64::ATOMIC_LOAD_AND_I64_POSTRA:
++    Opcode = Sw64::ANDr;
++    break;
++  case Sw64::ATOMIC_LOAD_OR_I64_POSTRA:
++    Opcode = Sw64::BISr;
++    break;
++  case Sw64::ATOMIC_LOAD_XOR_I64_POSTRA:
++    Opcode = Sw64::XORr;
++    break;
++  case Sw64::ATOMIC_SWAP_I64_POSTRA:
++    break;
++  case Sw64::ATOMIC_LOAD_UMAX_I32_POSTRA:
++  case Sw64::ATOMIC_LOAD_MAX_I32_POSTRA:
++  case Sw64::ATOMIC_LOAD_UMIN_I32_POSTRA:
++  case Sw64::ATOMIC_LOAD_MIN_I32_POSTRA:
++  case Sw64::ATOMIC_LOAD_NAND_I32_POSTRA:
++
++  case Sw64::ATOMIC_LOAD_UMAX_I64_POSTRA:
++  case Sw64::ATOMIC_LOAD_MAX_I64_POSTRA:
++  case Sw64::ATOMIC_LOAD_UMIN_I64_POSTRA:
++  case Sw64::ATOMIC_LOAD_MIN_I64_POSTRA:
++  case Sw64::ATOMIC_LOAD_NAND_I64_POSTRA:
++    break;
++  default:
++    llvm_unreachable("Unknown pseudo atomic!");
++  }
++
++  // insert new blocks after the current block
++  const BasicBlock *LLVM_BB = BB.getBasicBlock();
++  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++
++  MachineFunction::iterator It = ++BB.getIterator();
++  MF->insert(It, loopMBB);
++  MF->insert(It, exitMBB);
++
++  // Transfer the remainder of BB and its successor edges to exitMBB.
++  exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end());
++  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);
++
++  BB.addSuccessor(loopMBB, BranchProbability::getOne());
++  loopMBB->addSuccessor(loopMBB);
++  loopMBB->addSuccessor(exitMBB);
++  loopMBB->normalizeSuccProbs();
++
++  // memb
++  BuildMI(loopMBB, DL, TII->get(Sw64::MB));
++
++  // ldi
++  BuildMI(loopMBB, DL, TII->get(Sw64::LDA), Ptr).addImm(0).addReg(Ptr);
++
++  // lldw
++  //  if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
++  //    BuildMI(BB, DL, TII->get(LL)).addReg(OldVal,
++  //    RegState::EarlyClobber).addImm(0).addReg(Ptr);
++  //  else
++  BuildMI(loopMBB, DL, TII->get(LL), OldVal).addImm(0).addReg(Ptr);
++
++  // ldi
++  BuildMI(loopMBB, DL, TII->get(LDA), Scratch1).addImm(1).addReg(ZERO);
++
++  if (!STI->hasCore4())
++    // wr_f
++    BuildMI(loopMBB, DL, TII->get(Sw64::WR_F)).addReg(Scratch1);
++
++  // BinOpcode
++
++  // Use a tmp reg since the src and dst reg of ORNOT op shall not be the same
++  // one for unknown reason.
++  switch (I->getOpcode()) {
++  case Sw64::ATOMIC_LOAD_UMAX_I64_POSTRA:
++    // cmpult OldVal, Incr, cmpres          -- cmpres = OldVal < Incr ? 1 : 0
++    // seleq cmpres, OldVal, Incr, StoreVal -- StoreVal = cmpres == 0 ? OldVal :
++    // Incr
++    BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
++        .addReg(cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    break;
++  case Sw64::ATOMIC_LOAD_MAX_I64_POSTRA:
++    // cmplt OldVal, Incr, cmpres
++    // seleq cmpres, OldVal, Incr, StoreVal
++    BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
++        .addReg(cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    break;
++  case Sw64::ATOMIC_LOAD_UMIN_I64_POSTRA:
++    // cmpult OldVal, Incr, cmpres          -- cmpres = OldVal < Incr ? 1 : 0
++    // seleq cmpres, Incr, OldVal, StoreVal -- StoreVal = cmpres == 0 ? Incr :
++    // OldVal
++    BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
++        .addReg(cmpres)
++        .addReg(Incr)
++        .addReg(OldVal);
++    break;
++  case Sw64::ATOMIC_LOAD_MIN_I64_POSTRA:
++    // cmplt OldVal, Incr, cmpres
++    // seleq cmpres, Incr, OldVal, StoreVal
++    BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
++        .addReg(cmpres)
++        .addReg(Incr)
++        .addReg(OldVal);
++    break;
++  case Sw64::ATOMIC_LOAD_NAND_I64_POSTRA:
++    // and OldVal, Incr, cmpres
++    // ornot cmpres, 0, StoreVal
++    BuildMI(loopMBB, DL, TII->get(Sw64::ANDr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::ORNOTr), StoreVal)
++        .addReg(Sw64::R31)
++        .addReg(cmpres);
++    break;
++  case Sw64::ATOMIC_LOAD_UMAX_I32_POSTRA:
++    // cmpult OldVal, Incr, cmpres          -- cmpres = OldVal < Incr ? 1 : 0
++    // seleq cmpres, OldVal, Incr, StoreVal -- StoreVal = cmpres == 0 ? OldVal :
++    // Incr
++    BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
++        .addReg(cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    break;
++  case Sw64::ATOMIC_LOAD_MAX_I32_POSTRA:
++    // cmplt OldVal, Incr, cmpres
++    // seleq cmpres, OldVal, Incr, StoreVal
++    BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
++        .addReg(cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    break;
++  case Sw64::ATOMIC_LOAD_UMIN_I32_POSTRA:
++    // cmpult OldVal, Incr, cmpres          -- cmpres = OldVal < Incr ? 1 : 0
++    // seleq cmpres, Incr, OldVal, StoreVal -- StoreVal = cmpres == 0 ? Incr :
++    // OldVal
++    BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
++        .addReg(cmpres)
++        .addReg(Incr)
++        .addReg(OldVal);
++    break;
++  case Sw64::ATOMIC_LOAD_MIN_I32_POSTRA:
++    // cmplt OldVal, Incr, cmpres
++    // seleq cmpres, Incr, OldVal, StoreVal
++    BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
++        .addReg(cmpres)
++        .addReg(Incr)
++        .addReg(OldVal);
++    break;
++  case Sw64::ATOMIC_LOAD_NAND_I32_POSTRA:
++    // and OldVal, Incr, cmpres
++    // ornot cmpres, 0, StoreVal
++    BuildMI(loopMBB, DL, TII->get(Sw64::ANDr), cmpres)
++        .addReg(OldVal)
++        .addReg(Incr);
++    BuildMI(loopMBB, DL, TII->get(Sw64::ORNOTr), StoreVal)
++        .addReg(Sw64::R31)
++        .addReg(cmpres);
++    break;
++  default:
++    if (Opcode) {
++      // if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
++      //     BuildMI(BB, DL, TII->get(BinOpcode)).addReg(StoreVal,
++      //     RegState::EarlyClobber).addReg(OldVal).addReg(Incr);
++      // else
++      BuildMI(loopMBB, DL, TII->get(Opcode), StoreVal)
++          .addReg(OldVal)
++          .addReg(Incr);
++    } else {
++      BuildMI(loopMBB, DL, TII->get(Sw64::BISr), StoreVal)
++          .addReg(Incr)
++          .addReg(Incr);
++    }
++  }
++
++  // lstw
++  BuildMI(loopMBB, DL, TII->get(SC)).addReg(StoreVal).addImm(0).addReg(Ptr);
++
++  if (!STI->hasCore4())
++    // rd_f
++    BuildMI(loopMBB, DL, TII->get(Sw64::RD_F)).addReg(StoreVal);
++
++  // beq
++  BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(StoreVal).addMBB(loopMBB);
++
++  NMBBI = BB.end();
++  I->eraseFromParent(); // The instruction is gone now.
++
++  LivePhysRegs LiveRegs;
++  computeAndAddLiveIns(LiveRegs, *loopMBB);
++  computeAndAddLiveIns(LiveRegs, *exitMBB);
++
++  return true;
++}
++
++bool Sw64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
++                                MachineBasicBlock::iterator MBBI,
++                                MachineBasicBlock::iterator &NMBB) {
++
++  bool Modified = false;
++
++  switch (MBBI->getOpcode()) {
++  case Sw64::ATOMIC_CMP_SWAP_I32_POSTRA:
++    return expandAtomicCmpSwap(MBB, MBBI, NMBB, 4);
++  case Sw64::ATOMIC_CMP_SWAP_I64_POSTRA:
++    return expandAtomicCmpSwap(MBB, MBBI, NMBB, 8);
++
++  case Sw64::ATOMIC_CMP_SWAP_I8_POSTRA:
++  case Sw64::ATOMIC_CMP_SWAP_I16_POSTRA:
++    return expandAtomicCmpSwapSubword(MBB, MBBI, NMBB);
++
++  case Sw64::ATOMIC_SWAP_I8_POSTRA:
++  case Sw64::ATOMIC_SWAP_I16_POSTRA:
++  case Sw64::ATOMIC_LOAD_ADD_I8_POSTRA:
++  case Sw64::ATOMIC_LOAD_ADD_I16_POSTRA:
++  case Sw64::ATOMIC_LOAD_SUB_I8_POSTRA:
++  case Sw64::ATOMIC_LOAD_SUB_I16_POSTRA:
++  case Sw64::ATOMIC_LOAD_AND_I8_POSTRA:
++  case Sw64::ATOMIC_LOAD_AND_I16_POSTRA:
++  case Sw64::ATOMIC_LOAD_OR_I8_POSTRA:
++  case Sw64::ATOMIC_LOAD_OR_I16_POSTRA:
++  case Sw64::ATOMIC_LOAD_XOR_I8_POSTRA:
++  case Sw64::ATOMIC_LOAD_XOR_I16_POSTRA:
++
++  case Sw64::ATOMIC_LOAD_UMAX_I16_POSTRA:
++  case Sw64::ATOMIC_LOAD_MAX_I16_POSTRA:
++  case Sw64::ATOMIC_LOAD_UMIN_I16_POSTRA:
++  case Sw64::ATOMIC_LOAD_MIN_I16_POSTRA:
++  case Sw64::ATOMIC_LOAD_NAND_I16_POSTRA:
++  case Sw64::ATOMIC_LOAD_UMAX_I8_POSTRA:
++  case Sw64::ATOMIC_LOAD_MAX_I8_POSTRA:
++  case Sw64::ATOMIC_LOAD_UMIN_I8_POSTRA:
++  case Sw64::ATOMIC_LOAD_MIN_I8_POSTRA:
++  case Sw64::ATOMIC_LOAD_NAND_I8_POSTRA:
++    return expandAtomicBinOpSubword(MBB, MBBI, NMBB);
++
++  case Sw64::ATOMIC_LOAD_ADD_I32_POSTRA:
++  case Sw64::ATOMIC_LOAD_SUB_I32_POSTRA:
++  case Sw64::ATOMIC_LOAD_AND_I32_POSTRA:
++  case Sw64::ATOMIC_LOAD_OR_I32_POSTRA:
++  case Sw64::ATOMIC_LOAD_XOR_I32_POSTRA:
++  case Sw64::ATOMIC_SWAP_I32_POSTRA:
++  case Sw64::ATOMIC_LOAD_UMAX_I32_POSTRA:
++  case Sw64::ATOMIC_LOAD_MAX_I32_POSTRA:
++  case Sw64::ATOMIC_LOAD_UMIN_I32_POSTRA:
++  case Sw64::ATOMIC_LOAD_MIN_I32_POSTRA:
++  case Sw64::ATOMIC_LOAD_NAND_I32_POSTRA:
++    return expandAtomicBinOp(MBB, MBBI, NMBB, 4);
++
++  case Sw64::ATOMIC_LOAD_ADD_I64_POSTRA:
++  case Sw64::ATOMIC_LOAD_SUB_I64_POSTRA:
++  case Sw64::ATOMIC_LOAD_AND_I64_POSTRA:
++  case Sw64::ATOMIC_LOAD_OR_I64_POSTRA:
++  case Sw64::ATOMIC_LOAD_XOR_I64_POSTRA:
++  case Sw64::ATOMIC_SWAP_I64_POSTRA:
++  case Sw64::ATOMIC_LOAD_UMAX_I64_POSTRA:
++  case Sw64::ATOMIC_LOAD_MAX_I64_POSTRA:
++  case Sw64::ATOMIC_LOAD_UMIN_I64_POSTRA:
++  case Sw64::ATOMIC_LOAD_MIN_I64_POSTRA:
++  case Sw64::ATOMIC_LOAD_NAND_I64_POSTRA:
++    return expandAtomicBinOp(MBB, MBBI, NMBB, 8);
++  case Sw64::MOVProgPCGp:
++  case Sw64::MOVaddrPCGp:
++    return expandCurGpdisp(MBB, MBBI);
++  case Sw64::LOADlitSym:
++  case Sw64::LOADlit:
++    return expandLoadGotAddress(MBB, MBBI, NMBB);
++  case Sw64::LOADconstant:
++    return expandLoadCPAddress(MBB, MBBI, NMBB);
++  case Sw64::MOVaddrCP:
++  case Sw64::MOVaddrBA:
++  case Sw64::MOVaddrGP:
++  case Sw64::MOVaddrEXT:
++  case Sw64::MOVaddrJT:
++    return expandLoadAddress(MBB, MBBI, NMBB);
++  default:
++    return Modified;
++  }
++}
++
++bool Sw64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
++  bool Modified = false;
++
++  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
++  while (MBBI != E) {
++    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
++    Modified |= expandMI(MBB, MBBI, NMBBI);
++    MBBI = NMBBI;
++  }
++
++  return Modified;
++}
++
++bool Sw64ExpandPseudo::expandCurGpdisp(MachineBasicBlock &MBB,
++                                       MachineBasicBlock::iterator MBBI) {
++
++  MachineInstr &MI = *MBBI;
++  DebugLoc DL = MI.getDebugLoc();
++
++  MachineOperand addr = MI.getOperand(0);
++  MachineOperand dstReg = MI.getOperand(2);
++
++  BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), Sw64::R29)
++      .addGlobalAddress(addr.getGlobal(), 0, Sw64II::MO_GPDISP_HI)
++      .add(dstReg);
++  BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDA), Sw64::R29)
++      .addGlobalAddress(addr.getGlobal(), 0, Sw64II::MO_GPDISP_LO)
++      .addReg(Sw64::R29);
++
++  MI.eraseFromParent();
++  return true;
++}
++
++bool Sw64ExpandPseudo::expandLoadCPAddress(
++    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
++    MachineBasicBlock::iterator &NextMBBI) {
++  return expandLdihInstPair(MBB, MBBI, NextMBBI, Sw64II::MO_GPREL_HI,
++                            Sw64::LDL);
++}
++
++bool Sw64ExpandPseudo::expandLoadGotAddress(
++    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
++    MachineBasicBlock::iterator &NextMBBI) {
++  LLVM_DEBUG(dbgs() << "expand Loadlit LoadlitSym" << *MBBI);
++  MachineInstr &MI = *MBBI;
++  DebugLoc DL = MI.getDebugLoc();
++
++  unsigned DestReg = MI.getOperand(0).getReg();
++  const MachineOperand &Symbol = MI.getOperand(1);
++
++  MachineFunction *MF = MBB.getParent();
++  switch (MF->getTarget().getCodeModel()) {
++  default:
++    report_fatal_error("Unsupported code model for lowering");
++  case CodeModel::Small: {
++    if (Symbol.isSymbol())
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg)
++          .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL)
++          .addReg(Sw64::R29);
++    else
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg)
++          .addDisp(Symbol, 0, Sw64II::MO_LITERAL)
++          .addReg(Sw64::R29);
++    break;
++  }
++
++  case CodeModel::Medium: {
++    if (Symbol.isSymbol()) {
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), DestReg)
++          .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL_GOT)
++          .addReg(Sw64::R29);
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg)
++          .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL)
++          .addReg(DestReg);
++    } else {
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), DestReg)
++          .addDisp(Symbol, 0, Sw64II::MO_LITERAL_GOT)
++          .addReg(Sw64::R29);
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg)
++          .addDisp(Symbol, 0, Sw64II::MO_LITERAL)
++          .addReg(DestReg);
++    }
++    break;
++  }
++  }
++  MI.eraseFromParent();
++  return true;
++}
++
++bool Sw64ExpandPseudo::expandLoadAddress(
++    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
++    MachineBasicBlock::iterator &NextMBBI) {
++  return expandLdihInstPair(MBB, MBBI, NextMBBI, Sw64II::MO_GPREL_HI,
++                            Sw64::LDA);
++}
++
++bool Sw64ExpandPseudo::expandLdihInstPair(MachineBasicBlock &MBB,
++                                          MachineBasicBlock::iterator MBBI,
++                                          MachineBasicBlock::iterator &NextMBBI,
++                                          unsigned FlagsHi,
++                                          unsigned SecondOpcode,
++                                          unsigned FlagsLo, unsigned srcReg) {
++  MachineInstr &MI = *MBBI;
++  DebugLoc DL = MI.getDebugLoc();
++
++  unsigned DestReg = MI.getOperand(0).getReg();
++  const MachineOperand &Symbol = MI.getOperand(1);
++
++  MachineInstrBuilder MIB =
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), DestReg)
++          .add(Symbol)
++          .addReg(srcReg);
++  MachineInstrBuilder MIB1 =
++      BuildMI(MBB, MBBI, DL, TII->get(SecondOpcode), DestReg)
++          .add(Symbol)
++          .addReg(DestReg);
++
++  MachineInstr *tmpInst = MIB.getInstr();
++  MachineInstr *tmpInst1 = MIB1.getInstr();
++
++  MachineOperand &SymbolHi = tmpInst->getOperand(1);
++  MachineOperand &SymbolLo = tmpInst1->getOperand(1);
++
++  SymbolHi.addTargetFlag(FlagsHi);
++  SymbolLo.addTargetFlag(FlagsLo);
++
++  MI.eraseFromParent();
++  return true;
++}
++
++bool Sw64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
++  STI = &static_cast<const Sw64Subtarget &>(MF.getSubtarget());
++  TII = STI->getInstrInfo();
++
++  bool Modified = false;
++  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
++       ++MFI)
++    Modified |= expandMBB(*MFI);
++
++  if (Modified)
++    MF.RenumberBlocks();
++
++  return Modified;
++}
++
++/// createSw64ExpandPseudoPass - returns an instance of the pseudo instruction
++/// expansion pass.
++FunctionPass *llvm::createSw64ExpandPseudoPass() {
++  return new Sw64ExpandPseudo();
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64ExpandPseudo2.cpp b/llvm/lib/Target/Sw64/Sw64ExpandPseudo2.cpp
+new file mode 100644
+index 000000000..550c2f520
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64ExpandPseudo2.cpp
+@@ -0,0 +1,334 @@
++//===-- Sw64ExpandPseudoInsts.cpp - Expand pseudo instructions ------------===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains a pass that expands pseudo instructions into target
++// instructions to allow proper scheduling, if-conversion, and other late
++// optimizations. This pass should be run after register allocation but before
++// the post-regalloc scheduling pass.
++//
++// This is currently only used for expanding atomic pseudos after register
++// allocation. We do this to avoid the fast register allocator introducing
++// spills between ll and sc. These stores cause some other implementations to
++// abort the atomic RMW sequence.
++//
++//===----------------------------------------------------------------------===//
++
++#include "MCTargetDesc/Sw64BaseInfo.h"
++#include "Sw64.h"
++#include "Sw64InstrInfo.h"
++#include "Sw64Subtarget.h"
++#include "llvm/CodeGen/LivePhysRegs.h"
++#include "llvm/CodeGen/MachineFunctionPass.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/Support/Debug.h"
++
++using namespace llvm;
++
++#define DEBUG_TYPE "sw_64-pseudo2"
++namespace llvm {
++extern const MCInstrDesc Sw64Insts[];
++}
++
++static cl::opt<bool>
++    ExpandPre("expand-presched",
++              cl::desc("Expand pseudo Inst before PostRA schedule"),
++              cl::init(true), cl::Hidden);
++
++namespace {
++class Sw64ExpandPseudo2 : public MachineFunctionPass {
++public:
++  static char ID;
++  Sw64ExpandPseudo2() : MachineFunctionPass(ID) {}
++
++  const Sw64InstrInfo *TII;
++  const Sw64Subtarget *STI;
++
++  bool runOnMachineFunction(MachineFunction &Fn) override;
++
++  MachineFunctionProperties getRequiredProperties() const override {
++    return MachineFunctionProperties().set(
++        MachineFunctionProperties::Property::NoVRegs);
++  }
++
++  StringRef getPassName() const override {
++    return "Sw64 pseudo instruction expansion pass2";
++  }
++
++private:
++  bool expandPseudoCall(MachineBasicBlock &MBB,
++                        MachineBasicBlock::iterator MBBI,
++                        MachineBasicBlock::iterator &NextMBBI);
++
++  bool expandLoadAddress(MachineBasicBlock &MBB,
++                         MachineBasicBlock::iterator MBBI,
++                         MachineBasicBlock::iterator &NextMBBI);
++
++  bool expandLoadCPAddress(MachineBasicBlock &MBB,
++                           MachineBasicBlock::iterator MBBI,
++                           MachineBasicBlock::iterator &NextMBBI);
++
++  bool expandLdihInstPair(MachineBasicBlock &MBB,
++                          MachineBasicBlock::iterator MBBI,
++                          MachineBasicBlock::iterator &NextMBBI,
++                          unsigned FlagsHi, unsigned SecondOpcode,
++                          unsigned FlagsLo = Sw64II::MO_GPREL_LO,
++                          unsigned srcReg = Sw64::R29);
++
++  bool expandLoadGotAddress(MachineBasicBlock &MBB,
++                            MachineBasicBlock::iterator MBBI,
++                            MachineBasicBlock::iterator &NextMBBI);
++
++  bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
++                MachineBasicBlock::iterator &NMBB);
++
++  bool expandMBB(MachineBasicBlock &MBB);
++};
++char Sw64ExpandPseudo2::ID = 0;
++} // namespace
++
++bool Sw64ExpandPseudo2::expandMI(MachineBasicBlock &MBB,
++                                 MachineBasicBlock::iterator MBBI,
++                                 MachineBasicBlock::iterator &NMBB) {
++  bool Modified = false;
++
++  if (ExpandPre) {
++    switch (MBBI->getOpcode()) {
++    case Sw64::LOADlitSym:
++    case Sw64::LOADlit:
++      return expandLoadGotAddress(MBB, MBBI, NMBB);
++    case Sw64::LOADconstant:
++      return expandLoadCPAddress(MBB, MBBI, NMBB);
++    case Sw64::MOVaddrCP:
++    case Sw64::MOVaddrBA:
++    case Sw64::MOVaddrGP:
++    case Sw64::MOVaddrEXT:
++    case Sw64::MOVaddrJT:
++      return expandLoadAddress(MBB, MBBI, NMBB);
++    case Sw64::PseudoCall:
++      return expandPseudoCall(MBB, MBBI, NMBB);
++    default:
++      return Modified;
++    }
++  } else {
++    switch (MBBI->getOpcode()) {
++    case Sw64::PseudoCall:
++      return expandPseudoCall(MBB, MBBI, NMBB);
++    default:
++      return Modified;
++    }
++  }
++}
++
++bool Sw64ExpandPseudo2::expandMBB(MachineBasicBlock &MBB) {
++  bool Modified = false;
++
++  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
++  while (MBBI != E) {
++    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
++    Modified |= expandMI(MBB, MBBI, NMBBI);
++    MBBI = NMBBI;
++  }
++
++  return Modified;
++}
++
++bool Sw64ExpandPseudo2::expandLoadCPAddress(
++    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
++    MachineBasicBlock::iterator &NextMBBI) {
++  return expandLdihInstPair(MBB, MBBI, NextMBBI, Sw64II::MO_GPREL_HI,
++                            Sw64::LDL);
++}
++
++bool Sw64ExpandPseudo2::expandLoadAddress(
++    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
++    MachineBasicBlock::iterator &NextMBBI) {
++  return expandLdihInstPair(MBB, MBBI, NextMBBI, Sw64II::MO_GPREL_HI,
++                            Sw64::LDA);
++}
++
++bool Sw64ExpandPseudo2::expandLdihInstPair(
++    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
++    MachineBasicBlock::iterator &NextMBBI, unsigned FlagsHi,
++    unsigned SecondOpcode, unsigned FlagsLo, unsigned srcReg) {
++  MachineInstr &MI = *MBBI;
++  DebugLoc DL = MI.getDebugLoc();
++
++  unsigned DestReg = MI.getOperand(0).getReg();
++  const MachineOperand &Symbol = MI.getOperand(1);
++
++  MachineInstrBuilder MIB =
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), DestReg)
++          .add(Symbol)
++          .addReg(srcReg);
++  MachineInstrBuilder MIB1 =
++      BuildMI(MBB, MBBI, DL, TII->get(SecondOpcode), DestReg)
++          .add(Symbol)
++          .addReg(DestReg);
++
++  MachineInstr *tmpInst = MIB.getInstr();
++  MachineInstr *tmpInst1 = MIB1.getInstr();
++
++  MachineOperand &SymbolHi = tmpInst->getOperand(1);
++  MachineOperand &SymbolLo = tmpInst1->getOperand(1);
++
++  SymbolHi.addTargetFlag(FlagsHi);
++  SymbolLo.addTargetFlag(FlagsLo);
++
++  MI.eraseFromParent();
++  return true;
++}
++
++// while expanding call, we can choose adding lituse
++// for linker relax or not. Adding flags for sortRelocs
++bool Sw64ExpandPseudo2::expandPseudoCall(
++    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
++    MachineBasicBlock::iterator &NextMBBI) {
++  LLVM_DEBUG(dbgs() << "expand PseudoCall" << *MBBI);
++
++  MachineFunction *MF = MBB.getParent();
++  const auto &STI = MF->getSubtarget<Sw64Subtarget>();
++  const Sw64FrameLowering *SFL = STI.getFrameLowering();
++
++  MachineInstr &MI = *MBBI;
++  DebugLoc DL = MI.getDebugLoc();
++  unsigned Lflags = 0; // load flags
++  unsigned Cflags = 0; // Call flags
++
++  MachineOperand Symbol = MI.getOperand(0);
++  switch (MF->getTarget().getCodeModel()) {
++  default:
++    report_fatal_error("Unsupported code model for lowering");
++  case CodeModel::Small: {
++    if (Symbol.isGlobal()) {
++      int64_t Offs = Symbol.getOffset();
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), Sw64::R27)
++          .addGlobalAddress(Symbol.getGlobal(), Offs,
++                            Lflags | Sw64II::MO_LITERAL |
++                                Sw64II::MO_LITERAL_BASE)
++          .addReg(Sw64::R29);
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::JSR), Sw64::R26)
++          .addReg(Sw64::R27)
++          .addGlobalAddress(Symbol.getGlobal(), 0,
++                            Cflags | Sw64II::MO_HINT | Sw64II::MO_LITUSE);
++    } else if (Symbol.isSymbol()) {
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), Sw64::R27)
++          .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL)
++          .addReg(Sw64::R29);
++      const Sw64TargetLowering *STL = STI.getTargetLowering();
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::JSR), Sw64::R26)
++          .addReg(Sw64::R27)
++          .addExternalSymbol(Symbol.getSymbolName());
++    }
++    break;
++  }
++
++  case CodeModel::Medium: {
++    if (Symbol.isGlobal()) {
++      int64_t Offs = Symbol.getOffset();
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), Sw64::R27)
++          .addGlobalAddress(Symbol.getGlobal(), Offs, Sw64II::MO_LITERAL_GOT)
++          .addReg(Sw64::R29);
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), Sw64::R27)
++          .addGlobalAddress(Symbol.getGlobal(), Offs,
++                            Lflags | Sw64II::MO_LITERAL |
++                                Sw64II::MO_LITERAL_BASE)
++          .addReg(Sw64::R27);
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::JSR), Sw64::R26)
++          .addReg(Sw64::R27)
++          .addGlobalAddress(Symbol.getGlobal(), 0,
++                            Cflags | Sw64II::MO_HINT | Sw64II::MO_LITUSE);
++    } else if (Symbol.isSymbol()) {
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), Sw64::R27)
++          .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL_GOT)
++          .addReg(Sw64::R29);
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), Sw64::R27)
++          .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL)
++          .addReg(Sw64::R27);
++      const Sw64TargetLowering *STL = STI.getTargetLowering();
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::JSR), Sw64::R26)
++          .addReg(Sw64::R27)
++          .addExternalSymbol(Symbol.getSymbolName());
++    }
++    break;
++  }
++  }
++
++  MI.eraseFromParent();
++  return true;
++}
++
++bool Sw64ExpandPseudo2::expandLoadGotAddress(
++    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
++    MachineBasicBlock::iterator &NextMBBI) {
++  LLVM_DEBUG(dbgs() << "expand Loadlit LoadlitSym" << *MBBI);
++  MachineInstr &MI = *MBBI;
++  DebugLoc DL = MI.getDebugLoc();
++
++  unsigned DestReg = MI.getOperand(0).getReg();
++  const MachineOperand &Symbol = MI.getOperand(1);
++
++  MachineFunction *MF = MBB.getParent();
++  switch (MF->getTarget().getCodeModel()) {
++  default:
++    report_fatal_error("Unsupported code model for lowering");
++  case CodeModel::Small: {
++    if (Symbol.isSymbol())
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg)
++          .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL)
++          .addReg(Sw64::R29);
++    else
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg)
++          .addDisp(Symbol, 0, Sw64II::MO_LITERAL)
++          .addReg(Sw64::R29);
++    break;
++  }
++
++  case CodeModel::Medium: {
++    if (Symbol.isSymbol()) {
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), DestReg)
++          .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL_GOT)
++          .addReg(Sw64::R29);
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg)
++          .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL)
++          .addReg(DestReg);
++    } else {
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), DestReg)
++          .addDisp(Symbol, 0, Sw64II::MO_LITERAL_GOT)
++          .addReg(Sw64::R29);
++      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg)
++          .addDisp(Symbol, 0, Sw64II::MO_LITERAL)
++          .addReg(DestReg);
++    }
++    break;
++  }
++  }
++  MI.eraseFromParent();
++  return true;
++}
++
++bool Sw64ExpandPseudo2::runOnMachineFunction(MachineFunction &MF) {
++  STI = &static_cast<const Sw64Subtarget &>(MF.getSubtarget());
++  TII = STI->getInstrInfo();
++
++  bool Modified = false;
++  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
++       ++MFI)
++    Modified |= expandMBB(*MFI);
++
++  if (Modified)
++    MF.RenumberBlocks();
++
++  return Modified;
++}
++
++/// createSw64ExpandPseudoPass - returns an instance of the pseudo instruction
++/// expansion pass.
++FunctionPass *llvm::createSw64ExpandPseudo2Pass() {
++  return new Sw64ExpandPseudo2();
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64FrameLowering.cpp b/llvm/lib/Target/Sw64/Sw64FrameLowering.cpp
+new file mode 100644
+index 000000000..9030d8ba9
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64FrameLowering.cpp
+@@ -0,0 +1,456 @@
++//=====- Sw64FrameLowering.cpp - Sw64 Frame Information ------*- C++ -*-====//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the Sw64 implementation of TargetFrameLowering class.
++//
++//===----------------------------------------------------------------------===//
++#include "Sw64FrameLowering.h"
++#include "MCTargetDesc/Sw64BaseInfo.h"
++#include "Sw64.h"
++#include "Sw64InstrInfo.h"
++#include "Sw64MachineFunctionInfo.h"
++#include "Sw64Subtarget.h"
++#include "llvm/CodeGen/MachineFrameInfo.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/MachineModuleInfo.h"
++#include "llvm/CodeGen/MachineRegisterInfo.h"
++#include "llvm/CodeGen/RegisterScavenging.h"
++#include "llvm/CodeGen/TargetLowering.h"
++#include "llvm/IR/DataLayout.h"
++#include "llvm/IR/Function.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Target/TargetOptions.h"
++#include <algorithm> // std::sort
++
++using namespace llvm;
++
++cl::opt<bool> Sw64PG("pg", cl::desc("Support the pg"), cl::init(false));
++
++static long getUpper16(long l) {
++  long y = l / Sw64::IMM_MULT;
++  if (l % Sw64::IMM_MULT > Sw64::IMM_HIGH)
++    ++y;
++  else if (l % Sw64::IMM_MULT < Sw64::IMM_LOW)
++    --y;
++  return y;
++}
++
++static long getLower16(long l) {
++  long h = getUpper16(l);
++  return l - h * Sw64::IMM_MULT;
++}
++
++// hasFP - Return true if the specified function should have a dedicated frame
++// pointer register.  This is true if the function has variable sized allocas or
++// if frame pointer elimination is disabled.
++//
++bool Sw64FrameLowering::hasFP(const MachineFunction &MF) const {
++  const MachineFrameInfo &MFI = MF.getFrameInfo();
++  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
++
++  return MF.getTarget().Options.DisableFramePointerElim(MF) ||
++         MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
++         TRI->hasStackRealignment(MF);
++}
++
++// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
++// not required, we reserve argument space for call sites in the function
++// immediately on entry to the current function.  This eliminates the need for
++// add/sub sp brackets around call sites.  Returns true if the call frame is
++// included as part of the stack frame.
++bool Sw64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
++  return !MF.getFrameInfo().hasVarSizedObjects();
++}
++
++bool Sw64FrameLowering::isLeafProc(MachineFunction &MF) const {
++  MachineRegisterInfo &MRI = MF.getRegInfo();
++  MachineFrameInfo &MFI = MF.getFrameInfo();
++
++  return !MRI.isPhysRegUsed(Sw64::R29);
++}
++
++bool Sw64FrameLowering::hasBP(const MachineFunction &MF) const {
++  const MachineFrameInfo &MFI = MF.getFrameInfo();
++  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
++
++  return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF);
++}
++
++void Sw64FrameLowering::emitPrologue(MachineFunction &MF,
++                                     MachineBasicBlock &MBB) const {
++  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
++
++  MachineBasicBlock::iterator MBBI = MBB.begin(); // Prolog goes in entry BB
++  MachineFrameInfo &MFI = MF.getFrameInfo();
++
++  const Sw64InstrInfo &TII = *MF.getSubtarget<Sw64Subtarget>().getInstrInfo();
++  const Sw64RegisterInfo &RegInfo = *static_cast<const Sw64RegisterInfo *>(
++      MF.getSubtarget<Sw64Subtarget>().getRegisterInfo());
++  // Debug location must be unknown since the first debug location is used
++  // to determine the end of the prologue.
++  DebugLoc dl;
++
++  // First, compute final stack size.
++  uint64_t StackSize = MFI.getStackSize();
++
++  MachineModuleInfo &MMI = MF.getMMI();
++  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
++
++  MBB.addLiveIn(Sw64::R27);
++  int curgpdist = STI.getCurgpdist();
++  // Handle GOT offset
++  // Now sw_64 won't emit this unless it is necessary.
++  // While it is also useful for DebugInfo test.
++  if (!isLeafProc(MF)) {
++    BuildMI(MBB, MBBI, dl, TII.get(Sw64::MOVProgPCGp))
++        .addGlobalAddress(&(MF.getFunction()))
++        .addImm(++curgpdist)
++        .addReg(Sw64::R27);
++
++    BuildMI(MBB, MBBI, dl, TII.get(Sw64::ALTENT))
++        .addGlobalAddress(&(MF.getFunction()));
++  }
++
++  // No need to allocate space on the stack.
++  if (StackSize == 0 && !MFI.adjustsStack())
++    return;
++
++  if (Sw64Mieee) {
++    if (!Sw64DeleteNop)
++      BuildMI(MBB, MBBI, dl, TII.get(Sw64::NOP));
++  }
++  if (Sw64PG) {
++    BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDL), Sw64::R28)
++        .addExternalSymbol("_mcount")
++        .addReg(Sw64::R29);
++    if (Sw64Mieee) {
++      if (!Sw64DeleteNop)
++        BuildMI(MBB, MBBI, dl, TII.get(Sw64::NOP));
++      BuildMI(MBB, MBBI, dl, TII.get(Sw64::JSR))
++          .addReg(Sw64::R28)
++          .addReg(Sw64::R28)
++          .addExternalSymbol("_mcount");
++      if (!Sw64DeleteNop)
++        BuildMI(MBB, MBBI, dl, TII.get(Sw64::NOP));
++    } else
++      BuildMI(MBB, MBBI, dl, TII.get(Sw64::JSR))
++          .addReg(Sw64::R28)
++          .addReg(Sw64::R28)
++          .addExternalSymbol("_mcount");
++  }
++
++  unsigned Align = getStackAlignment();
++  StackSize = (StackSize + Align - 1) / Align * Align;
++
++  // Update frame info to pretend that this is part of the stack...
++  MFI.setStackSize(StackSize);
++
++  // adjust stack pointer: r30 -= numbytes
++  int AdjustStackSize = -StackSize;
++  if (AdjustStackSize >= Sw64::IMM_LOW) {
++    BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDA), Sw64::R30)
++        .addImm(AdjustStackSize)
++        .addReg(Sw64::R30);
++  } else if (getUpper16(AdjustStackSize) >= Sw64::IMM_LOW) {
++    BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDAH), Sw64::R30)
++        .addImm(getUpper16(AdjustStackSize))
++        .addReg(Sw64::R30);
++    BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDA), Sw64::R30)
++        .addImm(getLower16(AdjustStackSize))
++        .addReg(Sw64::R30);
++  } else {
++    report_fatal_error("Too big a stack frame at " + Twine(-AdjustStackSize));
++  }
++
++  // emit ".cfi_def_cfa_offset StackSize"
++  unsigned CFIIndex = MF.addFrameInst(
++      MCCFIInstruction::cfiDefCfaOffset(nullptr, -AdjustStackSize));
++  BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
++      .addCFIIndex(CFIIndex);
++
++  std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
++
++  if (!CSI.empty()) {
++    // Find the instruction past the last instruction that saves a
++    // callee-saved register to the stack.
++    for (unsigned i = 0; i < CSI.size(); ++i)
++      ++MBBI;
++
++    // Iterate over list of callee-saved registers and emit .cfi_offset
++    // directives.
++    for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
++                                                      E = CSI.end();
++         I != E; ++I) {
++      int64_t Offset = MFI.getObjectOffset(I->getFrameIdx());
++      unsigned Reg = I->getReg();
++      unsigned DReg = MRI->getDwarfRegNum(Reg, true);
++      unsigned CFIIndex = MF.addFrameInst(
++          MCCFIInstruction::createOffset(nullptr, DReg, Offset));
++
++      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
++          .addCFIIndex(CFIIndex);
++    }
++  }
++
++  // if framepointer enabled, set it to point to the stack pointer.
++  // Now if we need to, save the old FP and set the new
++  if (hasFP(MF)) {
++    // This must be the last instr in the prolog
++    BuildMI(MBB, MBBI, dl, TII.get(Sw64::BISr), Sw64::R15)
++        .addReg(Sw64::R30)
++        .addReg(Sw64::R30);
++
++    // emit ".cfi_def_cfa_register $fp"
++    unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
++        nullptr, MRI->getDwarfRegNum(Sw64::R15, true)));
++    BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
++        .addCFIIndex(CFIIndex);
++
++    if (RegInfo.hasStackRealignment(MF)) {
++      // ldi -MaxAlign
++      // and -MaxAlign for sp
++      Register VR = MF.getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
++
++      assert((Log2(MFI.getMaxAlign()) < 16) &&
++             "Function's alignment size requirement is not supported.");
++      int64_t MaxAlign = -(int64_t)MFI.getMaxAlign().value();
++      BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDA), VR)
++          .addImm(MaxAlign)
++          .addReg(Sw64::R31);
++      BuildMI(MBB, MBBI, dl, TII.get(Sw64::ANDr), Sw64::R30)
++          .addReg(Sw64::R30)
++          .addReg(VR);
++
++      if (hasBP(MF))
++        // mov $sp, $14
++        BuildMI(MBB, MBBI, dl, TII.get(Sw64::BISr), Sw64::R14)
++            .addReg(Sw64::R30)
++            .addReg(Sw64::R30);
++    }
++  }
++}
++
++void Sw64FrameLowering::emitEpilogue(MachineFunction &MF,
++                                     MachineBasicBlock &MBB) const {
++
++  MachineFrameInfo &MFI = MF.getFrameInfo();
++  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
++  const Sw64InstrInfo &TII = *MF.getSubtarget<Sw64Subtarget>().getInstrInfo();
++  DebugLoc dl = MBBI->getDebugLoc();
++
++  assert((MBBI->getOpcode() == Sw64::PseudoRet) &&
++         "Can only insert epilog into returning blocks");
++
++  // Get the number of bytes allocated from the FrameInfo...
++  uint64_t StackSize = MFI.getStackSize();
++  // now if we need to, restore the old FP
++  if (hasFP(MF)) {
++    // Find the first instruction that restores a callee-saved register.
++    MachineBasicBlock::iterator I = MBBI;
++    for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i) {
++      --I;
++    }
++
++    // copy the FP into the SP (discards allocas)
++    BuildMI(MBB, I, dl, TII.get(Sw64::BISr), Sw64::R30)
++        .addReg(Sw64::R15)
++        .addReg(Sw64::R15);
++  }
++
++  if (StackSize != 0) {
++    if (StackSize <= Sw64::IMM_HIGH) {
++      BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDA), Sw64::R30)
++          .addImm(StackSize)
++          .addReg(Sw64::R30);
++    } else if (getUpper16(StackSize) <= Sw64::IMM_HIGH) {
++      BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDAH), Sw64::R30)
++          .addImm(getUpper16(StackSize))
++          .addReg(Sw64::R30);
++      BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDA), Sw64::R30)
++          .addImm(getLower16(StackSize))
++          .addReg(Sw64::R30);
++    } else {
++      report_fatal_error("Too big a stack frame at " + Twine(StackSize));
++    }
++  }
++}
++
++StackOffset
++Sw64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
++                                          Register &FrameReg) const {
++  const MachineFrameInfo &MFI = MF.getFrameInfo();
++
++  if (MFI.isFixedObjectIndex(FI))
++    FrameReg = hasFP(MF) ? Sw64::R15 : Sw64::R30;
++  else
++    FrameReg = hasBP(MF) ? Sw64::R14 : Sw64::R30;
++
++  return StackOffset::getFixed(MFI.getObjectOffset(FI) + MFI.getStackSize() -
++                               getOffsetOfLocalArea() +
++                               MFI.getOffsetAdjustment());
++}
++
++// TODO: must be rewrite.
++bool Sw64FrameLowering::spillCalleeSavedRegisters(
++    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
++    ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
++  if (CSI.empty())
++    return true;
++
++  const TargetInstrInfo &TII = *STI.getInstrInfo();
++
++  DebugLoc DL;
++  if (MI != MBB.end() && !MI->isDebugInstr())
++    DL = MI->getDebugLoc();
++  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
++    unsigned Reg = CSI[i].getReg();
++    MBB.addLiveIn(Reg);
++    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
++    TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RC, TRI,
++                            Register());
++  }
++  return true;
++}
++
++bool Sw64FrameLowering::restoreCalleeSavedRegisters(
++    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
++    MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
++  MachineFunction *MF = MBB.getParent();
++  const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
++  bool AtStart = MI == MBB.begin();
++  MachineBasicBlock::iterator BeforeI = MI;
++  if (!AtStart)
++    --BeforeI;
++  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
++    unsigned Reg = CSI[i].getReg();
++    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
++    TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI,
++                             Register());
++    assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
++    // Insert in reverse order.  loadRegFromStackSlot can insert multiple
++    // instructions.
++    if (AtStart)
++      MI = MBB.begin();
++    else {
++      MI = BeforeI;
++      ++MI;
++    }
++  }
++  return true;
++}
++
++// This function eliminates ADJCALLSTACKDOWN,
++// ADJCALLSTACKUP pseudo instructions
++MachineBasicBlock::iterator Sw64FrameLowering::eliminateCallFramePseudoInstr(
++    MachineFunction &MF, MachineBasicBlock &MBB,
++    MachineBasicBlock::iterator I) const {
++
++  const Sw64InstrInfo &TII = *MF.getSubtarget<Sw64Subtarget>().getInstrInfo();
++
++  if (!hasReservedCallFrame(MF)) {
++    // Turn the adjcallstackdown instruction into 'ldi sp,-<amt>sp' and the
++    // adjcallstackup instruction into 'ldi sp,<amt>sp'
++    MachineInstr &Old = *I;
++    // FIXME: temporary modify the old value is: Old.getOperand(0).getImm();
++    uint64_t Amount = Old.getOperand(0).getImm();
++    if (Amount != 0) {
++      // We need to keep the stack aligned properly.  To do this, we round the
++      // amount of space needed for the outgoing arguments up to the next
++      // alignment boundary.
++      unsigned Align = getStackAlignment();
++      Amount = (Amount + Align - 1) / Align * Align;
++
++      MachineInstr *New;
++      if (Old.getOpcode() == Sw64::ADJUSTSTACKDOWN) {
++        New = BuildMI(MF, Old.getDebugLoc(), TII.get(Sw64::LDA), Sw64::R30)
++                  .addImm(-Amount)
++                  .addReg(Sw64::R30);
++      } else {
++        assert(Old.getOpcode() == Sw64::ADJUSTSTACKUP);
++        New = BuildMI(MF, Old.getDebugLoc(), TII.get(Sw64::LDA), Sw64::R30)
++                  .addImm(Amount)
++                  .addReg(Sw64::R30);
++      }
++      // Replace the pseudo instruction with a new instruction...
++      MBB.insert(I, New);
++    }
++  }
++
++  return MBB.erase(I);
++}
++
++/// Mark \p Reg and all registers aliasing it in the bitset.
++static void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs,
++                         unsigned Reg) {
++  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
++  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
++    SavedRegs.set(*AI);
++}
++
++// TODO: must be rewrite.
++void Sw64FrameLowering::determineCalleeSaves(MachineFunction &MF,
++                                             BitVector &SavedRegs,
++                                             RegScavenger *RS) const {
++  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
++  // Mark $fp as used if function has dedicated frame pointer.
++  if (hasFP(MF))
++    setAliasRegs(MF, SavedRegs, Sw64::R15);
++  if (hasBP(MF))
++    setAliasRegs(MF, SavedRegs, Sw64::R14);
++
++  // Set scavenging frame index if necessary.
++  uint64_t MaxSPOffset = estimateStackSize(MF);
++
++  // If there is a variable sized object on the stack, the estimation cannot
++  // account for it.
++  if (isIntN(16, MaxSPOffset) && !MF.getFrameInfo().hasVarSizedObjects())
++    return;
++}
++
++// Estimate the size of the stack, including the incoming arguments. We need to
++// account for register spills, local objects, reserved call frame and incoming
++// arguments. This is required to determine the largest possible positive offset
++// from $sp so that it can be determined if an emergency spill slot for stack
++// addresses is required.
++uint64_t Sw64FrameLowering::estimateStackSize(const MachineFunction &MF) const {
++  const MachineFrameInfo &MFI = MF.getFrameInfo();
++  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
++
++  int64_t Size = 0;
++
++  // Iterate over fixed sized objects which are incoming arguments.
++  for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
++    if (MFI.getObjectOffset(I) > 0)
++      Size += MFI.getObjectSize(I);
++
++  // Conservatively assume all callee-saved registers will be saved.
++  for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) {
++    unsigned RegSize = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R));
++    Size = alignTo(Size + RegSize, RegSize);
++  }
++
++  // Get the size of the rest of the frame objects and any possible reserved
++  // call frame, accounting for alignment.
++  return Size + MFI.estimateStackSize(MF);
++}
++
++void Sw64FrameLowering::processFunctionBeforeFrameFinalized(
++    MachineFunction &MF, RegScavenger *RS) const {
++  const Sw64RegisterInfo *RegInfo =
++      MF.getSubtarget<Sw64Subtarget>().getRegisterInfo();
++  MachineFrameInfo &MFI = MF.getFrameInfo();
++  const TargetRegisterClass *RC = &Sw64::GPRCRegClass;
++  if (!isInt<16>(MFI.estimateStackSize(MF))) {
++    int RegScavFI = MFI.CreateStackObject(RegInfo->getSpillSize(*RC),
++                                          RegInfo->getSpillAlign(*RC), false);
++    RS->addScavengingFrameIndex(RegScavFI);
++  }
++  assert(RS && "requiresRegisterScavenging failed");
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64FrameLowering.h b/llvm/lib/Target/Sw64/Sw64FrameLowering.h
+new file mode 100644
+index 000000000..ef0613b44
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64FrameLowering.h
+@@ -0,0 +1,82 @@
++//===-- Sw64FrameLowering.h - Frame info for Sw64 Target ------*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains Sw64 frame information that doesn't fit anywhere else
++// cleanly...
++//
++//===----------------------------------------------------------------------===//
++#ifndef LLVM_LIB_TARGET_SW64_SW64FRAMELOWERING_H
++#define LLVM_LIB_TARGET_SW64_SW64FRAMELOWERING_H
++
++#include "llvm/CodeGen/TargetFrameLowering.h"
++#include "llvm/Target/TargetMachine.h"
++
++namespace llvm {
++class Sw64Subtarget;
++
++class Sw64FrameLowering : public TargetFrameLowering {
++
++protected:
++  const Sw64Subtarget &STI;
++
++public:
++  explicit Sw64FrameLowering(const Sw64Subtarget &sti)
++      : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(32), 0),
++        STI(sti) {
++    // Do nothing
++  }
++
++  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
++  /// the function.
++  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
++  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
++
++  StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
++                                     Register &FrameReg) const override;
++
++  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
++                                 MachineBasicBlock::iterator MI,
++                                 ArrayRef<CalleeSavedInfo> CSI,
++                                 const TargetRegisterInfo *TRI) const override;
++
++  bool
++  restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
++                              MachineBasicBlock::iterator MI,
++                              MutableArrayRef<CalleeSavedInfo> CSI,
++                              const TargetRegisterInfo *TRI) const override;
++
++  MachineBasicBlock::iterator
++  eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
++                                MachineBasicBlock::iterator I) const override;
++
++  bool hasFP(const MachineFunction &MF) const override;
++  bool hasReservedCallFrame(const MachineFunction &MF) const override;
++
++  bool hasBP(const MachineFunction &MF) const;
++
++private:
++  void emitMieee(MachineFunction &MF) const;
++
++  void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
++                            RegScavenger *RS = nullptr) const override;
++
++  void processFunctionBeforeFrameFinalized(
++      MachineFunction &MF, RegScavenger *RS = nullptr) const override;
++
++  //! Stack slot size (4 bytes)
++  static int stackSlotSize() { return 4; }
++
++  // Returns true if MF is a leaf procedure.
++  bool isLeafProc(MachineFunction &MF) const;
++
++protected:
++  uint64_t estimateStackSize(const MachineFunction &MF) const;
++};
++} // namespace llvm
++#endif
+diff --git a/llvm/lib/Target/Sw64/Sw64IEEEConstraint.cpp b/llvm/lib/Target/Sw64/Sw64IEEEConstraint.cpp
+new file mode 100644
+index 000000000..6689f7c25
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64IEEEConstraint.cpp
+@@ -0,0 +1,138 @@
++#include "MCTargetDesc/Sw64BaseInfo.h"
++#include "Sw64.h"
++#include "Sw64FrameLowering.h"
++#include "Sw64Subtarget.h"
++#include "llvm/ADT/SetOperations.h"
++#include "llvm/ADT/Statistic.h"
++#include "llvm/CodeGen/MachineFunctionPass.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/TargetInstrInfo.h"
++#include "llvm/Support/CommandLine.h"
++#include "llvm/Target/TargetMachine.h"
++
++#define DEBUG_TYPE "sw_64-ieee-contrain"
++
++using namespace llvm;
++
++namespace llvm {
++
++struct Sw64IEEEConstraint : public MachineFunctionPass {
++  /// Target machine description which we query for reg. names, data
++  /// layout, etc.
++  static char ID;
++  Sw64IEEEConstraint() : MachineFunctionPass(ID) {}
++
++  StringRef getPassName() const { return "Sw64 Add IEEE Contrain"; }
++
++  bool runOnMachineFunction(MachineFunction &F);
++};
++char Sw64IEEEConstraint::ID = 0;
++} // end namespace llvm
++
++static bool isNeedIEEEConstraint(unsigned opcode) {
++  switch (opcode) {
++  case Sw64::ADDS:
++  case Sw64::SUBS:
++  case Sw64::MULS:
++  case Sw64::DIVS:
++  case Sw64::FMAS:
++  case Sw64::FMSS:
++  case Sw64::FNMAS:
++  case Sw64::FNMSS:
++  case Sw64::ADDD:
++  case Sw64::SUBD:
++  case Sw64::MULD:
++  case Sw64::DIVD:
++  case Sw64::FMAD:
++  case Sw64::FMSD:
++  case Sw64::FNMAD:
++  case Sw64::FNMSD:
++  case Sw64::CVTQS:
++  case Sw64::CVTQT:
++  case Sw64::CVTTQ:
++  case Sw64::CVTTS:
++  case Sw64::CVTST:
++  case Sw64::FCVTWL:
++  case Sw64::FCVTLW:
++  case Sw64::VADDS:
++  case Sw64::VADDD:
++  case Sw64::VSUBS:
++  case Sw64::VSUBD:
++  case Sw64::VMULS:
++  case Sw64::VMULD:
++  case Sw64::VDIVS:
++  case Sw64::VDIVD:
++  case Sw64::VSQRTS:
++  case Sw64::VSQRTD:
++  case Sw64::SQRTSS:
++  case Sw64::SQRTSD:
++  case Sw64::CMPTEQ:
++  case Sw64::CMPTLE:
++  case Sw64::CMPTLT:
++  case Sw64::CMPTUN:
++  case Sw64::VFCMPEQ:
++  case Sw64::VFCMPLE:
++  case Sw64::VFCMPLT:
++  case Sw64::VFCMPUN:
++  case Sw64::VMAS:
++  case Sw64::VMAD:
++  case Sw64::VMSS:
++  case Sw64::VMSD:
++  case Sw64::VNMAS:
++  case Sw64::VNMAD:
++  case Sw64::VNMSS:
++  case Sw64::VNMSD:
++  case Sw64::FSELEQS:
++  case Sw64::FSELNES:
++  case Sw64::FSELLTS:
++  case Sw64::FSELLES:
++  case Sw64::FSELGTS:
++  case Sw64::FSELGES:
++  case Sw64::FSELEQD:
++  case Sw64::FSELNED:
++  case Sw64::FSELLTD:
++  case Sw64::FSELLED:
++  case Sw64::FSELGTD:
++  case Sw64::FSELGED:
++  case Sw64::FCTTDL_G:
++  case Sw64::FCTTDL_P:
++  case Sw64::FCTTDL_N:
++  case Sw64::FCTTDL:
++    return true;
++  }
++  return false;
++}
++
++bool Sw64IEEEConstraint::runOnMachineFunction(MachineFunction &F) {
++  const Sw64Subtarget &ST = F.getSubtarget<Sw64Subtarget>();
++  if (ST.hasCore4())
++    return false;
++
++  for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
++    MachineBasicBlock &MBB = *FI;
++    MachineBasicBlock::iterator MBBI = MBB.begin();
++    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
++    NMBBI++;
++    for (; MBBI != MBB.end(); MBBI++) {
++      if (isNeedIEEEConstraint(MBBI->getOpcode())) {
++        MachineOperand &MO = MBBI->getOperand(0);
++        if (MO.isEarlyClobber()) {
++          LLVM_DEBUG(dbgs() << "getting is EarlyClobber Flag"
++                            << MO.isEarlyClobber() << "\n";
++                     MBBI->dump());
++          continue;
++        }
++
++        MO.setIsEarlyClobber();
++        LLVM_DEBUG(dbgs() << "setting is EarlyClobber Flag"
++                          << MBBI->getOperand(0).isEarlyClobber() << "\n";
++                   MBBI->dump());
++      }
++    }
++  }
++  return true;
++}
++
++FunctionPass *llvm::createSw64IEEEConstraintPass() {
++  return new Sw64IEEEConstraint();
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64ISelDAGToDAG.cpp b/llvm/lib/Target/Sw64/Sw64ISelDAGToDAG.cpp
+new file mode 100644
+index 000000000..b911086b8
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64ISelDAGToDAG.cpp
+@@ -0,0 +1,1032 @@
++//===-- Sw64ISelDAGToDAG.cpp - Sw64 pattern matching inst selector ------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines a pattern matching instruction selector for Sw64,
++// converting from a legalized dag to a Sw64 dag.
++//
++//===----------------------------------------------------------------------===//
++#define DEBUG_TYPE "sw_64-isel"
++#define PASS_NAME "Sw64 DAG->DAG Pattern Instruction Selection"
++//#include "Sw64MathExtras.h"
++
++#include "MCTargetDesc/Sw64BaseInfo.h"
++#include "Sw64.h"
++#include "Sw64MachineFunctionInfo.h"
++#include "Sw64Subtarget.h"
++#include "Sw64TargetMachine.h"
++#include "llvm/CodeGen/MachineFrameInfo.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/MachineRegisterInfo.h"
++#include "llvm/CodeGen/SelectionDAG.h"
++#include "llvm/CodeGen/SelectionDAGISel.h"
++#include "llvm/IR/Constants.h"
++#include "llvm/IR/DerivedTypes.h"
++#include "llvm/IR/GlobalValue.h"
++#include "llvm/IR/Intrinsics.h"
++#include "llvm/IR/LLVMContext.h"
++#include "llvm/Support/Compiler.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/MathExtras.h"
++#include "llvm/Support/raw_ostream.h"
++#include "llvm/Target/TargetOptions.h"
++#include <algorithm>
++using namespace llvm;
++
++namespace {
++
++//===--------------------------------------------------------------------===//
++/// Sw64DAGToDAGISel - Sw64 specific code to select Sw64 machine
++/// instructions for SelectionDAG operations.
++class Sw64DAGToDAGISel : public SelectionDAGISel {
++  const Sw64Subtarget *Subtarget;
++
++  static const int64_t IMM_LOW = -32768;
++  static const int64_t IMM_HIGH = 32767;
++  static const int64_t IMM_MULT = 65536;
++  static const int64_t IMM_FULLHIGH = IMM_HIGH + IMM_HIGH * IMM_MULT;
++  static const int64_t IMM_FULLLOW = IMM_LOW + IMM_LOW * IMM_MULT;
++
++  static int64_t get_ldah16(int64_t x) {
++    int64_t y = x / IMM_MULT;
++    if (x % IMM_MULT > IMM_HIGH)
++      ++y;
++    if (x % IMM_MULT < IMM_LOW)
++      --y;
++    return y;
++  }
++
++  static int64_t get_lda16(int64_t x) { return x - get_ldah16(x) * IMM_MULT; }
++
++  /// get_zapImm - Return a zap mask if X is a valid immediate for a zapnot
++  /// instruction (if not, return 0).  Note that this code accepts partial
++  /// zap masks.  For example (and LHS, 1) is a valid zap, as long we know
++  /// that the bits 1-7 of LHS are already zero.  If LHS is non-null, we are
++  /// in checking mode.  If LHS is null, we assume that the mask has already
++  /// been validated before.
++  uint64_t get_zapImm(SDValue LHS, uint64_t Constant) const {
++    uint64_t BitsToCheck = 0;
++    unsigned Result = 0;
++    for (unsigned i = 0; i != 8; ++i) {
++      if (((Constant >> 8 * i) & 0xFF) == 0) {
++        // nothing to do.
++      } else {
++        Result |= 1 << i;
++        if (((Constant >> 8 * i) & 0xFF) == 0xFF) {
++          // If the entire byte is set, zapnot the byte.
++        } else if (LHS.getNode() == 0) {
++          // Otherwise, if the mask was previously validated, we know its okay
++          // to zapnot this entire byte even though all the bits aren't set.
++        } else {
++          // Otherwise we don't know that the it's okay to zapnot this entire
++          // byte.  Only do this iff we can prove that the missing bits are
++          // already null, so the bytezap doesn't need to really null them.
++          BitsToCheck |= ~Constant & (0xFFULL << 8 * i);
++        }
++      }
++    }
++
++    // If there are missing bits in a byte (for example, X & 0xEF00), check to
++    // see if the missing bits (0x1000) are already known zero if not, the zap
++    // isn't okay to do, as it won't clear all the required bits.
++    if (BitsToCheck && !CurDAG->MaskedValueIsZero(
++                           LHS, APInt(LHS.getValueSizeInBits(), BitsToCheck)))
++      return 0;
++
++    return Result;
++  }
++
++  static uint64_t get_zapImm(uint64_t x) {
++    unsigned build = 0;
++    for (int i = 0; i != 8; ++i) {
++      if ((x & 0x00FF) == 0x00FF)
++        build |= 1 << i;
++      else if ((x & 0x00FF) != 0)
++        return 0;
++      x >>= 8;
++    }
++    return build;
++  }
++
++  static uint64_t getNearPower2(uint64_t x) {
++    if (!x)
++      return 0;
++    unsigned at = __builtin_clzll(x);
++    uint64_t complow = 1ULL << (63 - at);
++    uint64_t comphigh = complow << 1;
++    if (x - complow <= comphigh - x)
++      return complow;
++    else
++      return comphigh;
++  }
++
++  static bool chkRemNearPower2(uint64_t x, uint64_t r, bool swap) {
++    uint64_t y = getNearPower2(x);
++    if (swap)
++      return (y - x) == r;
++    else
++      return (x - y) == r;
++  }
++
++public:
++  static char ID;
++
++  Sw64DAGToDAGISel() = delete;
++
++  explicit Sw64DAGToDAGISel(Sw64TargetMachine &TM, CodeGenOpt::Level OptLevel)
++      : SelectionDAGISel(ID, TM, OptLevel), Subtarget(nullptr) {}
++
++  bool runOnMachineFunction(MachineFunction &MF) override {
++    Subtarget = &MF.getSubtarget<Sw64Subtarget>();
++    return SelectionDAGISel::runOnMachineFunction(MF);
++  }
++  /// getI64Imm - Return a target constant with the specified value, of type
++  /// i64.
++  inline SDValue getI64Imm(int64_t Imm, const SDLoc &dl) {
++    return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
++  }
++
++  inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
++    return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
++  }
++
++  static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm);
++  // Select - Convert the specified operand from a target-independent to a
++  // target-specific node if it hasn't already been changed.
++  void Select(SDNode *N) override;
++  StringRef getPassName() const override {
++    return "Sw64 DAG->DAG Pattern Instruction Selection";
++  }
++
++  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
++  /// inline asm expressions.
++  bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
++                                    std::vector<SDValue> &OutOps) override;
++
++  template <MVT::SimpleValueType VT>
++  bool SelectAddSubImm(SDValue N, SDValue &Imm) {
++    return SelectAddSubImm(N, VT, Imm);
++  }
++
++  bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const;
++  bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset,
++                                  unsigned OffsetBits,
++                                  unsigned ShiftAmount) const;
++  bool selectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset) const;
++  bool selectAddrRegImm16(SDValue Addr, SDValue &Base, SDValue &Offset) const;
++
++  /// abs64 - absolute value of a 64-bit int.  Not all environments support
++  /// "abs" on whatever their name for the 64-bit int type is.  The absolute
++  /// value of the largest negative number is undefined, as with "abs".
++  inline int64_t abs64(int64_t x) { return (x < 0) ? -x : x; }
++
++// Include the pieces autogenerated from the target description.
++#include "Sw64GenDAGISel.inc"
++
++private:
++  /// getTargetMachine - Return a reference to the TargetMachine, casted
++  /// to the target-specific type.
++  const Sw64TargetMachine &getTargetMachine() {
++    return static_cast<const Sw64TargetMachine &>(TM);
++  }
++
++  bool SelectAddSubImm(SDValue N, MVT VT, SDValue &Imm);
++  bool SelectComplexImm(SDValue N, SDValue &Imm);
++
++#if 0
++    /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
++    /// to the target-specific type.
++    const Sw64InstrInfo *getInstrInfo() {
++      return getTargetMachine().getInstrInfo();
++    }
++#endif
++  SDNode *getGlobalBaseReg();
++  SDNode *getGlobalRetAddr();
++  void SelectCALL(SDNode *Op);
++  bool tryIndexedLoad(SDNode *N);
++  bool tryIndexedStore(SDNode *N);
++  bool selectSExti32(SDValue N, SDValue &Val);
++  bool selectZExti32(SDValue N, SDValue &Val);
++
++  /// Select constant vector splats.
++  bool selectVSplat(SDNode *N, APInt &Imm, unsigned MinSizeInBits) const;
++  /// Select constant vector splats whose value fits in a given integer.
++  bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed,
++                          unsigned ImmBitSize) const;
++  /// Select constant vector splats whose value fits in a uimm8.
++  bool selectVSplatUimm8(SDValue N, SDValue &Imm) const;
++
++  bool selectVSplatSimm8(SDValue N, SDValue &Imm) const;
++  bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const;
++
++  bool selectIntAddrSImm16(SDValue Addr, SDValue &Base, SDValue &Offset) const;
++
++  bool selectIntAddrSImm12(SDValue Addr, SDValue &Base, SDValue &Offset) const;
++
++  bool SelectAddrFI(SDValue Addr, SDValue &Base);
++};
++} // end anonymous namespace
++char Sw64DAGToDAGISel::ID = 0;
++
++INITIALIZE_PASS(Sw64DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
++
++/// getGlobalBaseReg - Output the instructions required to put the
++/// GOT address into a register.
++///
++SDNode *Sw64DAGToDAGISel::getGlobalBaseReg() {
++  unsigned GlobalBaseReg = Subtarget->getInstrInfo()->getGlobalBaseReg(MF);
++  // unsigned GlobalBaseReg =
++  // MF->getInfo<Sw64MachineFunctionInfo>()->getGlobalBaseReg(*MF);
++  return CurDAG
++      ->getRegister(GlobalBaseReg,
++                    getTargetLowering()->getPointerTy(CurDAG->getDataLayout()))
++      .getNode();
++}
++
++/// getGlobalRetAddr - Grab the return address.
++///
++SDNode *Sw64DAGToDAGISel::getGlobalRetAddr() {
++  unsigned GlobalRetAddr = Subtarget->getInstrInfo()->getGlobalRetAddr(MF);
++  // unsigned GlobalRetAddr =
++  // MF->getInfo<Sw64MachineFunctionInfo>()->getGlobalRetAddr(*MF);
++  return CurDAG
++      ->getRegister(GlobalRetAddr,
++                    getTargetLowering()->getPointerTy(CurDAG->getDataLayout()))
++      .getNode();
++}
++
++bool Sw64DAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
++  if (auto FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
++    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
++    return true;
++  }
++
++  return false;
++}
++
++// Select - Convert the specified operand from a target-independent to a
++// target-specific node if it hasn't already been changed.
++void Sw64DAGToDAGISel::Select(SDNode *N) {
++
++  // Dump information about the Node being selected
++  LLVM_DEBUG(errs() << "Selecting: "; N->dump(CurDAG); errs() << "\n");
++
++  // If we have a custom node, we already have selected!
++  if (N->isMachineOpcode()) {
++    LLVM_DEBUG(errs() << "== "; N->dump(CurDAG); errs() << "\n");
++    return;
++  }
++  SDLoc dl(N);
++  switch (N->getOpcode()) {
++  default:
++    break;
++  case ISD::LOAD:
++    if (tryIndexedLoad(N))
++      return;
++    // Other cases are autogenerated.
++    break;
++  case ISD::STORE:
++    if (tryIndexedStore(N))
++      return;
++    // Other cases are autogenerated.
++    break;
++  case Sw64ISD::CALL:
++    SelectCALL(N);
++    if (N->use_empty()) // Don't delete EntryToken, etc.
++      CurDAG->RemoveDeadNode(N);
++    return;
++  case ISD::FrameIndex: {
++    assert(N->getValueType(0) == MVT::i64);
++    int FI = cast<FrameIndexSDNode>(N)->getIndex();
++    SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
++    if (N->hasOneUse()) {
++      N->setDebugLoc((*(N->use_begin()))->getDebugLoc());
++      CurDAG->SelectNodeTo(N, Sw64::LDA, MVT::i64, TFI,
++                           CurDAG->getTargetConstant(0, dl, MVT::i64));
++      return;
++    }
++    ReplaceNode(
++        N, CurDAG->getMachineNode(Sw64::LDA, dl, MVT::i64, TFI,
++                                  CurDAG->getTargetConstant(0, dl, MVT::i64)));
++    return;
++  }
++  case ISD::GLOBAL_OFFSET_TABLE:
++    ReplaceNode(N, getGlobalBaseReg());
++    return;
++  case Sw64ISD::GlobalRetAddr:
++    ReplaceNode(N, getGlobalRetAddr());
++    return;
++
++  case Sw64ISD::DivCall: {
++    SDValue Chain = CurDAG->getEntryNode();
++    SDValue N0 = N->getOperand(0);
++    SDValue N1 = N->getOperand(1);
++    SDValue N2 = N->getOperand(2);
++    Chain = CurDAG->getCopyToReg(Chain, dl, Sw64::R24, N1, SDValue(0, 0));
++    Chain = CurDAG->getCopyToReg(Chain, dl, Sw64::R25, N2, Chain.getValue(1));
++    Chain = CurDAG->getCopyToReg(Chain, dl, Sw64::R27, N0, Chain.getValue(1));
++    SDNode *CNode = CurDAG->getMachineNode(Sw64::PseudoCallDiv, dl, MVT::Other,
++                                           MVT::Glue, Chain, Chain.getValue(1));
++    Chain = CurDAG->getCopyFromReg(Chain, dl, Sw64::R27, MVT::i64,
++                                   SDValue(CNode, 1));
++    ReplaceNode(N,
++                CurDAG->getMachineNode(Sw64::BISr, dl, MVT::i64, Chain, Chain));
++    return;
++  }
++
++  case ISD::READCYCLECOUNTER: {
++    SDValue Chain = N->getOperand(0);
++    ReplaceNode(
++        N, CurDAG->getMachineNode(Sw64::RPCC, dl, MVT::i64, MVT::Other, Chain));
++    return;
++  }
++
++  case ISD::Constant: {
++    auto ConstNode = cast<ConstantSDNode>(N);
++    if (ConstNode->isZero()) {
++      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
++                                              Sw64::R31, MVT::i64);
++      ReplaceUses(SDValue(N, 0), Result);
++      return;
++    }
++    uint64_t uval = cast<ConstantSDNode>(N)->getZExtValue();
++    int64_t Imm = ConstNode->getSExtValue();
++    int64_t val = Imm;
++    int32_t val32 = (int32_t)val;
++    if (val <= IMM_HIGH + IMM_HIGH * IMM_MULT &&
++        val >= IMM_LOW + IMM_LOW * IMM_MULT)
++      break;                 //(LDAH (LDA))
++    if ((uval >> 32) == 0 && // empty upper bits
++        val32 <= IMM_HIGH + IMM_HIGH * IMM_MULT)
++      // val32 >= IMM_LOW  + IMM_LOW  * IMM_MULT) //always true
++      break; //(zext (LDAH (LDA)))
++    // Else use the constant pool
++
++    ConstantInt *C =
++        ConstantInt::get(Type::getInt64Ty(*CurDAG->getContext()), uval);
++    SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64);
++    SDNode *Load =
++        CurDAG->getMachineNode(Sw64::LOADconstant, dl, MVT::i64, CPI);
++    ReplaceNode(N, Load);
++
++    return;
++  }
++  case ISD::TargetConstantFP:
++  case ISD::ConstantFP: {
++    ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
++    bool isDouble = N->getValueType(0) == MVT::f64;
++    EVT T = isDouble ? MVT::f64 : MVT::f32;
++    if (CN->getValueAPF().isPosZero()) {
++      ReplaceNode(
++          N, CurDAG->getMachineNode(isDouble ? Sw64::CPYSD : Sw64::CPYSS, dl, T,
++                                    CurDAG->getRegister(Sw64::F31, T),
++                                    CurDAG->getRegister(Sw64::F31, T)));
++      return;
++    } else if (CN->getValueAPF().isNegZero()) {
++      ReplaceNode(
++          N, CurDAG->getMachineNode(isDouble ? Sw64::CPYSND : Sw64::CPYSNS, dl,
++                                    T, CurDAG->getRegister(Sw64::F31, T),
++                                    CurDAG->getRegister(Sw64::F31, T)));
++      return;
++    } else {
++      report_fatal_error("Unhandled FP constant type");
++    }
++    break;
++  }
++
++  case ISD::SETCC:
++    if (N->getSimpleValueType(0).SimpleTy == MVT::v4i64)
++      break;
++    if (N->getOperand(0).getNode()->getValueType(0).isFloatingPoint()) {
++      ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
++
++      unsigned Opc = Sw64::WTF;
++      bool rev = false;
++      bool inv = false;
++      bool ordonly = false;
++      if (Sw64Mieee) {
++        switch (CC) {
++        default:
++          LLVM_DEBUG(N->dump(CurDAG));
++          llvm_unreachable("Unknown FP comparison!");
++        case ISD::SETEQ:
++        case ISD::SETOEQ:
++        case ISD::SETUEQ:
++          Opc = Sw64::CMPTEQ;
++          break;
++        case ISD::SETLT:
++        case ISD::SETOLT:
++        case ISD::SETULT:
++          Opc = Sw64::CMPTLT;
++          break;
++        case ISD::SETLE:
++        case ISD::SETOLE:
++        case ISD::SETULE:
++          Opc = Sw64::CMPTLE;
++          break;
++        case ISD::SETGT:
++        case ISD::SETOGT:
++        case ISD::SETUGT:
++          Opc = Sw64::CMPTLT;
++          rev = true;
++          break;
++        case ISD::SETGE:
++        case ISD::SETOGE:
++        case ISD::SETUGE:
++          Opc = Sw64::CMPTLE;
++          rev = true;
++          break;
++        case ISD::SETNE:
++        case ISD::SETONE:
++        case ISD::SETUNE:
++          Opc = Sw64::CMPTEQ;
++          inv = true;
++          break;
++        case ISD::SETO:
++          Opc = Sw64::CMPTUN;
++          inv = true;
++          ordonly = true;
++          break;
++        case ISD::SETUO:
++          Opc = Sw64::CMPTUN;
++          ordonly = true;
++          break;
++        };
++
++        /*
++           unordered:
++           FCMPUN $f1, $f2, $f3
++           FCMPxx $f1, $f2, $f3
++           FSELNE $f3, $f3, $f4, $f4
++
++           ordered:
++           FCMPUN $f1, $f2, $f3
++           FCMPxx $f1, $f2, $f3
++           FSELEQ $f3, $f4, $f31, $f4
++
++           SETO/SETUO:
++           FCMPxx $f1, $f2, $f3
++        */
++        bool ordered = true;
++        switch (CC) {
++        case ISD::SETUEQ:
++        case ISD::SETULT:
++        case ISD::SETULE:
++        case ISD::SETUNE:
++        case ISD::SETUGT:
++        case ISD::SETUGE:
++          ordered = false;
++          break;
++        default:
++          break;
++        }
++        SDValue opr0 = N->getOperand(rev ? 1 : 0);
++        SDValue opr1 = N->getOperand(rev ? 0 : 1);
++        SDNode *cmpu =
++            CurDAG->getMachineNode(Sw64::CMPTUN, dl, MVT::f64, opr0, opr1);
++        SDNode *cmp = CurDAG->getMachineNode(Opc, dl, MVT::f64, opr0, opr1);
++        if (inv)
++          cmp = CurDAG->getMachineNode(
++              Sw64::CMPTEQ, dl, MVT::f64, SDValue(cmp, 0),
++              CurDAG->getRegister(Sw64::F31, MVT::f64));
++
++        SDNode *sel = NULL;
++        if (ordonly)
++          sel = cmp;
++        else if (ordered)
++          sel = CurDAG->getMachineNode(Sw64::FSELEQD, dl, MVT::f64,
++                                       CurDAG->getRegister(Sw64::F31, MVT::f64),
++                                       SDValue(cmp, 0), SDValue(cmpu, 0));
++        else
++          sel = CurDAG->getMachineNode(Sw64::FSELNED, dl, MVT::f64,
++                                       SDValue(cmp, 0), SDValue(cmpu, 0),
++                                       SDValue(cmpu, 0));
++
++        MVT VT = N->getSimpleValueType(0).SimpleTy == MVT::v4i64 ? MVT::v4i64
++                                                                 : MVT::i64;
++        SDNode *LD =
++            CurDAG->getMachineNode(Sw64::FTOIT, dl, VT, SDValue(sel, 0));
++
++        ReplaceNode(N, CurDAG->getMachineNode(
++                           Sw64::CMPULTr, dl, VT,
++                           CurDAG->getRegister(Sw64::R31, VT), SDValue(LD, 0)));
++        return;
++      } else {
++        switch (CC) {
++        default:
++          LLVM_DEBUG(N->dump(CurDAG));
++          llvm_unreachable("Unknown FP comparison!");
++        case ISD::SETEQ:
++        case ISD::SETOEQ:
++        case ISD::SETUEQ:
++          Opc = Sw64::CMPTEQ;
++          break;
++        case ISD::SETLT:
++        case ISD::SETOLT:
++        case ISD::SETULT:
++          Opc = Sw64::CMPTLT;
++          break;
++        case ISD::SETLE:
++        case ISD::SETOLE:
++        case ISD::SETULE:
++          Opc = Sw64::CMPTLE;
++          break;
++        case ISD::SETGT:
++        case ISD::SETOGT:
++        case ISD::SETUGT:
++          Opc = Sw64::CMPTLT;
++          rev = true;
++          break;
++        case ISD::SETGE:
++        case ISD::SETOGE:
++        case ISD::SETUGE:
++          Opc = Sw64::CMPTLE;
++          rev = true;
++          break;
++        case ISD::SETNE:
++        case ISD::SETONE:
++        case ISD::SETUNE:
++          Opc = Sw64::CMPTEQ;
++          inv = true;
++          break;
++        case ISD::SETO:
++          Opc = Sw64::CMPTUN;
++          inv = true;
++          break;
++        case ISD::SETUO:
++          Opc = Sw64::CMPTUN;
++          break;
++        };
++        SDValue tmp1 = N->getOperand(rev ? 1 : 0);
++        SDValue tmp2 = N->getOperand(rev ? 0 : 1);
++        SDNode *cmp = CurDAG->getMachineNode(Opc, dl, MVT::f64, tmp1, tmp2);
++        if (inv)
++          cmp = CurDAG->getMachineNode(
++              Sw64::CMPTEQ, dl, MVT::f64, SDValue(cmp, 0),
++              CurDAG->getRegister(Sw64::F31, MVT::f64));
++        switch (CC) {
++        case ISD::SETUEQ:
++        case ISD::SETULT:
++        case ISD::SETULE:
++        case ISD::SETUNE:
++        case ISD::SETUGT:
++        case ISD::SETUGE: {
++          SDNode *cmp2 =
++              CurDAG->getMachineNode(Sw64::CMPTUN, dl, MVT::f64, tmp1, tmp2);
++          cmp = CurDAG->getMachineNode(Sw64::ADDD, dl, MVT::f64,
++                                       SDValue(cmp2, 0), SDValue(cmp, 0));
++          break;
++        }
++        default:
++          break;
++        }
++        SDNode *LD =
++            CurDAG->getMachineNode(Sw64::FTOIT, dl, MVT::i64, SDValue(cmp, 0));
++
++        ReplaceNode(
++            N, CurDAG->getMachineNode(Sw64::CMPULTr, dl, MVT::i64,
++                                      CurDAG->getRegister(Sw64::R31, MVT::i64),
++                                      SDValue(LD, 0)));
++        return;
++      }
++    }
++    break;
++  case ISD::AND: {
++    ConstantSDNode *SC = NULL;
++    ConstantSDNode *MC = NULL;
++    if (N->getOperand(0).getOpcode() == ISD::SRL &&
++        (MC = dyn_cast<ConstantSDNode>(N->getOperand(1))) &&
++        (SC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1)))) {
++      uint64_t sval = SC->getZExtValue();
++      uint64_t mval = MC->getZExtValue();
++      // If the result is a zap, let the autogened stuff handle it.
++      if (get_zapImm(N->getOperand(0), mval))
++        break;
++      // given mask X, and shift S, we want to see if there is any zap in the
++      // mask if we play around with the botton S bits
++      uint64_t dontcare = (~0ULL) >> (64 - sval);
++      uint64_t mask = mval << sval;
++
++      if (get_zapImm(mask | dontcare))
++        mask = mask | dontcare;
++
++      if (get_zapImm(mask)) {
++        SDValue Z =
++            SDValue(CurDAG->getMachineNode(Sw64::ZAPNOTi, dl, MVT::i64,
++                                           N->getOperand(0).getOperand(0),
++                                           getI64Imm(get_zapImm(mask), dl)),
++                    0);
++        ReplaceNode(N, CurDAG->getMachineNode(Sw64::SRLi, dl, MVT::i64, Z,
++                                              getI64Imm(sval, dl)));
++        return;
++      }
++    }
++    break;
++  }
++  case ISD::BUILD_VECTOR: {
++
++    BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N);
++    APInt SplatValue, SplatUndef;
++    unsigned SplatBitSize;
++    bool HasAnyUndefs;
++    EVT ViaVecTy;
++
++    if (!Subtarget->hasSIMD() || !BVN->getValueType(0).is256BitVector())
++      return;
++
++    if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
++                              HasAnyUndefs, 8, false))
++      break;
++  }
++  }
++  // Select the default instruction
++  SelectCode(N);
++}
++
++void Sw64DAGToDAGISel::SelectCALL(SDNode *N) {
++  // TODO: add flag stuff to prevent nondeturministic breakage!
++
++  SDValue Chain = N->getOperand(0);
++  SDValue Addr = N->getOperand(1);
++  SDValue InFlag = N->getOperand(N->getNumOperands() - 1);
++  SDLoc dl(N);
++  if (Addr.getOpcode() == Sw64ISD::GPRelLo) {
++    SDValue GOT = SDValue(getGlobalBaseReg(), 0);
++    Chain = CurDAG->getCopyToReg(Chain, dl, Sw64::R29, GOT, InFlag);
++    InFlag = Chain.getValue(1);
++    Chain = SDValue(CurDAG->getMachineNode(Sw64::BSR, dl, MVT::Other, MVT::Glue,
++                                           Addr.getOperand(0), Chain, InFlag),
++                    0);
++  } else {
++    Chain = CurDAG->getCopyToReg(Chain, dl, Sw64::R27, Addr, InFlag);
++    InFlag = Chain.getValue(1);
++    SDValue Ops[] = {Chain, CurDAG->getRegister(Sw64::R27, MVT::i64),
++                     N->getOperand(2), InFlag};
++    Chain = SDValue(
++        CurDAG->getMachineNode(Sw64::JSR, dl, MVT::Other, MVT::Glue, Ops), 0);
++  }
++  InFlag = Chain.getValue(1);
++
++  ReplaceUses(SDValue(N, 0), Chain);
++  ReplaceUses(SDValue(N, 1), InFlag);
++}
++
++/// Match frameindex
++bool Sw64DAGToDAGISel::selectAddrFrameIndex(SDValue Addr, SDValue &Base,
++                                            SDValue &Offset) const {
++  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
++    EVT ValTy = Addr.getValueType();
++
++    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
++    Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), ValTy);
++    return true;
++  }
++  return false;
++}
++
++/// Match frameindex+offset and frameindex|offset
++bool Sw64DAGToDAGISel::selectAddrFrameIndexOffset(
++    SDValue Addr, SDValue &Base, SDValue &Offset, unsigned OffsetBits,
++    unsigned ShiftAmount = 0) const {
++  if (CurDAG->isBaseWithConstantOffset(Addr)) {
++    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
++    if (isIntN(OffsetBits + ShiftAmount, CN->getSExtValue())) {
++      EVT ValTy = Addr.getValueType();
++
++      // If the first operand is a FI, get the TargetFI Node
++      if (FrameIndexSDNode *FIN =
++              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
++        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
++      else {
++        Base = Addr.getOperand(0);
++        // If base is a FI, additional offset calculation is done in
++        // eliminateFrameIndex, otherwise we need to check the alignment
++        const Align Alignment(1ULL << ShiftAmount);
++        if (!isAligned(Alignment, CN->getZExtValue()))
++          return false;
++      }
++
++      Offset =
++          CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), ValTy);
++      return true;
++    }
++  }
++  return false;
++}
++
++bool Sw64DAGToDAGISel::selectAddrRegImm9(SDValue Addr, SDValue &Base,
++                                         SDValue &Offset) const {
++  if (selectAddrFrameIndex(Addr, Base, Offset))
++    return true;
++
++  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 9))
++    return true;
++
++  return false;
++}
++
++bool Sw64DAGToDAGISel::selectAddrRegImm16(SDValue Addr, SDValue &Base,
++                                          SDValue &Offset) const {
++  if (selectAddrFrameIndex(Addr, Base, Offset))
++    return true;
++
++  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 16))
++    return true;
++
++  return false;
++}
++
++bool Sw64DAGToDAGISel::SelectInlineAsmMemoryOperand(
++    const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
++  SDValue Base, Offset;
++
++  switch (ConstraintID) {
++  default:
++    llvm_unreachable("Unexpected asm memory constraint");
++  case InlineAsm::Constraint_i:
++  case InlineAsm::Constraint_m:
++  case InlineAsm::Constraint_Q:
++    // We need to make sure that this one operand does not end up in XZR, thus
++    //  require the address to be in a PointerRegClass register.
++    const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
++    const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
++    SDLoc dl(Op);
++    SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
++    SDValue NewOp =
++        SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
++                                       Op.getValueType(), Op, RC),
++                0);
++    OutOps.push_back(NewOp);
++    return false;
++  }
++  return true;
++}
++
++bool Sw64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
++  LoadSDNode *LD = cast<LoadSDNode>(N);
++  ISD::MemIndexedMode AM = LD->getAddressingMode();
++  if (AM != ISD::POST_INC)
++    return false;
++  SDLoc dl(N);
++  MVT VT = LD->getMemoryVT().getSimpleVT();
++  bool isFloat = false;
++  unsigned Opcode = 0;
++  switch (VT.SimpleTy) {
++  case MVT::i8:
++    Opcode = Sw64::LDBU_A;
++    break;
++  case MVT::i16:
++    Opcode = Sw64::LDHU_A;
++    break;
++  case MVT::i32:
++    Opcode = Sw64::LDW_A;
++    break;
++  case MVT::i64:
++    Opcode = Sw64::LDL_A;
++    break;
++  case MVT::f32:
++    Opcode = Sw64::LDS_A;
++    isFloat = true;
++    break;
++  case MVT::f64:
++    Opcode = Sw64::LDD_A;
++    isFloat = true;
++    break;
++  default:
++    return false;
++  }
++  SDValue Offset = LD->getOffset();
++  int64_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
++  ReplaceNode(
++      N, CurDAG->getMachineNode(Opcode, SDLoc(N), isFloat ? VT : MVT::i64,
++                                MVT::i64, MVT::Other, LD->getBasePtr(),
++                                CurDAG->getTargetConstant(Inc, dl, MVT::i64),
++                                LD->getChain()));
++  return true;
++}
++
++bool Sw64DAGToDAGISel::tryIndexedStore(SDNode *N) {
++  StoreSDNode *ST = cast<StoreSDNode>(N);
++  ISD::MemIndexedMode AM = ST->getAddressingMode();
++  if (AM != ISD::POST_INC)
++    return false;
++  SDLoc dl(N);
++  MVT VT = ST->getMemoryVT().getSimpleVT();
++  unsigned Opcode = 0;
++  switch (VT.SimpleTy) {
++  case MVT::i8:
++    Opcode = Sw64::STB_A;
++    break;
++  case MVT::i16:
++    Opcode = Sw64::STH_A;
++    break;
++  case MVT::i32:
++    Opcode = Sw64::STW_A;
++    break;
++  case MVT::i64:
++    Opcode = Sw64::STL_A;
++    break;
++  case MVT::f32:
++    Opcode = Sw64::STS_A;
++    break;
++  case MVT::f64:
++    Opcode = Sw64::STD_A;
++    break;
++  default:
++    return false;
++  }
++  MachineMemOperand *MemOp = ST->getMemOperand();
++  SDValue From[2] = {SDValue(ST, 0), SDValue(ST, 1)};
++  SDValue To[2];
++  int64_t Inc = cast<ConstantSDNode>(ST->getOffset().getNode())->getSExtValue();
++  SDValue Ops[] = {ST->getValue(), ST->getBasePtr(),
++                   CurDAG->getTargetConstant(Inc, dl, MVT::i64),
++                   ST->getChain()};
++  MachineSDNode *S =
++      CurDAG->getMachineNode(Opcode, dl, MVT::i64, MVT::Other, Ops);
++  CurDAG->setNodeMemRefs(S, {MemOp});
++  To[0] = SDValue(S, 0);
++  To[1] = SDValue(S, 1);
++  ReplaceUses(From, To, 2);
++  CurDAG->RemoveDeadNode(ST);
++  return true;
++}
++
++/// ComplexPattern used on Sw64InstrInfo
++/// Used on Sw64 Load/Store instructions
++bool Sw64DAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
++                                         SDValue &Offset) const {
++  Base = Addr;
++  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Addr.getValueType());
++  return true;
++}
++
++// Select constant vector splats.
++//
++// Returns true and sets Imm if:
++// * MSA is enabled
++// * N is a ISD::BUILD_VECTOR representing a constant splat
++bool Sw64DAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm,
++                                    unsigned MinSizeInBits) const {
++  //  if (!Subtarget->hasMSA())
++  //    return false;
++
++  BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N);
++
++  if (!Node)
++    return false;
++
++  APInt SplatValue, SplatUndef;
++  unsigned SplatBitSize;
++  bool HasAnyUndefs;
++
++  if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
++                             MinSizeInBits, false))
++    return false;
++
++  Imm = SplatValue;
++
++  return true;
++}
++
++bool Sw64DAGToDAGISel::selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed,
++                                          unsigned ImmBitSize) const {
++  APInt ImmValue;
++  EVT EltTy = N->getValueType(0).getVectorElementType();
++
++  if (N->getOpcode() == ISD::BITCAST)
++    N = N->getOperand(0);
++
++  if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
++      ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
++
++    if ((Signed && ImmValue.isSignedIntN(ImmBitSize)) ||
++        (!Signed && ImmValue.isIntN(ImmBitSize))) {
++      Imm = CurDAG->getTargetConstant(ImmValue, SDLoc(N), EltTy);
++      return true;
++    }
++  }
++
++  return false;
++}
++
++// Select constant vector splats.
++bool Sw64DAGToDAGISel::selectVSplatSimm8(SDValue N, SDValue &Imm) const {
++  return selectVSplatCommon(N, Imm, true, 8);
++}
++
++bool Sw64DAGToDAGISel::selectVSplatUimm8(SDValue N, SDValue &Imm) const {
++  return selectVSplatCommon(N, Imm, false, 8);
++}
++
++bool Sw64DAGToDAGISel::selectIntAddrSImm16(SDValue Addr, SDValue &Base,
++                                           SDValue &Offset) const {
++  if (selectAddrFrameIndex(Addr, Base, Offset))
++    return true;
++
++  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 2))
++    return true;
++
++  return selectAddrDefault(Addr, Base, Offset);
++}
++
++bool Sw64DAGToDAGISel::selectIntAddrSImm12(SDValue Addr, SDValue &Base,
++                                           SDValue &Offset) const {
++  if (selectAddrFrameIndex(Addr, Base, Offset))
++    return true;
++
++  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 3))
++    return true;
++
++  return selectAddrDefault(Addr, Base, Offset);
++}
++
++bool Sw64DAGToDAGISel::SelectAddSubImm(SDValue N, MVT VT, SDValue &Imm) {
++  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
++    const int64_t ImmVal = CNode->getSExtValue();
++    SDLoc DL(N);
++
++    switch (VT.SimpleTy) {
++    case MVT::i8:
++      // Can always select i8s, no shift, mask the immediate value to
++      // deal with sign-extended value from lowering.
++      if (!isUInt<8>(ImmVal))
++        return false;
++      Imm = CurDAG->getTargetConstant(ImmVal & 0xFF, DL, MVT::i64);
++      return true;
++    case MVT::i16:
++      // i16 values get sign-extended to 32-bits during lowering.
++      Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i64);
++      return true;
++      break;
++    case MVT::i32:
++    case MVT::i64:
++      return false;
++      break;
++    default:
++      break;
++    }
++  }
++
++  return false;
++}
++
++bool Sw64DAGToDAGISel::SelectComplexImm(SDValue N, SDValue &Imm) {
++  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
++    const int64_t ImmVal = CNode->getSExtValue();
++    SDLoc DL(N);
++    if (!isUInt<5>(ImmVal))
++      return false;
++    Imm = CurDAG->getTargetConstant(ImmVal & 0x1F, DL, MVT::i64);
++    return true;
++  }
++  return false;
++}
++
++/// createSw64ISelDag - This pass converts a legalized DAG into a
++/// Sw64-specific DAG, ready for instruction scheduling.
++///
++FunctionPass *llvm::createSw64ISelDag(Sw64TargetMachine &TM,
++                                      CodeGenOpt::Level OptLevel) {
++  return new Sw64DAGToDAGISel(TM, OptLevel);
++}
++
++bool Sw64DAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) {
++  if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
++      cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
++    Val = N.getOperand(0);
++    return true;
++  }
++  MVT VT = N.getSimpleValueType();
++  if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) {
++    Val = N;
++    return true;
++  }
++
++  return false;
++}
++
++bool Sw64DAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) {
++  if (N.getOpcode() == ISD::AND) {
++    auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
++    if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) {
++      Val = N.getOperand(0);
++      return true;
++    }
++  }
++  MVT VT = N.getSimpleValueType();
++  APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32);
++  if (CurDAG->MaskedValueIsZero(N, Mask)) {
++    Val = N;
++    return true;
++  }
++
++  return false;
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64ISelLowering.cpp b/llvm/lib/Target/Sw64/Sw64ISelLowering.cpp
+new file mode 100644
+index 000000000..4ef148ac0
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64ISelLowering.cpp
+@@ -0,0 +1,4154 @@
++//===-- Sw64ISelLowering.cpp - Sw64 DAG Lowering Implementation ---------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file implements the Sw64TargetLowering class.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64ISelLowering.h"
++#include "MCTargetDesc/Sw64BaseInfo.h"
++#include "Sw64.h"
++#include "Sw64MachineFunctionInfo.h"
++#include "Sw64Subtarget.h"
++#include "Sw64TargetMachine.h"
++#include "Sw64TargetObjectFile.h"
++#include "llvm/CodeGen/CallingConvLower.h"
++#include "llvm/CodeGen/FastISel.h"
++#include "llvm/CodeGen/MachineFrameInfo.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/MachineJumpTableInfo.h"
++#include "llvm/CodeGen/MachineRegisterInfo.h"
++#include "llvm/CodeGen/SelectionDAGISel.h"
++#include "llvm/CodeGen/ValueTypes.h"
++#include "llvm/IR/CallingConv.h"
++#include "llvm/IR/Constants.h"
++#include "llvm/IR/DerivedTypes.h"
++#include "llvm/IR/Function.h"
++#include "llvm/IR/GlobalAlias.h"
++#include "llvm/IR/GlobalVariable.h"
++#include "llvm/IR/IRBuilder.h"
++#include "llvm/IR/Intrinsics.h"
++#include "llvm/IR/IntrinsicsSw64.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/KnownBits.h"
++#include "llvm/Support/raw_ostream.h"
++#include <algorithm>
++
++using namespace llvm;
++
++#define DEBUG_TYPE "sw_64-lower"
++
++/// AddLiveIn - This helper function adds the specified physical register to the
++/// MachineFunction as a live in value.  It also creates a corresponding virtual
++/// register for it.
++static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
++                          const TargetRegisterClass *RC) {
++  assert(RC->contains(PReg) && "Not the correct regclass!");
++  Register VReg = MF.getRegInfo().createVirtualRegister(RC);
++  MF.getRegInfo().addLiveIn(PReg, VReg);
++  return VReg;
++}
++
++const char *Sw64TargetLowering::getTargetNodeName(unsigned Opcode) const {
++  switch ((Sw64ISD::NodeType)Opcode) {
++  default:
++    return 0;
++  case Sw64ISD::CVTQT_:
++    return "Sw64::CVTQT_";
++  case Sw64ISD::CVTQS_:
++    return "Sw64::CVTQS_";
++  case Sw64ISD::CVTTQ_:
++    return "Sw64::CVTTQ_";
++  case Sw64ISD::CVTST_:
++    return "Sw64::CVTST_";
++  case Sw64ISD::CVTTS_:
++    return "Sw64::CVTTS_";
++  case Sw64ISD::JmpLink:
++    return "Sw64::JmpLink";
++  case Sw64ISD::Ret:
++    return "Sw64::Ret";
++  case Sw64ISD::TPRelLo:
++    return "Sw64::TPRelLo";
++  case Sw64ISD::TPRelHi:
++    return "Sw64::TPRelHi";
++  case Sw64ISD::SysCall:
++    return "Sw64::SysCall";
++  case Sw64ISD::LDAWC:
++    return "Sw64::Sw64_LDAWC";
++
++  case Sw64ISD::TLSGD:
++    return "Sw64::TLSGD";
++  case Sw64ISD::DTPRelLo:
++    return "Sw64::DTPRelLo";
++  case Sw64ISD::DTPRelHi:
++    return "Sw64::DTPRelHi";
++  case Sw64ISD::TLSLDM:
++    return "Sw64::TLSLDM";
++  case Sw64ISD::RelGottp:
++    return "Sw64::RelGottp";
++  case Sw64ISD::GPRelHi:
++    return "Sw64::GPRelHi";
++  case Sw64ISD::GPRelLo:
++    return "Sw64::GPRelLo";
++  case Sw64ISD::RelLit:
++    return "Sw64::RelLit";
++  case Sw64ISD::GlobalRetAddr:
++    return "Sw64::GlobalRetAddr";
++  case Sw64ISD::CALL:
++    return "Sw64::CALL";
++  case Sw64ISD::DivCall:
++    return "Sw64::DivCall";
++  case Sw64ISD::RET_FLAG:
++    return "Sw64::RET_FLAG";
++  case Sw64ISD::COND_BRANCH_I:
++    return "Sw64::COND_BRANCH_I";
++  case Sw64ISD::COND_BRANCH_F:
++    return "Sw64::COND_BRANCH_F";
++  case Sw64ISD::MEMBARRIER:
++    return "Sw64ISD::MEMBARRIER";
++
++  case Sw64ISD::GPRel:
++    return "Sw64ISD::GPRel";
++  case Sw64ISD::TPRel:
++    return "Sw64ISD::TPRel";
++  case Sw64ISD::DTPRel:
++    return "Sw64ISD::DTPRel";
++  case Sw64ISD::LDIH:
++    return "Sw64ISD::LDIH";
++  case Sw64ISD::LDI:
++    return "Sw64ISD::LDI";
++
++  case Sw64ISD::Z_S_FILLCS:
++    return "Sw64ISD::Z_S_FILLCS";
++  case Sw64ISD::Z_S_FILLDE:
++    return "Sw64ISD::Z_S_FILLDE";
++  case Sw64ISD::Z_FILLDE:
++    return "Sw64ISD::Z_FILLDE";
++  case Sw64ISD::Z_FILLDE_E:
++    return "Sw64ISD::Z_FILLDE_E";
++  case Sw64ISD::Z_FILLCS:
++    return "Sw64ISD::Z_FILLCS";
++  case Sw64ISD::Z_FILLCS_E:
++    return "Sw64ISD::Z_FILLCS_E";
++  case Sw64ISD::Z_E_FILLCS:
++    return "Sw64ISD::Z_E_FILLCS";
++  case Sw64ISD::Z_E_FILLDE:
++    return "Sw64ISD::Z_E_FILLDE";
++  case Sw64ISD::Z_FLUSHD:
++    return "Sw64ISD::Z_FLUSHD";
++
++  case Sw64ISD::FRECS:
++    return "Sw64ISD::FRECS";
++  case Sw64ISD::FRECD:
++    return "Sw64ISD::FRECD";
++  case Sw64ISD::SBT:
++    return "Sw64ISD::SBT";
++  case Sw64ISD::REVBH:
++    return "Sw64ISD::REVBH";
++  case Sw64ISD::REVBW:
++    return "Sw64ISD::REVBW";
++
++  case Sw64ISD::ROLW:
++    return "Sw64ISD::ROLW";
++  case Sw64ISD::CRC32B:
++    return "Sw64ISD::CRC32B";
++  case Sw64ISD::CRC32H:
++    return "Sw64ISD::CRC32H";
++  case Sw64ISD::CRC32W:
++    return "Sw64ISD::CRC32W";
++  case Sw64ISD::CRC32L:
++    return "Sw64ISD::CRC32L";
++  case Sw64ISD::CRC32CB:
++    return "Sw64ISD::CRC32CB";
++  case Sw64ISD::CRC32CH:
++    return "Sw64ISD::CRC32CH";
++  case Sw64ISD::CRC32CW:
++    return "Sw64ISD::CRC32CW";
++  case Sw64ISD::CRC32CL:
++    return "Sw64ISD::CRC32CL";
++
++  case Sw64ISD::VLDWE:
++    return "Sw64ISD::VLDWE";
++  case Sw64ISD::VLDSE:
++    return "Sw64ISD::VLDSE";
++  case Sw64ISD::VLDDE:
++    return "Sw64ISD::VLDDE";
++
++  case Sw64ISD::VNOR:
++    return "Sw64ISD::VNOR";
++  case Sw64ISD::VEQV:
++    return "Sw64ISD::VEQV";
++  case Sw64ISD::VORNOT:
++    return "Sw64ISD::VORNOT";
++  case Sw64ISD::VSHF:
++    return "Sw64ISD::VSHF";
++  case Sw64ISD::SHF:
++    return "Sw64ISD::SHF";
++  case Sw64ISD::ILVEV:
++    return "Sw64ISD::ILVEV";
++  case Sw64ISD::ILVOD:
++    return "Sw64ISD::ILVOD";
++  case Sw64ISD::ILVL:
++    return "Sw64ISD::ILVL";
++  case Sw64ISD::ILVR:
++    return "Sw64ISD::ILVR";
++  case Sw64ISD::PCKEV:
++    return "Sw64ISD::PCKEV";
++  case Sw64ISD::PCKOD:
++    return "Sw64ISD::PCKOD";
++  case Sw64ISD::VMAX:
++    return "Sw64ISD::VMAX";
++  case Sw64ISD::VMIN:
++    return "Sw64ISD::VMIN";
++  case Sw64ISD::VUMAX:
++    return "Sw64ISD::VUMAX";
++  case Sw64ISD::VUMIN:
++    return "Sw64ISD::VUMIN";
++  case Sw64ISD::VFREC:
++    return "Sw64ISD::VFREC";
++  case Sw64ISD::VFCMPEQ:
++    return "Sw64ISD::VFCMPEQ";
++  case Sw64ISD::VFCMPLE:
++    return "Sw64ISD::VFCMPLE";
++  case Sw64ISD::VFCMPLT:
++    return "Sw64ISD::VFCMPLT";
++  case Sw64ISD::VFCMPUN:
++    return "Sw64ISD::VFCMPUN";
++  case Sw64ISD::VFCVTSD:
++    return "Sw64ISD::VFCVTSD";
++  case Sw64ISD::VFCVTDS:
++    return "Sw64ISD::VFCVTDS";
++  case Sw64ISD::VFCVTLS:
++    return "Sw64ISD::VFCVTLS";
++  case Sw64ISD::VFCVTLD:
++    return "Sw64ISD::VFCVTLD";
++  case Sw64ISD::VFCVTSH:
++    return "Sw64ISD::VFCVTSH";
++  case Sw64ISD::VFCVTHS:
++    return "Sw64ISD::VFCVTHS";
++  case Sw64ISD::VFCVTDL:
++    return "Sw64ISD::VFCVTDL";
++  case Sw64ISD::VFCVTDLG:
++    return "Sw64ISD::VFCVTDLG";
++  case Sw64ISD::VFCVTDLP:
++    return "Sw64ISD::VFCVTDLP";
++  case Sw64ISD::VFCVTDLZ:
++    return "Sw64ISD::VFCVTDLZ";
++  case Sw64ISD::VFCVTDLN:
++    return "Sw64ISD::VFCVTDLN";
++  case Sw64ISD::VFRIS:
++    return "Sw64ISD::VFRIS";
++  case Sw64ISD::VFRISG:
++    return "Sw64ISD::VFRISG";
++  case Sw64ISD::VFRISP:
++    return "Sw64ISD::VFRISP";
++  case Sw64ISD::VFRISZ:
++    return "Sw64ISD::VFRISZ";
++  case Sw64ISD::VFRISN:
++    return "Sw64ISD::VFRISN";
++  case Sw64ISD::VFRID:
++    return "Sw64ISD::VFRID";
++  case Sw64ISD::VFRIDG:
++    return "Sw64ISD::VFRIDG";
++  case Sw64ISD::VFRIDP:
++    return "Sw64ISD::VFRIDP";
++  case Sw64ISD::VFRIDZ:
++    return "Sw64ISD::VFRIDZ";
++  case Sw64ISD::VFRIDN:
++    return "Sw64ISD::VFRIDN";
++  case Sw64ISD::VMAXF:
++    return "Sw64ISD::VMAXF";
++  case Sw64ISD::VMINF:
++    return "Sw64ISD::VMINF";
++    //  case Sw64ISD::VINSECTLH:      return "Sw64ISD::VINSECTLH";
++    //  case Sw64ISD::VINSECTLW:      return "Sw64ISD::VINSECTLW";
++    //  case Sw64ISD::VINSECTLL:      return "Sw64ISD::VINSECTLL";
++    //  case Sw64ISD::VINSECTLB:      return "Sw64ISD::VINSECTLB";
++    //  case Sw64ISD::VSHFQB:         return "Sw64ISD::VSHFQB";
++    //  case Sw64ISD::VSHFQ:          return "Sw64ISD::VSHFQ";
++  case Sw64ISD::VCPYB:
++    return "Sw64ISD::VCPYB";
++  case Sw64ISD::VCPYH:
++    return "Sw64ISD::VCPYH";
++
++  case Sw64ISD::VCON_W:
++    return "Sw64ISD::VCON_W";
++  case Sw64ISD::VCON_S:
++    return "Sw64ISD::VCON_S";
++  case Sw64ISD::VCON_D:
++    return "Sw64ISD::VCON_D";
++
++  case Sw64ISD::INSVE:
++    return "Sw64ISD::INSVE";
++  case Sw64ISD::VCOPYF:
++    return "Sw64ISD::VCOPYF";
++  case Sw64ISD::V8SLL:
++    return "Sw64ISD::V8SLL";
++  case Sw64ISD::V8SLLi:
++    return "Sw64ISD::V8SLLi";
++  case Sw64ISD::V8SRL:
++    return "Sw64ISD::V8SRL";
++  case Sw64ISD::V8SRLi:
++    return "Sw64ISD::V8SRLi";
++  case Sw64ISD::VROTR:
++    return "Sw64ISD::VROTR";
++  case Sw64ISD::VROTRi:
++    return "Sw64ISD::VROTRi";
++  case Sw64ISD::V8SRA:
++    return "Sw64ISD::V8SRA";
++  case Sw64ISD::V8SRAi:
++    return "Sw64ISD::V8SRAi";
++  case Sw64ISD::VROLB:
++    return "Sw64ISD::VROLB";
++  case Sw64ISD::VROLBi:
++    return "Sw64ISD::VROLBi";
++  case Sw64ISD::VROLH:
++    return "Sw64ISD::VROLH";
++  case Sw64ISD::VROLHi:
++    return "Sw64ISD::VROLHi";
++  case Sw64ISD::VROLL:
++    return "Sw64ISD::VROLL";
++  case Sw64ISD::VROLLi:
++    return "Sw64ISD::VROLLi";
++  case Sw64ISD::VCTPOP:
++    return "Sw64ISD::VCTPOP";
++  case Sw64ISD::VCTLZ:
++    return "Sw64ISD::VCTLZ";
++
++  case Sw64ISD::VLOG:
++    return "Sw64ISD::VLOG";
++  case Sw64ISD::VSETGE:
++    return "Sw64ISD::VSETGE";
++
++  case Sw64ISD::VSELEQW:
++    return "Sw64ISD::VSELEQW";
++  case Sw64ISD::VSELLTW:
++    return "Sw64ISD::VSELLTW";
++  case Sw64ISD::VSELLEW:
++    return "Sw64ISD::VSELLEW";
++  case Sw64ISD::VSELLBCW:
++    return "Sw64ISD::VSELLBCW";
++
++  case Sw64ISD::VFCMOVEQ:
++    return "Sw64ISD::VFCMOVEQ";
++  case Sw64ISD::VFCMOVLE:
++    return "Sw64ISD::VFCMOVLE";
++  case Sw64ISD::VFCMOVLT:
++    return "Sw64ISD::VFCMOVLT";
++
++  case Sw64ISD::VECT_VUCADDW:
++    return "Sw64ISD::VECT_VUCADDW";
++  case Sw64ISD::VECT_VUCADDH:
++    return "Sw64ISD::VECT_VUCADDH";
++  case Sw64ISD::VECT_VUCADDB:
++    return "Sw64ISD::VECT_VUCADDB";
++  case Sw64ISD::VECT_VUCSUBW:
++    return "Sw64ISD::VECT_VUCSUBW";
++  case Sw64ISD::VECT_VUCSUBH:
++    return "Sw64ISD::VECT_VUCSUBH";
++  case Sw64ISD::VECT_VUCSUBB:
++    return "Sw64ISD::VECT_VUCSUBB";
++
++  case Sw64ISD::VECREDUCE_FADD:
++    return "Sw64ISD::VECREDUCE_FADD";
++  case Sw64ISD::VSHL_BY_SCALAR:
++    return "Sw64ISD::VSHL_BY_SCALAR";
++  case Sw64ISD::VSRL_BY_SCALAR:
++    return "Sw64ISD::VSRL_BY_SCALAR";
++  case Sw64ISD::VSRA_BY_SCALAR:
++    return "Sw64ISD::VSRA_BY_SCALAR";
++  case Sw64ISD::VEXTRACT_SEXT_ELT:
++    return "Sw64ISD::VEXTRACT_SEXT_ELT";
++  case Sw64ISD::VBROADCAST:
++    return "Sw64ISD::VBROADCAST";
++  case Sw64ISD::VBROADCAST_LD:
++    return "Sw64ISD::VBROADCAST_LD";
++  case Sw64ISD::VTRUNCST:
++    return "Sw64ISD::VTRUNCST";
++  }
++
++  return nullptr;
++}
++
++Sw64TargetLowering::Sw64TargetLowering(const TargetMachine &TM,
++                                       const Sw64Subtarget &Subtarget)
++    : TargetLowering(TM), TM(TM), Subtarget(Subtarget) {
++  if (Subtarget.hasSIMD()) {
++    // Expand all truncating stores and extending loads.
++    for (MVT VT0 : MVT::vector_valuetypes()) {
++      for (MVT VT1 : MVT::vector_valuetypes()) {
++        setTruncStoreAction(VT0, VT1, Expand);
++        setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand);
++        setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand);
++        setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand);
++      }
++    }
++  }
++
++  //  addRegisterClass(MVT::v32i8, &Sw64::V256LRegClass);
++  //  addRegisterClass(MVT::v16i16, &Sw64::V256LRegClass);
++  //  addRegisterClass(MVT::v8i32, &Sw64::V256LRegClass);
++  //  addRegisterClass(MVT::v4i64, &Sw64::V256LRegClass);
++
++  // Set up the TargetLowering object.
++  // I am having problems with shr n i8 1
++  setBooleanContents(ZeroOrOneBooleanContent);
++  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
++
++  addRegisterClass(MVT::i64, &Sw64::GPRCRegClass);
++  addRegisterClass(MVT::f64, &Sw64::F8RCRegClass);
++  addRegisterClass(MVT::f32, &Sw64::F4RCRegClass);
++  // We want to custom lower some of our intrinsics.
++  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
++
++  // Loads
++  for (MVT VT : MVT::integer_valuetypes()) {
++    setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
++    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
++    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
++  }
++
++  setLoadExtAction(ISD::SEXTLOAD, MVT::i64, MVT::i8, Expand);  // ldbu
++  setLoadExtAction(ISD::SEXTLOAD, MVT::i64, MVT::i16, Expand); // ldhu
++  setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, MVT::i32, Expand); // ldwu
++
++  if (Subtarget.hasCore4() && Subtarget.enablePostInc()) {
++    for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
++      setIndexedLoadAction(ISD::POST_INC, VT, Legal);
++      setIndexedStoreAction(ISD::POST_INC, VT, Legal);
++    }
++  }
++
++  setTruncStoreAction(MVT::f32, MVT::f16, Expand);
++  setTruncStoreAction(MVT::f64, MVT::f16, Expand);
++  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
++  setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
++  setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
++  setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
++  setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
++  setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
++  setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
++  setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
++
++  for (MVT VT : MVT::fp_valuetypes()) {
++    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
++  }
++  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
++  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
++  setOperationAction(ISD::BR_CC, MVT::i32, Expand);
++  setOperationAction(ISD::BR_CC, MVT::i64, Expand);
++  setOperationAction(ISD::BR_CC, MVT::f32, Expand);
++  setOperationAction(ISD::BR_CC, MVT::f64, Expand);
++
++  // Sw64 wants to turn select_cc of INT/FP into sel/fsel when possible.
++  setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
++  setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
++  setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
++  setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
++
++  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
++
++  setOperationAction(ISD::FREM, MVT::f32, Expand);
++  setOperationAction(ISD::FREM, MVT::f64, Expand);
++
++  if (Subtarget.hasCore4() && Subtarget.enableFloatCmov()) {
++    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
++  } else {
++    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
++    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
++    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
++    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
++    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
++  }
++
++  setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
++  setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
++
++  setOperationAction(ISD::CTPOP, MVT::i64, Expand);
++  setOperationAction(ISD::CTTZ, MVT::i64, Expand);
++  setOperationAction(ISD::CTLZ, MVT::i64, Expand);
++  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
++  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
++  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
++  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
++
++  setOperationAction(ISD::UDIV, MVT::i128, Custom);
++  setOperationAction(ISD::SDIV, MVT::i128, Custom);
++  setOperationAction(ISD::UREM, MVT::i128, Custom);
++  setOperationAction(ISD::SREM, MVT::i128, Custom);
++
++  if (!Subtarget.hasCore4() || !Subtarget.enableIntAri()) {
++    setOperationAction(ISD::SREM, MVT::i64, Custom);
++    setOperationAction(ISD::UREM, MVT::i64, Custom);
++    setOperationAction(ISD::SDIV, MVT::i64, Custom);
++    setOperationAction(ISD::UDIV, MVT::i64, Custom);
++  }
++
++  if (Subtarget.hasCore4() && Subtarget.enableByteInst()) {
++    setOperationAction(ISD::BSWAP, MVT::i64, Legal);
++    setOperationAction(ISD::BSWAP, MVT::i32, Legal);
++    setOperationAction(ISD::BSWAP, MVT::i16, Legal);
++  } else {
++    setOperationAction(ISD::BSWAP, MVT::i64, Expand);
++  }
++
++  if (Subtarget.hasCore4() && Subtarget.enableFloatRound()) {
++    for (MVT Ty : {MVT::f32, MVT::f64}) {
++      setOperationAction(ISD::FFLOOR, Ty, Legal);
++      setOperationAction(ISD::FNEARBYINT, Ty, Legal);
++      setOperationAction(ISD::FCEIL, Ty, Legal);
++      setOperationAction(ISD::FTRUNC, Ty, Legal);
++      setOperationAction(ISD::FROUND, Ty, Legal);
++    }
++  }
++
++  setOperationAction(ISD::ADDC, MVT::i64, Expand);
++  setOperationAction(ISD::ADDE, MVT::i64, Expand);
++  setOperationAction(ISD::SUBC, MVT::i64, Expand);
++  setOperationAction(ISD::SUBE, MVT::i64, Expand);
++
++  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
++  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
++
++  setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
++  setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
++  setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
++
++  setOperationAction(ISD::TRAP, MVT::Other, Legal);
++
++  // We don't support sin/cos/sqrt/pow
++  setOperationAction(ISD::FSIN, MVT::f64, Expand);
++  setOperationAction(ISD::FCOS, MVT::f64, Expand);
++  setOperationAction(ISD::FSIN, MVT::f32, Expand);
++  setOperationAction(ISD::FCOS, MVT::f32, Expand);
++
++  setOperationAction(ISD::FSQRT, MVT::f64, Legal);
++  setOperationAction(ISD::FSQRT, MVT::f32, Legal);
++  setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
++  setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
++
++  setOperationAction(ISD::FPOW, MVT::f32, Expand);
++  setOperationAction(ISD::FPOW, MVT::f64, Expand);
++
++  // We have fused multiply-addition for f32 and f64 but not f128.
++  setOperationAction(ISD::FMA, MVT::f64, Legal);
++  setOperationAction(ISD::FMA, MVT::f32, Legal);
++  setOperationAction(ISD::FMA, MVT::f128, Expand);
++
++  setOperationAction(ISD::SETCC, MVT::f32, Promote);
++
++  setOperationAction(ISD::BITCAST, MVT::f32, Promote);
++  // Not implemented yet.
++  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
++  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
++  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
++  // We want to legalize GlobalAddress and ConstantPool and
++  // ExternalSymbols nodes into the appropriate instructions to
++  // materialize the address.
++  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
++  setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
++  setOperationAction(ISD::ExternalSymbol, MVT::i64, Custom);
++  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
++  setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
++  setOperationAction(ISD::VASTART, MVT::Other, Custom);
++  setOperationAction(ISD::VAEND, MVT::Other, Expand);
++  setOperationAction(ISD::VACOPY, MVT::Other, Custom);
++  setOperationAction(ISD::VAARG, MVT::Other, Custom);
++  setOperationAction(ISD::VAARG, MVT::i32, Custom);
++
++  setOperationAction(ISD::JumpTable, MVT::i64, Custom);
++  setOperationAction(ISD::JumpTable, MVT::i32, Custom);
++
++  setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
++
++  setOperationAction(ISD::ATOMIC_LOAD, MVT::i8, Custom);
++  setOperationAction(ISD::ATOMIC_STORE, MVT::i8, Custom);
++
++  setOperationAction(ISD::ATOMIC_LOAD, MVT::i16, Custom);
++  setOperationAction(ISD::ATOMIC_STORE, MVT::i16, Custom);
++
++  setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
++  setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
++
++  setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
++  setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Custom);
++
++  setOperationAction(ISD::FSIN, MVT::f64, Expand);
++  setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
++  setOperationAction(ISD::FSIN, MVT::f32, Expand);
++  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
++
++  setOperationAction(ISD::FADD, MVT::f128, Custom);
++  setOperationAction(ISD::FADD, MVT::i128, Custom);
++  setStackPointerRegisterToSaveRestore(Sw64::R30);
++
++  if (Subtarget.hasSIMD() || Subtarget.hasCore4()) {
++    // We want to custom lower some of our intrinsics.
++    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
++    setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other,
++                       Custom); // for builtin_sw64_load
++    setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
++  }
++
++  if (Subtarget.hasSIMD()) {
++    addSIMDIntType(MVT::v32i8, &Sw64::V256LRegClass);
++    addSIMDIntType(MVT::v16i16, &Sw64::V256LRegClass);
++    addSIMDIntType(MVT::v8i32, &Sw64::V256LRegClass);
++    addSIMDIntType(MVT::v4i64, &Sw64::V256LRegClass);
++    addSIMDFloatType(MVT::v4f32, &Sw64::V256LRegClass);
++    addSIMDFloatType(MVT::v4f64, &Sw64::V256LRegClass);
++
++    setTargetDAGCombine(ISD::AND);
++    setTargetDAGCombine(ISD::OR);
++    setTargetDAGCombine(ISD::SRA);
++    setTargetDAGCombine(ISD::VSELECT);
++    setTargetDAGCombine(ISD::XOR);
++
++    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i32, Legal);
++    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Legal);
++    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
++    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f64, Legal);
++
++    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i16, Expand);
++    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i8, Expand);
++
++    setOperationAction(ISD::SETCC, MVT::v8i32, Legal);
++    setOperationAction(ISD::SETCC, MVT::v4i64, Expand);
++    setOperationAction(ISD::SETCC, MVT::v4f32, Legal);
++    setOperationAction(ISD::SETCC, MVT::v4f64, Expand);
++
++    if (Subtarget.hasCore4())
++      for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
++        addRegisterClass(VT, &Sw64::V256LRegClass);
++        setOperationAction(ISD::SRL, VT, Custom);
++        setOperationAction(ISD::SHL, VT, Custom);
++        setOperationAction(ISD::SRA, VT, Custom);
++        setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
++      }
++    else {
++      addRegisterClass(MVT::v8i32, &Sw64::V256LRegClass);
++      setOperationAction(ISD::SRL, MVT::v8i32, Custom);
++      setOperationAction(ISD::SHL, MVT::v8i32, Custom);
++      setOperationAction(ISD::SRA, MVT::v8i32, Custom);
++      setOperationAction(ISD::BUILD_VECTOR, MVT::v8i32, Custom);
++    }
++  }
++
++  setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
++  setOperationAction(ISD::FNEG, MVT::v4f64, Legal);
++
++  setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
++  setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
++
++  if (Subtarget.hasCore4() && Subtarget.enableIntShift()) {
++    setOperationAction(ISD::ROTR, MVT::i64, Expand);
++    setOperationAction(ISD::ROTL, MVT::i32, Custom);
++  } else {
++    setOperationAction(ISD::ROTL, MVT::i64, Expand);
++    setOperationAction(ISD::ROTR, MVT::i64, Expand);
++  }
++  if (Subtarget.hasCore4() && Subtarget.enableFloatAri()) {
++    setOperationAction(ISD::FDIV, MVT::f32, Legal);
++    setOperationAction(ISD::FDIV, MVT::f64, Legal);
++  }
++
++  // return R
++  setLibcallName(RTLIB::OEQ_F128, "_OtsEqlX");
++  setLibcallName(RTLIB::UNE_F128, "_OtsNeqX");
++  setLibcallName(RTLIB::UO_F128, "_OtsNeqX");
++  setLibcallName(RTLIB::OLE_F128, "_OtsLeqX");
++  setLibcallName(RTLIB::OLT_F128, "_OtsLssX");
++  setLibcallName(RTLIB::OGE_F128, "_OtsGeqX");
++  setLibcallName(RTLIB::OGT_F128, "_OtsGtrX");
++  // return R16+R17
++  setLibcallName(RTLIB::FPEXT_F64_F128, "_OtsConvertFloatTX");
++  setLibcallName(RTLIB::FPEXT_F32_F128, "_OtsConvertFloatTX");
++  setLibcallName(RTLIB::UINTTOFP_I64_F128, "_OtsCvtQUX");
++  setLibcallName(RTLIB::UINTTOFP_I32_F128, "_OtsCvtQUX");
++  setLibcallName(RTLIB::SINTTOFP_I32_F128, "_OtsCvtQX");
++  setLibcallName(RTLIB::SINTTOFP_I64_F128, "_OtsCvtQX");
++  // add round return R
++  setLibcallName(RTLIB::FPTOSINT_F128_I64, "_OtsCvtXQ");
++  setLibcallName(RTLIB::FPTOUINT_F128_I64, "_OtsCvtXQ");
++  setLibcallName(RTLIB::FPROUND_F128_F64, "_OtsConvertFloatXT");
++  setLibcallName(RTLIB::FPROUND_F128_F32, "_OtsConvertFloatXT");
++  // add round return R16+R17
++  setLibcallName(RTLIB::ADD_F128, "_OtsAddX");
++  setLibcallName(RTLIB::SUB_F128, "_OtsSubX");
++  setLibcallName(RTLIB::MUL_F128, "_OtsMulX");
++  setLibcallName(RTLIB::DIV_F128, "_OtsDivX");
++  setOperationAction(ISD::CTPOP, MVT::i32, Promote);
++  setOperationAction(ISD::CTPOP, MVT::i64, Legal);
++
++  //  setOperationAction(ISD::CTLZ, MVT::i32, Promote);
++  //  setOperationAction(ISD::CTLZ, MVT::i64, Legal);
++  //
++  //  setOperationAction(ISD::CTTZ, MVT::i32, Promote);
++  //  setOperationAction(ISD::CTTZ, MVT::i64, Legal);
++
++  setMinStackArgumentAlignment(Align(32));
++  setMinFunctionAlignment(Align(8));
++  setTargetDAGCombine(ISD::MUL);
++
++  computeRegisterProperties(Subtarget.getRegisterInfo());
++  MaxStoresPerMemsetOptSize = 16;
++  MaxStoresPerMemset = 16;
++  MaxStoresPerMemcpy = 4;
++  MaxStoresPerMemcpyOptSize = 4;
++}
++
++bool Sw64TargetLowering::generateFMAsInMachineCombiner(
++    EVT VT, CodeGenOpt::Level OptLevel) const {
++  return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector();
++}
++
++EVT Sw64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
++                                           EVT VT) const {
++  // Refer to other.
++  if (!VT.isVector())
++    return MVT::i64;
++
++  return VT.changeVectorElementTypeToInteger();
++}
++
++// http://www.cs.arizona.edu/computer.help/policy/DIGITAL_unix/
++// AA-PY8AC-TET1_html/callCH3.html#BLOCK21
++
++// For now, just use variable size stack frame format
++
++// In a standard call, the first six items are passed in registers $16
++//- $21 and/or registers $f16 - $f21. (See Section 4.1.2 for details
++// of argument-to-register correspondence.) The remaining items are
++// collected in a memory argument list that is a naturally aligned
++// array of quadwords. In a standard call, this list, if present, must
++// be passed at 0(SP).
++// 7 ... n         0(SP) ... (n-7)*8(SP)
++
++// //#define FP    $15
++// //#define RA    $26
++// //#define PV    $27
++// //#define GP    $29
++// //#define SP    $30
++
++#include "Sw64GenCallingConv.inc"
++
++static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
++                             SelectionDAG &DAG, unsigned Flags) {
++
++  return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
++}
++
++static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
++                             SelectionDAG &DAG, unsigned Flags) {
++
++  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
++                                   Flags);
++}
++
++static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
++                             SelectionDAG &DAG, unsigned Flag) {
++
++  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
++}
++
++static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
++                             SelectionDAG &DAG, unsigned Flags) {
++
++  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
++                                   N->getOffset(), Flags);
++}
++
++/// This function returns true if CallSym is a long double emulation routine.
++static bool isF128SoftLibCall_void(const char *CallSym) {
++  const char *const LibCalls[] = {
++      "_OtsAddX", "_OtsConvertFloatTX", "_OtsCvtQUX", "_OtsCvtQX",
++      "_OtsDivX", "_OtsMulX",           "_OtsSubX"};
++
++  // Check that LibCalls is sorted betically.
++  auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; };
++  assert(std::is_sorted(std::begin(LibCalls), std::end(LibCalls), Comp));
++
++  return std::binary_search(std::begin(LibCalls), std::end(LibCalls), CallSym,
++                            Comp);
++}
++
++/// This function returns true if CallSym is a long double emulation routine.
++static bool isF128SoftLibCall_round(const char *CallSym) {
++  const char *const LibCalls[] = {
++      "_OtsAddX",  "_OtsConvertFloatTX", "_OtsConvertFloatXT",
++      "_OtsCvtXQ", "_OtsDivX",           "_OtsMulX",
++      "_OtsSubX"};
++
++  // Check that LibCalls is sorted betically.
++  auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; };
++  assert(std::is_sorted(std::begin(LibCalls), std::end(LibCalls), Comp));
++
++  return std::binary_search(std::begin(LibCalls), std::end(LibCalls), CallSym,
++                            Comp);
++}
++
++// Enable SIMD support for the given integer type and Register class.
++void Sw64TargetLowering::addSIMDIntType(MVT::SimpleValueType Ty,
++                                        const TargetRegisterClass *RC) {
++  addRegisterClass(Ty, RC);
++
++  // Expand all builtin opcodes.
++  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
++    setOperationAction(Opc, Ty, Expand);
++
++  // for vfcmpxxs
++  setTruncStoreAction(MVT::v4i64, MVT::v4i32, Custom);
++
++  setOperationAction(ISD::BITCAST, Ty, Legal);
++  setOperationAction(ISD::LOAD, Ty, Legal);
++  setOperationAction(ISD::STORE, Ty, Legal);
++
++  setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom);
++  setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Custom);
++  //  setOperationAction(ISD::BUILD_VECTOR, Ty, Custom);
++
++  //  if (Ty == MVT::v8i32) {
++  //  }
++  setOperationAction(ISD::ROTL, Ty, Custom);
++  setOperationAction(ISD::ROTR, Ty, Expand);
++  setOperationAction(ISD::ADD, Ty, Legal);
++  setOperationAction(ISD::AND, Ty, Legal);
++  setOperationAction(ISD::MUL, Ty, Legal);
++  setOperationAction(ISD::OR, Ty, Legal);
++  setOperationAction(ISD::SDIV, Ty, Legal);
++  setOperationAction(ISD::SREM, Ty, Legal);
++  setOperationAction(ISD::SUB, Ty, Legal);
++  //  setOperationAction(ISD::SMAX, Ty, Legal);
++  //  setOperationAction(ISD::SMIN, Ty, Legal);
++  setOperationAction(ISD::UDIV, Ty, Legal);
++  setOperationAction(ISD::UREM, Ty, Legal);
++  setOperationAction(ISD::UMAX, Ty, Legal);
++  setOperationAction(ISD::UMIN, Ty, Legal);
++  setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom);
++  //  setOperationAction(ISD::VSELECT, Ty, Legal);
++  setOperationAction(ISD::XOR, Ty, Legal);
++
++  setOperationAction(ISD::VECREDUCE_ADD, Ty, Legal);
++
++  if (Ty == MVT::v8i32 || Ty == MVT::v4i64) {
++    setOperationAction(ISD::FP_TO_SINT, Ty, Legal);
++    setOperationAction(ISD::FP_TO_UINT, Ty, Legal);
++    setOperationAction(ISD::SINT_TO_FP, Ty, Legal);
++    setOperationAction(ISD::UINT_TO_FP, Ty, Legal);
++  }
++  // setOperationAction(ISD::SETCC, Ty, Legal);
++  setCondCodeAction(ISD::SETNE, Ty, Expand);
++  setCondCodeAction(ISD::SETGE, Ty, Expand);
++  setCondCodeAction(ISD::SETGT, Ty, Expand);
++  setCondCodeAction(ISD::SETUGE, Ty, Expand);
++  setCondCodeAction(ISD::SETUGT, Ty, Expand);
++}
++
++// Enable SIMD support for the given floating-point type and Register class.
++void Sw64TargetLowering::addSIMDFloatType(MVT::SimpleValueType Ty,
++                                          const TargetRegisterClass *RC) {
++  addRegisterClass(Ty, RC);
++
++  // Expand all builtin opcodes.
++  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
++    setOperationAction(Opc, Ty, Expand);
++
++  setOperationAction(ISD::LOAD, Ty, Legal);
++  setOperationAction(ISD::STORE, Ty, Legal);
++  setOperationAction(ISD::BITCAST, Ty, Legal);
++  setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Custom);
++  setOperationAction(ISD::BUILD_VECTOR, Ty, Custom);
++
++  setOperationAction(ISD::FCOPYSIGN, Ty, Legal);
++
++  if (Ty != MVT::v16f16) {
++    setOperationAction(ISD::FABS, Ty, Expand);
++    setOperationAction(ISD::FADD, Ty, Legal);
++    setOperationAction(ISD::FDIV, Ty, Legal);
++    setOperationAction(ISD::FEXP2, Ty, Legal);
++    setOperationAction(ISD::FLOG2, Ty, Legal);
++    setOperationAction(ISD::FMA, Ty, Legal);
++    setOperationAction(ISD::FMUL, Ty, Legal);
++    setOperationAction(ISD::FRINT, Ty, Legal);
++    setOperationAction(ISD::FSQRT, Ty, Legal);
++    setOperationAction(ISD::FSUB, Ty, Legal);
++    setOperationAction(ISD::VSELECT, Ty, Legal);
++
++    setOperationAction(ISD::SETCC, Ty, Legal);
++    setCondCodeAction(ISD::SETO, Ty, Custom);
++    setCondCodeAction(ISD::SETOGE, Ty, Expand);
++    setCondCodeAction(ISD::SETOGT, Ty, Expand);
++    setCondCodeAction(ISD::SETUGE, Ty, Expand);
++    setCondCodeAction(ISD::SETUGT, Ty, Expand);
++    setCondCodeAction(ISD::SETGE, Ty, Expand);
++    setCondCodeAction(ISD::SETGT, Ty, Expand);
++    setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom);
++  }
++}
++
++// Fold zero extensions into Sw64ISD::VEXTRACT_[SZ]EXT_ELT
++//
++// Performs the following transformations:
++// - Changes Sw64ISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
++//   sign/zero-extension is completely overwritten by the new one performed by
++//   the ISD::AND.
++// - Removes redundant zero extensions performed by an ISD::AND.
++static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
++                                 TargetLowering::DAGCombinerInfo &DCI,
++                                 const Sw64Subtarget &Subtarget) {
++  return SDValue();
++  //  llvm_unreachable("ANDCombine Optimization not implemented");
++}
++
++// Perform combines where ISD::OR is the root node.
++//
++// Performs the following transformations:
++// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
++//   where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
++//   vector type.
++static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
++                                TargetLowering::DAGCombinerInfo &DCI,
++                                const Sw64Subtarget &Subtarget) {
++  return SDValue();
++  //  llvm_unreachable("ORCCombine Optimization not implemented");
++}
++
++static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
++                                               SelectionDAG &DAG,
++                                               const Sw64Subtarget &Subtarget) {
++  unsigned MaxSteps = 4;
++  SmallVector<APInt, 16> WorkStack(1, C);
++  unsigned Steps = 0;
++  unsigned BitWidth = C.getBitWidth();
++
++  while (!WorkStack.empty()) {
++    APInt Val = WorkStack.pop_back_val();
++
++    if (Val == 0 || Val == 1)
++      continue;
++
++    if (Steps >= MaxSteps)
++      return false;
++
++    if (Val.isPowerOf2()) {
++      ++Steps;
++      continue;
++    }
++
++    APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
++    APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
++                                  : APInt(BitWidth, 1) << C.ceilLogBase2();
++    if ((Val - Floor).ule(Ceil - Val)) {
++      WorkStack.push_back(Floor);
++      WorkStack.push_back(Val - Floor);
++    } else {
++      WorkStack.push_back(Ceil);
++      WorkStack.push_back(Ceil - Val);
++    }
++
++    ++Steps;
++  }
++  // If the value being multiplied is not supported natively, we have to pay
++  // an additional legalization cost, conservatively assume an increase in the
++  // cost of 3 instructions per step. This values for this heuristic were
++  // determined experimentally.
++  unsigned RegisterSize = DAG.getTargetLoweringInfo()
++                              .getRegisterType(*DAG.getContext(), VT)
++                              .getSizeInBits();
++  Steps *= (VT.getSizeInBits() != RegisterSize) * 3;
++  if (Steps > 27)
++    return false;
++
++  return true;
++}
++
++static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
++                            EVT ShiftTy, SelectionDAG &DAG) {
++  // Return 0.
++  if (C == 0)
++    return DAG.getConstant(0, DL, VT);
++
++  // Return x.
++  if (C == 1)
++    return X;
++
++  // If c is power of 2, return (shl x, log2(c)).
++  if (C.isPowerOf2())
++    return DAG.getNode(ISD::SHL, DL, VT, X,
++                       DAG.getConstant(C.logBase2(), DL, ShiftTy));
++
++  unsigned BitWidth = C.getBitWidth();
++  APInt Floor = APInt(BitWidth, 1) << C.logBase2();
++  APInt Ceil = C.isNegative() ? APInt(BitWidth, 0)
++                              : APInt(BitWidth, 1) << C.ceilLogBase2();
++
++  // If |c - floor_c| <= |c - ceil_c|,
++  // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))),
++  // return (add constMult(x, floor_c), constMult(x, c - floor_c)).
++  if ((C - Floor).ule(Ceil - C)) {
++    SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG);
++    SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG);
++    return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
++  }
++
++  // If |c - floor_c| > |c - ceil_c|,
++  // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)).
++  SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG);
++  SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG);
++  return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
++}
++
++static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
++                                 TargetLowering::DAGCombinerInfo &DCI,
++                                 const Sw64Subtarget &Subtarget) {
++  EVT VT = N->getValueType(0);
++
++  if (Subtarget.enOptMul())
++    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
++      if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs(
++                                C->getAPIntValue(), VT, DAG, Subtarget))
++        return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT,
++                            MVT::i64, DAG);
++
++  return SDValue(N, 0);
++}
++
++static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
++                                 TargetLowering::DAGCombinerInfo &DCI,
++                                 const Sw64Subtarget &Subtarget) {
++  return SDValue();
++  //  llvm_unreachable("SHLCombine Optimization not implemented");
++}
++
++static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
++                                 TargetLowering::DAGCombinerInfo &DCI,
++                                 const Sw64Subtarget &Subtarget) {
++  return SDValue();
++  //  llvm_unreachable("SRACombine Optimization not implemented");
++}
++
++static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
++                                 TargetLowering::DAGCombinerInfo &DCI,
++                                 const Sw64Subtarget &Subtarget) {
++  return SDValue();
++  //  llvm_unreachable("SRLCombine Optimization not implemented");
++}
++
++static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
++  return SDValue();
++  //  llvm_unreachable("SETCCCombine Optimization not implemented");
++}
++
++static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) {
++  return SDValue();
++  //  llvm_unreachable("VSELECTCombine Optimization not implemented");
++}
++
++static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
++                                 const Sw64Subtarget &Subtarget) {
++  return SDValue();
++  //  llvm_unreachable("XORCombine Optimization not implemented");
++}
++
++SDValue Sw64TargetLowering::PerformDAGCombine(SDNode *N,
++                                              DAGCombinerInfo &DCI) const {
++  SelectionDAG &DAG = DCI.DAG;
++  SDValue Val;
++
++  switch (N->getOpcode()) {
++  case ISD::AND:
++    Val = performANDCombine(N, DAG, DCI, Subtarget);
++    break;
++  case ISD::OR:
++    Val = performORCombine(N, DAG, DCI, Subtarget);
++    break;
++  case ISD::MUL:
++    return performMULCombine(N, DAG, DCI, Subtarget);
++  case ISD::SHL:
++    Val = performSHLCombine(N, DAG, DCI, Subtarget);
++    break;
++  case ISD::SRA:
++    return performSRACombine(N, DAG, DCI, Subtarget);
++  case ISD::SRL:
++    return performSRLCombine(N, DAG, DCI, Subtarget);
++  case ISD::VSELECT:
++    return performVSELECTCombine(N, DAG);
++  case ISD::XOR:
++    Val = performXORCombine(N, DAG, Subtarget);
++    break;
++  case ISD::SETCC:
++    Val = performSETCCCombine(N, DAG);
++    break;
++  }
++
++  if (Val.getNode()) {
++    LLVM_DEBUG(dbgs() << "\nSw64 DAG Combine:\n";
++               N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n";
++               Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n");
++    return Val;
++  }
++
++  return Sw64TargetLowering::PerformDAGCombineV(N, DCI);
++}
++
++/// ------------------------- scaler ------------------------------ ///
++
++static SDValue performDivRemCombineV(SDNode *N, SelectionDAG &DAG,
++                                     TargetLowering::DAGCombinerInfo &DCI,
++                                     const Sw64Subtarget &Subtarget) {
++  return SDValue();
++  //  llvm_unreachable("DivRemCombineV Optimization not implemented");
++}
++
++static SDValue performSELECTCombineV(SDNode *N, SelectionDAG &DAG,
++                                     TargetLowering::DAGCombinerInfo &DCI,
++                                     const Sw64Subtarget &Subtarget) {
++  return SDValue();
++  //  llvm_unreachable("SELECTCombineV Optimization not implemented");
++}
++
++static SDValue performANDCombineV(SDNode *N, SelectionDAG &DAG,
++                                  TargetLowering::DAGCombinerInfo &DCI,
++                                  const Sw64Subtarget &Subtarget) {
++  return SDValue();
++  //  llvm_unreachable("ANDCombineV Optimization not implemented");
++}
++
++static SDValue performORCombineV(SDNode *N, SelectionDAG &DAG,
++                                 TargetLowering::DAGCombinerInfo &DCI,
++                                 const Sw64Subtarget &Subtarget) {
++  return SDValue();
++  //  llvm_unreachable("ORCCombineV Optimization not implemented");
++}
++
++static SDValue performADDCombineV(SDNode *N, SelectionDAG &DAG,
++                                  TargetLowering::DAGCombinerInfo &DCI,
++                                  const Sw64Subtarget &Subtarget) {
++  return SDValue();
++  //  llvm_unreachable("ADDCombineV Optimization not implemented");
++}
++
++static SDValue performSHLCombineV(SDNode *N, SelectionDAG &DAG,
++                                  TargetLowering::DAGCombinerInfo &DCI,
++                                  const Sw64Subtarget &Subtarget) {
++  return SDValue();
++  //  llvm_unreachable("SHLCombineV Optimization not implemented");
++}
++
++SDValue Sw64TargetLowering::PerformDAGCombineV(SDNode *N,
++                                               DAGCombinerInfo &DCI) const {
++  SelectionDAG &DAG = DCI.DAG;
++  unsigned Opc = N->getOpcode();
++
++  switch (Opc) {
++  default:
++    break;
++  case ISD::SDIVREM:
++  case ISD::UDIVREM:
++    return performDivRemCombineV(N, DAG, DCI, Subtarget);
++  case ISD::SELECT:
++    return performSELECTCombineV(N, DAG, DCI, Subtarget);
++  case ISD::AND:
++    return performANDCombineV(N, DAG, DCI, Subtarget);
++  case ISD::OR:
++    return performORCombineV(N, DAG, DCI, Subtarget);
++  case ISD::ADD:
++    return performADDCombineV(N, DAG, DCI, Subtarget);
++  case ISD::SHL:
++    return performSHLCombineV(N, DAG, DCI, Subtarget);
++    //  case ISD::SUB:
++    //    return performSUBCombine(N, DAG, DCI, Subtarget);
++  }
++
++  return SDValue();
++}
++
++SDValue Sw64TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
++                                      SmallVectorImpl<SDValue> &InVals) const {
++
++  SelectionDAG &DAG = CLI.DAG;
++  SDLoc &dl = CLI.DL;
++  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
++  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
++  SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
++  SDValue Chain = CLI.Chain;
++  SDValue Callee = CLI.Callee;
++  bool &isTailCall = CLI.IsTailCall;
++  CallingConv::ID CallConv = CLI.CallConv;
++  bool isVarArg = CLI.IsVarArg;
++  EVT PtrVT = getPointerTy(DAG.getDataLayout());
++
++  MachineFunction &MF = DAG.getMachineFunction();
++  // Sw64 target does not yet support tail call optimization.
++  isTailCall = false;
++
++  // Analyze operands of the call, assigning locations to each operand.
++  SmallVector<CCValAssign, 16> ArgLocs;
++  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
++                 *DAG.getContext());
++
++  CCInfo.AnalyzeCallOperands(Outs, CC_Sw64);
++
++  // Get a count of how many bytes are to be pushed on the stack.
++  unsigned NumBytes = CCInfo.getStackSize();
++  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
++  SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
++  SmallVector<SDValue, 12> MemOpChains;
++  SDValue StackPtr;
++  RegsToPass.push_back(std::make_pair((unsigned)Sw64::R27, Callee));
++
++  // Walk the register/memloc assignments, inserting copies/loads.
++  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
++    CCValAssign &VA = ArgLocs[i];
++
++    SDValue Arg = OutVals[i];
++
++    // Promote the value if needed.
++    switch (VA.getLocInfo()) {
++    default:
++      assert(0 && "Unknown loc info!");
++    case CCValAssign::Full:
++      break;
++    case CCValAssign::SExt:
++      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
++      break;
++    case CCValAssign::ZExt:
++      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
++      break;
++    case CCValAssign::AExt:
++      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
++      break;
++    }
++    // Arguments that can be passed on register must be kept at RegsToPass
++    // vector
++    if (VA.isRegLoc()) {
++      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
++    } else {
++      assert(VA.isMemLoc());
++
++      if (StackPtr.getNode() == 0)
++        StackPtr = DAG.getCopyFromReg(Chain, dl, Sw64::R30, MVT::i64);
++
++      SDValue PtrOff =
++          DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), StackPtr,
++                      DAG.getIntPtrConstant(VA.getLocMemOffset(), dl));
++
++      MemOpChains.push_back(
++          DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
++    }
++  }
++  const ExternalSymbolSDNode *ES =
++      dyn_cast_or_null<const ExternalSymbolSDNode>(Callee.getNode());
++  if (ES && isF128SoftLibCall_round(ES->getSymbol())) {
++    RegsToPass.push_back(std::make_pair(((unsigned)Sw64::R16) + ArgLocs.size(),
++                                        DAG.getConstant(2, dl, MVT::i64)));
++  }
++
++  // FIXME: Fix the error for clang-repl.
++
++  // Transform all store nodes into one single node because all store nodes are
++  // independent of each other.
++  if (!MemOpChains.empty())
++    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
++
++  // Build a sequence of copy-to-reg nodes chained together with token chain and
++  // flag operands which copy the outgoing args into registers.  The InFlag in
++  // necessary since all emitted instructions must be stuck together.
++  SDValue InFlag;
++  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
++    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
++                             RegsToPass[i].second, InFlag);
++    InFlag = Chain.getValue(1);
++  }
++
++  // Returns a chain & a flag for retval copy to use.
++  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
++
++  SmallVector<SDValue, 8> Ops;
++  Ops.push_back(Chain);
++  // Fix the error for clang-repl.
++  // Ops.push_back(Callee);
++
++  // Add argument registers to the end of the list so that they are
++  // known live into the call.
++  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
++    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
++                                  RegsToPass[i].second.getValueType()));
++
++  if (!isTailCall) {
++    const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
++    const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
++    assert(Mask && "Missing call preserved mask for calling convention");
++    Ops.push_back(DAG.getRegisterMask(Mask));
++  }
++
++  if (InFlag.getNode())
++    Ops.push_back(InFlag);
++  Chain = DAG.getNode(Sw64ISD::JmpLink, dl, NodeTys, Ops);
++  InFlag = Chain.getValue(1);
++
++  // Create the CALLSEQ_END node.
++  Chain = DAG.getCALLSEQ_END(
++      Chain,
++      DAG.getConstant(NumBytes, dl, getPointerTy(DAG.getDataLayout()), true),
++      DAG.getConstant(0, dl, getPointerTy(DAG.getDataLayout()), true), InFlag,
++      dl);
++  InFlag = Chain.getValue(1);
++
++  // Handle result values, copying them out of physregs into vregs that we
++  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
++                         InVals, CLI.Callee.getNode(), CLI.RetTy);
++}
++
++/// LowerCallResult - Lower the result values of a call into the
++/// appropriate copies out of appropriate physical registers.
++///
++SDValue Sw64TargetLowering::LowerCallResult(
++    SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
++    const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc &dl, SelectionDAG &DAG,
++    SmallVectorImpl<SDValue> &InVals, const SDNode *CallNode,
++    const Type *RetTy) const {
++  // Assign locations to each value returned by this call.
++  SmallVector<CCValAssign, 16> RVLocs;
++  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
++                 *DAG.getContext());
++  const ExternalSymbolSDNode *ES =
++      dyn_cast_or_null<const ExternalSymbolSDNode>(CallNode);
++
++  if (ES && isF128SoftLibCall_void(ES->getSymbol()))
++    CCInfo.AnalyzeCallResult(Ins, RetCC_F128Soft_Sw64);
++  else
++
++    CCInfo.AnalyzeCallResult(Ins, RetCC_Sw64);
++
++  // Copy all of the result registers out of their specified physreg.
++  for (unsigned i = 0; i != RVLocs.size(); ++i) {
++    CCValAssign &VA = RVLocs[i];
++
++    Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), InFlag)
++                .getValue(1);
++
++    SDValue RetValue = Chain.getValue(0);
++    InFlag = Chain.getValue(2);
++
++    // If this is an 8/16/32-bit value, it is really passed promoted to 64
++    // bits. Insert an assert[sz]ext to capture this, then truncate to the
++    // right size.
++
++    if (VA.getLocInfo() == CCValAssign::SExt)
++      RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue,
++                             DAG.getValueType(VA.getValVT()));
++    else if (VA.getLocInfo() == CCValAssign::ZExt)
++      RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue,
++                             DAG.getValueType(VA.getValVT()));
++
++    if (VA.getLocInfo() != CCValAssign::Full)
++      RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue);
++
++    InVals.push_back(RetValue);
++  }
++
++  return Chain;
++}
++
++SDValue Sw64TargetLowering::LowerFormalArguments(
++    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
++    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
++    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
++
++  MachineFunction &MF = DAG.getMachineFunction();
++  MachineFrameInfo &MFI = MF.getFrameInfo();
++  Sw64MachineFunctionInfo *FuncInfo = MF.getInfo<Sw64MachineFunctionInfo>();
++
++  unsigned args_int[] = {Sw64::R16, Sw64::R17, Sw64::R18,
++                         Sw64::R19, Sw64::R20, Sw64::R21};
++  unsigned args_float[] = {Sw64::F16, Sw64::F17, Sw64::F18,
++                           Sw64::F19, Sw64::F20, Sw64::F21};
++  unsigned args_vector[] = {Sw64::V16, Sw64::V17, Sw64::V18,
++                            Sw64::V19, Sw64::V20, Sw64::V21};
++
++  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
++    SDValue argt;
++    EVT ObjectVT = Ins[ArgNo].VT;
++    SDValue ArgVal;
++    if (ArgNo < 6) {
++      switch (ObjectVT.getSimpleVT().SimpleTy) {
++      default:
++        assert(false && "Invalid value type!");
++      case MVT::f64:
++        args_float[ArgNo] =
++            AddLiveIn(MF, args_float[ArgNo], &Sw64::F8RCRegClass);
++        ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT);
++        break;
++      case MVT::f32:
++        args_float[ArgNo] =
++            AddLiveIn(MF, args_float[ArgNo], &Sw64::F4RCRegClass);
++        ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT);
++        break;
++      case MVT::i64:
++        args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo], &Sw64::GPRCRegClass);
++        ArgVal = DAG.getCopyFromReg(Chain, dl, args_int[ArgNo], MVT::i64);
++        break;
++      case MVT::v32i8:
++      case MVT::v16i16:
++      case MVT::v8i32:
++      case MVT::v4i64:
++      case MVT::v4f32:
++      case MVT::v4f64:
++        args_vector[ArgNo] =
++            AddLiveIn(MF, args_vector[ArgNo], &Sw64::V256LRegClass);
++        ArgVal = DAG.getCopyFromReg(Chain, dl, args_vector[ArgNo], ObjectVT);
++        //        args_float[ArgNo] =
++        //          AddLiveIn(MF, args_float[ArgNo], &Sw64::F8RCRegClass);
++        //    ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo],
++        //    ObjectVT);
++        break;
++      }
++    } else { // more args
++      // Create the frame index object for this incoming parameter...
++      int FI = MFI.CreateFixedObject(8, 8 * (ArgNo - 6), true);
++
++      // Create the SelectionDAG nodes corresponding to a load
++      // from this parameter
++      SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
++      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
++    }
++    InVals.push_back(ArgVal);
++  }
++
++  // If the functions takes variable number of arguments, copy all regs to stack
++  if (isVarArg) {
++    FuncInfo->setVarArgsOffset(Ins.size() * 8);
++    std::vector<SDValue> LS;
++    for (int i = 0; i < 6; ++i) {
++      if (Register::isPhysicalRegister(args_int[i]))
++        args_int[i] = AddLiveIn(MF, args_int[i], &Sw64::GPRCRegClass);
++      SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64);
++      int FI = MFI.CreateFixedObject(8, -8 * (6 - i), true);
++      if (i == 0)
++        FuncInfo->setVarArgsBase(FI);
++      SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
++      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo()));
++      if (Register::isPhysicalRegister(args_float[i]))
++        args_float[i] = AddLiveIn(MF, args_float[i], &Sw64::F8RCRegClass);
++      argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
++      FI = MFI.CreateFixedObject(8, -8 * (12 - i), true);
++      SDFI = DAG.getFrameIndex(FI, MVT::i64);
++      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo()));
++    }
++    // Set up a token factor with all the stack traffic
++    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LS);
++  }
++
++  return Chain;
++}
++
++//===----------------------------------------------------------------------===//
++//               Return Value Calling Convention Implementation
++//===----------------------------------------------------------------------===//
++
++bool Sw64TargetLowering::CanLowerReturn(
++    CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
++    const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
++  SmallVector<CCValAssign, 16> RVLocs;
++  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
++  return CCInfo.CheckReturn(Outs, RetCC_Sw64);
++}
++
++SDValue
++Sw64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
++                                bool isVarArg,
++                                const SmallVectorImpl<ISD::OutputArg> &Outs,
++                                const SmallVectorImpl<SDValue> &OutVals,
++                                const SDLoc &dl, SelectionDAG &DAG) const {
++
++  SDValue Copy = DAG.getCopyToReg(
++      Chain, dl, Sw64::R26, DAG.getNode(Sw64ISD::GlobalRetAddr, dl, MVT::i64),
++      SDValue());
++  SmallVector<SDValue, 4> RetOps(1, Chain);
++
++  SDValue Flag;
++  unsigned outSize = Outs.size();
++  unsigned *ArgReg = new unsigned[outSize];
++  for (unsigned j = 0, r = 0, f = 0, v = 0; j != outSize; j++) {
++    EVT ArgVT = Outs[j].VT;
++    switch (ArgVT.getSimpleVT().SimpleTy) {
++    default:
++      if (ArgVT.isInteger())
++        ArgReg[j] = Sw64::R0 + r++;
++      else
++        ArgReg[j] = Sw64::F0 + f++;
++      Copy =
++          DAG.getCopyToReg(Copy, dl, ArgReg[j], OutVals[j], Copy.getValue(1));
++
++      if (ArgVT.isInteger())
++        RetOps.push_back(DAG.getRegister(ArgReg[j], MVT::i64));
++      else
++        RetOps.push_back(DAG.getRegister(ArgReg[j], ArgVT.getSimpleVT()));
++      break;
++
++    case MVT::v32i8:
++    case MVT::v16i16:
++    case MVT::v8i32:
++    case MVT::v4i64:
++    case MVT::v4f32:
++    case MVT::v4f64:
++      ArgReg[j] = Sw64::V0 + v++;
++      Copy =
++          DAG.getCopyToReg(Copy, dl, ArgReg[j], OutVals[j], Copy.getValue(1));
++      RetOps.push_back(DAG.getRegister(ArgReg[j], ArgVT.getSimpleVT()));
++      break;
++    }
++  }
++
++  RetOps[0] = Copy;
++  RetOps.push_back(Copy.getValue(1));
++  return DAG.getNode(Sw64ISD::Ret, dl, MVT::Other, RetOps);
++}
++
++void Sw64TargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr,
++                                    SelectionDAG &DAG) const {
++
++  SDLoc dl(N);
++  Chain = N->getOperand(0);
++  SDValue VAListP = N->getOperand(1);
++  const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue();
++  unsigned Align = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
++  Align = std::max(Align,8u);
++
++  SDValue Base =
++      DAG.getLoad(MVT::i64, dl, Chain, VAListP, MachinePointerInfo(VAListS));
++  SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
++                            DAG.getConstant(8, dl, MVT::i64));
++  SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1),
++                                  Tmp, MachinePointerInfo(), MVT::i32);
++  DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset);
++  if (N->getValueType(0).isFloatingPoint()) {
++    // if fp && Offset < 6*8, then subtract 6*8 from DataPtr
++    SDValue FPDataPtr = DAG.getNode(ISD::SUB, dl, MVT::i64, DataPtr,
++                                    DAG.getConstant(8 * 6, dl, MVT::i64));
++    SDValue CC = DAG.getSetCC(dl, MVT::i64, Offset,
++                              DAG.getConstant(8 * 6, dl, MVT::i64), ISD::SETLT);
++    DataPtr = DAG.getNode(ISD::SELECT, dl, MVT::i64, CC, FPDataPtr, DataPtr);
++  }
++  SDValue NewOffset = DAG.getNode(
++      ISD::ADD, dl, MVT::i64, Offset,
++      DAG.getConstant(Align, dl, MVT::i64));
++  Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp,
++                            MachinePointerInfo(), MVT::i32);
++}
++
++/// LowerOperation - Provide custom lowering hooks for some operations.
++///
++SDValue Sw64TargetLowering::LowerOperation(SDValue Op,
++                                           SelectionDAG &DAG) const {
++  SDLoc dl(Op);
++  switch (Op.getOpcode()) {
++  default:
++    llvm_unreachable("Wasn't expecting to be able to lower this!");
++  case ISD::JumpTable:
++    return LowerJumpTable(Op, DAG);
++  case ISD::INTRINSIC_WO_CHAIN:
++    return LowerINTRINSIC_WO_CHAIN(Op, DAG);
++  case ISD::INTRINSIC_W_CHAIN:
++    return LowerINTRINSIC_W_CHAIN(Op, DAG);
++  case ISD::INTRINSIC_VOID:
++    return LowerINTRINSIC_VOID(Op, DAG);
++  case ISD::SRL_PARTS:
++    return LowerSRL_PARTS(Op, DAG);
++  case ISD::SRA_PARTS:
++    return LowerSRA_PARTS(Op, DAG);
++  case ISD::SHL_PARTS:
++    return LowerSHL_PARTS(Op, DAG);
++  case ISD::SINT_TO_FP:
++    return LowerSINT_TO_FP(Op, DAG);
++  case ISD::FP_TO_SINT:
++    return LowerFP_TO_SINT(Op, DAG);
++  case ISD::FP_TO_SINT_SAT:
++  case ISD::FP_TO_UINT_SAT:
++    return LowerFP_TO_INT_SAT(Op, DAG);
++  case ISD::ConstantPool:
++    return LowerConstantPool(Op, DAG);
++  case ISD::BlockAddress:
++    return LowerBlockAddress(Op, DAG);
++  case ISD::GlobalTLSAddress:
++    return LowerGlobalTLSAddress(Op, DAG);
++  case ISD::GlobalAddress:
++    return LowerGlobalAddress(Op, DAG);
++  case ISD::ExternalSymbol:
++    return LowerExternalSymbol(Op, DAG);
++  case ISD::ATOMIC_FENCE:
++    return LowerATOMIC_FENCE(Op, DAG);
++  case ISD::ATOMIC_LOAD:
++    return LowerATOMIC_LOAD(Op, DAG);
++  case ISD::ATOMIC_STORE:
++    return LowerATOMIC_STORE(Op, DAG);
++  case ISD::OR:
++    return LowerOR(Op, DAG);
++  case ISD::UREM:
++  case ISD::SREM:
++    return LowerSUREM(Op, DAG);
++  // fall through
++  case ISD::SDIV:
++  case ISD::UDIV:
++    return LowerSUDIV(Op, DAG);
++  case ISD::VAARG:
++    return LowerVAARG(Op, DAG);
++  case ISD::VACOPY:
++    return LowerVACOPY(Op, DAG);
++  case ISD::VASTART:
++    return LowerVASTART(Op, DAG);
++  case ISD::RETURNADDR:
++    return DAG.getNode(Sw64ISD::GlobalRetAddr, dl, MVT::i64);
++  case ISD::FRAMEADDR:
++    return LowerFRAMEADDR(Op, DAG);
++  case ISD::PREFETCH:
++    return LowerPREFETCH(Op, DAG);
++  case ISD::EXTRACT_VECTOR_ELT:
++    return LowerEXTRACT_VECTOR_ELT(Op, DAG);
++  case ISD::INSERT_VECTOR_ELT:
++    return LowerINSERT_VECTOR_ELT(Op, DAG);
++  case ISD::BUILD_VECTOR:
++    return LowerBUILD_VECTOR(Op, DAG);
++  case ISD::SHL:
++  case ISD::SRL:
++  case ISD::SRA:
++  case ISD::ROTL:
++    return LowerVectorShift(Op, DAG);
++  case ISD::VECTOR_SHUFFLE:
++    return LowerVECTOR_SHUFFLE(Op, DAG);
++  case ISD::SETCC:
++    return LowerSETCC(Op, DAG);
++  case ISD::STORE:
++    return LowerSTORE(Op, DAG);
++  }
++
++  return SDValue();
++}
++
++SDValue Sw64TargetLowering::LowerVectorShift(SDValue Op,
++                                             SelectionDAG &DAG) const {
++  // Look for cases where a vector shift can use the *_BY_SCALAR form.
++  // SDValue Op0 = Op.getOperand(0);
++  // SDValue Op1 = Op.getOperand(1);
++  SDLoc DL(Op);
++  EVT VT = Op.getValueType();
++  // unsigned ElemBitSize = VT.getScalarSizeInBits();
++
++  // See whether the shift vector is a splat represented as BUILD_VECTOR.
++  switch (Op.getOpcode()) {
++  default:
++    llvm_unreachable("unexpect vecotr opcode");
++  case ISD::ROTL:
++    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
++                       DAG.getConstant(Intrinsic::sw64_vrol, DL, MVT::i64),
++                       Op.getOperand(0), Op.getOperand(1));
++  case ISD::SHL:
++    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
++                       DAG.getConstant(Intrinsic::sw64_vsll, DL, MVT::i64),
++                       Op.getOperand(0), Op.getOperand(1));
++  case ISD::SRL:
++  case ISD::SRA:
++    unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::sw64_vsra
++                                                : Intrinsic::sw64_vsrl;
++
++    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
++                       DAG.getConstant(Opc, DL, MVT::i64), Op.getOperand(0),
++                       Op.getOperand(1));
++  }
++
++  // Otherwise just treat the current form as legal.
++  return Op;
++}
++
++// Lower Operand specifics
++SDValue Sw64TargetLowering::LowerJumpTable(SDValue Op,
++                                           SelectionDAG &DAG) const {
++  LLVM_DEBUG(dbgs() << "Sw64:: begin lowJumpTable----\n");
++  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
++  // FIXME there isn't really any debug info here
++  SDLoc dl(Op);
++  return getAddr(JT, DAG);
++}
++
++SDValue Sw64TargetLowering::LowerConstantPool(SDValue Op,
++                                              SelectionDAG &DAG) const {
++  LLVM_DEBUG(dbgs() << "Sw64:: begin lowConstantPool----\n");
++  SDLoc dl(Op);
++  SDLoc DL(Op);
++  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
++  // FIXME there isn't really any debug info here
++  return getAddr(N, DAG);
++}
++
++SDValue Sw64TargetLowering::LowerBlockAddress(SDValue Op,
++                                              SelectionDAG &DAG) const {
++  LLVM_DEBUG(dbgs() << "Sw64:: begin lowBlockAddress----\n");
++  SDLoc dl(Op);
++  SDLoc DL(Op);
++
++  BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
++  return getAddr(BA, DAG);
++}
++
++SDValue Sw64TargetLowering::LowerGlobalAddress(SDValue Op,
++                                               SelectionDAG &DAG) const {
++  LLVM_DEBUG(dbgs() << "Sw64:: begin lowGlobalAddress----\n");
++  SDLoc dl(Op);
++  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
++  const GlobalValue *GV = GSDN->getGlobal();
++  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64, GSDN->getOffset());
++  // FIXME there isn't really any debug info here
++  if (GV->hasLocalLinkage()) {
++    return getAddr(GSDN, DAG);
++  } else
++    return DAG.getNode(Sw64ISD::RelLit, dl, MVT::i64, GA,
++                       DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
++}
++
++template <class NodeTy>
++SDValue Sw64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG) const {
++  LLVM_DEBUG(dbgs() << "Sw64TargetLowering:: getAddr");
++  EVT Ty = getPointerTy(DAG.getDataLayout());
++  SDLoc DL(N);
++
++  switch (getTargetMachine().getCodeModel()) {
++  default:
++    report_fatal_error("Unsupported code model for lowering");
++  case CodeModel::Small:
++  case CodeModel::Medium: {
++    SDValue Hi = getTargetNode(N, DL, Ty, DAG, Sw64II::MO_GPREL_HI);
++    SDValue Lo = getTargetNode(N, DL, Ty, DAG, Sw64II::MO_GPREL_LO);
++    SDValue MNHi = DAG.getNode(Sw64ISD::LDIH, DL, Ty, Hi);
++    return DAG.getNode(Sw64ISD::LDI, DL, Ty, MNHi, Lo);
++  }
++  }
++}
++
++SDValue Sw64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
++                                                  SelectionDAG &DAG) const {
++
++  // If the relocation model is PIC, use the General Dynamic TLS Model or
++  // Local Dynamic TLS model, otherwise use the Initial Exec or
++  // Local Exec TLS Model.
++
++  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
++  if (DAG.getTarget().useEmulatedTLS())
++    return LowerToTLSEmulatedModel(GSDN, DAG);
++
++  SDLoc dl(Op);
++  const GlobalValue *GV = GSDN->getGlobal();
++
++  EVT PtrVT = getPointerTy(DAG.getDataLayout());
++
++  TLSModel::Model model = getTargetMachine().getTLSModel(GV);
++
++  if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) {
++    // General Dynamic == tlsgd
++    // LocalDynamic    == tlsldm
++    // GA == TGA
++    SDValue Argument;
++    if (model == TLSModel::GeneralDynamic) {
++      SDValue Addr =
++          DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Sw64II::MO_TLSGD);
++      Argument =
++          SDValue(DAG.getMachineNode(Sw64::LDA, dl, MVT::i64, Addr,
++                                     DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)),
++                  0);
++    } else {
++      SDValue Addr =
++          DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Sw64II::MO_TLSLDM);
++      Argument =
++          SDValue(DAG.getMachineNode(Sw64::LDA, dl, MVT::i64, Addr,
++                                     DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)),
++                  0);
++    }
++    unsigned PtrSize = PtrVT.getSizeInBits();
++    IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize);
++    SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT);
++    ArgListTy Args;
++    ArgListEntry Entry;
++    Entry.Node = Argument;
++    Entry.Ty = PtrTy;
++    Args.push_back(Entry);
++    TargetLowering::CallLoweringInfo CLI(DAG);
++    CLI.setDebugLoc(dl)
++        .setChain(DAG.getEntryNode())
++        .setLibCallee(CallingConv::C, PtrTy, TlsGetAddr, std::move(Args));
++    std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
++
++    SDValue Ret = CallResult.first;
++    if (model != TLSModel::LocalDynamic)
++      return Ret;
++
++    SDValue DTPHi = DAG.getTargetGlobalAddress(
++        GV, dl, MVT::i64, GSDN->getOffset(), Sw64II::MO_DTPREL_HI);
++    SDValue DTPLo = DAG.getTargetGlobalAddress(
++        GV, dl, MVT::i64, GSDN->getOffset(), Sw64II::MO_DTPREL_LO);
++
++    SDValue Hi =
++        SDValue(DAG.getMachineNode(Sw64::LDAH, dl, MVT::i64, DTPHi, Ret), 0);
++    return SDValue(DAG.getMachineNode(Sw64::LDA, dl, MVT::i64, DTPLo, Hi), 0);
++  }
++
++  if (model == TLSModel::InitialExec) {
++    // Initial Exec TLS Model //gottprel
++    SDValue Gp = DAG.getGLOBAL_OFFSET_TABLE(MVT::i64);
++    SDValue Addr =
++        DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Sw64II::MO_GOTTPREL);
++    SDValue RelDisp =
++        SDValue(DAG.getMachineNode(Sw64::LDL, dl, MVT::i64, Addr, Gp), 0);
++    SDValue SysCall = DAG.getNode(Sw64ISD::SysCall, dl, MVT::i64,
++                                  DAG.getConstant(0x9e, dl, MVT::i64));
++    return SDValue(
++        DAG.getMachineNode(Sw64::ADDQr, dl, MVT::i64, RelDisp, SysCall), 0);
++  } else {
++    // Local Exec TLS Model //tprelHi tprelLo
++    assert(model == TLSModel::LocalExec);
++    SDValue SysCall = DAG.getNode(Sw64ISD::SysCall, dl, MVT::i64,
++                                  DAG.getConstant(0x9e, dl, MVT::i64));
++    SDValue TPHi = DAG.getTargetGlobalAddress(
++        GV, dl, MVT::i64, GSDN->getOffset(), Sw64II::MO_TPREL_HI);
++    SDValue TPLo = DAG.getTargetGlobalAddress(
++        GV, dl, MVT::i64, GSDN->getOffset(), Sw64II::MO_TPREL_LO);
++    SDValue Hi =
++        SDValue(DAG.getMachineNode(Sw64::LDAH, dl, MVT::i64, TPHi, SysCall), 0);
++    return SDValue(DAG.getMachineNode(Sw64::LDA, dl, MVT::i64, TPLo, Hi), 0);
++  }
++}
++
++static bool isCrossINSMask(ArrayRef<int> M, EVT VT) {
++  // unsigned EltSz = VT.getScalarSizeInBits();
++  unsigned NumElts = VT.getVectorNumElements();
++  // unsigned BlockElts = NumElts * 2;
++  for (unsigned i = 0; i < NumElts; i++) {
++    unsigned idx = i / 2;
++    if (M[i] < 0)
++      return false;
++    if (M[i] != idx && (M[i] - NumElts) != idx)
++      return false;
++  }
++  return true;
++}
++
++static SDValue GenerateVectorShuffle(SDValue Op, EVT VT, SelectionDAG &DAG,
++                                     SDLoc dl) {
++  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
++  ArrayRef<int> ShuffleMask = SVN->getMask();
++  if (ShuffleMask.size() > 8)
++    return SDValue();
++
++  unsigned NewMask;
++  if (VT == MVT::v8i32) {
++    for (int i = (ShuffleMask.size() - 1); i >= 0; i--) {
++      NewMask = NewMask << 4;
++      int idx = ShuffleMask[i];
++      int bits = idx > 7 ? 1 : 0;
++      idx = idx > 7 ? (idx - 8) : idx;
++      NewMask |= (bits << 3) | idx;
++    }
++  } else if (VT == MVT::v4i64 || VT == MVT::v4f32 || VT == MVT::v4f64) {
++    for (int i = ShuffleMask.size() * 2 - 1; i >= 0; i--) {
++      NewMask = NewMask << 4;
++      int idx = ShuffleMask[i / 2];
++      int bits = idx > 3 ? 1 : 0;
++      int mod = i % 2;
++      idx = idx > 3 ? (idx * 2 + mod - 8) : idx * 2 + mod;
++      NewMask |= (bits << 3) | idx;
++    }
++  }
++
++  SDValue ConstMask = DAG.getConstant(NewMask, dl, MVT::i64);
++  return DAG.getNode(Sw64ISD::VSHF, dl, VT, Op.getOperand(0), Op.getOperand(1),
++                     ConstMask);
++}
++
++SDValue Sw64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
++                                                SelectionDAG &DAG) const {
++  SDLoc dl(Op);
++  EVT VT = Op.getValueType();
++
++  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
++  // Convert shuffles that are directly supported on NEON to target-specific
++  // DAG nodes, instead of keeping them as shuffles and matching them again
++  // during code selection.  This is more efficient and avoids the possibility
++  // of inconsistencies between legalization and selection.
++  ArrayRef<int> ShuffleMask = SVN->getMask();
++
++  SDValue V1 = Op.getOperand(0);
++  SDValue V2 = Op.getOperand(1);
++  assert(V1.getValueType() == VT && "Unexpected VECTOR_SHUFFLE type!");
++  assert(ShuffleMask.size() == VT.getVectorNumElements() &&
++         "Unexpected VECTOR_SHUFFLE mask size!");
++
++  if (SVN->isSplat()) {
++    int Lane = SVN->getSplatIndex();
++    // If this is undef splat, generate it via "just" vdup, if possible.
++    if (Lane == -1)
++      Lane = 0;
++
++    if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
++      return DAG.getNode(Sw64ISD::VBROADCAST, dl, V1.getValueType(),
++                         V1.getOperand(0));
++
++    // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
++    // constant. If so, we can just reference the lane's definition directly.
++    if (V1.getOpcode() == ISD::BUILD_VECTOR &&
++        !isa<ConstantSDNode>(V1.getOperand(Lane))) {
++      SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, V1.getValueType(),
++                                V1.getOperand(Lane));
++      return DAG.getNode(Sw64ISD::VBROADCAST, dl, VT, Ext);
++    }
++  }
++  if (isCrossINSMask(ShuffleMask, VT))
++    return DAG.getNode(Sw64ISD::VINSECTL, dl, VT, V1, V2);
++
++  // SmallVector<int, 32> NewMask;
++  SDValue Tmp1 = GenerateVectorShuffle(Op, VT, DAG, dl);
++
++  return Tmp1;
++}
++
++SDValue Sw64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
++                                                    SelectionDAG &DAG) const {
++  SDLoc dl(Op);
++  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
++  unsigned NewIntrinsic;
++  EVT VT = Op.getValueType();
++  switch (IntNo) {
++  default:
++    break; // Don't custom lower most intrinsics.
++  case Intrinsic::sw64_umulh:
++    return DAG.getNode(ISD::MULHU, dl, MVT::i64, Op.getOperand(1),
++                       Op.getOperand(2));
++    LLVM_FALLTHROUGH;
++  case Intrinsic::sw64_crc32b:
++    if (Subtarget.hasCore4() && Subtarget.enableCrcInst())
++      return DAG.getNode(Sw64ISD::CRC32B, dl, Op->getValueType(0),
++                         Op->getOperand(1), Op->getOperand(2));
++    LLVM_FALLTHROUGH;
++  case Intrinsic::sw64_crc32h:
++    if (Subtarget.hasCore4() && Subtarget.enableCrcInst())
++      return DAG.getNode(Sw64ISD::CRC32H, dl, Op->getValueType(0),
++                         Op->getOperand(1), Op->getOperand(2));
++    LLVM_FALLTHROUGH;
++  case Intrinsic::sw64_crc32w:
++    if (Subtarget.hasCore4() && Subtarget.enableCrcInst())
++      return DAG.getNode(Sw64ISD::CRC32W, dl, Op->getValueType(0),
++                         Op->getOperand(1), Op->getOperand(2));
++    LLVM_FALLTHROUGH;
++  case Intrinsic::sw64_crc32l:
++    if (Subtarget.hasCore4() && Subtarget.enableCrcInst())
++      return DAG.getNode(Sw64ISD::CRC32L, dl, Op->getValueType(0),
++                         Op->getOperand(1), Op->getOperand(2));
++    LLVM_FALLTHROUGH;
++  case Intrinsic::sw64_crc32cb:
++    if (Subtarget.hasCore4() && Subtarget.enableCrcInst())
++      return DAG.getNode(Sw64ISD::CRC32CB, dl, Op->getValueType(0),
++                         Op->getOperand(1), Op->getOperand(2));
++    LLVM_FALLTHROUGH;
++  case Intrinsic::sw64_crc32ch:
++    if (Subtarget.hasCore4() && Subtarget.enableCrcInst())
++      return DAG.getNode(Sw64ISD::CRC32CH, dl, Op->getValueType(0),
++                         Op->getOperand(1), Op->getOperand(2));
++    LLVM_FALLTHROUGH;
++  case Intrinsic::sw64_crc32cw:
++    if (Subtarget.hasCore4() && Subtarget.enableCrcInst())
++      return DAG.getNode(Sw64ISD::CRC32CW, dl, Op->getValueType(0),
++                         Op->getOperand(1), Op->getOperand(2));
++    LLVM_FALLTHROUGH;
++  case Intrinsic::sw64_crc32cl:
++    if (Subtarget.hasCore4() && Subtarget.enableCrcInst())
++      return DAG.getNode(Sw64ISD::CRC32CL, dl, Op->getValueType(0),
++                         Op->getOperand(1), Op->getOperand(2));
++    LLVM_FALLTHROUGH;
++  case Intrinsic::sw64_sbt:
++    if (Subtarget.hasCore4() && Subtarget.enableSCbtInst())
++      return DAG.getNode(Sw64ISD::SBT, dl, Op->getValueType(0),
++                         Op->getOperand(1), Op->getOperand(2));
++    LLVM_FALLTHROUGH;
++  case Intrinsic::sw64_cbt:
++    if (Subtarget.hasCore4() && Subtarget.enableSCbtInst())
++      return DAG.getNode(Sw64ISD::CBT, dl, Op->getValueType(0),
++                         Op->getOperand(1), Op->getOperand(2));
++    return Op;
++  case Intrinsic::sw64_vsllb:
++  case Intrinsic::sw64_vsllh:
++  case Intrinsic::sw64_vsllw:
++  case Intrinsic::sw64_vslll:
++    NewIntrinsic = Intrinsic::sw64_vsll;
++    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
++                       DAG.getConstant(NewIntrinsic, dl, MVT::i64),
++                       Op.getOperand(1), Op.getOperand(2));
++  case Intrinsic::sw64_vsrlb:
++  case Intrinsic::sw64_vsrlh:
++  case Intrinsic::sw64_vsrlw:
++  case Intrinsic::sw64_vsrll:
++    NewIntrinsic = Intrinsic::sw64_vsrl;
++    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
++                       DAG.getConstant(NewIntrinsic, dl, MVT::i64),
++                       Op.getOperand(1), Op.getOperand(2));
++    // Fallthough
++  case Intrinsic::sw64_vsrab:
++  case Intrinsic::sw64_vsrah:
++  case Intrinsic::sw64_vsraw:
++  case Intrinsic::sw64_vsral:
++    NewIntrinsic = Intrinsic::sw64_vsra;
++    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
++                       DAG.getConstant(NewIntrinsic, dl, MVT::i64),
++                       Op.getOperand(1), Op.getOperand(2));
++  case Intrinsic::sw64_vrolb:
++  case Intrinsic::sw64_vrolh:
++  case Intrinsic::sw64_vrolw:
++  case Intrinsic::sw64_vroll:
++    NewIntrinsic = Intrinsic::sw64_vrol;
++    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
++                       DAG.getConstant(NewIntrinsic, dl, MVT::i64),
++                       Op.getOperand(1), Op.getOperand(2));
++  case Intrinsic::sw64_vlogzz:
++    return DAG.getNode(Sw64ISD::VLOG, dl, VT, Op.getOperand(1),
++                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(4));
++  case Intrinsic::sw64_vmaxb:
++  case Intrinsic::sw64_vmaxh:
++  case Intrinsic::sw64_vmaxw:
++  case Intrinsic::sw64_vmaxl:
++    return DAG.getNode(Sw64ISD::VMAX, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++  case Intrinsic::sw64_vumaxb:
++  case Intrinsic::sw64_vumaxh:
++  case Intrinsic::sw64_vumaxw:
++  case Intrinsic::sw64_vumaxl:
++    return DAG.getNode(Sw64ISD::VUMAX, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++  case Intrinsic::sw64_vminb:
++  case Intrinsic::sw64_vminh:
++  case Intrinsic::sw64_vminw:
++  case Intrinsic::sw64_vminl:
++    return DAG.getNode(Sw64ISD::VMIN, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++  case Intrinsic::sw64_vuminb:
++  case Intrinsic::sw64_vuminh:
++  case Intrinsic::sw64_vuminw:
++  case Intrinsic::sw64_vuminl:
++    return DAG.getNode(Sw64ISD::VUMIN, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++  case Intrinsic::sw64_vmaxs:
++  case Intrinsic::sw64_vmaxd:
++    return DAG.getNode(Sw64ISD::VMAXF, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++  case Intrinsic::sw64_vmins:
++  case Intrinsic::sw64_vmind:
++    return DAG.getNode(Sw64ISD::VMINF, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++
++  case Intrinsic::sw64_vseleqw:
++  case Intrinsic::sw64_vseleqwi:
++    return DAG.getNode(Sw64ISD::VSELEQW, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
++  case Intrinsic::sw64_vselltw:
++  case Intrinsic::sw64_vselltwi:
++    return DAG.getNode(Sw64ISD::VSELLTW, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
++  case Intrinsic::sw64_vsellew:
++  case Intrinsic::sw64_vsellewi:
++    return DAG.getNode(Sw64ISD::VSELLEW, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
++  case Intrinsic::sw64_vsellbcw:
++  case Intrinsic::sw64_vsellbcwi:
++    return DAG.getNode(Sw64ISD::VSELLBCW, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
++  case Intrinsic::sw64_vsqrts:
++  case Intrinsic::sw64_vsqrtd:
++    return DAG.getNode(Sw64ISD::VSQRT, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++
++  case Intrinsic::sw64_vsums:
++  case Intrinsic::sw64_vsumd:
++    return DAG.getNode(Sw64ISD::VSUMF, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++
++  case Intrinsic::sw64_vfrecs:
++  case Intrinsic::sw64_vfrecd:
++    return DAG.getNode(Sw64ISD::VFREC, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++
++  case Intrinsic::sw64_vfcmpeqs:
++  case Intrinsic::sw64_vfcmpeqd:
++    return DAG.getNode(Sw64ISD::VFCMPEQ, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++  case Intrinsic::sw64_vfcmples:
++  case Intrinsic::sw64_vfcmpled:
++    return DAG.getNode(Sw64ISD::VFCMPLE, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++  case Intrinsic::sw64_vfcmplts:
++  case Intrinsic::sw64_vfcmpltd:
++    return DAG.getNode(Sw64ISD::VFCMPLT, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++  case Intrinsic::sw64_vfcmpuns:
++  case Intrinsic::sw64_vfcmpund:
++    return DAG.getNode(Sw64ISD::VFCMPUN, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++
++  case Intrinsic::sw64_vfcvtsd:
++    return DAG.getNode(Sw64ISD::VFCVTSD, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::sw64_vfcvtds:
++    return DAG.getNode(Sw64ISD::VFCVTDS, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::sw64_vfcvtls:
++    return DAG.getNode(Sw64ISD::VFCVTLS, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::sw64_vfcvtld:
++    return DAG.getNode(Sw64ISD::VFCVTLD, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::sw64_vfcvtsh:
++    return DAG.getNode(Sw64ISD::VFCVTSH, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
++  case Intrinsic::sw64_vfcvths:
++    return DAG.getNode(Sw64ISD::VFCVTHS, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++
++  case Intrinsic::sw64_vfcvtdl:
++    return DAG.getNode(Sw64ISD::VFCVTDL, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::sw64_vfcvtdl_g:
++    return DAG.getNode(Sw64ISD::VFCVTDLG, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::sw64_vfcvtdl_p:
++    return DAG.getNode(Sw64ISD::VFCVTDLP, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::sw64_vfcvtdl_z:
++    return DAG.getNode(Sw64ISD::VFCVTDLZ, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::sw64_vfcvtdl_n:
++    return DAG.getNode(Sw64ISD::VFCVTDLN, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++
++  case Intrinsic::sw64_vfris:
++    return DAG.getNode(Sw64ISD::VFRIS, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::sw64_vfris_g:
++    return DAG.getNode(Sw64ISD::VFRISG, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::sw64_vfris_p:
++    return DAG.getNode(Sw64ISD::VFRISP, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::sw64_vfris_z:
++    return DAG.getNode(Sw64ISD::VFRISZ, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::sw64_vfris_n:
++    return DAG.getNode(Sw64ISD::VFRISN, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::sw64_vfrid:
++    return DAG.getNode(Sw64ISD::VFRID, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::sw64_vfrid_g:
++    return DAG.getNode(Sw64ISD::VFRIDG, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::sw64_vfrid_p:
++    return DAG.getNode(Sw64ISD::VFRIDP, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::sw64_vfrid_z:
++    return DAG.getNode(Sw64ISD::VFRIDZ, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::sw64_vfrid_n:
++    return DAG.getNode(Sw64ISD::VFRIDN, dl, Op->getValueType(0),
++                       Op->getOperand(1));
++  case Intrinsic::sw64_vextw:
++  case Intrinsic::sw64_vextl:
++  case Intrinsic::sw64_vextfs:
++  case Intrinsic::sw64_vextfd:
++    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2));
++  case Intrinsic::sw64_vfseleqs:
++  case Intrinsic::sw64_vfseleqd:
++    return DAG.getNode(Sw64ISD::VFCMOVEQ, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
++  case Intrinsic::sw64_vfselles:
++  case Intrinsic::sw64_vfselled:
++    return DAG.getNode(Sw64ISD::VFCMOVLE, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
++  case Intrinsic::sw64_vfsellts:
++  case Intrinsic::sw64_vfselltd:
++    return DAG.getNode(Sw64ISD::VFCMOVLT, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
++  case Intrinsic::sw64_vshfw:
++    return DAG.getNode(Sw64ISD::VSHF, dl, Op->getValueType(0),
++                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
++  }
++  return Op;
++}
++
++SDValue Sw64TargetLowering::LowerVectorMemIntr(SDValue Op,
++                                               SelectionDAG &DAG) const {
++  SDLoc dl(Op);
++  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
++  EVT VT = Op.getValueType();
++  LLVM_DEBUG(dbgs() << "Custom Lower Vector Memory Intrinsics\n"; Op.dump(););
++  SDValue Args = Op.getOperand(2);
++  switch (IntNo) {
++  default:
++    break;
++  case Intrinsic::sw64_vload:
++    return DAG.getNode(ISD::LOAD, dl, VT, Args);
++  }
++  //  switch(VT) {
++  //  case MVT::v8i32:
++  //  case MVT::v4i64:
++  //  case MVT::v4f64:
++  //  case MVT::v32i8:
++  //  case MVT::v16i16:
++  //  SDNode ArgP = DAG.getNode(ISD::BITCAST, dl, Args.getSimpleValueType(),
++  //  Args); return DAG.getNode(ISD::LOAD, dl, VT, Args);
++  //  }
++  return Op;
++}
++
++SDValue Sw64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
++                                                   SelectionDAG &DAG) const {
++  SDLoc dl(Op);
++  unsigned IntNo = Op.getConstantOperandVal(1);
++  unsigned NewIntrinsic;
++  EVT VT = Op.getValueType();
++  switch (IntNo) {
++  default:
++    break; // Don't custom lower most intrinsics.
++  case Intrinsic::sw64_vloadu: {
++    SDValue Chain = Op->getOperand(0);
++    SDVTList VTs = DAG.getVTList(VT.getSimpleVT().SimpleTy, MVT::Other);
++    NewIntrinsic = Intrinsic::sw64_vload_u;
++    SDValue VLOAD_U1 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, VTs, Chain,
++                                   DAG.getConstant(NewIntrinsic, dl, MVT::i64),
++                                   Op.getOperand(2));
++    SDValue Hiaddr =
++        DAG.getNode(ISD::ADD, dl, MVT::i64,
++                    DAG.getConstant((VT == MVT::v4f32 ? 16 : 32), dl, MVT::i64),
++                    Op->getOperand(2));
++    SDValue VLOAD_U2 =
++        DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, VTs, Chain,
++                    DAG.getConstant(NewIntrinsic, dl, MVT::i64), Hiaddr);
++
++    switch (VT.getSimpleVT().SimpleTy) {
++    default:
++      break;
++    case MVT::v8i32:
++      NewIntrinsic = Intrinsic::sw64_vconw;
++      break;
++    case MVT::v4f32:
++      NewIntrinsic = Intrinsic::sw64_vcons;
++      break;
++    case MVT::v4f64:
++    case MVT::v4i64:
++      NewIntrinsic = Intrinsic::sw64_vcond;
++      break;
++    }
++    return DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, VTs, Chain,
++                       DAG.getConstant(NewIntrinsic, dl, MVT::i64), VLOAD_U1,
++                       VLOAD_U2, Op->getOperand(2));
++  }
++  }
++  return SDValue();
++}
++
++SDValue Sw64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
++                                                SelectionDAG &DAG) const {
++  SDLoc dl(Op);
++  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
++  unsigned NewIntrinsic;
++  EVT VT = Op.getValueType();
++  EVT VTOperand2 = Op.getOperand(2).getValueType();
++  switch (IntNo) {
++  case Intrinsic::sw64_vstoreu: {
++    NewIntrinsic = Intrinsic::sw64_vstoreul;
++    SDValue VSTOREUL =
++        DAG.getNode(ISD::INTRINSIC_VOID, dl, VT, Op.getOperand(0),
++                    DAG.getConstant(NewIntrinsic, dl, MVT::i64),
++                    Op.getOperand(2), Op.getOperand(3));
++
++    SDValue Hiaddr = DAG.getNode(
++        ISD::ADD, dl, MVT::i64,
++        DAG.getConstant((VTOperand2 == MVT::v4f32 ? 16 : 32), dl, MVT::i64),
++        Op->getOperand(3));
++    NewIntrinsic = Intrinsic::sw64_vstoreuh;
++    return DAG.getNode(ISD::INTRINSIC_VOID, dl, VT, VSTOREUL,
++                       DAG.getConstant(NewIntrinsic, dl, MVT::i64),
++                       Op.getOperand(2), Hiaddr);
++  }
++  default:
++    break;
++  }
++  return Op;
++}
++
++SDValue Sw64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
++                                                    SelectionDAG &DAG) const {
++  SDLoc dl(Op);
++  SDValue Vec = Op.getOperand(0);
++  MVT VecVT = Vec.getSimpleValueType();
++  SDValue Idx = Op.getOperand(1);
++  MVT EltVT = VecVT.getVectorElementType();
++  if (EltVT != MVT::i32 && EltVT != MVT::f32 && EltVT != MVT::f64)
++    return SDValue();
++
++  if (!dyn_cast<ConstantSDNode>(Idx))
++    return SDValue();
++
++  SDValue tmp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Vec, Idx);
++  return tmp;
++  //  return DAG.getAnyExtOrTrunc(tmp, dl, MVT::i32);
++}
++
++SDValue Sw64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
++                                                   SelectionDAG &DAG) const {
++  SDLoc dl(Op);
++  SDValue Idx = Op.getOperand(2);
++
++  if (!dyn_cast<ConstantSDNode>(Idx))
++    return SDValue();
++
++  return Op;
++}
++
++static bool isConstantOrUndef(const SDValue Op) {
++  if (Op->isUndef())
++    return true;
++  if (isa<ConstantSDNode>(Op))
++    return true;
++  if (isa<ConstantFPSDNode>(Op))
++    return true;
++  return false;
++}
++
++static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
++  for (unsigned i = 0; i < Op->getNumOperands(); ++i)
++    if (isConstantOrUndef(Op->getOperand(i)))
++      return true;
++  return false;
++}
++
++SDValue Sw64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
++                                              SelectionDAG &DAG) const {
++  BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
++  SDLoc dl(Op);
++  MVT VecVT = Op.getSimpleValueType();
++  EVT ResTy = Op->getValueType(0);
++  SDLoc DL(Op);
++  APInt SplatValue, SplatUndef;
++  unsigned SplatBitSize;
++  bool HasAnyUndefs;
++
++  if (!Subtarget.hasSIMD() || !ResTy.is256BitVector())
++    return SDValue();
++
++  if (VecVT.isInteger()) {
++    // Certain vector constants, used to express things like logical NOT and
++    // arithmetic NEG, are passed through unmodified.  This allows special
++    // patterns for these operations to match, which will lower these constants
++    // to whatever is proven necessary.
++    BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
++    if (BVN->isConstant())
++      if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
++        unsigned BitSize = VecVT.getVectorElementType().getSizeInBits();
++        APInt Val(BitSize,
++                  Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
++        if (Val.isZero() || Val.isAllOnes())
++          return Op;
++      }
++  }
++  MVT ElemTy = Op->getSimpleValueType(0).getScalarType();
++  unsigned ElemBits = ElemTy.getSizeInBits();
++
++  if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
++                            8, false) &&
++      SplatBitSize <= 64 && ElemBits == SplatBitSize) {
++    // We can only cope with 8, 16, 32, or 64-bit elements
++    if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
++        SplatBitSize != 64)
++      return SDValue();
++
++    // If the value isn't an integer type we will have to bitcast
++    // from an integer type first. Also, if there are any undefs, we must
++    // lower them to defined values first.
++    if (ResTy.isInteger() && !HasAnyUndefs) {
++      // SDValue TConst = DAG.getConstant(SplatValue, dl, VecVT);
++      return DAG.getNode(Sw64ISD::VBROADCAST, dl, ResTy, Op.getOperand(1));
++    }
++
++    EVT ViaVecTy;
++
++    switch (SplatBitSize) {
++    default:
++      return SDValue();
++    case 8:
++      ViaVecTy = MVT::v32i8;
++      break;
++    case 16:
++      ViaVecTy = MVT::v16i16;
++      break;
++    case 32:
++      ViaVecTy = MVT::v8i32;
++      break;
++    case 64:
++      ViaVecTy = MVT::v4i64;
++      break;
++    }
++
++    // SelectionDAG::getConstant will promote SplatValue appropriately.
++    SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
++
++    // Bitcast to the type we originally wanted
++    if (ViaVecTy != ResTy)
++      Result = DAG.getNode(ISD::BITCAST, dl, ResTy, Result);
++
++    return Result;
++  } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false)) {
++    return DAG.getNode(Sw64ISD::VBROADCAST, dl, ResTy, Op.getOperand(1));
++  } else if (!isConstantOrUndefBUILD_VECTOR(Node)) {
++    // Use INSERT_VECTOR_ELT operations rather than expand to stores.
++    // The resulting code is the same length as the expansion, but it doesn't
++    // use memory operations
++    EVT ResTy = Node->getValueType(0);
++
++    assert(ResTy.isVector());
++
++    unsigned NumElts = ResTy.getVectorNumElements();
++    SDValue Vector = DAG.getUNDEF(ResTy);
++    for (unsigned i = 0; i < NumElts; ++i) {
++      Vector =
++          DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
++                      Node->getOperand(i), DAG.getConstant(i, DL, MVT::i64));
++    }
++    return Vector;
++  }
++
++  return SDValue();
++  // SDValue tmp = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::i64, Vec, Idx);
++}
++
++SDValue Sw64TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
++  StoreSDNode &Nd = *cast<StoreSDNode>(Op);
++
++  if (Nd.getMemoryVT() != MVT::v4i32)
++    return Op;
++
++  // Replace a v4i64 with v4i32 stores.
++  SDLoc DL(Op);
++
++  // SDValue ChainIn = Op->getOperand(0);
++  SDValue Val = Op->getOperand(1);
++  // SDValue Value   = Op->getOperand(2);
++
++  return DAG.getMemIntrinsicNode(Sw64ISD::VTRUNCST, DL,
++                                 DAG.getVTList(MVT::Other),
++                                 {Nd.getChain(), Val, Nd.getBasePtr()},
++                                 Nd.getMemoryVT(), Nd.getMemOperand());
++}
++
++SDValue Sw64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
++  // Sw64 Produce not generic v4i64 setcc result, but v4f64/f32 result 2.0
++  // Need to use addition compare to reverse the result.
++  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
++  SDLoc DL(Op);
++  SDValue LHS = Op.getOperand(0);
++  SDValue RHS = Op.getOperand(1);
++
++  // TODO: Trunc v4i64 Compare to v4f64
++  // Sw64 Doesn't have v4i64 compare. Due to LLVM speciality, all comparisons
++  // will be process as Ingeter, like Vector-64bits compare reults is v4i64.
++  // So we have to do it for now.
++  if (LHS.getValueType() == MVT::v4i64 && RHS.getValueType() == MVT::v4i64) {
++    return SDValue();
++  }
++
++  if (CC != ISD::SETO)
++    return Op;
++
++  SDValue Res = DAG.getSetCC(DL, MVT::v4i64, Op.getOperand(0), Op.getOperand(1),
++                             ISD::SETUO);
++  SDValue Zero = DAG.getRegister(Sw64::V31, MVT::v4f64);
++  SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::v4f64, Res);
++  return DAG.getSetCC(DL, MVT::v4i64, Cast, Zero, ISD::SETOEQ);
++}
++
++SDValue Sw64TargetLowering::LowerSHL_PARTS(SDValue Op,
++                                           SelectionDAG &DAG) const {
++  SDLoc DL(Op);
++  MVT VT = MVT::i64;
++
++  SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
++  SDValue Shamt = Op.getOperand(2);
++  // if shamt < (VT.bits):
++  //  lo = (shl lo, shamt)
++  //  hi = (or (shl hi, shamt) (srl (srl lo, 1), (xor shamt, (VT.bits-1)))
++  // else:
++  //  lo = 0
++  //  hi = (shl lo, shamt[4:0])
++  SDValue Not =
++      DAG.getNode(ISD::XOR, DL, MVT::i64, Shamt,
++                  DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i64));
++  SDValue ShiftRight1Lo =
++      DAG.getNode(ISD::SRL, DL, VT, Lo, DAG.getConstant(1, DL, VT));
++  SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, Not);
++  SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
++  SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
++  SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
++  SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i64, Shamt,
++                             DAG.getConstant(VT.getSizeInBits(), DL, MVT::i64));
++  Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, DAG.getConstant(0, DL, VT),
++                   ShiftLeftLo);
++  Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftLeftLo, Or);
++
++  SDValue Ops[2] = {Lo, Hi};
++  return DAG.getMergeValues(Ops, DL);
++}
++
++SDValue Sw64TargetLowering::LowerSRL_PARTS(SDValue Op,
++                                           SelectionDAG &DAG) const {
++  SDLoc dl(Op);
++  SDValue ShOpLo = Op.getOperand(0);
++  SDValue ShOpHi = Op.getOperand(1);
++  SDValue ShAmt = Op.getOperand(2);
++  SDValue bm = DAG.getNode(ISD::SUB, dl, MVT::i64,
++                           DAG.getConstant(64, dl, MVT::i64), ShAmt);
++  SDValue BMCC = DAG.getSetCC(dl, MVT::i64, bm,
++                              DAG.getConstant(0, dl, MVT::i64), ISD::SETLE);
++  // if 64 - shAmt <= 0
++  SDValue Hi_Neg = DAG.getConstant(0, dl, MVT::i64);
++  SDValue ShAmt_Neg =
++      DAG.getNode(ISD::SUB, dl, MVT::i64, DAG.getConstant(0, dl, MVT::i64), bm);
++  SDValue Lo_Neg = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt_Neg);
++  // else
++  SDValue carries = DAG.getNode(ISD::SHL, dl, MVT::i64, ShOpHi, bm);
++  SDValue Hi_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt);
++  SDValue Lo_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpLo, ShAmt);
++  Lo_Pos = DAG.getNode(ISD::OR, dl, MVT::i64, Lo_Pos, carries);
++  // Merge
++  SDValue Hit = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Hi_Neg, Hi_Pos);
++  SDValue Lot = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Lo_Neg, Lo_Pos);
++  SDValue BMCC1 = DAG.getSetCC(dl, MVT::i64, ShAmt,
++                               DAG.getConstant(0, dl, MVT::i64), ISD::SETEQ);
++  SDValue BMCC2 = DAG.getSetCC(dl, MVT::i64, ShAmt,
++                               DAG.getConstant(64, dl, MVT::i64), ISD::SETEQ);
++  SDValue Hit1 = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC1, ShOpHi, Hit);
++  SDValue Lot1 = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC1, ShOpLo, Lot);
++  SDValue Hi = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC2,
++                           DAG.getConstant(0, dl, MVT::i64), Hit1);
++  SDValue Lo = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC2, ShOpHi, Lot1);
++
++  SDValue Ops[2] = {Lo, Hi};
++  return DAG.getMergeValues(Ops, dl);
++}
++
++SDValue Sw64TargetLowering::LowerSRA_PARTS(SDValue Op,
++                                           SelectionDAG &DAG) const {
++  EVT VT = Op.getValueType();
++  unsigned VTBits = VT.getSizeInBits();
++  SDLoc dl(Op);
++  SDValue ShOpLo = Op.getOperand(0);
++  SDValue ShOpHi = Op.getOperand(1);
++  SDValue ShAmt = Op.getOperand(2);
++  SDValue bm = DAG.getNode(ISD::SUB, dl, MVT::i64,
++                           DAG.getConstant(64, dl, MVT::i64), ShAmt);
++  SDValue BMCC = DAG.getSetCC(dl, MVT::i64, bm,
++                              DAG.getConstant(0, dl, MVT::i64), ISD::SETLE);
++  // if 64 - shAmt <= 0
++  SDValue Hi_Neg = DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
++                               DAG.getConstant(VTBits - 1, dl, MVT::i64));
++  SDValue ShAmt_Neg =
++      DAG.getNode(ISD::SUB, dl, MVT::i64, DAG.getConstant(0, dl, MVT::i64), bm);
++  SDValue Lo_Neg = DAG.getNode(ISD::SRA, dl, MVT::i64, ShOpHi, ShAmt_Neg);
++  // else
++  SDValue carries = DAG.getNode(ISD::SHL, dl, MVT::i64, ShOpHi, bm);
++  SDValue Hi_Pos = DAG.getNode(ISD::SRA, dl, MVT::i64, ShOpHi, ShAmt);
++  SDValue Lo_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpLo, ShAmt);
++  Lo_Pos = DAG.getNode(ISD::OR, dl, MVT::i64, Lo_Pos, carries);
++  // Merge
++  SDValue Hit = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Hi_Neg, Hi_Pos);
++  SDValue Lot = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Lo_Neg, Lo_Pos);
++  SDValue BMCC1 = DAG.getSetCC(dl, MVT::i64, ShAmt,
++                               DAG.getConstant(0, dl, MVT::i64), ISD::SETEQ);
++  SDValue BMCC2 = DAG.getSetCC(dl, MVT::i64, ShAmt,
++                               DAG.getConstant(64, dl, MVT::i64), ISD::SETEQ);
++  SDValue Hit1 = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC1, ShOpHi, Hit);
++  SDValue Lot1 = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC1, ShOpLo, Lot);
++  SDValue Hi = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC2,
++                           DAG.getNode(ISD::SRA, dl, MVT::i64, ShOpHi,
++                                       DAG.getConstant(63, dl, MVT::i64)),
++                           Hit1);
++  SDValue Lo = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC2, ShOpHi, Lot1);
++  SDValue Ops[2] = {Lo, Hi};
++  return DAG.getMergeValues(Ops, dl);
++}
++
++SDValue Sw64TargetLowering::LowerSINT_TO_FP(SDValue Op,
++                                            SelectionDAG &DAG) const {
++  SDLoc dl(Op);
++  assert(Op.getOperand(0).getValueType() == MVT::i64 &&
++         "Unhandled SINT_TO_FP type in custom expander!");
++  SDValue LD;
++  bool isDouble = Op.getValueType() == MVT::f64;
++  LD = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
++  SDValue FP = DAG.getNode(isDouble ? Sw64ISD::CVTQT_ : Sw64ISD::CVTQS_, dl,
++                           isDouble ? MVT::f64 : MVT::f32, LD);
++  return FP;
++}
++
++SDValue Sw64TargetLowering::LowerFP_TO_SINT(SDValue Op,
++                                            SelectionDAG &DAG) const {
++  SDLoc dl(Op);
++  bool isDouble = Op.getOperand(0).getValueType() == MVT::f64;
++  SDValue src = Op.getOperand(0);
++
++  if (!isDouble) // Promote
++    src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, src);
++
++  src = DAG.getNode(Sw64ISD::CVTTQ_, dl, MVT::f64, src);
++
++  return DAG.getNode(ISD::BITCAST, dl, MVT::i64, src);
++}
++
++SDValue Sw64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
++                                               SelectionDAG &DAG) const {
++  SDValue width = Op.getOperand(1);
++
++  if (width.getValueType() != MVT::i64)
++    width = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), MVT::i64, width);
++
++  return expandFP_TO_INT_SAT(Op.getNode(), DAG);
++}
++
++// ----------------------------------------------------------
++// For cnstruct a new chain call to libgcc to replace old chain
++// from udiv/sidv i128 , i128 to call %sret, i128 ,i128
++//
++// ----------------------------------------------------------
++SDValue Sw64TargetLowering::LowerSUDIVI128(SDValue Op,
++                                           SelectionDAG &DAG) const {
++  SDLoc dl(Op);
++
++  if (!Op.getValueType().isInteger())
++    return SDValue();
++  RTLIB::Libcall LC;
++  bool isSigned;
++  switch (Op->getOpcode()) {
++  default:
++    llvm_unreachable("Unexpected request for libcall!");
++  case ISD::SDIV:
++    isSigned = true;
++    LC = RTLIB::SDIV_I128;
++    break;
++  case ISD::UDIV:
++    isSigned = false;
++    LC = RTLIB::UDIV_I128;
++    break;
++  case ISD::SREM:
++    isSigned = true;
++    LC = RTLIB::SREM_I128;
++    break;
++  case ISD::UREM:
++    isSigned = false;
++    LC = RTLIB::UREM_I128;
++    break;
++  }
++  SDValue InChain = DAG.getEntryNode();
++
++  // Create a extra stack objdect to store libcall result
++  SDValue DemoteStackSlot;
++  TargetLowering::ArgListTy Args;
++  auto &DL = DAG.getDataLayout();
++  uint64_t TySize = 16;
++  MachineFunction &MF = DAG.getMachineFunction();
++  int DemoteStackIdx =
++      MF.getFrameInfo().CreateStackObject(TySize, Align(8), false);
++  EVT ArgVT = Op->getOperand(0).getValueType();
++  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
++  Type *StackSlotPtrType = PointerType::get(ArgTy, DL.getAllocaAddrSpace());
++  // save the sret infomation
++  DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
++  ArgListEntry Entry;
++  Entry.Node = DemoteStackSlot;
++  Entry.Ty = StackSlotPtrType;
++  Entry.IsSRet = true;
++  Entry.Alignment = Align(8);
++  Args.push_back(Entry);
++
++  // passing udiv/sdiv operands argument
++  for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
++    ArgListEntry Entry;
++    ArgVT = Op->getOperand(i).getValueType();
++    assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 &&
++           "Unexpected argument type for lowering");
++    Entry.Node = Op->getOperand(i);
++    Entry.Ty = IntegerType::get(*DAG.getContext(), 128);
++    Entry.IsInReg = true;
++    Entry.IsSExt = isSigned;
++    Entry.IsZExt = false;
++    Args.push_back(Entry);
++  }
++
++  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
++                                         getPointerTy(DAG.getDataLayout()));
++  // create a new libcall to producess udiv/sdiv
++  TargetLowering::CallLoweringInfo CLI(DAG);
++  CLI.setDebugLoc(dl)
++      .setChain(InChain)
++      .setLibCallee(
++          getLibcallCallingConv(LC),
++          static_cast<EVT>(MVT::isVoid).getTypeForEVT(*DAG.getContext()),
++          Callee, std::move(Args))
++      .setNoReturn(true)
++      .setSExtResult(isSigned)
++      .setZExtResult(!isSigned);
++
++  SDValue CallInfo = LowerCallTo(CLI).second;
++  return LowerCallExtraResult(CallInfo, DemoteStackSlot, DemoteStackIdx, DAG)
++      .first;
++}
++
++// --------------------------------------------------------------------
++// when a call using sret arugments pass in register, the call result
++// must be handled, create a load node and tokenfactor to pass the call
++// result
++// --------------------------------------------------------------------
++std::pair<SDValue, SDValue> Sw64TargetLowering::LowerCallExtraResult(
++    SDValue &Chain, SDValue &DemoteStackSlot, unsigned DemoteStackIdx,
++    SelectionDAG &DAG) const {
++  SmallVector<SDValue, 4> Chains(1), ReturnValues(1);
++  SDLoc DL(Chain);
++  SDNodeFlags Flags;
++  Flags.setNoUnsignedWrap(true);
++  SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, DemoteStackSlot,
++                            DAG.getConstant(0, DL, MVT::i64), Flags);
++  SDValue L = DAG.getLoad(MVT::i128, DL, Chain, Add,
++                          MachinePointerInfo::getFixedStack(
++                              DAG.getMachineFunction(), DemoteStackIdx, 0),
++                          /* Alignment = */ 8);
++  Chains[0] = L.getValue(1);
++  ReturnValues[0] = L;
++  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
++
++  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(MVT::i128),
++                            ReturnValues);
++  return std::make_pair(Res, Chain);
++}
++
++SDValue Sw64TargetLowering::LowerExternalSymbol(SDValue Op,
++                                                SelectionDAG &DAG) const {
++  LLVM_DEBUG(dbgs() << "Sw64:: begin lowExternalSymbol----\n");
++  SDLoc dl(Op);
++  return DAG.getNode(Sw64ISD::RelLit, dl, MVT::i64,
++                     DAG.getTargetExternalSymbol(
++                         cast<ExternalSymbolSDNode>(Op)->getSymbol(), MVT::i64),
++                     DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
++}
++
++SDValue Sw64TargetLowering::LowerATOMIC_FENCE(SDValue Op,
++                                              SelectionDAG &DAG) const {
++  // FIXME: Need pseudo-fence for 'singlethread' fences
++  // FIXME: Set SType for weaker fences where supported/appropriate.
++  SDLoc DL(Op);
++  return DAG.getNode(Sw64ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
++}
++
++SDValue Sw64TargetLowering::LowerATOMIC_LOAD(SDValue Op,
++                                             SelectionDAG &DAG) const {
++  AtomicSDNode *N = cast<AtomicSDNode>(Op);
++  assert(N->getOpcode() == ISD::ATOMIC_LOAD && "Bad Atomic OP");
++  assert((N->getSuccessOrdering() == AtomicOrdering::Unordered ||
++          N->getSuccessOrdering() == AtomicOrdering::Monotonic) &&
++         "setInsertFencesForAtomic(true) expects unordered / monotonic");
++  EVT VT = N->getMemoryVT();
++  SDValue Result;
++  if (VT != MVT::i64)
++    Result =
++        DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(Op), MVT::i64, N->getChain(),
++                       N->getBasePtr(), N->getPointerInfo(), VT, N->getAlign(),
++                       N->getMemOperand()->getFlags(), N->getAAInfo());
++  else
++    Result = DAG.getLoad(MVT::i64, SDLoc(Op), N->getChain(), N->getBasePtr(),
++                         N->getPointerInfo(), N->getAlign(),
++                         N->getMemOperand()->getFlags(), N->getAAInfo(),
++                         N->getRanges());
++  return Result;
++}
++
++SDValue Sw64TargetLowering::LowerATOMIC_STORE(SDValue Op,
++                                              SelectionDAG &DAG) const {
++  AtomicSDNode *N = cast<AtomicSDNode>(Op);
++  assert(N->getOpcode() == ISD::ATOMIC_STORE && "Bad Atomic OP");
++  assert((N->getSuccessOrdering() == AtomicOrdering::Unordered ||
++          N->getSuccessOrdering() == AtomicOrdering::Monotonic) &&
++         "setInsertFencesForAtomic(true) expects unordered / monotonic");
++
++  return DAG.getStore(N->getChain(), SDLoc(Op), N->getVal(), N->getBasePtr(),
++                      N->getPointerInfo(), N->getAlign(),
++                      N->getMemOperand()->getFlags(), N->getAAInfo());
++}
++MachineMemOperand::Flags
++Sw64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
++  // Because of how we convert atomic_load and atomic_store to normal loads and
++  // stores in the DAG, we need to ensure that the MMOs are marked volatile
++  // since DAGCombine hasn't been updated to account for atomic, but non
++  // volatile loads.  (See D57601)
++  if (auto *SI = dyn_cast<StoreInst>(&I))
++    if (SI->isAtomic())
++      return MachineMemOperand::MOVolatile;
++  if (auto *LI = dyn_cast<LoadInst>(&I))
++    if (LI->isAtomic())
++      return MachineMemOperand::MOVolatile;
++  if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
++    if (AI->isAtomic())
++      return MachineMemOperand::MOVolatile;
++  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
++    if (AI->isAtomic())
++      return MachineMemOperand::MOVolatile;
++  return MachineMemOperand::MONone;
++}
++
++SDValue Sw64TargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const {
++  SDValue N0 = Op->getOperand(0);
++  SDValue N1 = Op->getOperand(1);
++  EVT VT = N1.getValueType();
++  SDLoc dl(Op);
++  if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) {
++    const APInt &C1Val = C1->getAPIntValue();
++    if (C1Val.isPowerOf2()) {
++      SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), dl, VT);
++      return DAG.getNode(Sw64ISD::SBT, dl, VT, N0, ShAmtC);
++    }
++  }
++  // if ((or (srl shl)) || (or (shl srl)) then rolw
++  if ((N0->getOpcode() == ISD::SRL && N1->getOpcode() == ISD::SRL) ||
++      (N0->getOpcode() == ISD::SRL && N1->getOpcode() == ISD::SHL))
++    if (N0->getOperand(1)->getOperand(0)->getOpcode() == ISD::SUB &&
++        N0->getOperand(1)->getOperand(0)->getConstantOperandVal(0) == 32)
++      return DAG.getNode(Sw64ISD::ROLW, dl, VT, N1->getOperand(0),
++                         N1->getOperand(1)->getOperand(0));
++  return SDValue();
++}
++
++SDValue Sw64TargetLowering::LowerSUREM(SDValue Op, SelectionDAG &DAG) const {
++  SDLoc dl(Op);
++  // Expand only on constant case
++  // modify the operate of div 0
++  if (Op.getOperand(1).getOpcode() == ISD::Constant &&
++      cast<ConstantSDNode>(Op.getNode()->getOperand(1))->getAPIntValue() != 0) {
++
++    EVT VT = Op.getNode()->getValueType(0);
++
++    SmallVector<SDNode *, 8> Built;
++    SDValue Tmp1 = Op.getNode()->getOpcode() == ISD::UREM
++                       ? BuildUDIV(Op.getNode(), DAG, false, Built)
++                       : BuildSDIV(Op.getNode(), DAG, false, Built);
++
++    Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Op.getOperand(1));
++    Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Op.getOperand(0), Tmp1);
++
++    return Tmp1;
++  }
++
++  return LowerSUDIV(Op, DAG);
++}
++
++SDValue Sw64TargetLowering::LowerSUDIV(SDValue Op, SelectionDAG &DAG) const {
++  SDLoc dl(Op);
++
++  if (!Op.getValueType().isInteger())
++    return SDValue();
++
++  // modify the operate of div 0
++  if (Op.getOperand(1).getOpcode() == ISD::Constant &&
++      cast<ConstantSDNode>(Op.getNode()->getOperand(1))->getAPIntValue() != 0) {
++    SmallVector<SDNode *, 8> Built;
++    return Op.getOpcode() == ISD::SDIV
++               ? BuildSDIV(Op.getNode(), DAG, true, Built)
++               : BuildUDIV(Op.getNode(), DAG, true, Built);
++  }
++
++  const char *opstr = 0;
++  switch (Op.getOpcode()) {
++  case ISD::UREM:
++    opstr = "__remlu";
++    break;
++  case ISD::SREM:
++    opstr = "__reml";
++    break;
++  case ISD::UDIV:
++    opstr = "__divlu";
++    break;
++  case ISD::SDIV:
++    opstr = "__divl";
++    break;
++  }
++
++  SDValue Tmp1 = Op.getOperand(0);
++  SDValue Tmp2 = Op.getOperand(1);
++  SDValue Addr = DAG.getExternalSymbol(opstr, MVT::i64);
++  return DAG.getNode(Sw64ISD::DivCall, dl, MVT::i64, Addr, Tmp1, Tmp2);
++}
++
++SDValue Sw64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
++  SDLoc dl(Op);
++  SDValue Chain, DataPtr;
++  LowerVAARG(Op.getNode(), Chain, DataPtr, DAG);
++  SDValue Result;
++  if (Op.getValueType() == MVT::i32)
++    Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr,
++                            MachinePointerInfo(), MVT::i32);
++  else if (Op.getValueType() == MVT::f32) {
++    Result = DAG.getLoad(MVT::f64, dl, Chain, DataPtr, MachinePointerInfo());
++    SDValue InFlags = Result.getValue(1);
++    SmallVector<SDValue, 8> Ops;
++    Ops.push_back(InFlags);
++    Ops.push_back(Result);
++    SDVTList NodeTys = DAG.getVTList(MVT::f32, MVT::Other);
++    Result = DAG.getNode(Sw64ISD::CVTTS_, dl, NodeTys, Ops);
++  } else {
++    Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr,
++                         MachinePointerInfo());
++  }
++  return Result;
++}
++
++SDValue Sw64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
++  SDLoc dl(Op);
++  SDValue Chain = Op.getOperand(0);
++  SDValue DestP = Op.getOperand(1);
++  SDValue SrcP = Op.getOperand(2);
++  const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
++  const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
++  SDValue Val = DAG.getLoad(getPointerTy(DAG.getDataLayout()), dl, Chain, SrcP,
++                            MachinePointerInfo(SrcS));
++  SDValue Result =
++      DAG.getStore(Val.getValue(1), dl, Val, DestP, MachinePointerInfo(DestS));
++  SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
++                           DAG.getConstant(8, dl, MVT::i64));
++  Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result, NP,
++                       MachinePointerInfo(), MVT::i32);
++  SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
++                            DAG.getConstant(8, dl, MVT::i64));
++  return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, MachinePointerInfo(),
++                           MVT::i32);
++}
++
++SDValue Sw64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
++  SDLoc dl(Op);
++  MachineFunction &MF = DAG.getMachineFunction();
++  Sw64MachineFunctionInfo *FuncInfo = MF.getInfo<Sw64MachineFunctionInfo>();
++
++  SDValue Chain = Op.getOperand(0);
++  SDValue VAListP = Op.getOperand(1);
++  const Value *VAListS = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
++
++  // vastart stores the address of the VarArgsBase and VarArgsOffset
++  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsBase(), MVT::i64);
++  SDValue S1 =
++      DAG.getStore(Chain, dl, FR, VAListP, MachinePointerInfo(VAListS));
++  SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
++                            DAG.getConstant(8, dl, MVT::i64));
++
++  return DAG.getTruncStore(
++      S1, dl, DAG.getConstant(FuncInfo->getVarArgsOffset(), dl, MVT::i64), SA2,
++      MachinePointerInfo(), MVT::i32);
++}
++
++// Prefetch operands are:
++// 1: Address to prefetch
++// 2: bool isWrite
++// 3: int locality (0 = no locality ... 3 = extreme locality)
++// 4: bool isDataCache
++SDValue Sw64TargetLowering::LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const {
++  SDLoc DL(Op);
++  unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
++  // unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
++  unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
++
++  unsigned Code = IsData ? Sw64ISD::Z_S_FILLCS : Sw64ISD::Z_FILLCS;
++  if (IsWrite == 1 && IsData == 1)
++    Code = Sw64ISD::Z_FILLDE;
++  if (IsWrite == 0 && IsData == 1)
++    Code = Sw64ISD::Z_FILLCS;
++  if (IsWrite == 1 && IsData == 0)
++    Code = Sw64ISD::Z_S_FILLDE;
++  if (IsWrite == 0 && IsData == 0)
++    Code = Sw64ISD::Z_FILLCS;
++
++  unsigned PrfOp = 0;
++
++  return DAG.getNode(Code, DL, MVT::Other, Op.getOperand(0),
++                     DAG.getConstant(PrfOp, DL, MVT::i64), Op.getOperand(1));
++}
++
++SDValue Sw64TargetLowering::LowerROLW(SDNode *N, SelectionDAG &DAG) const {
++  SDLoc DL(N);
++
++  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
++  SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
++  SDValue NewRes = DAG.getNode(Sw64ISD::ROLW, DL, MVT::i64, NewOp0, NewOp1);
++  return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
++}
++
++SDValue Sw64TargetLowering::LowerFRAMEADDR(SDValue Op,
++                                           SelectionDAG &DAG) const {
++  // check the depth
++  if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) {
++    DAG.getContext()->emitError(
++        "return address can be determined only for current frame");
++    return SDValue();
++  }
++
++  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
++  MFI.setFrameAddressIsTaken(true);
++  EVT VT = Op.getValueType();
++  SDLoc DL(Op);
++  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Sw64::R15, VT);
++  return FrameAddr;
++}
++
++void Sw64TargetLowering::ReplaceNodeResults(SDNode *N,
++                                            SmallVectorImpl<SDValue> &Results,
++                                            SelectionDAG &DAG) const {
++  SDLoc dl(N);
++  switch (N->getOpcode()) {
++  default:
++    break;
++  case ISD::SDIV:
++  case ISD::UDIV:
++  case ISD::SREM:
++  case ISD::UREM: {
++    SDValue Res = LowerSUDIVI128(SDValue(N, 0), DAG);
++    Results.push_back(Res);
++    return;
++  }
++  case ISD::ATOMIC_LOAD:
++  case ISD::ATOMIC_STORE:
++  case ISD::FP_TO_SINT_SAT:
++  case ISD::FP_TO_UINT_SAT:
++    return;
++  case ISD::FP_TO_SINT: {
++    SDValue NewRes =
++        DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i64, N->getOperand(0));
++    Results.push_back(
++        DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), NewRes));
++    return;
++  }
++  case ISD::ROTL:
++    SDValue Res = LowerROLW(N, DAG);
++    Results.push_back(Res);
++    return;
++  }
++  assert(N->getValueType(0) == MVT::i32 && N->getOpcode() == ISD::VAARG &&
++         "Unknown node to custom promote!");
++
++  SDValue Chain, DataPtr;
++  LowerVAARG(N, Chain, DataPtr, DAG);
++
++  SDValue Res =
++      DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, MachinePointerInfo());
++
++  Results.push_back(Res);
++  Results.push_back(SDValue(Res.getNode(), 1));
++}
++
++/// getConstraintType - Given a constraint letter, return the type of
++/// constraint it is for this target.
++Sw64TargetLowering::ConstraintType
++Sw64TargetLowering::getConstraintType(const std::string &Constraint) const {
++  if (Constraint.size() == 1) {
++    switch (Constraint[0]) {
++    default:
++      break;
++    case 'f':
++    case 'r':
++      return C_RegisterClass;
++    }
++  }
++  return TargetLowering::getConstraintType(Constraint);
++}
++
++unsigned Sw64TargetLowering::MatchRegName(StringRef Name) const {
++  unsigned Reg = StringSwitch<unsigned>(Name.lower())
++                     .Case("$0", Sw64::R0)
++                     .Case("$1", Sw64::R1)
++                     .Case("$2", Sw64::R2)
++                     .Case("$3", Sw64::R3)
++                     .Case("$4", Sw64::R4)
++                     .Case("$5", Sw64::R5)
++                     .Case("$6", Sw64::R6)
++                     .Case("$7", Sw64::R7)
++                     .Case("$8", Sw64::R8)
++                     .Case("$9", Sw64::R9)
++                     .Case("$10", Sw64::R10)
++                     .Case("$11", Sw64::R11)
++                     .Case("$12", Sw64::R12)
++                     .Case("$13", Sw64::R13)
++                     .Case("$14", Sw64::R14)
++                     .Case("$15", Sw64::R15)
++                     .Case("$16", Sw64::R16)
++                     .Case("$17", Sw64::R17)
++                     .Case("$18", Sw64::R18)
++                     .Case("$19", Sw64::R19)
++                     .Case("$20", Sw64::R20)
++                     .Case("$21", Sw64::R21)
++                     .Case("$22", Sw64::R22)
++                     .Case("$23", Sw64::R23)
++                     .Case("$24", Sw64::R24)
++                     .Case("$25", Sw64::R25)
++                     .Case("$26", Sw64::R26)
++                     .Case("$27", Sw64::R27)
++                     .Case("$28", Sw64::R28)
++                     .Case("$29", Sw64::R29)
++                     .Case("$30", Sw64::R30)
++                     .Case("$31", Sw64::R31)
++                     .Default(0);
++  return Reg;
++}
++Register
++Sw64TargetLowering::getRegisterByName(const char *RegName, LLT VT,
++                                      const MachineFunction &MF) const {
++  Register Reg = MatchRegName(StringRef(RegName));
++  if (Reg)
++    return Reg;
++
++  report_fatal_error("Sw Invalid register name global variable");
++}
++/// Examine constraint type and operand type and determine a weight value.
++/// This object must already have been set up with the operand type
++/// and the current alternative constraint selected.
++TargetLowering::ConstraintWeight
++Sw64TargetLowering::getSingleConstraintMatchWeight(
++    AsmOperandInfo &info, const char *constraint) const {
++  ConstraintWeight weight = CW_Invalid;
++  Value *CallOperandVal = info.CallOperandVal;
++  // If we don't have a value, we can't do a match,
++  // but allow it at the lowest weight.
++  if (CallOperandVal == NULL)
++    return CW_Default;
++  // Look at the constraint type.
++  switch (*constraint) {
++  default:
++    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
++    break;
++  case 'f':
++    weight = CW_Register;
++    break;
++  }
++  return weight;
++}
++
++Instruction *Sw64TargetLowering::emitLeadingFence(IRBuilderBase &Builder,
++                                                  Instruction *Inst,
++                                                  AtomicOrdering Ord) const {
++  if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
++    return Builder.CreateFence(AtomicOrdering::AcquireRelease);
++  if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
++    return Builder.CreateFence(AtomicOrdering::Release);
++  return nullptr;
++}
++
++Instruction *Sw64TargetLowering::emitTrailingFence(IRBuilderBase &Builder,
++                                                   Instruction *Inst,
++                                                   AtomicOrdering Ord) const {
++  if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
++    return Builder.CreateFence(AtomicOrdering::AcquireRelease);
++  if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
++    return Builder.CreateFence(AtomicOrdering::Release);
++  return nullptr;
++}
++
++/// This is a helper function to parse a physical register string and split it
++/// into non-numeric and numeric parts (Prefix and Reg). The first boolean flag
++/// that is returned indicates whether parsing was successful. The second flag
++/// is true if the numeric part exists.
++static std::pair<bool, bool> parsePhysicalReg(StringRef C, StringRef &Prefix,
++                                              unsigned long long &Reg) {
++  if (C.front() != '{' || C.back() != '}')
++    return std::make_pair(false, false);
++
++  // Search for the first numeric character.
++  StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1;
++  I = std::find_if(B, E, isdigit);
++
++  Prefix = StringRef(B, I - B);
++
++  // The second flag is set to false if no numeric characters were found.
++  if (I == E)
++    return std::make_pair(true, false);
++
++  // Parse the numeric characters.
++  return std::make_pair(!getAsUnsignedInteger(StringRef(I, E - I), 10, Reg),
++                        true);
++}
++
++std::pair<unsigned, const TargetRegisterClass *>
++Sw64TargetLowering::parseRegForInlineAsmConstraint(StringRef C, MVT VT) const {
++  const TargetRegisterClass *RC;
++  StringRef Prefix;
++  unsigned long long Reg;
++
++  std::pair<bool, bool> R = parsePhysicalReg(C, Prefix, Reg);
++
++  if (!R.first)
++    return std::make_pair(0U, nullptr);
++
++  if (!R.second)
++    return std::make_pair(0U, nullptr);
++
++  if (Prefix == "$f") { // Parse $f0-$f31.
++    // The size of FP registers is 64-bit or Reg is an even number, select
++    // the 64-bit register class.
++    if (VT == MVT::Other)
++      VT = MVT::f64;
++
++    RC = getRegClassFor(VT);
++
++  } else { // Parse $0-$31.
++    assert(Prefix == "$");
++    // Sw64 has only i64 register.
++    RC = getRegClassFor(MVT::i64);
++    StringRef name((C.data() + 1), (C.size() - 2));
++
++    return std::make_pair(MatchRegName(name), RC);
++  }
++
++  assert(Reg < RC->getNumRegs());
++  return std::make_pair(*(RC->begin() + Reg), RC);
++}
++/// Given a register class constraint, like 'r', if this corresponds directly
++/// to an LLVM register class, return a register of 0 and the register class
++/// pointer.
++std::pair<unsigned, const TargetRegisterClass *>
++Sw64TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
++                                                 StringRef Constraint,
++                                                 MVT VT) const {
++  if (Constraint.size() == 1) {
++    switch (Constraint[0]) {
++    case 'r':
++      return std::make_pair(0U, &Sw64::GPRCRegClass);
++    case 'f':
++      return VT == MVT::f64 ? std::make_pair(0U, &Sw64::F8RCRegClass)
++                            : std::make_pair(0U, &Sw64::F4RCRegClass);
++    }
++  }
++
++  std::pair<unsigned, const TargetRegisterClass *> R;
++  R = parseRegForInlineAsmConstraint(Constraint, VT);
++
++  if (R.second)
++    return R;
++
++  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
++}
++
++//===----------------------------------------------------------------------===//
++//  Other Lowering Code
++//===----------------------------------------------------------------------===//
++
++MachineBasicBlock *
++Sw64TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
++                                                MachineBasicBlock *BB) const {
++  switch (MI.getOpcode()) {
++  default:
++    llvm_unreachable("Unexpected instr type to insert");
++
++  case Sw64::FILLCS:
++  case Sw64::FILLDE:
++  case Sw64::S_FILLCS:
++  case Sw64::S_FILLDE:
++    return emitPrefetch(MI, BB);
++
++  // I64 && I32
++  case Sw64::ATOMIC_LOAD_ADD_I32:
++  case Sw64::LAS32:
++    return emitAtomicBinary(MI, BB);
++  case Sw64::ATOMIC_LOAD_ADD_I64:
++  case Sw64::LAS64:
++    return emitAtomicBinary(MI, BB);
++
++  case Sw64::ATOMIC_SWAP_I32:
++  case Sw64::SWAP32:
++    return emitAtomicBinary(MI, BB);
++  case Sw64::ATOMIC_SWAP_I64:
++  case Sw64::SWAP64:
++    return emitAtomicBinary(MI, BB);
++  case Sw64::ATOMIC_CMP_SWAP_I32:
++  case Sw64::CAS32:
++    return emitAtomicCmpSwap(MI, BB, 4);
++  case Sw64::ATOMIC_CMP_SWAP_I64:
++  case Sw64::CAS64:
++    return emitAtomicCmpSwap(MI, BB, 8);
++
++  case Sw64::ATOMIC_LOAD_AND_I32:
++    return emitAtomicBinary(MI, BB);
++  case Sw64::ATOMIC_LOAD_AND_I64:
++    return emitAtomicBinary(MI, BB);
++
++  case Sw64::ATOMIC_LOAD_OR_I32:
++    return emitAtomicBinary(MI, BB);
++  case Sw64::ATOMIC_LOAD_OR_I64:
++    return emitAtomicBinary(MI, BB);
++
++  case Sw64::ATOMIC_LOAD_SUB_I32:
++    return emitAtomicBinary(MI, BB);
++  case Sw64::ATOMIC_LOAD_SUB_I64:
++    return emitAtomicBinary(MI, BB);
++
++  case Sw64::ATOMIC_LOAD_XOR_I32:
++    return emitAtomicBinary(MI, BB);
++  case Sw64::ATOMIC_LOAD_XOR_I64:
++    return emitAtomicBinary(MI, BB);
++
++  case Sw64::ATOMIC_LOAD_UMAX_I64:
++  case Sw64::ATOMIC_LOAD_MAX_I64:
++  case Sw64::ATOMIC_LOAD_UMIN_I64:
++  case Sw64::ATOMIC_LOAD_MIN_I64:
++  case Sw64::ATOMIC_LOAD_NAND_I64:
++    return emitAtomicBinary(MI, BB);
++
++  case Sw64::ATOMIC_LOAD_UMAX_I32:
++  case Sw64::ATOMIC_LOAD_MAX_I32:
++  case Sw64::ATOMIC_LOAD_UMIN_I32:
++  case Sw64::ATOMIC_LOAD_MIN_I32:
++  case Sw64::ATOMIC_LOAD_NAND_I32:
++    return emitAtomicBinary(MI, BB);
++
++  case Sw64::ATOMIC_LOAD_UMAX_I16:
++  case Sw64::ATOMIC_LOAD_MAX_I16:
++  case Sw64::ATOMIC_LOAD_UMIN_I16:
++  case Sw64::ATOMIC_LOAD_MIN_I16:
++  case Sw64::ATOMIC_LOAD_NAND_I16:
++    return emitAtomicBinaryPartword(MI, BB, 2);
++
++  case Sw64::ATOMIC_LOAD_UMAX_I8:
++  case Sw64::ATOMIC_LOAD_MAX_I8:
++  case Sw64::ATOMIC_LOAD_UMIN_I8:
++  case Sw64::ATOMIC_LOAD_MIN_I8:
++  case Sw64::ATOMIC_LOAD_NAND_I8:
++    return emitAtomicBinaryPartword(MI, BB, 1);
++
++  // I8
++  case Sw64::ATOMIC_LOAD_ADD_I8:
++    return emitAtomicBinaryPartword(MI, BB, 1);
++  case Sw64::ATOMIC_SWAP_I8:
++    return emitAtomicBinaryPartword(MI, BB, 1);
++  case Sw64::ATOMIC_LOAD_AND_I8:
++    return emitAtomicBinaryPartword(MI, BB, 1);
++  case Sw64::ATOMIC_LOAD_OR_I8:
++    return emitAtomicBinaryPartword(MI, BB, 1);
++  case Sw64::ATOMIC_LOAD_SUB_I8:
++    return emitAtomicBinaryPartword(MI, BB, 1);
++  case Sw64::ATOMIC_LOAD_XOR_I8:
++    return emitAtomicBinaryPartword(MI, BB, 1);
++  case Sw64::ATOMIC_CMP_SWAP_I8:
++    return emitAtomicCmpSwapPartword(MI, BB, 1);
++
++  // I16
++  case Sw64::ATOMIC_LOAD_ADD_I16:
++    return emitAtomicBinaryPartword(MI, BB, 2);
++  case Sw64::ATOMIC_SWAP_I16:
++    return emitAtomicBinaryPartword(MI, BB, 2);
++  case Sw64::ATOMIC_LOAD_AND_I16:
++    return emitAtomicBinaryPartword(MI, BB, 2);
++  case Sw64::ATOMIC_LOAD_OR_I16:
++    return emitAtomicBinaryPartword(MI, BB, 2);
++  case Sw64::ATOMIC_LOAD_SUB_I16:
++    return emitAtomicBinaryPartword(MI, BB, 2);
++  case Sw64::ATOMIC_LOAD_XOR_I16:
++    return emitAtomicBinaryPartword(MI, BB, 2);
++  case Sw64::ATOMIC_CMP_SWAP_I16:
++    return emitAtomicCmpSwapPartword(MI, BB, 2);
++  }
++}
++
++MachineBasicBlock *
++Sw64TargetLowering::emitPrefetch(MachineInstr &MI,
++                                 MachineBasicBlock *BB) const {
++
++  Register RA, RB, RC;
++  MachineFunction *MF = BB->getParent();
++  // MachineRegisterInfo &RegInfo = MF->getRegInfo();
++  MachineRegisterInfo &MRI = MF->getRegInfo();
++
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  DebugLoc DL = MI.getDebugLoc();
++
++  MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(1).getReg());
++
++  // %11:gprc = PHI %10:gprc, %bb.1, %15:gprc, %bb.4
++  // FILLCS 128, %11:gprc
++  // it should be directed return.
++  if (!(DefMI->getOpcode() == Sw64::LDA && DefMI->getOperand(1).isImm()))
++    return BB;
++
++  int Imm = DefMI->getOperand(1).getImm();
++  int Distance = Imm + MI.getOperand(0).getImm();
++  Register Address = DefMI->getOperand(2).getReg();
++
++  MachineInstr *MII = MI.getNextNode();
++  if (MII)
++    MII = MII->getNextNode();
++  else
++    return BB;
++
++  if (MII) {
++    if (MII->getOpcode() == Sw64::LDL || MII->getOpcode() == Sw64::LDW ||
++        MII->getOpcode() == Sw64::LDHU || MII->getOpcode() == Sw64::LDBU) {
++      int MIImm = MII->getOperand(1).getImm();
++      if (MIImm > 1000 || MIImm < -1000) {
++        MI.eraseFromParent();
++        return BB;
++      }
++    }
++  }
++
++  if (Distance > 1500 || Distance < -1500) {
++    MI.eraseFromParent(); // The pseudo instruction is gone now.
++    return BB;
++  }
++
++  BuildMI(*BB, MI, DL, TII->get(MI.getOpcode()))
++      .addImm(Distance)
++      .addReg(Address);
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++  return BB;
++}
++
++MachineBasicBlock *
++Sw64TargetLowering::emitReduceSum(MachineInstr &MI,
++                                  MachineBasicBlock *BB) const {
++
++  MachineFunction *MF = BB->getParent();
++  MachineRegisterInfo &RegInfo = MF->getRegInfo();
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  DebugLoc DL = MI.getDebugLoc();
++
++  Register RB = MI.getOperand(0).getReg();
++  Register RA = MI.getOperand(1).getReg();
++
++  Register RC = RegInfo.createVirtualRegister(&Sw64::F4RCRegClass);
++  Register RD = RegInfo.createVirtualRegister(&Sw64::F4RCRegClass);
++  Register RE = RegInfo.createVirtualRegister(&Sw64::GPRCRegClass);
++
++  MachineBasicBlock::iterator II(MI);
++
++  BuildMI(*BB, II, DL, TII->get(MI.getOpcode()))
++      .addReg(RB, RegState::Define | RegState::EarlyClobber)
++      .addReg(RA, RegState::Kill)
++      .addReg(RC, RegState::Define | RegState::EarlyClobber |
++                      RegState::Implicit | RegState::Dead)
++      .addReg(RD, RegState::Define | RegState::EarlyClobber |
++                      RegState::Implicit | RegState::Dead)
++      .addReg(RE, RegState::Define | RegState::EarlyClobber |
++                      RegState::Implicit | RegState::Dead);
++
++  MI.eraseFromParent(); // The instruction is gone now.
++
++  return BB;
++}
++
++MachineBasicBlock *
++Sw64TargetLowering::emitITOFSInstruct(MachineInstr &MI,
++                                      MachineBasicBlock *BB) const {
++#if 0
++  Register RA, RB, RC, RD;
++  MachineFunction *MF = BB->getParent();
++  MachineRegisterInfo &RegInfo = MF->getRegInfo();
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  DebugLoc DL = MI.getDebugLoc();
++
++  Register Scratch =
++        RegInfo.createVirtualRegister(&Sw64::F4RCRegClass);
++
++  switch (MI.getOpcode()) {
++  default:
++    llvm_unreachable("Unknown instruct to expand.");
++  case Sw64::VROLW:
++  case Sw64::SLLOW:
++  case Sw64::SRLOW: {
++    RC = MI.getOperand(0).getReg();
++    RA = MI.getOperand(1).getReg();
++    RB = MI.getOperand(2).getReg();
++
++    BuildMI(*BB, MI, DL, TII->get(Sw64::ITOFS)).addReg(Scratch, RegState::Define).addReg(RB);
++    BuildMI(*BB, MI, DL, TII->get(MI.getOpcode())).addReg(RC, RegState::Define).addReg(RA).addReg(Scratch);
++    break;
++  }
++  case Sw64::VINSW: {
++    RC  = MI.getOperand(0).getReg();
++    RA  = MI.getOperand(1).getReg();
++    RB  = MI.getOperand(2).getReg();
++    int64_t Imm  = MI.getOperand(3).getImm();
++
++    BuildMI(*BB, MI, DL, TII->get(Sw64::ITOFS)).addReg(Scratch, RegState::Define).addReg(RA);
++    BuildMI(*BB, MI, DL, TII->get(Sw64::VINSW)).addReg(RC, RegState::Define).addReg(Scratch).addReg(RB).addImm(Imm);
++
++    break;
++  }
++  case Sw64::VEXTW: {
++    RC  = MI.getOperand(0).getReg();
++    RA  = MI.getOperand(1).getReg();
++    int64_t Imm  = MI.getOperand(2).getImm();
++
++    BuildMI(*BB, MI, DL, TII->get(Sw64::VEXTW)).addReg(Scratch, RegState::Define).addReg(RA).addImm(Imm);
++    BuildMI(*BB, MI, DL, TII->get(Sw64::FTOIS)).addReg(RC, RegState::Define).addReg(Scratch);
++
++    break;
++  }
++  case Sw64::VCPYW: {
++    RC  = MI.getOperand(0).getReg();
++    RA  = MI.getOperand(1).getReg();
++
++    BuildMI(*BB, MI, DL, TII->get(Sw64::ITOFS)).addReg(Scratch, RegState::Define).addReg(RA);
++    BuildMI(*BB, MI, DL, TII->get(Sw64::VCPYW)).addReg(RC, RegState::Define).addReg(Scratch);
++
++    break;
++  }
++  case Sw64::VCMPGEW: {
++    RC  = MI.getOperand(0).getReg();
++    RA  = MI.getOperand(1).getReg();
++    RB  = MI.getOperand(2).getReg();
++
++    BuildMI(*BB, MI, DL, TII->get(MI.getOpcode())).addReg(Scratch, RegState::Define).addReg(RA).addReg(RB);
++    BuildMI(*BB, MI, DL, TII->get(Sw64::FTOIS)).addReg(RC, RegState::Define).addReg(Scratch);
++
++    break;
++  }
++  case Sw64::VCMPGEWi: {
++    RC  = MI.getOperand(0).getReg();
++    RA  = MI.getOperand(1).getReg();
++    int64_t Imm  = MI.getOperand(2).getImm();
++
++    BuildMI(*BB, MI, DL, TII->get(MI.getOpcode())).addReg(Scratch, RegState::Define).addReg(RA).addImm(Imm);
++    BuildMI(*BB, MI, DL, TII->get(Sw64::FTOIS)).addReg(RC, RegState::Define).addReg(Scratch);
++
++    break;
++  }
++  case Sw64::VSHFW: {
++    RD  = MI.getOperand(0).getReg();
++    RA  = MI.getOperand(1).getReg();
++    RB  = MI.getOperand(2).getReg();
++    RC  = MI.getOperand(3).getReg();
++
++    BuildMI(*BB, MI, DL, TII->get(Sw64::ITOFT)).addReg(Scratch, RegState::Define).addReg(RC);
++    BuildMI(*BB, MI, DL, TII->get(Sw64::VSHFW)).addReg(RD, RegState::Define).addReg(RA).addReg(RB).addReg(Scratch);
++
++    break;
++  }
++  }
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++#endif
++  return BB;
++}
++
++MachineBasicBlock *
++Sw64TargetLowering::emitFSTOIInstruct(MachineInstr &MI,
++                                      MachineBasicBlock *BB) const {
++
++  Register RA, RC;
++  MachineFunction *MF = BB->getParent();
++  MachineRegisterInfo &RegInfo = MF->getRegInfo();
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  DebugLoc DL = MI.getDebugLoc();
++
++  unsigned Opc = Sw64::CTPOPOW;
++  Register Scratch = RegInfo.createVirtualRegister(&Sw64::F4RCRegClass);
++
++  RC = MI.getOperand(0).getReg();
++  RA = MI.getOperand(1).getReg();
++
++  if (MI.getOpcode() != Opc)
++    Opc = Sw64::CTLZOW;
++
++  BuildMI(*BB, MI, DL, TII->get(Opc))
++      .addReg(Scratch, RegState::Define)
++      .addReg(RA);
++  BuildMI(*BB, MI, DL, TII->get(Sw64::FTOIS))
++      .addReg(RC, RegState::Define)
++      .addReg(Scratch);
++
++  MI.eraseFromParent(); // The pseudo instruction is gone now.
++  return BB;
++}
++
++MachineBasicBlock *Sw64TargetLowering::emitAtomicBinaryPartword(
++    MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const {
++  assert((Size == 1 || Size == 2) &&
++         "Unsupported size for EmitAtomicBinaryPartial.");
++
++  MachineFunction *MF = BB->getParent();
++  MachineRegisterInfo &RegInfo = MF->getRegInfo();
++  const TargetRegisterClass *RC = getRegClassFor(MVT::i64);
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  DebugLoc DL = MI.getDebugLoc();
++
++  unsigned OldVal = MI.getOperand(0).getReg();
++  unsigned Ptr = MI.getOperand(1).getReg();
++  unsigned Incr = MI.getOperand(2).getReg();
++
++  unsigned StoreVal = RegInfo.createVirtualRegister(RC);
++  unsigned LockVal = RegInfo.createVirtualRegister(RC);
++  unsigned Reg_bic = RegInfo.createVirtualRegister(RC);
++  unsigned Scratch = RegInfo.createVirtualRegister(RC);
++
++  unsigned AtomicOp = 0;
++  switch (MI.getOpcode()) {
++  case Sw64::ATOMIC_LOAD_ADD_I8:
++    AtomicOp = Sw64::ATOMIC_LOAD_ADD_I8_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_SUB_I8:
++    AtomicOp = Sw64::ATOMIC_LOAD_SUB_I8_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_AND_I8:
++    AtomicOp = Sw64::ATOMIC_LOAD_AND_I8_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_OR_I8:
++    AtomicOp = Sw64::ATOMIC_LOAD_OR_I8_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_XOR_I8:
++    AtomicOp = Sw64::ATOMIC_LOAD_XOR_I8_POSTRA;
++    break;
++  case Sw64::ATOMIC_SWAP_I8:
++    AtomicOp = Sw64::ATOMIC_SWAP_I8_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_ADD_I16:
++    AtomicOp = Sw64::ATOMIC_LOAD_ADD_I16_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_SUB_I16:
++    AtomicOp = Sw64::ATOMIC_LOAD_SUB_I16_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_AND_I16:
++    AtomicOp = Sw64::ATOMIC_LOAD_AND_I16_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_OR_I16:
++    AtomicOp = Sw64::ATOMIC_LOAD_OR_I16_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_XOR_I16:
++    AtomicOp = Sw64::ATOMIC_LOAD_XOR_I16_POSTRA;
++    break;
++  case Sw64::ATOMIC_SWAP_I16:
++    AtomicOp = Sw64::ATOMIC_SWAP_I16_POSTRA;
++    break;
++
++  case Sw64::ATOMIC_LOAD_UMAX_I16:
++    AtomicOp = Sw64::ATOMIC_LOAD_UMAX_I16_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_MAX_I16:
++    AtomicOp = Sw64::ATOMIC_LOAD_MAX_I16_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_UMIN_I16:
++    AtomicOp = Sw64::ATOMIC_LOAD_UMIN_I16_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_MIN_I16:
++    AtomicOp = Sw64::ATOMIC_LOAD_MIN_I16_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_NAND_I16:
++    AtomicOp = Sw64::ATOMIC_LOAD_NAND_I16_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_UMAX_I8:
++    AtomicOp = Sw64::ATOMIC_LOAD_UMAX_I8_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_MAX_I8:
++    AtomicOp = Sw64::ATOMIC_LOAD_MAX_I8_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_UMIN_I8:
++    AtomicOp = Sw64::ATOMIC_LOAD_UMIN_I8_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_MIN_I8:
++    AtomicOp = Sw64::ATOMIC_LOAD_MIN_I8_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_NAND_I8:
++    AtomicOp = Sw64::ATOMIC_LOAD_NAND_I8_POSTRA;
++    break;
++  default:
++    llvm_unreachable("Unknown pseudo atomic for replacement!");
++  }
++
++  MachineBasicBlock::iterator II(MI);
++
++  unsigned t_Ptr = MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
++  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_Ptr).addReg(Ptr).addReg(Ptr);
++
++  unsigned t_Incr = MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
++  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_Incr).addReg(Incr).addReg(Incr);
++
++  BuildMI(*BB, II, DL, TII->get(AtomicOp))
++      .addReg(OldVal, RegState::Define | RegState::EarlyClobber)
++      .addReg(t_Ptr, RegState::EarlyClobber)
++      .addReg(t_Incr, RegState::EarlyClobber)
++      .addReg(StoreVal, RegState::Define | RegState::EarlyClobber |
++                            RegState::Implicit | RegState::Dead)
++      .addReg(LockVal, RegState::Define | RegState::EarlyClobber |
++                           RegState::Implicit | RegState::Dead)
++      .addReg(Reg_bic, RegState::Define | RegState::EarlyClobber |
++                           RegState::Implicit | RegState::Dead)
++      .addReg(Scratch, RegState::Define | RegState::EarlyClobber |
++                           RegState::Implicit | RegState::Dead);
++
++  MI.eraseFromParent(); // The instruction is gone now.
++
++  return BB;
++}
++
++MachineBasicBlock *Sw64TargetLowering::emitAtomicCmpSwapPartword(
++    MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const {
++  assert((Size == 1 || Size == 2) &&
++         "Unsupported size for EmitAtomicCmpSwapPartial.");
++
++  MachineFunction *MF = BB->getParent();
++  MachineRegisterInfo &RegInfo = MF->getRegInfo();
++  const TargetRegisterClass *RC = getRegClassFor(MVT::i64);
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  DebugLoc DL = MI.getDebugLoc();
++  unsigned Dest = MI.getOperand(0).getReg();
++  unsigned Ptr = MI.getOperand(1).getReg();
++  unsigned OldVal = MI.getOperand(2).getReg();
++  unsigned NewVal = MI.getOperand(3).getReg();
++
++  //  unsigned Success = RegInfo.createVirtualRegister(RC);
++  unsigned Reg_bic = RegInfo.createVirtualRegister(RC);
++  unsigned Reg_ins = RegInfo.createVirtualRegister(RC);
++  unsigned LockVal = RegInfo.createVirtualRegister(RC);
++  unsigned Reg_cmp = RegInfo.createVirtualRegister(RC);
++  unsigned Reg_mas = RegInfo.createVirtualRegister(RC);
++  //  unsigned Reg_bis = RegInfo.createVirtualRegister(RC);
++
++  unsigned AtomicOp = 0;
++  switch (MI.getOpcode()) {
++  case Sw64::ATOMIC_CMP_SWAP_I8:
++    AtomicOp = Sw64::ATOMIC_CMP_SWAP_I8_POSTRA;
++    break;
++  case Sw64::ATOMIC_CMP_SWAP_I16:
++    AtomicOp = Sw64::ATOMIC_CMP_SWAP_I16_POSTRA;
++    break;
++  default:
++    llvm_unreachable("Unknown pseudo atomic for replacement!");
++  }
++
++  MachineBasicBlock::iterator II(MI);
++
++  unsigned t_Ptr = MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
++  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_Ptr).addReg(Ptr).addReg(Ptr);
++  unsigned t_OldVal =
++      MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
++  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_OldVal)
++      .addReg(OldVal)
++      .addReg(OldVal);
++  unsigned t_NewVal =
++      MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
++  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_NewVal)
++      .addReg(NewVal)
++      .addReg(NewVal);
++
++  BuildMI(*BB, II, DL, TII->get(AtomicOp))
++      .addReg(Dest, RegState::Define | RegState::EarlyClobber)
++      .addReg(t_Ptr, RegState::EarlyClobber)
++      .addReg(t_OldVal, RegState::EarlyClobber)
++      .addReg(t_NewVal, RegState::EarlyClobber)
++      .addReg(Reg_bic, RegState::Define | RegState::EarlyClobber |
++                           RegState::Implicit | RegState::Dead)
++      .addReg(Reg_ins, RegState::Define | RegState::EarlyClobber |
++                           RegState::Implicit | RegState::Dead)
++      .addReg(LockVal, RegState::Define | RegState::EarlyClobber |
++                           RegState::Implicit | RegState::Dead)
++      .addReg(Reg_cmp, RegState::Define | RegState::EarlyClobber |
++                           RegState::Implicit | RegState::Dead)
++      .addReg(Reg_mas, RegState::Define | RegState::EarlyClobber |
++                           RegState::Implicit | RegState::Dead);
++
++  MI.eraseFromParent(); // The instruction is gone now.
++
++  return BB;
++}
++
++// This function also handles Sw64::ATOMIC_SWAP_I32 (when BinOpcode == 0), and
++// Sw64::SWAP32
++MachineBasicBlock *
++Sw64TargetLowering::emitAtomicBinary(MachineInstr &MI,
++                                     MachineBasicBlock *BB) const {
++  MachineFunction *MF = BB->getParent();
++  MachineRegisterInfo &RegInfo = MF->getRegInfo();
++  const TargetRegisterClass *RC = getRegClassFor(MVT::i64);
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  DebugLoc DL = MI.getDebugLoc();
++
++  unsigned AtomicOp;
++  switch (MI.getOpcode()) {
++  case Sw64::ATOMIC_LOAD_ADD_I32:
++  case Sw64::LAS32:
++    AtomicOp = Sw64::ATOMIC_LOAD_ADD_I32_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_SUB_I32:
++    AtomicOp = Sw64::ATOMIC_LOAD_SUB_I32_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_AND_I32:
++    AtomicOp = Sw64::ATOMIC_LOAD_AND_I32_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_OR_I32:
++    AtomicOp = Sw64::ATOMIC_LOAD_OR_I32_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_XOR_I32:
++    AtomicOp = Sw64::ATOMIC_LOAD_XOR_I32_POSTRA;
++    break;
++  case Sw64::ATOMIC_SWAP_I32:
++  case Sw64::SWAP32:
++    AtomicOp = Sw64::ATOMIC_SWAP_I32_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_ADD_I64:
++  case Sw64::LAS64:
++    AtomicOp = Sw64::ATOMIC_LOAD_ADD_I64_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_SUB_I64:
++    AtomicOp = Sw64::ATOMIC_LOAD_SUB_I64_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_AND_I64:
++    AtomicOp = Sw64::ATOMIC_LOAD_AND_I64_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_OR_I64:
++    AtomicOp = Sw64::ATOMIC_LOAD_OR_I64_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_XOR_I64:
++    AtomicOp = Sw64::ATOMIC_LOAD_XOR_I64_POSTRA;
++    break;
++  case Sw64::ATOMIC_SWAP_I64:
++  case Sw64::SWAP64:
++    AtomicOp = Sw64::ATOMIC_SWAP_I64_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_UMAX_I64:
++    AtomicOp = Sw64::ATOMIC_LOAD_UMAX_I64_POSTRA;
++    break;
++
++  case Sw64::ATOMIC_LOAD_MAX_I64:
++    AtomicOp = Sw64::ATOMIC_LOAD_MAX_I64_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_UMIN_I64:
++    AtomicOp = Sw64::ATOMIC_LOAD_UMIN_I64_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_MIN_I64:
++    AtomicOp = Sw64::ATOMIC_LOAD_MIN_I64_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_NAND_I64:
++    AtomicOp = Sw64::ATOMIC_LOAD_NAND_I64_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_UMAX_I32:
++    AtomicOp = Sw64::ATOMIC_LOAD_UMAX_I32_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_MAX_I32:
++    AtomicOp = Sw64::ATOMIC_LOAD_MAX_I32_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_UMIN_I32:
++    AtomicOp = Sw64::ATOMIC_LOAD_UMIN_I32_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_MIN_I32:
++    AtomicOp = Sw64::ATOMIC_LOAD_MIN_I32_POSTRA;
++    break;
++  case Sw64::ATOMIC_LOAD_NAND_I32:
++    AtomicOp = Sw64::ATOMIC_LOAD_NAND_I32_POSTRA;
++    break;
++
++  default:
++    llvm_unreachable("Unknown pseudo atomic for replacement!");
++  }
++
++  unsigned OldVal = MI.getOperand(0).getReg();
++  unsigned Ptr = MI.getOperand(1).getReg();
++  unsigned Incr = MI.getOperand(2).getReg();
++
++  unsigned StoreVal = RegInfo.createVirtualRegister(RC);
++  unsigned Scratch = RegInfo.createVirtualRegister(RC);
++  unsigned Scratch1 = RegInfo.createVirtualRegister(RC);
++
++  MachineBasicBlock::iterator II(MI);
++
++  unsigned t_Ptr = MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
++  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_Ptr).addReg(Ptr).addReg(Ptr);
++
++  unsigned t_Incr = MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
++  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_Incr).addReg(Incr).addReg(Incr);
++
++  BuildMI(*BB, II, DL, TII->get(AtomicOp))
++      .addReg(OldVal, RegState::Define | RegState::EarlyClobber)
++      .addReg(t_Ptr, RegState::EarlyClobber)
++      .addReg(t_Incr, RegState::EarlyClobber)
++      .addReg(StoreVal, RegState::Define | RegState::EarlyClobber |
++                            RegState::Implicit | RegState::Dead)
++      .addReg(Scratch, RegState::Define | RegState::EarlyClobber |
++                           RegState::Implicit | RegState::Dead)
++      .addReg(Scratch1, RegState::Define | RegState::EarlyClobber |
++                            RegState::Implicit | RegState::Dead);
++
++  MI.eraseFromParent(); // The instruction is gone now.
++
++  return BB;
++}
++
++MachineBasicBlock *Sw64TargetLowering::emitAtomicCmpSwap(MachineInstr &MI,
++                                                         MachineBasicBlock *BB,
++                                                         unsigned Size) const {
++  assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap.");
++  MachineFunction *MF = BB->getParent();
++  MachineRegisterInfo &RegInfo = MF->getRegInfo();
++  const TargetRegisterClass *RC = getRegClassFor(MVT::i64);
++  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
++  DebugLoc DL = MI.getDebugLoc();
++  unsigned AtomicOp;
++
++  switch (MI.getOpcode()) {
++  case Sw64::CAS32:
++  case Sw64::ATOMIC_CMP_SWAP_I32:
++    AtomicOp = Sw64::ATOMIC_CMP_SWAP_I32_POSTRA;
++    break;
++  case Sw64::CAS64:
++  case Sw64::ATOMIC_CMP_SWAP_I64:
++    AtomicOp = Sw64::ATOMIC_CMP_SWAP_I64_POSTRA;
++    break;
++  default:
++    llvm_unreachable("Unknown pseudo atomic for replacement!");
++  }
++
++  /*
++      $0=Dest $16=Ptr $17=OldVal $18=NewVal
++
++      memb
++      $BB0_1:
++         ldi $0,0($16)
++         lldw $0,0($0)
++         cmpeq $17,$0,$1
++         wr_f $1
++         bis $18,$18,$2
++         lstw $2,0($16)
++         rd_f $2
++         beq $1,$BB0_2
++         beq $2,$BB0_1
++      $BB0_2:
++ */
++
++  unsigned Dest = MI.getOperand(0).getReg();
++  unsigned Ptr = MI.getOperand(1).getReg();
++  unsigned OldVal = MI.getOperand(2).getReg();
++  unsigned NewVal = MI.getOperand(3).getReg();
++  unsigned Scratch = RegInfo.createVirtualRegister(RC);
++  unsigned Reg_cmp = RegInfo.createVirtualRegister(RC);
++
++  MachineBasicBlock::iterator II(MI);
++
++  unsigned t_Ptr = MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
++  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_Ptr).addReg(Ptr).addReg(Ptr);
++  unsigned t_OldVal =
++      MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
++  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_OldVal)
++      .addReg(OldVal)
++      .addReg(OldVal);
++  unsigned t_NewVal =
++      MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
++  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_NewVal)
++      .addReg(NewVal)
++      .addReg(NewVal);
++
++  BuildMI(*BB, II, DL, TII->get(AtomicOp))
++      .addReg(Dest, RegState::Define | RegState::EarlyClobber)
++      .addReg(t_Ptr, RegState::EarlyClobber)
++      .addReg(t_OldVal, RegState::EarlyClobber)
++      .addReg(t_NewVal, RegState::EarlyClobber)
++      .addReg(Scratch, RegState::Define | RegState::EarlyClobber |
++                           RegState::Implicit | RegState::Dead)
++      .addReg(Reg_cmp, RegState::Define | RegState::EarlyClobber |
++                           RegState::Implicit | RegState::Dead);
++
++  MI.eraseFromParent(); // The instruction is gone now.
++
++  return BB;
++}
++
++MVT Sw64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
++                                               EVT LHSTy) const {
++  return MVT::i64;
++}
++
++bool Sw64TargetLowering::isOffsetFoldingLegal(
++    const GlobalAddressSDNode *GA) const {
++  // The Sw64 target isn't yet aware of offsets.
++  return false;
++}
++
++EVT Sw64TargetLowering::getOptimalMemOpType(
++    const MemOp &Op, const AttributeList & /*FuncAttributes*/) const {
++  if (Subtarget.enOptMemset())
++    return MVT::i64;
++  return MVT::Other;
++}
++
++bool Sw64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
++  if (VT != MVT::f32 && VT != MVT::f64)
++    return false;
++  // +0.0   F31
++  // +0.0f  F31
++  // -0.0  -F31
++  // -0.0f -F31
++  return Imm.isZero() || Imm.isNegZero();
++}
++
++SDValue Sw64TargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
++                                             int Enabled,
++                                             int &RefinementSteps) const {
++  EVT VT = Operand.getValueType();
++  if ((VT == MVT::f32 || VT == MVT::f64) && Subtarget.hasCore4() &&
++      Subtarget.enableFloatAri()) {
++    if (RefinementSteps == ReciprocalEstimate::Unspecified) {
++      if (VT.getScalarType() == MVT::f32)
++        RefinementSteps = 2;
++      if (VT.getScalarType() == MVT::f64)
++        RefinementSteps = 3;
++    }
++    if (VT.getScalarType() == MVT::f32)
++      return DAG.getNode(Sw64ISD::FRECS, SDLoc(Operand), VT, Operand);
++    if (VT.getScalarType() == MVT::f64)
++      return DAG.getNode(Sw64ISD::FRECD, SDLoc(Operand), VT, Operand);
++  }
++  return SDValue();
++}
++
++bool Sw64TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
++                                                    SDValue &Base,
++                                                    SDValue &Offset,
++                                                    ISD::MemIndexedMode &AM,
++                                                    SelectionDAG &DAG) const {
++  EVT VT;
++  SDValue Ptr;
++  LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N);
++  if (!LSN)
++    return false;
++  VT = LSN->getMemoryVT();
++  bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
++                     VT == MVT::i64 || VT == MVT::f32 || VT == MVT::f64;
++  if (!IsLegalType)
++    return false;
++  if (Op->getOpcode() != ISD::ADD)
++    return false;
++  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
++    VT = LD->getMemoryVT();
++    Ptr = LD->getBasePtr();
++  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
++    VT = ST->getMemoryVT();
++    Ptr = ST->getBasePtr();
++  } else
++    return false;
++
++  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
++    uint64_t RHSC = RHS->getZExtValue();
++    Base = Ptr;
++    Offset = DAG.getConstant(RHSC, SDLoc(N), MVT::i64);
++    AM = ISD::POST_INC;
++    return true;
++  }
++
++  return false;
++}
++
++const TargetRegisterClass *Sw64TargetLowering::getRepRegClassFor(MVT VT) const {
++  if (VT == MVT::Other)
++    return &Sw64::GPRCRegClass;
++  if (VT == MVT::i32)
++    return &Sw64::FPRC_loRegClass;
++  return TargetLowering::getRepRegClassFor(VT);
++}
++
++bool Sw64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
++                                               const AddrMode &AM, Type *Ty,
++                                               unsigned AS,
++                                               Instruction *I) const {
++  if (!Subtarget.hasCore4() || !Subtarget.enablePostInc())
++    return llvm::TargetLoweringBase::isLegalAddressingMode(DL, AM, Ty, AS, I);
++
++  // No global is ever allowed as a base.
++  if (AM.BaseGV)
++    return false;
++
++  // Require a 12-bit signed offset.
++  if (!isInt<12>(AM.BaseOffs))
++    return false;
++
++  switch (AM.Scale) {
++  case 0: // "r+i" or just "i", depending on HasBaseReg.
++    break;
++  case 1:
++    if (!AM.HasBaseReg) // allow "r+i".
++      break;
++    return false; // disallow "r+r" or "r+r+i".
++  default:
++    return false;
++  }
++
++  return true;
++}
++
++bool Sw64TargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
++                                                    EVT VT) const {
++  VT = VT.getScalarType();
++
++  if (!VT.isSimple())
++    return false;
++
++  switch (VT.getSimpleVT().SimpleTy) {
++  case MVT::f32:
++  case MVT::f64:
++    return true;
++  default:
++    break;
++  }
++
++  return false;
++}
++
++bool Sw64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
++                                                    Type *Ty) const {
++  switch (Ty->getScalarType()->getTypeID()) {
++  case Type::FloatTyID:
++  case Type::DoubleTyID:
++    return true;
++  default:
++    return false;
++  }
++}
++
++bool Sw64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
++  // Zexts are free if they can be combined with a load.
++  if (Subtarget.enOptExt()) {
++    if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
++      EVT MemVT = LD->getMemoryVT();
++      if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
++           (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
++          (LD->getExtensionType() == ISD::NON_EXTLOAD ||
++           LD->getExtensionType() == ISD::ZEXTLOAD))
++        return true;
++    }
++  }
++
++  return TargetLowering::isZExtFree(Val, VT2);
++}
++
++bool Sw64TargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
++  if (Subtarget.enOptExt())
++    return SrcVT == MVT::i32 && DstVT == MVT::i64;
++  return false;
++}
++
++bool Sw64TargetLowering::isLegalICmpImmediate(int64_t Imm) const {
++  if (Subtarget.enOptExt())
++    return Imm >= 0 && Imm <= 255;
++  return false;
++}
++
++bool Sw64TargetLowering::isLegalAddImmediate(int64_t Imm) const {
++  if (Subtarget.enOptExt())
++    return Imm >= 0 && Imm <= 255;
++  return false;
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64ISelLowering.h b/llvm/lib/Target/Sw64/Sw64ISelLowering.h
+new file mode 100644
+index 000000000..cab44faf6
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64ISelLowering.h
+@@ -0,0 +1,484 @@
++//===-- Sw64ISelLowering.h - Sw64 DAG Lowering Interface ------*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines the interfaces that Sw64 uses to lower LLVM code into a
++// selection DAG.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_SW64ISELLOWERING_H
++#define LLVM_LIB_TARGET_SW64_SW64ISELLOWERING_H
++
++#include "Sw64.h"
++#include "llvm/CodeGen/SelectionDAG.h"
++#include "llvm/CodeGen/TargetLowering.h"
++
++namespace llvm {
++
++// Forward delcarations
++class Sw64Subtarget;
++class Sw64TargetMachine;
++
++namespace Sw64ISD {
++enum NodeType : unsigned {
++  // Start the numbering where the builtin ops and target ops leave off.
++  FIRST_NUMBER = ISD::BUILTIN_OP_END,
++
++  // These corrospond to the identical Instruction
++  CVTQT_,
++  CVTQS_,
++  CVTTQ_,
++  CVTTS_,
++  CVTST_,
++  /// GPRelHi/GPRelLo - These represent the high and low 16-bit
++  /// parts of a global address respectively.
++  GPRelHi,
++  GPRelLo,
++  /// TPRelHi/TPRelLo - These represent the high and low 16-bit
++  /// parts of a TLS global address respectively.
++  TPRelHi,
++  TPRelLo,
++  TLSGD,  // SW
++  TLSLDM, // SW
++  DTPRelHi,
++  DTPRelLo,
++  RelGottp, // SW
++  SysCall,
++  /// RetLit - Literal Relocation of a Global
++  RelLit,
++
++  /// GlobalRetAddr - used to restore the return address
++  GlobalRetAddr,
++
++  /// CALL - Normal call.
++  CALL,
++
++  ///  Jump and link (call)
++  JmpLink,
++  /// DIVCALL - used for special library calls for div and rem
++  DivCall,
++  /// return flag operand
++  RET_FLAG,
++  Ret,
++  LDAWC,
++  MEMBARRIER,
++  /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This
++  /// corresponds to the COND_BRANCH pseudo instruction.
++  /// *PRC is the input register to compare to zero,
++  /// OPC is the branch opcode to use (e.g. Sw64::BEQ),
++  /// DESTBB is the destination block to branch to, and INFLAG is
++  /// an optional input flag argument.
++  COND_BRANCH_I,
++  COND_BRANCH_F,
++
++  Z_S_FILLCS,
++  Z_S_FILLDE,
++  Z_FILLDE,
++  Z_FILLDE_E,
++  Z_FILLCS,
++  Z_FILLCS_E,
++  Z_E_FILLCS,
++  Z_E_FILLDE,
++  Z_FLUSHD,
++
++  GPRel,
++  TPRel,
++  DTPRel,
++  LDIH,
++  LDI,
++
++  FRECS,
++  FRECD,
++  ADDPI,
++  ADDPIS,
++  SBT,
++  CBT,
++  REVBH,
++  REVBW,
++
++  ROLW,
++  CRC32B,
++  CRC32H,
++  CRC32W,
++  CRC32L,
++  CRC32CB,
++  CRC32CH,
++  CRC32CW,
++  CRC32CL,
++
++  VBROADCAST_LD,
++  VBROADCAST,
++
++  // Vector load.
++  VLDWE,
++  VLDSE,
++  VLDDE,
++
++  // Vector comparisons.
++  // These take a vector and return a boolean.
++  VALL_ZERO,
++  VANY_ZERO,
++  VALL_NONZERO,
++  VANY_NONZERO,
++
++  // This is vcmpgew.
++  VSETGE,
++
++  // These take a vector and return a vector bitmask.
++  VCEQ,
++  VCLE_S,
++  VCLE_U,
++  VCLT_S,
++  VCLT_U,
++  // These is vector select.
++  VFCMOVEQ,
++  VFCMOVLE,
++  VFCMOVLT,
++  VSELEQW,
++  VSELLTW,
++  VSELLEW,
++  VSELLBCW,
++
++  VMAX,
++  VMIN,
++  VUMAX,
++  VUMIN,
++  VSQRT,
++  VSUMF,
++  VFREC,
++  VFCMPEQ,
++  VFCMPLE,
++  VFCMPLT,
++  VFCMPUN,
++  VFCVTSD,
++  VFCVTDS,
++  VFCVTLS,
++  VFCVTLD,
++  VFCVTSH,
++  VFCVTHS,
++  VFCVTDL,
++  VFCVTDLG,
++  VFCVTDLP,
++  VFCVTDLZ,
++  VFCVTDLN,
++  VFRIS,
++  VFRISG,
++  VFRISP,
++  VFRISZ,
++  VFRISN,
++  VFRID,
++  VFRIDG,
++  VFRIDP,
++  VFRIDZ,
++  VFRIDN,
++  VMAXF,
++  VMINF,
++  VINSECTL,
++  //  VINSECTLH,
++  //  VINSECTLB,
++  //  VINSECTLL,
++  //  VINSECTLW,
++  //  VSHFQB,
++  //  VSHFQ,
++  VCPYB,
++  VCPYH,
++  // Vector Shuffle with mask as an operand
++  VSHF,  // Generic shuffle
++  SHF,   // 4-element set shuffle.
++  ILVEV, // Interleave even elements
++  ILVOD, // Interleave odd elements
++  ILVL,  // Interleave left elements
++  ILVR,  // Interleave right elements
++  PCKEV, // Pack even elements
++  PCKOD, // Pack odd elements
++  VCON_W,
++  VCON_S,
++  VCON_D,
++
++  VSHL_BY_SCALAR,
++  VSRL_BY_SCALAR,
++  VSRA_BY_SCALAR,
++  // Vector Lane Copy
++  INSVE, // Copy element from one vector to another
++
++  // Combined (XOR (OR $a, $b), -1)
++  VNOR,
++  VEQV,
++  VORNOT,
++
++  VCTPOP,
++  VCTLZ,
++
++  VLOG,
++  VCOPYF,
++  V8SLL,
++  V8SLLi,
++  V8SRL,
++  V8SRLi,
++  VROTR,
++  VROTRi,
++  V8SRA,
++  V8SRAi,
++  VROLB,
++  VROLBi,
++  VROLH,
++  VROLHi,
++  VROLL,
++  VROLLi,
++  VECREDUCE_FADD,
++  VECT_VUCADDW,
++  VECT_VUCADDH,
++  VECT_VUCADDB,
++  VECT_VUCSUBW,
++  VECT_VUCSUBH,
++  VECT_VUCSUBB,
++  // Extended vector element extraction
++  VEXTRACT_SEXT_ELT,
++  VEXTRACT_ZEXT_ELT,
++
++  VTRUNCST = ISD::FIRST_TARGET_MEMORY_OPCODE
++};
++} // namespace Sw64ISD
++
++//===--------------------------------------------------------------------===//
++// TargetLowering Implementation
++//===--------------------------------------------------------------------===//
++class Sw64TargetLowering : public TargetLowering {
++  const TargetMachine &TM;
++  const Sw64Subtarget &Subtarget;
++
++public:
++  explicit Sw64TargetLowering(const TargetMachine &TM,
++                              const Sw64Subtarget &Subtarget);
++
++  MVT getScalarShiftAmountTy(const DataLayout &DL, EVT LHSTy) const override;
++
++  virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; };
++
++  bool generateFMAsInMachineCombiner(EVT VT,
++                                     CodeGenOpt::Level OptLevel) const override;
++
++  /// getSetCCResultType - Get the SETCC result ValueType
++  virtual EVT getSetCCResultType(const DataLayout &, LLVMContext &,
++                                 EVT VT) const override;
++  bool isLegalICmpImmediate(int64_t Imm) const override;
++  bool isLegalAddImmediate(int64_t Imm) const override;
++  bool isZExtFree(SDValue Val, EVT VT2) const override;
++  bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
++
++  /// LowerOperation - Provide custom lowering hooks for some operations.
++  ///
++  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
++
++  /// ReplaceNodeResults - Replace the results of node with an illegal result
++  /// type with new values built out of custom code.
++  ///
++  virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
++                                  SelectionDAG &DAG) const override;
++
++  /// getTargetNodeName - This method returns the name of a target specific
++  ///  DAG node.
++  const char *getTargetNodeName(unsigned Opcode) const override;
++  template <class NodeTy> SDValue getAddr(NodeTy *N, SelectionDAG &DAG) const;
++  SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
++                          CallingConv::ID CallConv, bool isVarArg,
++                          const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc &dl,
++                          SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
++                          const SDNode *CallNode, const Type *RetTy) const;
++  ConstraintType getConstraintType(const std::string &Constraint) const;
++
++  unsigned MatchRegName(StringRef Name) const;
++  Register getRegisterByName(const char *RegName, LLT VT,
++                             const MachineFunction &MF) const override;
++  /// Examine constraint string and operand type and determine a weight value.
++  /// The operand object must already have been set up with the operand type.
++  ConstraintWeight
++  getSingleConstraintMatchWeight(AsmOperandInfo &info,
++                                 const char *constraint) const override;
++
++  // Inline asm support
++  std::pair<unsigned, const TargetRegisterClass *>
++  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
++                               StringRef Constraint, MVT VT) const override;
++
++  MachineBasicBlock *
++  EmitInstrWithCustomInserter(MachineInstr &MI,
++                              MachineBasicBlock *BB) const override;
++
++  virtual bool
++  isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
++
++  EVT getOptimalMemOpType(
++      const MemOp &Op, const AttributeList & /*FuncAttributes*/) const override;
++
++  /// isFPImmLegal - Returns true if the target can instruction select the
++  /// specified FP immediate natively. If false, the legalizer will
++  /// materialize the FP immediate as a load from a constant pool.
++  virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
++  struct LTStr {
++    bool operator()(const char *S1, const char *S2) const {
++      return strcmp(S1, S2) < 0;
++    }
++  };
++  /// If a physical register, this returns the register that receives the
++  /// exception address on entry to an EH pad.
++  Register
++  getExceptionPointerRegister(const Constant *PersonalityFn) const override {
++    return Sw64::R16;
++  }
++
++  /// If a physical register, this returns the register that receives the
++  /// exception typeid on entry to a landing pad.
++  Register
++  getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
++    return Sw64::R17;
++  }
++  SDValue PerformDAGCombineV(SDNode *N, DAGCombinerInfo &DCI) const;
++  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
++
++  /// Enable SIMD support for the given integer type and Register
++  /// class.
++  void addSIMDIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC);
++
++  /// Enable SIMD support for the given floating-point type and
++  /// Register class.
++  void addSIMDFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC);
++
++private:
++  // Helpers for custom lowering.
++  void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr,
++                  SelectionDAG &DAG) const;
++
++  SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
++                               bool isVarArg,
++                               const SmallVectorImpl<ISD::InputArg> &Ins,
++                               const SDLoc &dl, SelectionDAG &DAG,
++                               SmallVectorImpl<SDValue> &InVals) const override;
++
++  virtual SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
++                            SmallVectorImpl<SDValue> &InVals) const override;
++
++  SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
++                      const SmallVectorImpl<ISD::OutputArg> &Outs,
++                      const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
++                      SelectionDAG &DAG) const override;
++
++  bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
++                      bool isVarArg,
++                      const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
++                      LLVMContext &Context) const override;
++
++  // Lower Operand specifics
++  SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerSUREM(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerSUDIV(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerOR(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerSUDIVI128(SDValue Op, SelectionDAG &DAG) const;
++  std::pair<SDValue, SDValue> LowerCallExtraResult(SDValue &Chain,
++                                                   SDValue &DemoteStackSlot,
++                                                   unsigned DemoteStackIdx,
++                                                   SelectionDAG &DAG) const;
++  SDValue LowerROLW(SDNode *N, SelectionDAG &DAG) const;
++
++  SDValue LowerVectorShift(SDValue Op, SelectionDAG &DAG) const;
++
++  ISD::NodeType getExtendForAtomicOps() const override {
++    return ISD::ANY_EXTEND;
++  }
++
++  SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
++
++  SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
++
++  MachineMemOperand::Flags
++  getTargetMMOFlags(const Instruction &I) const override;
++
++  bool shouldInsertFencesForAtomic(const Instruction *I) const override {
++    return true;
++  }
++  Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
++                                AtomicOrdering Ord) const override;
++  Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
++                                 AtomicOrdering Ord) const override;
++  /// This function parses registers that appear in inline-asm constraints.
++  /// It returns pair (0, 0) on failure.
++
++  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
++
++  SDValue LowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
++
++  MachineBasicBlock *emitReduceSum(MachineInstr &MI,
++                                   MachineBasicBlock *BB) const;
++  MachineBasicBlock *emitITOFSInstruct(MachineInstr &MI,
++                                       MachineBasicBlock *BB) const;
++  MachineBasicBlock *emitFSTOIInstruct(MachineInstr &MI,
++                                       MachineBasicBlock *BB) const;
++  SDValue LowerVectorMemIntr(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
++  // Return true if an FMA operation is faster than a pair of fmul and fadd
++  /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
++  /// returns true, otherwise fmuladd is expanded to fmul + fadd.
++  bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
++                                  EVT VT) const override;
++  bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
++
++  std::pair<unsigned, const TargetRegisterClass *>
++  parseRegForInlineAsmConstraint(StringRef C, MVT VT) const;
++
++  MachineBasicBlock *emitAtomicBinary(MachineInstr &MI,
++                                      MachineBasicBlock *BB) const;
++  MachineBasicBlock *emitAtomicCmpSwap(MachineInstr &MI, MachineBasicBlock *BB,
++                                       unsigned Size) const;
++  MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr &MI,
++                                              MachineBasicBlock *BB,
++                                              unsigned Size) const;
++  MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr &MI,
++                                               MachineBasicBlock *BB,
++                                               unsigned Size) const;
++  MachineBasicBlock *emitPrefetch(MachineInstr &MI,
++                                  MachineBasicBlock *BB) const;
++
++  SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
++                           int &RefinementSteps) const override;
++  bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
++                                  SDValue &Offset, ISD::MemIndexedMode &AM,
++                                  SelectionDAG &DAG) const override;
++  const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;
++
++  SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
++  bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
++                             unsigned AS,
++                             Instruction *I = nullptr) const override;
++};
++} // namespace llvm
++#endif
+diff --git a/llvm/lib/Target/Sw64/Sw64InstrFormats.td b/llvm/lib/Target/Sw64/Sw64InstrFormats.td
+new file mode 100644
+index 000000000..c7ec61ea5
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64InstrFormats.td
+@@ -0,0 +1,452 @@
++//===- Sw64InstrFormats.td - Sw64 Instruction Formats ----*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++def u5imm   : Operand<i64>{
++  let DecoderMethod = "decodeUImmOperand<5>";
++}
++def u6imm   : Operand<i64>{
++  let DecoderMethod = "decodeUImmOperand<6>";
++}
++def u8imm   : Operand<i64>{
++  let DecoderMethod = "decodeUImmOperand<8>";
++}
++def u8immHex   : Operand<i64>{
++  let DecoderMethod = "decodeUImmOperand<8>";
++  let PrintMethod = "printHexImm";
++}
++def s8imm   : Operand<i64>{
++  let DecoderMethod = "decodeSImmOperand<8>";
++}
++def s13imm  : Operand<i64>{
++  let DecoderMethod = "decodeSImmOperand<13>";
++}
++def s12imm  : Operand<i64>{
++  let DecoderMethod = "decodeSImmOperand<12>";
++}
++def s14imm  : Operand<i64>{
++  let DecoderMethod = "decodeSImmOperand<14>";
++}
++def s16imm  : Operand<i64>{
++  let DecoderMethod = "decodeSImmOperand<16>";
++  let OperandType = "OPERAND_PCREL";
++}
++def s21imm  : Operand<i64>{
++  let DecoderMethod = "decodeSImmOperand<21>";
++  let OperandType = "OPERAND_PCREL";
++}
++def u26imm  : Operand<i64>{
++  let DecoderMethod = "decodeSImmOperand<26>";
++}
++def s64imm  : Operand<i64>{
++  let DecoderMethod = "decodeSImmOperand<64>";
++  let PrintMethod = "printMemoryArg";
++}
++def u64imm  : Operand<i64>{
++  let DecoderMethod = "decodeSImmOperand<64>";
++}
++
++//===----------------------------------------------------------------------===//
++// Instruction format superclass
++//===----------------------------------------------------------------------===//
++// Sw64 instruction baseline
++class InstSw64<bits<6> op, string opstr, string operands> : Instruction {
++  field bits<32> Inst;
++  let Namespace = "Sw64";
++  let Inst{31-26} = op;
++
++  let AsmString = opstr # " " # operands;
++  // Add Size: Number of bytes in encoding
++  let Size = 4;
++  // SoftFail is a field the disassembler can use to provide a way for
++  // instructions to not match without killing the whole decode process. It is
++  // mainly used for ARM, but Tablegen expects this field to exist or it fails
++  // to build the decode table.
++  field bits<32> SoftFail = 0;
++}
++
++//Chapter2.6.1
++// sys_call  |31     26|25                 0|
++//           |  Opcode |        Func        |
++class PALForm<bits<6> opcode, dag iops, dag oops,
++              string opstr, string operands>
++    : InstSw64<opcode, opstr, operands> {
++  let OutOperandList = oops;
++  let InOperandList = iops;
++  bits<26> disp;
++
++  let Inst{25-0} = disp;
++}
++
++// Branching	beq/bge/bgt	Chapter2.6.2
++// COND_BRANCH  |31     26|25      21|20                           0|
++//              |  Opcode |   RA/Fa  |          disp                |
++
++def JmpTargetAsmOperand : AsmOperandClass {
++  let Name = "JmpImm";
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isImm";
++  let ParserMethod = "parseJmpImm";
++}
++
++def target : Operand<OtherVT> {
++  let ParserMatchClass = JmpTargetAsmOperand;
++	let EncoderMethod = "getBranchTargetOpValue";
++  let DecoderMethod = "decodeSImmOperand<21>";
++  let OperandType = "OPERAND_PCREL";
++  let MCOperandPredicate = [{
++    int64_t Imm;
++    if (MCOp.evaluateAsConstantImm(Imm))
++      return isShiftedInt<22, 2>(Imm);
++    return MCOp.isBareSymbolRef();
++  }];
++}
++
++class BForm<bits<6> opcode, dag iops, dag oops,
++            string opstr, string operands, list<dag> pattern=[]>
++    : InstSw64<opcode, opstr, operands> {
++  let Pattern = pattern;
++  let OutOperandList = oops;
++  let InOperandList = iops;
++  bits<64> Opc; //dummy
++  bits<5> RA;
++  bits<21> disp;
++
++  let Inst{25-21} = RA;
++  let Inst{20-0} = disp;
++}
++
++// LDL/LDW	 Chapter2.6.3
++// Memory  |31     26|25      21|20      16|15               0|
++//         |  Opcode |   RA/Fa  |    RB    |        disp      |
++class MForm<bits<6> opcode, dag iops, dag oops,
++            string opstr, string operands="", list<dag> pattern=[]>
++    : InstSw64<opcode, opstr, operands> {
++  let Pattern = pattern;
++  let OutOperandList = oops;
++  let InOperandList = iops;
++
++  bits<5> RA;
++  bits<16> DISP;
++  bits<5> RB;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-0} = DISP;
++}
++
++class MfcForm<bits<6> opcode, bits<16> Func, dag iops, dag oops,
++              string opstr, string operands="", list<dag> pattern=[]>
++    : InstSw64<opcode, opstr, operands> {
++  bits<16> Function=Func;
++  bits<5> RA;
++  bits<5> RB;
++
++  let OutOperandList = oops;
++  let InOperandList = iops;
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-0} = Function;
++}
++
++
++// New Add, for atomic-op
++// Memory + Func  |31     26|25      21|20      16|15    12|11           0|
++//                |  Opcode |   RA/Fa  |    RB    |  Func  |    disp      |
++class MFuncForm<bits<6> opcode, bits<4> func, dag iops, dag oops,
++                string opstr, string operands="", list<dag> pattern=[]>
++    : InstSw64<opcode, opstr, operands> {
++  let Pattern = pattern;
++  let InOperandList = iops;
++  let OutOperandList = oops;
++
++  bits<5> RA;
++  bits<12> disp;
++  bits<5> RB;
++  bits<4> Function = func;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-12} = Function;
++  let Inst{11-0} = disp;
++}
++
++// New Add, for privilege inst
++// Memory + Func  |31     26|25      21|20      16|15    12|11           0|
++//                |  Opcode |    TH    |    RB    |  Func  |    disp      |
++class MPrvlForm<bits<6> opcode, bits<4> func, dag iops, dag oops,
++                string opstr, string operands="", list<dag> pattern=[]>
++    : InstSw64<opcode, opstr, operands> {
++  let Pattern = pattern;
++  let InOperandList = iops;
++  let OutOperandList = oops;
++
++  bits<5> TH;
++  bits<12> disp;
++  bits<5> RB;
++  bits<4> Function = func;
++
++  let Inst{25-21} = TH;
++  let Inst{20-16} = RB;
++  let Inst{15-12} = Function;
++  let Inst{11-0} = disp;
++}
++
++//	Chapter2.6.4
++// simple_operation_form |31     26|25      21|20     16|15   13|12            5|4     0|
++//              r + r :  |  Opcode |   RA     |  RB     |  SBZ  |      Func     |  RC   |
++class OForm<bits<6> opcode, bits<8> fun, dag iops, dag oops,
++            string opstr, string operands, list<dag> pattern=[]>
++    : InstSw64<opcode, opstr, operands> {
++  let Pattern = pattern;
++  let OutOperandList = oops;
++  let InOperandList = iops;
++
++  bits<5> RC;
++  bits<5> RA;
++  bits<5> RB;
++  bits<8> Function = fun;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-13} = 0;
++  let Inst{12-5} = Function;
++  let Inst{4-0} = RC;
++}
++
++
++// 	Chapter2.6.4
++// simple_operation_form |31     26|25      21|20             13|12            5|4     0|
++//              r + i :  |  Opcode |   RA     |        imm      |      Func     |   RC  |
++class OFormL<bits<6> opcode, bits<8> fun, dag iops, dag oops,
++            string opstr, string operands, list<dag> pattern=[]>
++    : InstSw64<opcode, opstr, operands> {
++  let Pattern = pattern;
++  let OutOperandList = oops;
++  let InOperandList = iops;
++
++  bits<5> RC;
++  bits<5> RA;
++  bits<8> L;
++  bits<8> Function = fun;
++
++  let Inst{25-21} = RA;
++  let Inst{20-13} = L;
++  let Inst{12-5} = Function;
++  let Inst{4-0} = RC;
++}
++
++//      Chapter2.6.4
++// simple_operation_form |31     26|25                        13|12            5|4     0|
++//              r + i :  |  Opcode |           imm              |      Func     |   RC  |
++class OFormI<bits<6> opcode, bits<8> fun, dag iops, dag oops,
++            string opstr, string operands, list<dag> pattern=[]>
++    : InstSw64<opcode, opstr, operands> {
++  let Pattern = pattern;
++  let OutOperandList = oops;
++  let InOperandList = iops;
++
++  bits<5> RC;
++  bits<13> L;
++  bits<8> Function = fun;
++
++  let Inst{25-13} = L;
++  let Inst{12-5} = Function;
++  let Inst{4-0} = RC;
++}
++
++
++
++// seleq/selne...		Chapter2.6.5(1)
++// int_complex_operation_form |31     26|25      21|20     16|15   13|12  10|9    5|4    0|
++//                   r + r :  |  Opcode |    RA    |    RB   |  SBZ  | Func |  RC  |  RD  |
++class OForm4<bits<6> opcode, bits<3> fun, dag iops, dag oops,
++             string opstr, string operands, list<dag> pattern=[]>
++    : InstSw64<opcode, opstr, operands> {
++  let Pattern = pattern;
++  let OutOperandList = oops;
++  let InOperandList = iops;
++
++  bits<5> RD;
++  bits<5> RA;
++  bits<5> RB;
++  bits<5> RC;
++  bits<3> Function = fun;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-13} = 0;
++  let Inst{12-10} = Function;
++  let Inst{9-5} = RC;
++  let Inst{4-0} = RD;
++}
++
++// seleq/selne...		Chapter2.6.5(2)
++// int_complex_operation_form |31     26|25      21|20             13|12  10|9    5|4    0|
++//                   r + i :  |  Opcode |    RA    |        imm      | Func |  RC  |  RD  |
++class OForm4L<bits<6> opcode, bits<3> fun, dag iops, dag oops,
++              string opstr, string operands, list<dag> pattern=[]>
++    : InstSw64<opcode, opstr, operands> {
++  let Pattern = pattern;
++  let OutOperandList = oops;
++  let InOperandList = iops;
++
++  bits<5> RD;
++  bits<5> RA;
++  bits<8> L;
++  bits<3> Function = fun;
++  bits<5> RC;
++
++  let Inst{25-21} = RA;
++  let Inst{20-13} = L;
++  let Inst{12-10} = Function;
++  let Inst{9-5} = RC;
++  let Inst{4-0} = RD;
++}
++
++// fadds/faddd... Chapter2.6.4
++// simple_operation_form |31     26|25      21|20     16|15   13|12            5|4     0|
++//              r + r :  |  Opcode |      Fa  |     Fb  |  SBZ  |      Func     |   Fc  |
++class FPForm<bits<6> opcode, bits<8> fun, dag iops, dag oops,
++             string opstr, string operands, list<dag> pattern=[]>
++    : InstSw64<opcode, opstr, operands> {
++  let Pattern = pattern;
++  let InOperandList = iops;
++  let OutOperandList = oops;
++
++  bits<5> RC;
++  bits<5> RA;
++  bits<5> RB;
++  bits<8> Function = fun;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-13} = 0;
++  let Inst{12-5} = Function;
++  let Inst{4-0} = RC;
++}
++
++class FPForm1<bits<6> opcode, bits<8> fun, dag iops, dag oops,
++             string opstr, string operands, list<dag> pattern=[]>
++    : InstSw64<opcode, opstr, operands> {
++  let Pattern = pattern;
++  let InOperandList = iops;
++  let OutOperandList = oops;
++
++  bits<5> RC;
++  bits<5> RA;
++  bits<5> RB;
++  bits<8> Function = fun;
++
++  let Inst{25-21} = RA;
++  let Inst{15-13} = 0;
++  let Inst{12-5} = Function;
++  let Inst{4-0} = RC;
++}
++
++// New add  	fselXX	Chapter2.6.5(3)
++// fp_complex_operation_form  |31     26|25     21|20    16|15     10|9    5|4    0|
++//                   r + r :  |  Opcode |    Fa   |    Fb  |   Func  |  Fc  |  Fd  |
++class FForm4<bits<6> opcode, bits<6> func, dag iops, dag oops,
++             string opstr, string operands, list<dag> pattern=[]>
++    : InstSw64<opcode, opstr, operands> {
++  let Pattern = pattern;
++  let OutOperandList = oops;
++  let InOperandList = iops;
++
++  bits<5> RD;
++  bits<5> RC;
++  bits<5> RB;
++  bits<5> RA;
++  bits<6> Function = func;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-10} = Function;
++  let Inst{9-5} = RC;
++  let Inst{4-0} = RD;
++}
++
++// New add  	fselXX	Chapter2.6.5(4)
++// fp_complex_operation_form  |31     26|25     21|20    16|15     10|9    5|4    0|
++//                   r + i :  |  Opcode |    Fa   |    Fb  |   Func  |  imm |  Fd  |
++class FForm4L<bits<6> opcode, bits<6> func, dag iops, dag oops,
++              string opstr, string operands, list<dag> pattern=[]>
++    : InstSw64<opcode, opstr, operands> {
++  let Pattern = pattern;
++  let OutOperandList = oops;
++  let InOperandList = iops;
++  let Constraints = "$RFALSE = $RDEST";
++  let DisableEncoding = "$RFALSE";
++
++  bits<5> RA;
++  bits<5> RB;
++  bits<5> LIT;
++  bits<5> RD;
++  bits<6> Function = func;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-10} = Function;
++  let Inst{9-5} = LIT;
++  let Inst{4-0} = RD;
++}
++
++// New add  	CSRXX	Chapter4.9.2
++// fp_complex_operation_form  |31     26|25     21|20    16|15       8|7       0|
++//                         :  |  Opcode |    Ra   |    Rb  |   Func   |  Index  |
++class CSRForm<bits<6> opcode, bits<8> func, dag iops, dag oops,
++             string opstr, string operands, list<dag> pattern=[]>
++    : InstSw64<opcode, opstr, operands> {
++  let Pattern = pattern;
++  let OutOperandList = oops;
++  let InOperandList = iops;
++
++  bits<5> RB;
++  bits<5> RA;
++  bits<8> Function = func;
++  bits<8> L;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = 0x1f;
++  let Inst{15-8} = Function;
++  let Inst{7-0} = L;
++}
++
++// New add  FCVTSH	Chapter 4.6.3.3
++// fp_complex_operation_form  |31     26|25     21|20    16|15     10|9    5|4    0|
++//                   r + i :  |  Opcode |    Fa   |    Fb  |   Func  |  imm |  Fd  |
++class FCForm4L<bits<6> opcode, bits<6> func, dag iops, dag oops,
++              string opstr, string operands, list<dag> pattern=[]>
++    : InstSw64<opcode, opstr, operands> {
++  let Pattern = pattern;
++  let OutOperandList = oops;
++  let InOperandList = iops;
++
++  bits<5> RA;
++  bits<5> RB;
++  bits<5> LIT;
++  bits<5> RD;
++  bits<6> Function = func;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-10} = Function;
++  let Inst{9-5} = LIT;
++  let Inst{4-0} = RD;
++}
++
++
++// Pseudo instructions.
++class PseudoInstSw64<dag oops, dag iops, string opstr="", list<dag> pattern>
++    : InstSw64<0, opstr, "">  {
++  let OutOperandList = oops;
++  let InOperandList = iops;
++  let Pattern = pattern;
++  let isCodeGenOnly = 1;
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64InstrFormatsV.td b/llvm/lib/Target/Sw64/Sw64InstrFormatsV.td
+new file mode 100644
+index 000000000..19c8fa114
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64InstrFormatsV.td
+@@ -0,0 +1,400 @@
++//===- Sw64InstrFormats.td - Sw64 Instruction Formats ----*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++// SIMD Instruction format superclass
++//===----------------------------------------------------------------------===//
++
++
++class SIMDPseudo<dag outs, dag ins, list<dag> pattern>:
++  PseudoInstSw64<outs, ins, "", pattern> {
++//  let EncodingPredicates = [HasStdEnc];
++//  let ASEPredicate = [HasSIMD];
++}
++
++
++class InstSw64V<bits<6> op> : Instruction {
++  field bits<32> Inst;
++  let Namespace = "Sw64";
++  let Inst{31-26} = op;
++  let Size = 4;
++  field bits<32> SoftFail = 0;
++}
++
++class InstSw64VLog<bits<4> op> : Instruction {
++  field bits<32> Inst;
++  let Namespace = "Sw64";
++  let Inst{31-28} = op;
++  let Size = 4;
++  field bits<32> SoftFail = 0;
++}
++
++// VLDD/VLDW       Chapter2.6.3
++// Memory  |31     26|25      21|20      16|15               0|
++//         |  Opcode |   RA/RA  |    RB    |        disp      |
++class MFormV<bits<6> opcode> : InstSw64V<opcode> {
++
++  bits<5> RA;
++  bits<21> addr;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = addr{20-16};
++  let Inst{15-0} = addr{15-0};
++}
++
++// Memory + Func  |31     26|25      21|20      16|15    12|11           0|
++//                |  Opcode |   RA/RA  |    RB    |  Func  |    disp      |
++class MFuncFormV<bits<6> opcode, bits<4> func> : InstSw64V<opcode> {
++
++  bits<5> RA;
++  bits<21> addr;
++  bits<4> Function = func;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = addr{20-16};
++  let Inst{15-12} = Function;
++  let Inst{11-0} = addr{11-0};
++}
++
++// fadds/faddd... Chapter2.6.4
++// simple_operation_form |31     26|25      21|20     16|15   13|12            5|4     0|
++//              r + r :  |  Opcode |      RA  |     RB  |  SBZ  |      Func     |   RC  |
++class FPFormV<bits<6> opcode, bits<8> fun> : InstSw64V<opcode> {
++
++  bits<5> RC;
++  bits<5> RA;
++  bits<5> RB;
++  bits<8> Function = fun;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-13} = 0;
++  let Inst{12-5} = Function;
++  let Inst{4-0} = RC;
++}
++
++class FPFormV_2RV<bits<6> opcode, bits<8> fun> : InstSw64V<opcode> {
++
++  bits<5> RC;
++  bits<5> RA;
++  bits<5> RB;
++  bits<8> Function = fun;
++
++  let Inst{25-21} = RA;
++  let Inst{15-13} = 0;
++  let Inst{12-5} = Function;
++  let Inst{4-0} = RC;
++}
++
++class FPFormV_2RV1<bits<6> opcode, bits<8> fun> : InstSw64V<opcode> {
++
++  bits<5> RC;
++  bits<5> RA;
++  bits<5> RB;
++  bits<8> Function = fun;
++
++  let Inst{20-16} = RB;
++  let Inst{15-13} = 0;
++  let Inst{12-5} = Function;
++  let Inst{4-0} = RC;
++}
++
++class FPFormV_CT<bits<6> opcode, bits<8> fun> : InstSw64V<opcode> {
++
++  bits<5> RC;
++  bits<5> RA;
++  bits<5> RB;
++  bits<8> Function = fun;
++
++  let Inst{25-21} = RA;
++  let Inst{15-13} = 0;
++  let Inst{12-5} = Function;
++  let Inst{4-0} = RC;
++}
++
++
++//      Chapter2.6.4
++// simple_operation_form |31     26|25      21|20             13|12            5|4     0|
++//              r + i :  |  Opcode |   RA     |        imm      |      Func     |   RC  |
++class FPFormIV<bits<6> opcode, bits<8> fun> : InstSw64V<opcode> {
++
++  bits<5> RC;
++  bits<5> RA;
++  bits<8> Imm;
++  bits<8> Function = fun;
++
++  let Inst{25-21} = RA;
++  let Inst{20-13} = Imm;
++  let Inst{12-11} = Function{7-6};
++  let Inst{10} = 1;
++  let Inst{9-5} = Function{4-0};
++  let Inst{4-0} = RC;
++}
++
++
++// New add      fselXX  Chapter2.6.5(3)
++// fp_complex_operation_form  |31     26|25     21|20    16|15     10|9    5|4    0|
++//                   r + r :  |  Opcode |    Va   |    Vb  |   Func  |  Vc  |  Vd  |
++class FForm4V<bits<6> opcode, bits<6> func> : InstSw64V<opcode> {
++  bits<5> RD;
++  bits<5> RA;
++  bits<5> RB;
++  bits<5> RC;
++  bits<6> Function = func;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-10} = Function;
++  let Inst{9-5} = RC;
++  let Inst{4-0} = RD;
++}
++
++class FForm4VINSECTL<bits<6> opcode, bits<6> func> : InstSw64V<opcode> {
++  bits<5> RD;
++  bits<5> RA;
++  bits<5> RB;
++  bits<5> RC;
++  bits<6> Function = func;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-10} = Function;
++  let Inst{9-5} = 31;
++}
++
++class FForm4VCPY<bits<6> opcode, bits<6> func> : InstSw64V<opcode> {
++  bits<5> RD;
++  bits<5> RA;
++  bits<5> RB;
++  bits<5> RC;
++  bits<6> Function = func;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = 31;
++  let Inst{15-10} = Function;
++  let Inst{9-5} = 31;
++  let Inst{4-0} = RD;
++}
++
++// vcpyw/vcpys
++class FForm2V<bits<6> opcode, bits<6> func> : InstSw64V<opcode> {
++  bits<5> RD;
++  bits<5> RA;
++  bits<5> RB;
++  bits<5> RC;
++  bits<6> Function = func;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = 0;
++  let Inst{15-10} = Function;
++  let Inst{9-5} = 0;
++  let Inst{4-0} = RC;
++}
++
++
++// New add      fselXX  Chapter2.6.5(4)
++// fp_complex_operation_form  |31     26|25     21|20    16|15     10|9    5|4    0|
++//                   r + i :  |  Opcode |    Va   |    Vb  |   Func  |  imm |  Vd  |
++class FForm4LV<bits<6> opcode, bits<6> func> : InstSw64V<opcode> {
++
++//  let Constraints = "$RFALSE = $RDEST";
++//  let DisableEncoding = "$RFALSE";
++
++  bits<5> RD;
++  bits<5> RA;
++  bits<5> RB;
++  bits<5> Imm;
++  bits<6> Function = func;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-10} = Function;
++  let Inst{9-5} = Imm;
++  let Inst{4-0} = RD;
++}
++
++class FForm4LV1<bits<6> opcode, bits<6> func> : InstSw64V<opcode> {
++
++//  let Constraints = "$RFALSE = $RDEST";
++//  let DisableEncoding = "$RFALSE";
++
++  bits<5> RD;
++  bits<5> RA;
++  bits<5> RB;
++  bits<5> Imm;
++  bits<6> Function = func;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = 31;
++  let Inst{15-10} = Function;
++  let Inst{9-5} = Imm;
++  let Inst{4-0} = RD;
++}
++
++class FForm4LV2<bits<6> opcode, bits<6> func> : InstSw64V<opcode> {
++
++//  let Constraints = "$RFALSE = $RDEST";
++//  let DisableEncoding = "$RFALSE";
++
++  bits<5> RD;
++  bits<5> RA;
++  bits<5> RB;
++  bits<5> RC;
++  bits<6> Function = func;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-10} = Function;
++  let Inst{9-5} = RC;
++  let Inst{4-0} = RD;
++}
++// New add  vext Fix the RD to RC
++// fp_complex_operation_form  |31     26|25     21|20    16|15     10|9    5|4    0|
++//                   r + i :  |  Opcode |    Va   |    Vb  |   Func  |  imm |  Vd  |
++class FForm4LVV<bits<6> opcode, bits<6> func> : InstSw64V<opcode> {
++
++  bits<5> RC;
++  bits<5> RA;
++  bits<5> RB;
++  bits<5> Imm;
++  bits<6> Function = func;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = 0;
++  let Inst{15-10} = Function;
++  let Inst{9-5} = Imm;
++  let Inst{4-0} = RC;
++}
++
++// New add      vlogxx
++// vlogxx:  |31     28|27      26|25    21|20    16|15      10|9    5|4    0|
++//          |  Opcode | zz[7:6]  |   Va   |   Vb   |  zz[5:0] |  Vc  |  Vd  |
++class FForm_VANDW<bits<6> opcode> : InstSw64V<opcode> {
++  bits<5> RD;
++  bits<5> RA;
++  bits<5> RB;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-10} = 0;
++  let Inst{9-5} = 31;
++  let Inst{4-0} = RD;
++}
++
++class FForm_VBICW<bits<6> opcode> : InstSw64V<opcode> {
++  bits<5> RD;
++  bits<5> RA;
++  bits<5> RB;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-10} = 0x30;
++  let Inst{9-5} = 31;
++  let Inst{4-0} = RD;
++}
++
++class FForm_VBISW<bits<6> opcode> : InstSw64V<opcode> {
++  bits<5> RD;
++  bits<5> RA;
++  bits<5> RB;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-10} = 0x3c;
++  let Inst{9-5} = 31;
++  let Inst{4-0} = RD;
++}
++
++class FForm_VXORW<bits<6> opcode> : InstSw64V<opcode> {
++  bits<5> RD;
++  bits<5> RA;
++  bits<5> RB;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-10} = 0x3c;
++  let Inst{9-5} = 31;
++  let Inst{4-0} = RD;
++}
++
++class FForm_VEQVW<bits<6> opcode> : InstSw64V<opcode> {
++  bits<5> RD;
++  bits<5> RA;
++  bits<5> RB;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-10} = 0x03;
++  let Inst{9-5} = 31;
++  let Inst{4-0} = RD;
++}
++
++class FForm_VORNOTW<bits<6> opcode> : InstSw64V<opcode> {
++  bits<5> RD;
++  bits<5> RA;
++  bits<5> RB;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-10} = 0x33;
++  let Inst{9-5} = 31;
++  let Inst{4-0} = RD;
++}
++
++
++class FForm4LVLogZZ<bits<4> opcode> : InstSw64VLog<opcode> {
++  bits<5> RD;
++  bits<5> RA;
++  bits<5> RB;
++  bits<5> RC;
++  bits<8> Imm;
++
++  let Inst{27-26} = Imm{7-6};
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-10} = Imm{5-0};
++  let Inst{9-5} = RC;
++  let Inst{4-0} = RD;
++}
++
++class FForm4LVLog<bits<4> opcode, bits<8> zz> : InstSw64VLog<opcode> {
++  bits<5> RC;
++  bits<5> RA;
++  bits<5> RB;
++
++  let Inst{27-26} = zz{7-6};
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15-10} = zz{5-0};
++  let Inst{9-5} = 31;
++  let Inst{4-0} = RC;
++}
++
++// fp_complex_operation_form  |31     26|25     21|20    16|15     10|9    5|4    0|
++//                   r + i :  |  Opcode |    Va   |    Vb  |   Func  |  imm |  Vd  |
++class FForm4_VSELi<bits<6> opcode, bits<6> func> : InstSw64V<opcode> {
++
++  bits<5> RD;
++  bits<5> RA;
++  bits<5> RB;
++  bits<5> Imm;
++  bits<6> Function = func;
++
++  let Inst{25-21} = RA;
++  let Inst{20-16} = RB;
++  let Inst{15} = 1;
++  let Inst{14-10} = Function{4-0};
++  let Inst{9-5} = Imm;
++  let Inst{4-0} = RD;
++}
++
++class VectorIndex<ValueType ty, code pred> : Operand<ty>, ImmLeaf<ty, pred>;
++
++def VectorIndexB : VectorIndex<i64, [{ return ((uint64_t)Imm) < 32; }]>;
++def VectorIndexH : VectorIndex<i64, [{ return ((uint64_t)Imm) < 16; }]>;
++def VectorIndexS : VectorIndex<i64, [{ return ((uint64_t)Imm) < 8; }]>;
++def VectorIndexD : VectorIndex<i64, [{ return ((uint64_t)Imm) < 4; }]>;
+diff --git a/llvm/lib/Target/Sw64/Sw64InstrInfo.cpp b/llvm/lib/Target/Sw64/Sw64InstrInfo.cpp
+new file mode 100644
+index 000000000..8107c0092
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64InstrInfo.cpp
+@@ -0,0 +1,1012 @@
++//===-- Sw64InstrInfo.cpp - Sw64 Instruction Information ----------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the Sw64 implementation of the TargetInstrInfo class.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64InstrInfo.h"
++#include "Sw64.h"
++#include "Sw64MachineFunctionInfo.h"
++#include "Sw64OptionRecord.h"
++#include "llvm/ADT/STLExtras.h"
++#include "llvm/CodeGen/MachineBasicBlock.h"
++#include "llvm/CodeGen/MachineCombinerPattern.h"
++#include "llvm/CodeGen/MachineConstantPool.h"
++#include "llvm/CodeGen/MachineFrameInfo.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/MachineInstr.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/MachineMemOperand.h"
++#include "llvm/CodeGen/MachineModuleInfo.h"
++#include "llvm/CodeGen/MachineOperand.h"
++#include "llvm/CodeGen/MachineRegisterInfo.h"
++#include "llvm/CodeGen/StackMaps.h"
++#include "llvm/CodeGen/TargetRegisterInfo.h"
++#include "llvm/CodeGen/TargetSubtargetInfo.h"
++#include "llvm/IR/Constants.h"
++#include "llvm/IR/DebugInfoMetadata.h"
++#include "llvm/IR/Function.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCInstBuilder.h"
++#include "llvm/MC/TargetRegistry.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/Support/ErrorHandling.h"
++
++using namespace llvm;
++
++#define DEBUG_TYPE "Sw64combinefma"
++
++#define GET_INSTRINFO_CTOR_DTOR
++#include "Sw64GenInstrInfo.inc"
++
++// Pin the vtable to this file.
++void Sw64InstrInfo::anchor() {}
++
++Sw64InstrInfo::Sw64InstrInfo()
++    : Sw64GenInstrInfo(Sw64::ADJUSTSTACKDOWN, Sw64::ADJUSTSTACKUP), RI() {}
++
++/// isLoadFromStackSlot - If the specified machine instruction is a direct
++/// load from a stack slot, return the virtual or physical register number of
++/// the destination along with the FrameIndex of the loaded stack slot.  If
++/// not, return 0.  This predicate must return 0 if the instruction has
++/// any side effects other than loading from the stack slot.
++unsigned Sw64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
++                                            int &FrameIndex) const {
++  switch (MI.getOpcode()) {
++  case Sw64::LDL:
++  case Sw64::LDW:
++  case Sw64::LDHU:
++  case Sw64::LDBU:
++  case Sw64::LDS:
++  case Sw64::LDD:
++    if (MI.getOperand(1).isFI()) {
++      FrameIndex = MI.getOperand(1).getIndex();
++      return MI.getOperand(0).getReg();
++    }
++    break;
++  }
++
++  return 0;
++}
++
++/// isStoreToStackSlot - If the specified machine instruction is a direct
++/// store to a stack slot, return the virtual or physical register number of
++/// the source reg along with the FrameIndex of the loaded stack slot.  If
++/// not, return 0.  This predicate must return 0 if the instruction has
++/// any side effects other than storing to the stack slot.
++unsigned Sw64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
++                                           int &FrameIndex) const {
++  switch (MI.getOpcode()) {
++  case Sw64::STL:
++  case Sw64::STH:
++  case Sw64::STB:
++  case Sw64::STW:
++  case Sw64::STS:
++  case Sw64::STD:
++    if (MI.getOperand(1).isFI()) {
++      FrameIndex = MI.getOperand(1).getIndex();
++      return MI.getOperand(0).getReg();
++    }
++    break;
++  }
++  return 0;
++}
++
++unsigned Sw64InstrInfo::insertBranch(
++    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
++    ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
++  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
++  assert((Cond.size() == 2 || Cond.size() == 0) &&
++         "Sw64 branch conditions have two components!");
++
++  // Unconditional branch.
++  if (Cond.empty()) {
++    MachineInstr &MI = *BuildMI(&MBB, DL, get(Sw64::PseudoBR)).addMBB(TBB);
++    if (BytesAdded)
++      *BytesAdded += getInstSizeInBytes(MI);
++    return 1;
++  }
++
++  // Either a one or two-way conditional branch.
++  unsigned Opc = Cond[0].getImm();
++  MachineInstr &CondMI = *BuildMI(&MBB, DL, get(Opc)).add(Cond[1]).addMBB(TBB);
++  if (BytesAdded)
++    *BytesAdded += getInstSizeInBytes(CondMI);
++
++  // One-way conditional branch.
++  if (!FBB)
++    return 1;
++
++  // Two-way conditional branch.
++  MachineInstr &MI = *BuildMI(&MBB, DL, get(Sw64::PseudoBR)).addMBB(FBB);
++  if (BytesAdded)
++    *BytesAdded += getInstSizeInBytes(MI);
++  return 2;
++}
++
++void Sw64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
++                                MachineBasicBlock::iterator MI,
++                                const DebugLoc &DL, MCRegister DestReg,
++                                MCRegister SrcReg, bool KillSrc) const {
++  if ((Sw64::F4RCRegClass.contains(DestReg) ||
++       Sw64::FPRC_loRegClass.contains(DestReg)) && // for rust and SIMD
++      Sw64::GPRCRegClass.contains(SrcReg)) {
++    BuildMI(MBB, MI, DL, get(Sw64::ITOFS), DestReg)
++        .addReg(SrcReg, getKillRegState(KillSrc));
++  } else if (Sw64::F4RCRegClass.contains(SrcReg) && // for rust and SIMD
++             Sw64::GPRCRegClass.contains(DestReg)) {
++    BuildMI(MBB, MI, DL, get(Sw64::FTOIS), DestReg)
++        .addReg(SrcReg, getKillRegState(KillSrc));
++  } else if (Sw64::FPRCRegClass.contains(SrcReg) && // for rust and SIMD
++             Sw64::GPRCRegClass.contains(DestReg)) {
++    BuildMI(MBB, MI, DL, get(Sw64::FTOIT), DestReg)
++        .addReg(SrcReg, getKillRegState(KillSrc));
++  } else if (Sw64::FPRCRegClass.contains(DestReg) && // for rust and SIMD
++             Sw64::GPRCRegClass.contains(SrcReg)) {
++    BuildMI(MBB, MI, DL, get(Sw64::ITOFT), DestReg)
++        .addReg(SrcReg, getKillRegState(KillSrc));
++  } else if (Sw64::FPRCRegClass.contains(DestReg) && // for rust and SIMD
++             Sw64::FPRC_loRegClass.contains(SrcReg)) {
++    BuildMI(MBB, MI, DL, get(Sw64::CPYSD), DestReg)
++        .addReg(SrcReg)
++        .addReg(SrcReg, getKillRegState(KillSrc));
++  } else if (Sw64::FPRCRegClass.contains(SrcReg) && // for rust and SIMD
++             Sw64::FPRC_loRegClass.contains(DestReg)) {
++    BuildMI(MBB, MI, DL, get(Sw64::CPYSD), DestReg)
++        .addReg(SrcReg)
++        .addReg(SrcReg, getKillRegState(KillSrc));
++  } else if (Sw64::GPRCRegClass.contains(DestReg, SrcReg)) {
++    BuildMI(MBB, MI, DL, get(Sw64::BISr), DestReg)
++        .addReg(SrcReg)
++        .addReg(SrcReg, getKillRegState(KillSrc));
++  } else if (Sw64::F4RCRegClass.contains(DestReg, SrcReg)) {
++    unsigned int RC = MI->getOperand(1).getReg();
++    unsigned int Opc = Sw64::CPYSS;
++    for (MachineBasicBlock::iterator MBBI = MI; MBBI != MBB.begin(); --MBBI) {
++      if (MBBI->getOpcode() == Sw64::VLDS || MBBI->getOpcode() == Sw64::VLDD) {
++        unsigned int RD = MBBI->getOperand(0).getReg();
++        if (RC == RD)
++          Opc = Sw64::VCPYS;
++        break;
++      }
++    }
++    BuildMI(MBB, MI, DL, get(Opc), DestReg)
++        .addReg(SrcReg)
++        .addReg(SrcReg, getKillRegState(KillSrc));
++  } else if (Sw64::F8RCRegClass.contains(DestReg, SrcReg)) {
++    BuildMI(MBB, MI, DL, get(Sw64::CPYSD), DestReg)
++        .addReg(SrcReg)
++        .addReg(SrcReg, getKillRegState(KillSrc));
++  } else if (Sw64::FPRCRegClass.contains(DestReg, SrcReg)) {
++    BuildMI(MBB, MI, DL, get(Sw64::CPYSD), DestReg)
++        .addReg(SrcReg)
++        .addReg(SrcReg, getKillRegState(KillSrc));
++  } else if (Sw64::V256LRegClass.contains(DestReg, SrcReg)) {
++    BuildMI(MBB, MI, DL, get(Sw64::VOR), DestReg)
++        .addReg(SrcReg)
++        .addReg(SrcReg, getKillRegState(KillSrc));
++  } else {
++    llvm_unreachable("Attempt to copy register that is not GPR or FPR");
++  }
++}
++
++void Sw64InstrInfo::storeRegToStackSlot(
++    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
++    bool isKill, int FrameIdx, const TargetRegisterClass *RC,
++    const TargetRegisterInfo *TRI, Register VReg) const {
++
++  DebugLoc DL;
++  if (MI != MBB.end())
++    DL = MI->getDebugLoc();
++
++  unsigned Opc = 0;
++
++  if (RC == &Sw64::F4RCRegClass)
++    Opc = Sw64::STS;
++  else if (RC == &Sw64::F8RCRegClass)
++    Opc = Sw64::STD;
++  else if (RC == &Sw64::GPRCRegClass)
++    Opc = Sw64::STL;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::i64) ||
++           TRI->isTypeLegalForClass(*RC, MVT::f64))
++    Opc = Sw64::STD;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::i32) ||
++           TRI->isTypeLegalForClass(*RC, MVT::f32))
++    Opc = Sw64::STS;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v8i32))
++    Opc = Sw64::VSTD;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v4f32))
++    Opc = Sw64::VSTS;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v4i64))
++    Opc = Sw64::VSTD;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v4f64))
++    Opc = Sw64::VSTD;
++  else
++    llvm_unreachable("Unhandled register class");
++
++  BuildMI(MBB, MI, DL, get(Opc))
++      .addReg(SrcReg, getKillRegState(isKill))
++      .addFrameIndex(FrameIdx)
++      .addReg(Sw64::R31);
++}
++
++void Sw64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
++                                         MachineBasicBlock::iterator MI,
++                                         Register DestReg, int FrameIdx,
++                                         const TargetRegisterClass *RC,
++                                         const TargetRegisterInfo *TRI,
++                                         Register VReg) const {
++  DebugLoc DL;
++  if (MI != MBB.end())
++    DL = MI->getDebugLoc();
++
++  unsigned Opc = 0;
++
++  if (RC == &Sw64::F4RCRegClass)
++    Opc = Sw64::LDS;
++  else if (RC == &Sw64::F8RCRegClass)
++    Opc = Sw64::LDD;
++  else if (RC == &Sw64::GPRCRegClass)
++    Opc = Sw64::LDL;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::i64) ||
++           TRI->isTypeLegalForClass(*RC, MVT::f64))
++    Opc = Sw64::LDD;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::i32) ||
++           TRI->isTypeLegalForClass(*RC, MVT::f32))
++    Opc = Sw64::LDS;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v8i32))
++    Opc = Sw64::VLDD;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v4f32))
++    Opc = Sw64::VLDS;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v4i64))
++    Opc = Sw64::VLDD;
++  else if (TRI->isTypeLegalForClass(*RC, MVT::v4f64))
++    Opc = Sw64::VLDD;
++  else
++    llvm_unreachable("Unhandled register class");
++
++  BuildMI(MBB, MI, DL, get(Opc), DestReg)
++      .addFrameIndex(FrameIdx)
++      .addReg(Sw64::R31);
++}
++
++static unsigned Sw64RevCondCode(unsigned Opcode) {
++  switch (Opcode) {
++  case Sw64::BEQ:
++    return Sw64::BNE;
++  case Sw64::BNE:
++    return Sw64::BEQ;
++  case Sw64::BGE:
++    return Sw64::BLT;
++  case Sw64::BGT:
++    return Sw64::BLE;
++  case Sw64::BLE:
++    return Sw64::BGT;
++  case Sw64::BLT:
++    return Sw64::BGE;
++  case Sw64::BLBC:
++    return Sw64::BLBS;
++  case Sw64::BLBS:
++    return Sw64::BLBC;
++  case Sw64::FBEQ:
++    return Sw64::FBNE;
++  case Sw64::FBNE:
++    return Sw64::FBEQ;
++  case Sw64::FBGE:
++    return Sw64::FBLT;
++  case Sw64::FBGT:
++    return Sw64::FBLE;
++  case Sw64::FBLE:
++    return Sw64::FBGT;
++  case Sw64::FBLT:
++    return Sw64::FBGE;
++  default:
++    llvm_unreachable("Unknown opcode");
++  }
++  return 0; // Not reached
++}
++
++//===----------------------------------------------------------------------===//
++// Branch Analysis
++//===----------------------------------------------------------------------===//
++//
++
++static bool isCondOpCode(unsigned Opcode) {
++  switch (Opcode) {
++  default:
++    return false;
++  case Sw64::BEQ:
++  case Sw64::BNE:
++  case Sw64::BGE:
++  case Sw64::BGT:
++  case Sw64::BLE:
++  case Sw64::BLT:
++  case Sw64::BLBC:
++  case Sw64::BLBS:
++  case Sw64::FBEQ:
++  case Sw64::FBNE:
++  case Sw64::FBGE:
++  case Sw64::FBGT:
++  case Sw64::FBLE:
++  case Sw64::FBLT:
++    return true;
++  }
++  return false; // Not reached
++}
++
++static bool isUnCondOpCode(unsigned Opcode) { return Opcode == Sw64::PseudoBR; }
++
++static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
++                            SmallVectorImpl<MachineOperand> &Cond) {
++
++  Target = LastInst->getOperand(1).getMBB();
++  Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
++  Cond.push_back(LastInst->getOperand(0));
++}
++
++bool Sw64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
++                                  MachineBasicBlock *&TBB,
++                                  MachineBasicBlock *&FBB,
++                                  SmallVectorImpl<MachineOperand> &Cond,
++                                  bool AllowModify) const {
++  // If the block has no terminators, it just falls into the block after it.
++  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
++  if (I == MBB.end())
++    return false;
++
++  if (!isUnpredicatedTerminator(*I))
++    return false;
++
++  // Get the last instruction in the block.
++  MachineInstr *LastInst = &*I;
++  unsigned LastOpc = LastInst->getOpcode();
++  // If there is only one terminator instruction, process it.
++  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
++    if (isUnCondOpCode(LastOpc)) {
++      TBB = LastInst->getOperand(0).getMBB();
++      return false;
++    } else if (isCondOpCode(LastOpc)) {
++      parseCondBranch(LastInst, TBB, Cond);
++      return false;
++    } // Otherwise, don't know what this is.
++    return true;
++  }
++
++  // Get the instruction before it if it's a terminator.
++  MachineInstr *SecondLastInst = &*I;
++  unsigned SecondLastOpc = SecondLastInst->getOpcode();
++
++  // If AllowModify is true and the block ends with two or more unconditional
++  // branches, delete all but the first unconditional branch.
++  if (AllowModify && isUnCondOpCode(LastOpc)) {
++    while (isUnCondOpCode(SecondLastOpc)) {
++      LastInst->eraseFromParent();
++      LastInst = SecondLastInst;
++      LastOpc = LastInst->getOpcode();
++      if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
++        TBB = LastInst->getOperand(0).getMBB();
++        return false;
++      } else {
++        SecondLastInst = &*I;
++        SecondLastOpc = SecondLastInst->getOpcode();
++      }
++    }
++  }
++
++  // If there are three terminators, we don't know what sort of block this is.
++  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
++    return true;
++
++  if (isCondOpCode(SecondLastOpc) && isUnCondOpCode(LastOpc)) {
++    parseCondBranch(SecondLastInst, TBB, Cond);
++    FBB = LastInst->getOperand(0).getMBB();
++    return false;
++  }
++
++  // If the block ends with two Sw64::BRs, handle it.  The second one is not
++  // executed, so remove it.
++  if (isUnCondOpCode(SecondLastOpc) && isUnCondOpCode(LastOpc)) {
++    TBB = SecondLastInst->getOperand(0).getMBB();
++    I = LastInst;
++    if (AllowModify)
++      I->eraseFromParent();
++    return false;
++  }
++
++  // Otherwise, can't handle this.
++  return true;
++}
++
++unsigned Sw64InstrInfo::removeBranch(MachineBasicBlock &MBB,
++                                     int *BytesRemoved) const {
++  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
++  if (I == MBB.end())
++    return false;
++
++  if (I->getOpcode() != Sw64::PseudoBR && !isCondOpCode(I->getOpcode()))
++    return 0;
++
++  // Remove the branch.
++  I->eraseFromParent();
++
++  I = MBB.end();
++
++  if (I == MBB.begin()) {
++    if (BytesRemoved)
++      *BytesRemoved = 4;
++    return 1;
++  }
++  --I;
++  if (!isCondOpCode(I->getOpcode())) {
++    if (BytesRemoved)
++      *BytesRemoved = 4;
++    return 1;
++  }
++
++  // Remove the branch.
++  I->eraseFromParent();
++  if (BytesRemoved)
++    *BytesRemoved = 8;
++  return 2;
++}
++
++void Sw64InstrInfo::insertNoop(MachineBasicBlock &MBB,
++                               MachineBasicBlock::iterator MI) const {
++  DebugLoc DL;
++  BuildMI(MBB, MI, DL, get(Sw64::BISr), Sw64::R31)
++      .addReg(Sw64::R31)
++      .addReg(Sw64::R31);
++}
++
++bool Sw64InstrInfo::ReverseBranchCondition(
++    SmallVectorImpl<MachineOperand> &Cond) const {
++  assert(Cond.size() == 2 && "Invalid Sw64 branch opcode!");
++  Cond[0].setImm(Sw64RevCondCode(Cond[0].getImm()));
++  return false;
++}
++
++/// getGlobalBaseReg - Return a virtual register initialized with the
++/// the global base register value. Output instructions required to
++/// initialize the register in the function entry block, if necessary.
++///
++unsigned Sw64InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
++  Sw64MachineFunctionInfo *Sw64FI = MF->getInfo<Sw64MachineFunctionInfo>();
++  unsigned GlobalBaseReg = Sw64FI->getGlobalBaseReg(*MF);
++  if (GlobalBaseReg != 0)
++    return GlobalBaseReg;
++
++  // Insert the set of GlobalBaseReg into the first MBB of the function
++  GlobalBaseReg = Sw64::R29;
++  Sw64FI->setGlobalBaseReg(GlobalBaseReg);
++  return GlobalBaseReg;
++}
++
++/// getGlobalRetAddr - Return a virtual register initialized with the
++/// the global base register value. Output instructions required to
++/// initialize the register in the function entry block, if necessary.
++///
++unsigned Sw64InstrInfo::getGlobalRetAddr(MachineFunction *MF) const {
++  Sw64MachineFunctionInfo *Sw64FI = MF->getInfo<Sw64MachineFunctionInfo>();
++  unsigned GlobalRetAddr = Sw64FI->getGlobalRetAddr(*MF);
++  if (GlobalRetAddr != 0)
++    return GlobalRetAddr;
++
++  // Insert the set of GlobalRetAddr into the first MBB of the function
++  MachineRegisterInfo &RegInfo = MF->getRegInfo();
++  GlobalRetAddr = Sw64::R26;
++  RegInfo.addLiveIn(Sw64::R26);
++  Sw64FI->setGlobalRetAddr(GlobalRetAddr);
++  return GlobalRetAddr;
++}
++
++MachineInstr *Sw64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
++                                                      int FrameIx,
++                                                      uint64_t Offset,
++                                                      const MDNode *MDPtr,
++                                                      DebugLoc DL) const {
++  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Sw64::DBG_VALUE))
++                                .addFrameIndex(FrameIx)
++                                .addImm(0)
++                                .addImm(Offset)
++                                .addMetadata(MDPtr);
++  return &*MIB;
++}
++
++// for vector optimize.
++// Utility routine that checks if \param MO is defined by an
++// \param CombineOpc instruction in the basic block \param MBB
++static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
++                       unsigned CombineOpc) {
++  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
++  MachineInstr *MI = nullptr;
++
++  if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
++    MI = MRI.getUniqueVRegDef(MO.getReg());
++
++  LLVM_DEBUG(dbgs() << "is MO reg?" << MO.isReg();
++             dbgs() << "is Register Virtual?"
++                    << Register::isVirtualRegister(MO.getReg()));
++
++  // And it needs to be in the trace (otherwise, it won't have a depth).
++  if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
++    return false;
++
++  // Must only used by the user we combine with.
++  if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
++    return false;
++
++  return true;
++}
++
++//
++// Is \param MO defined by a floating-point multiply and can be combined?
++static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
++                               unsigned MulOpc) {
++  return canCombine(MBB, MO, MulOpc);
++}
++
++// TODO: There are many more machine instruction opcodes to match:
++//       1. Other data types (integer, vectors)
++//       2. Other math / logic operations (xor, or)
++//       3. Other forms of the same operation (intrinsics and other variants)
++bool Sw64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
++                                                bool Invert) const {
++  if (Invert)
++    return false;
++  switch (Inst.getOpcode()) {
++  case Sw64::ADDD:
++  case Sw64::ADDS:
++  case Sw64::MULD:
++  case Sw64::MULS:
++  case Sw64::VADDS:
++  case Sw64::VADDD:
++  case Sw64::VMULS:
++  case Sw64::VMULD:
++    return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
++  default:
++    return false;
++  }
++}
++
++// FP Opcodes that can be combined with a FMUL
++static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
++  switch (Inst.getOpcode()) {
++  default:
++    break;
++  case Sw64::ADDS:
++  case Sw64::ADDD:
++  case Sw64::SUBS:
++  case Sw64::SUBD: {
++    TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
++    return (Options.UnsafeFPMath ||
++            Options.AllowFPOpFusion == FPOpFusion::Fast);
++  }
++  case Sw64::VADDS:
++  case Sw64::VADDD:
++  case Sw64::VSUBS:
++  case Sw64::VSUBD:
++    return true;
++  }
++  return false;
++}
++
++/// Find instructions that can be turned into madd.
++static bool getFMAPatterns(MachineInstr &Root,
++                           SmallVectorImpl<MachineCombinerPattern> &Patterns) {
++
++  if (!isCombineInstrCandidateFP(Root))
++    return false;
++
++  MachineBasicBlock &MBB = *Root.getParent();
++  bool Found = false;
++
++  switch (Root.getOpcode()) {
++  default:
++    assert(false && "Unsupported FP instruction in combiner\n");
++    break;
++  case Sw64::ADDS:
++    assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
++           "FADDS does not have register operands");
++    if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::MULS)) {
++      Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
++      Found = true;
++    }
++    if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::MULS)) {
++      Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
++      Found = true;
++    }
++    break;
++
++  case Sw64::ADDD:
++    if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::MULD)) {
++      Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
++      Found = true;
++    }
++    if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::MULD)) {
++      Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
++      Found = true;
++    }
++    break;
++
++  case Sw64::SUBS:
++    if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::MULS)) {
++      Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
++      Found = true;
++    }
++    if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::MULS)) {
++      Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
++      Found = true;
++    }
++    break;
++
++  case Sw64::SUBD:
++    if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::MULD)) {
++      Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
++      Found = true;
++    }
++    if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::MULD)) {
++      Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
++      Found = true;
++    }
++    break;
++  case Sw64::VADDS:
++    assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
++           "FADDS does not have register operands");
++    if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::VMULS)) {
++      Patterns.push_back(MachineCombinerPattern::VMULADDS_OP1);
++      Found = true;
++    }
++    if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::VMULS)) {
++      Patterns.push_back(MachineCombinerPattern::VMULADDS_OP2);
++      Found = true;
++    }
++    break;
++
++  case Sw64::VADDD:
++    if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::VMULD)) {
++      Patterns.push_back(MachineCombinerPattern::VMULADDD_OP1);
++      Found = true;
++    }
++    if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::VMULD)) {
++      Patterns.push_back(MachineCombinerPattern::VMULADDD_OP2);
++      Found = true;
++    }
++    break;
++
++  case Sw64::VSUBS:
++    if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::VMULS)) {
++      Patterns.push_back(MachineCombinerPattern::VMULSUBS_OP1);
++      Found = true;
++    }
++    if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::VMULS)) {
++      Patterns.push_back(MachineCombinerPattern::VMULSUBS_OP2);
++      Found = true;
++    }
++    break;
++  case Sw64::VSUBD:
++    if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::VMULD)) {
++      Patterns.push_back(MachineCombinerPattern::VMULSUBD_OP1);
++      Found = true;
++    }
++    if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::VMULD)) {
++      Patterns.push_back(MachineCombinerPattern::VMULSUBD_OP2);
++      Found = true;
++    }
++    break;
++  }
++  return Found;
++}
++
++/// Return true when a code sequence can improve throughput. It
++/// should be called only for instructions in loops.
++/// \param Pattern - combiner pattern
++bool Sw64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
++  switch (Pattern) {
++  default:
++    break;
++  case MachineCombinerPattern::FMULADDS_OP1:
++  case MachineCombinerPattern::FMULADDS_OP2:
++  case MachineCombinerPattern::FMULSUBS_OP1:
++  case MachineCombinerPattern::FMULSUBS_OP2:
++  case MachineCombinerPattern::FMULADDD_OP1:
++  case MachineCombinerPattern::FMULADDD_OP2:
++  case MachineCombinerPattern::FMULSUBD_OP1:
++  case MachineCombinerPattern::FMULSUBD_OP2:
++  case MachineCombinerPattern::FNMULSUBS_OP1:
++  case MachineCombinerPattern::FNMULSUBD_OP1:
++  case MachineCombinerPattern::VMULADDS_OP1:
++  case MachineCombinerPattern::VMULADDS_OP2:
++  case MachineCombinerPattern::VMULADDD_OP1:
++  case MachineCombinerPattern::VMULADDD_OP2:
++  case MachineCombinerPattern::VMULSUBS_OP1:
++  case MachineCombinerPattern::VMULSUBS_OP2:
++  case MachineCombinerPattern::VMULSUBD_OP1:
++  case MachineCombinerPattern::VMULSUBD_OP2:
++    return true;
++  } // end switch (Pattern)
++  return false;
++}
++
++/// Return true when there is potentially a faster code sequence for an
++/// instruction chain ending in \p Root. All potential patterns are listed in
++/// the \p Pattern vector. Pattern should be sorted in priority order since the
++/// pattern evaluator stops checking as soon as it finds a faster sequence.
++bool Sw64InstrInfo::getMachineCombinerPatterns(
++    MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
++    bool DoRegPressureReduce) const {
++  // Floating point patterns
++  if (getFMAPatterns(Root, Patterns))
++    return true;
++
++  return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
++                                                     DoRegPressureReduce);
++}
++
++enum class FMAInstKind { Default, Indexed, Accumulator };
++/// genFusedMultiply - Generate fused multiply instructions.
++/// This function supports both integer and floating point instructions.
++/// A typical example:
++///  F|MUL I=A,B,0
++///  F|ADD R,I,C
++///  ==> F|MADD R,A,B,C
++/// \param MF Containing MachineFunction
++/// \param MRI Register information
++/// \param TII Target information
++/// \param Root is the F|ADD instruction
++/// \param [out] InsInstrs is a vector of machine instructions and will
++/// contain the generated madd instruction
++/// \param IdxMulOpd is index of operand in Root that is the result of
++/// the F|MUL. In the example above IdxMulOpd is 1.
++/// \param MaddOpc the opcode fo the f|madd instruction
++/// \param RC Register class of operands
++/// \param kind of fma instruction (addressing mode) to be generated
++/// \param ReplacedAddend is the result register from the instruction
++/// replacing the non-combined operand, if any.
++static MachineInstr *
++genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
++                 const TargetInstrInfo *TII, MachineInstr &Root,
++                 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
++                 unsigned MaddOpc, const TargetRegisterClass *RC,
++                 FMAInstKind kind = FMAInstKind::Default,
++                 const unsigned *ReplacedAddend = nullptr) {
++  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
++
++  LLVM_DEBUG(dbgs() << "creating fma insn \n");
++  LLVM_DEBUG(dbgs() << MaddOpc);
++  LLVM_DEBUG(dbgs() << "\n");
++
++  unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
++  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
++  unsigned ResultReg = Root.getOperand(0).getReg();
++  unsigned SrcReg0 = MUL->getOperand(1).getReg();
++  bool Src0IsKill = MUL->getOperand(1).isKill();
++  unsigned SrcReg1 = MUL->getOperand(2).getReg();
++  bool Src1IsKill = MUL->getOperand(2).isKill();
++
++  unsigned SrcReg2;
++  bool Src2IsKill;
++  if (ReplacedAddend) {
++    // If we just generated a new addend, we must be it's only use.
++    SrcReg2 = *ReplacedAddend;
++    Src2IsKill = true;
++  } else {
++    SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
++    Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
++  }
++  if (Register::isVirtualRegister(ResultReg))
++    MRI.constrainRegClass(ResultReg, RC);
++  if (Register::isVirtualRegister(SrcReg0))
++    MRI.constrainRegClass(SrcReg0, RC);
++  if (Register::isVirtualRegister(SrcReg1))
++    MRI.constrainRegClass(SrcReg1, RC);
++  if (Register::isVirtualRegister(SrcReg2))
++    MRI.constrainRegClass(SrcReg2, RC);
++
++  MachineInstrBuilder MIB;
++  if (kind == FMAInstKind::Default)
++    MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
++              .addReg(SrcReg0, getKillRegState(Src0IsKill))
++              .addReg(SrcReg1, getKillRegState(Src1IsKill))
++              .addReg(SrcReg2, getKillRegState(Src2IsKill));
++  else if (kind == FMAInstKind::Indexed)
++    MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
++              .addReg(SrcReg2, getKillRegState(Src2IsKill))
++              .addReg(SrcReg0, getKillRegState(Src0IsKill))
++              .addReg(SrcReg1, getKillRegState(Src1IsKill))
++              .addImm(MUL->getOperand(3).getImm());
++  else if (kind == FMAInstKind::Accumulator)
++    MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
++              .addReg(SrcReg2, getKillRegState(Src2IsKill))
++              .addReg(SrcReg0, getKillRegState(Src0IsKill))
++              .addReg(SrcReg1, getKillRegState(Src1IsKill));
++  else
++    assert(false && "Invalid FMA instruction kind \n");
++  // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
++  InsInstrs.push_back(MIB);
++  return MUL;
++}
++
++/// When getMachineCombinerPatterns() finds potential patterns,
++/// this function generates the instructions that could replace the
++/// original code sequence
++void Sw64InstrInfo::genAlternativeCodeSequence(
++    MachineInstr &Root, MachineCombinerPattern Pattern,
++    SmallVectorImpl<MachineInstr *> &InsInstrs,
++    SmallVectorImpl<MachineInstr *> &DelInstrs,
++    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
++
++  LLVM_DEBUG(dbgs() << "combining float instring\n");
++  MachineBasicBlock &MBB = *Root.getParent();
++  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
++  MachineFunction &MF = *MBB.getParent();
++  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
++
++  MachineInstr *MUL;
++  const TargetRegisterClass *RC;
++  unsigned Opc;
++  switch (Pattern) {
++  default:
++    // Reassociate instructions.
++    TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
++                                                DelInstrs, InstrIdxForVirtReg);
++    return;
++  // Floating Point Support
++  case MachineCombinerPattern::FMULADDS_OP1:
++  case MachineCombinerPattern::FMULADDD_OP1:
++    // FMUL I=A,B
++    // FADD R,I,C
++    // ==> FMAx R,A,B,C
++    // --- Create(FMAx);
++    if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
++      Opc = Sw64::FMAS;
++      RC = &Sw64::F4RCRegClass;
++    } else {
++      Opc = Sw64::FMAD;
++      RC = &Sw64::F8RCRegClass;
++    }
++    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
++    break;
++  case MachineCombinerPattern::FMULADDS_OP2:
++  case MachineCombinerPattern::FMULADDD_OP2:
++    // FMUL I=A,B
++    // FADD R,C,I
++    // ==> FMAx R,A,B,C
++    // --- Create(FMAx);
++    if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
++      Opc = Sw64::FMAS;
++      RC = &Sw64::F4RCRegClass;
++    } else {
++      Opc = Sw64::FMAD;
++      RC = &Sw64::F8RCRegClass;
++    }
++    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
++    break;
++
++  case MachineCombinerPattern::FMULSUBS_OP1:
++  case MachineCombinerPattern::FMULSUBD_OP1: {
++    // FMUL I=A,B,0
++    // FSUB R,I,C
++    // ==> FMSx R,A,B,C // = A*B - C
++    // --- Create(FMSx);
++    if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
++      Opc = Sw64::FMSS;
++      RC = &Sw64::F4RCRegClass;
++    } else {
++      Opc = Sw64::FMSD;
++      RC = &Sw64::F8RCRegClass;
++    }
++    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
++    break;
++  }
++  case MachineCombinerPattern::FMULSUBS_OP2:
++  case MachineCombinerPattern::FMULSUBD_OP2: {
++    // FMUL I=A,B,0
++    // FSUB R,I,C
++    // ==> FNMAx R,A,B,C // = -A*B + C
++    // --- Create(FNMAx);
++    if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
++      Opc = Sw64::FNMAS;
++      RC = &Sw64::F4RCRegClass;
++    } else {
++      Opc = Sw64::FNMAD;
++      RC = &Sw64::F8RCRegClass;
++    }
++    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
++    break;
++  }
++
++  case MachineCombinerPattern::FNMULSUBS_OP1:
++  case MachineCombinerPattern::FNMULSUBD_OP1: {
++    // FNMUL I=A,B,0
++    // FSUB R,I,C
++    // ==> FNMSx R,A,B,C // = -A*B - C
++    // --- Create(FNMSx);
++    if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
++      Opc = Sw64::FNMSS;
++      RC = &Sw64::F4RCRegClass;
++    } else {
++      Opc = Sw64::FNMSD;
++      RC = &Sw64::F8RCRegClass;
++    }
++    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
++    break;
++  }
++
++  case MachineCombinerPattern::VMULADDS_OP1:
++  case MachineCombinerPattern::VMULADDD_OP1: {
++    // VMULx I=A,B
++    // VADDx I,C,R
++    // ==> VMAx A,B,C,R // = A*B+C
++    // --- Create(VMAx);
++    Opc = Pattern == MachineCombinerPattern::VMULADDS_OP1 ? Sw64::VMAS
++                                                          : Sw64::VMAD;
++    RC = &Sw64::V256LRegClass;
++    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
++    break;
++  }
++  case MachineCombinerPattern::VMULADDS_OP2:
++  case MachineCombinerPattern::VMULADDD_OP2: {
++    // VMUL I=A,B
++    // VADD C,R,I
++    // ==> VMA A,B,C,R (computes C + A*B)
++    // --- Create(FMSUB);
++    Opc = Pattern == MachineCombinerPattern::VMULADDS_OP2 ? Sw64::VMAS
++                                                          : Sw64::VMAD;
++    RC = &Sw64::V256LRegClass;
++    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
++    break;
++  }
++
++  case MachineCombinerPattern::VMULSUBS_OP1:
++  case MachineCombinerPattern::VMULSUBD_OP1: {
++    // VMULx I=A,B
++    // VSUBx I,C,R
++    // ==> VMSx A,B,C,R // = A*B-C
++    // --- Create(VMSx);
++    Opc = Pattern == MachineCombinerPattern::VMULSUBS_OP1 ? Sw64::VMSS
++                                                          : Sw64::VMSD;
++    RC = &Sw64::V256LRegClass;
++    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
++    break;
++  }
++  case MachineCombinerPattern::VMULSUBS_OP2:
++  case MachineCombinerPattern::VMULSUBD_OP2: {
++    // FMUL I=A,B,0
++    // FSUB R,C,I
++    // ==> FMSUB R,A,B,C (computes C - A*B)
++    // --- Create(FMSUB);
++    Opc = Pattern == MachineCombinerPattern::VMULSUBS_OP2 ? Sw64::VNMAS
++                                                          : Sw64::VNMAD;
++    RC = &Sw64::V256LRegClass;
++    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
++    break;
++  }
++  } // end switch (Pattern)
++  // Record MUL and ADD/SUB for deletion
++  DelInstrs.push_back(MUL);
++  DelInstrs.push_back(&Root);
++}
++
++bool Sw64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
++                                         const MachineBasicBlock *MBB,
++                                         const MachineFunction &MF) const {
++  if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
++    return true;
++
++  switch (MI.getOpcode()) {
++  case Sw64::MOVProgPCGp:
++  case Sw64::MOVaddrPCGp:
++  case Sw64::WMEMB:
++  case Sw64::IMEMB:
++  case Sw64::MB:
++    return true;
++  }
++  return false;
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64InstrInfo.h b/llvm/lib/Target/Sw64/Sw64InstrInfo.h
+new file mode 100644
+index 000000000..69e9bd921
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64InstrInfo.h
+@@ -0,0 +1,143 @@
++//===-- Sw64InstrInfo.h - Sw64 Instruction Information --------*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the Sw64 implementation of the TargetInstrInfo class.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_SW64INSTRINFO_H
++#define LLVM_LIB_TARGET_SW64_SW64INSTRINFO_H
++
++#include "Sw64RegisterInfo.h"
++#include "llvm/CodeGen/TargetInstrInfo.h"
++
++#define GET_INSTRINFO_HEADER
++#include "Sw64GenInstrInfo.inc"
++
++namespace llvm {
++
++class Sw64InstrInfo : public Sw64GenInstrInfo {
++  const Sw64RegisterInfo RI;
++  virtual void anchor();
++
++public:
++  Sw64InstrInfo();
++
++  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
++  /// such, whenever a client has an instance of instruction info, it should
++  /// always be able to get register info as well (through this method).
++  ///
++  const Sw64RegisterInfo &getRegisterInfo() const { return RI; }
++
++  /// isLoadFromStackSlot - If the specified machine instruction is a direct
++  /// load from a stack slot, return the virtual or physical register number of
++  /// the destination along with the FrameIndex of the loaded stack slot.  If
++  /// not, return 0.  This predicate must return 0 if the instruction has
++  /// any side effects other than loading from the stack slot.
++  unsigned isLoadFromStackSlot(const MachineInstr &MI,
++                               int &FrameIndex) const override;
++
++  /// isStoreToStackSlot - If the specified machine instruction is a direct
++  /// store to a stack slot, return the virtual or physical register number of
++  /// the source reg along with the FrameIndex of the loaded stack slot.  If
++  /// not, return 0.  This predicate must return 0 if the instruction has
++  /// any side effects other than storing to the stack slot.
++  unsigned isStoreToStackSlot(const MachineInstr &MI,
++                              int &FrameIndex) const override;
++
++  bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
++                     MachineBasicBlock *&FBB,
++                     SmallVectorImpl<MachineOperand> &Cond,
++                     bool AllowModify) const override;
++
++  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
++                        MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
++                        const DebugLoc &DL,
++                        int *BytesAdded = nullptr) const override;
++
++  unsigned removeBranch(MachineBasicBlock &MBB,
++                        int *BytesRemoved = nullptr) const override;
++
++  void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
++                   const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
++                   bool KillSrc) const override;
++
++  void storeRegToStackSlot(MachineBasicBlock &MBB,
++                           MachineBasicBlock::iterator MI, Register SrcReg,
++                           bool isKill, int FrameIndex,
++                           const TargetRegisterClass *RC,
++                           const TargetRegisterInfo *TRI,
++                           Register VReg) const override;
++
++  void loadRegFromStackSlot(MachineBasicBlock &MBB,
++                            MachineBasicBlock::iterator MI, Register DestReg,
++                            int FrameIndex, const TargetRegisterClass *RC,
++                            const TargetRegisterInfo *TRI,
++                            Register VReg) const override;
++
++  // Emit code before MBBI to load immediate value into physical register Reg.
++  // Returns an iterator to the new instruction.
++  MachineBasicBlock::iterator loadImmediate(MachineBasicBlock &MBB,
++                                            MachineBasicBlock::iterator MI,
++                                            unsigned Reg, uint64_t Value) const;
++  void insertNoop(MachineBasicBlock &MBB,
++                  MachineBasicBlock::iterator MI) const override;
++  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
++
++  /// getGlobalBaseReg - Return a virtual register initialized with the
++  /// the global base register value. Output instructions required to
++  /// initialize the register in the function entry block, if necessary.
++  ///
++  unsigned getGlobalBaseReg(MachineFunction *MF) const;
++
++  /// getGlobalRetAddr - Return a virtual register initialized with the
++  /// the global return address register value. Output instructions required to
++  /// initialize the register in the function entry block, if necessary.
++  ///
++  unsigned getGlobalRetAddr(MachineFunction *MF) const;
++
++  bool isSchedulingBoundary(const MachineInstr &MI,
++                            const MachineBasicBlock *MBB,
++                            const MachineFunction &MF) const override;
++
++  /// Return true when a code sequence can improve throughput. It
++  /// should be called only for instructions in loops.
++  /// \param Pattern - combiner pattern
++  bool isThroughputPattern(MachineCombinerPattern Pattern) const override;
++
++  /// Return true when there is potentially a faster code sequence
++  /// for an instruction chain ending in ``Root``. All potential patterns are
++  /// listed in the ``Patterns`` array.
++  bool
++  getMachineCombinerPatterns(MachineInstr &Root,
++                             SmallVectorImpl<MachineCombinerPattern> &Patterns,
++                             bool DoRegPressureReduce) const override;
++
++  /// Return true when Inst is associative and commutative so that it can be
++  /// reassociated.
++  bool isAssociativeAndCommutative(const MachineInstr &Inst,
++                                   bool Invert) const override;
++
++  /// When getMachineCombinerPatterns() finds patterns, this function generates
++  /// the instructions that could replace the original code sequence
++  void genAlternativeCodeSequence(
++      MachineInstr &Root, MachineCombinerPattern Pattern,
++      SmallVectorImpl<MachineInstr *> &InsInstrs,
++      SmallVectorImpl<MachineInstr *> &DelInstrs,
++      DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
++  /// Sw64 supports MachineCombiner.
++  bool useMachineCombiner() const override { return true; }
++
++  virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
++                                                 int FrameIx, uint64_t Offset,
++                                                 const MDNode *MDPtr,
++                                                 DebugLoc DL) const;
++};
++} // namespace llvm
++#endif // END LLVM_LIB_TARGET_SW64_SW64INSTRINFO_H
+diff --git a/llvm/lib/Target/Sw64/Sw64InstrInfo.td b/llvm/lib/Target/Sw64/Sw64InstrInfo.td
+new file mode 100644
+index 000000000..eb65f9bd3
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64InstrInfo.td
+@@ -0,0 +1,2096 @@
++//===- Sw64InstrInfo.td - The Sw64 Instruction Set -------*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++//
++//===----------------------------------------------------------------------===//
++
++include "Sw64InstrFormats.td"
++
++//===----------------------------------------------------------------------===//
++// Sw64 Instruction Predicate Definitions.
++//
++def EnableIntShift         : Predicate<"Subtarget->enableIntShift()">,
++                             AssemblerPredicate<(all_of Featureintshift), "swIntShift">;
++
++def EnableFloatCmov        : Predicate<"Subtarget->enableFloatCmov()">;
++
++def EnableCrcInst          : Predicate<"Subtarget->enableCrcInst()">;
++
++def EnableWmembInst        : Predicate<"Subtarget->enableWmembInst()">;
++
++def EnableCasInst          : Predicate<"Subtarget->enableCasInst()">;
++
++def HasSIMD                : Predicate<"Subtarget->hasSIMD()">,
++                             AssemblerPredicate<(all_of FeatureSIMD)>;
++//********************
++//Custom DAG Nodes
++//********************
++
++def SDTFPUnaryOpUnC  : SDTypeProfile<1, 1, [
++  SDTCisFP<1>, SDTCisFP<0>
++]>;
++
++def SDTIntTriOp : SDTypeProfile<1, 3, [
++  SDTCisInt<1>, SDTCisInt<0>, SDTCisInt<2>, SDTCisInt<3>
++]>;
++
++def Sw64_cvtqt   : SDNode<"Sw64ISD::CVTQT_",    SDTFPUnaryOpUnC, []>;
++def Sw64_cvtqs   : SDNode<"Sw64ISD::CVTQS_",    SDTFPUnaryOpUnC, []>;
++def Sw64_cvttq   : SDNode<"Sw64ISD::CVTTQ_"  ,  SDTFPUnaryOp, []>;
++def Sw64_cvtts   : SDNode<"Sw64ISD::CVTTS_",    SDTFPUnaryOpUnC,
++                          [SDNPHasChain]>;
++def Sw64_cvtst   : SDNode<"Sw64ISD::CVTST_",    SDTFPUnaryOpUnC,
++                          [SDNPHasChain]>;
++def Sw64_tprello : SDNode<"Sw64ISD::TPRelLo",   SDTIntBinOp, []>;
++def Sw64_tprelhi : SDNode<"Sw64ISD::TPRelHi",   SDTIntBinOp, []>;
++
++def Sw64_tlsgd   : SDNode<"Sw64ISD::TLSGD",    SDTIntTriOp, []>;
++def Sw64_tlsldm   : SDNode<"Sw64ISD::TLSLDM",    SDTIntBinOp, []>;
++def Sw64_dtprello : SDNode<"Sw64ISD::DTPRelLo",   SDTIntBinOp, []>;
++def Sw64_dtprelhi : SDNode<"Sw64ISD::DTPRelHi",   SDTIntBinOp, []>;
++
++def Sw64_syscall   : SDNode<"Sw64ISD::SysCall", SDTIntUnaryOp, []>;
++def Sw64_LDAWithChain : SDNode<"Sw64ISD::LDAWC", SDTIntBinOp, [SDNPHasChain]>;
++def Sw64_gprello : SDNode<"Sw64ISD::GPRelLo",   SDTIntUnaryOp>;
++def Sw64_gprelhi : SDNode<"Sw64ISD::GPRelHi",   SDTIntUnaryOp>;
++def Sw64_rellit  : SDNode<"Sw64ISD::RelLit",    SDTIntUnaryOp>;
++
++def Sw64_gprel : SDNode<"Sw64ISD::GPRel",   SDTIntUnaryOp>;
++def Sw64_tprel : SDNode<"Sw64ISD::TPRel",   SDTIntUnaryOp>;
++def Sw64_dtprel : SDNode<"Sw64ISD::DTPRel",   SDTIntUnaryOp>;
++
++def Sw64ldih : SDNode<"Sw64ISD::LDIH", SDTIntUnaryOp, []>;
++def Sw64ldi : SDNode<"Sw64ISD::LDI", SDTIntBinOp, [SDNPOutGlue]>;
++
++def Sw64_relgottp  : SDNode<"Sw64ISD::RelGottp",    SDTIntBinOp, [SDNPMayLoad]>;
++def retflag       : SDNode<"Sw64ISD::RET_FLAG", SDTNone,
++                           [SDNPHasChain, SDNPOptInGlue]>;
++
++// These are target-independent nodes, but have target-specific formats.
++def SDT_Sw64CallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64> ]>;
++def SDT_Sw64CallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i64>,
++                                          SDTCisVT<1, i64> ]>;
++
++def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_Sw64CallSeqStart,
++                           [SDNPHasChain, SDNPOutGlue]>;
++def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_Sw64CallSeqEnd,
++                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
++def Sw64_frecs : SDNode<"Sw64ISD::FRECS",     SDTFPUnaryOp, []>;
++def Sw64_frecd : SDNode<"Sw64ISD::FRECD",     SDTFPUnaryOp, []>;
++def Sw64_sbt   : SDNode<"Sw64ISD::SBT",     SDTIntBinOp, []>;
++def Sw64_cbt   : SDNode<"Sw64ISD::CBT",     SDTIntBinOp, []>;
++def Sw64_addpi   : SDNode<"Sw64ISD::ADDPI",   SDTIntUnaryOp, []>;
++def Sw64_addpis  : SDNode<"Sw64ISD::ADDPIS",  SDTIntUnaryOp, []>;
++
++def Sw64_revbh  : SDNode<"Sw64ISD::REVBH",  SDTIntUnaryOp, []>;
++def Sw64_revbw  : SDNode<"Sw64ISD::REVBW",  SDTIntUnaryOp, []>;
++
++def Sw64_rolw : SDNode<"Sw64ISD::ROLW",   SDTIntBinOp, []>;
++
++def Sw64_crc32b : SDNode<"Sw64ISD::CRC32B",   SDTIntBinOp, []>;
++def Sw64_crc32h : SDNode<"Sw64ISD::CRC32H",   SDTIntBinOp, []>;
++def Sw64_crc32w : SDNode<"Sw64ISD::CRC32W",   SDTIntBinOp, []>;
++def Sw64_crc32l : SDNode<"Sw64ISD::CRC32L",   SDTIntBinOp, []>;
++def Sw64_crc32cb : SDNode<"Sw64ISD::CRC32CB",   SDTIntBinOp, []>;
++def Sw64_crc32ch : SDNode<"Sw64ISD::CRC32CH",   SDTIntBinOp, []>;
++def Sw64_crc32cw : SDNode<"Sw64ISD::CRC32CW",   SDTIntBinOp, []>;
++def Sw64_crc32cl : SDNode<"Sw64ISD::CRC32CL",   SDTIntBinOp, []>;
++
++def Sw64_casl : SDNode<"Sw64ISD::CASL",   SDTIntBinOp, []>;
++def Sw64_casw : SDNode<"Sw64ISD::CASW",   SDTIntBinOp, []>;
++
++let Constraints = "$RD = $RC" in
++class inst_cas<bits<6> opc, bits<8> fun, string opstr>
++	: OForm <opc, fun, (ins GPRC:$RA, GPRC:$RB, GPRC:$RC), (outs GPRC:$RD),
++                 opstr, "$RA,$RB,$RC">;
++
++def CASW : inst_cas<0x10, 0x5e, "casw">;
++def CASL : inst_cas<0x10, 0x5f, "casl">;
++
++def : Pat<(atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp),
++          (CASL GPRC:$cmp, GPRC:$ptr, GPRC:$swp)>,
++          Requires<[EnableCasInst, HasCore4]>;
++
++def : Pat<(atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp),
++          (CASW GPRC:$cmp, GPRC:$ptr, GPRC:$swp)>,
++          Requires<[EnableCasInst, HasCore4]>;
++
++def call_symbol : Operand<i64>;
++//********************
++//Paterns for matching
++//********************
++def invX : SDNodeXForm<imm, [{ //invert
++  return getI64Imm(~N->getZExtValue(), SDLoc(N));
++}]>;
++def negX : SDNodeXForm<imm, [{ //negate
++  return getI64Imm(~N->getZExtValue() + 1, SDLoc(N));
++}]>;
++def SExt32 : SDNodeXForm<imm, [{ //signed extend int to long
++  return getI64Imm(((int64_t)N->getZExtValue() << 32) >> 32, SDLoc(N));
++}]>;
++def SExt16 : SDNodeXForm<imm, [{ //signed extend int to long
++  return getI64Imm(((int64_t)N->getZExtValue() << 48) >> 48, SDLoc(N));
++}]>;
++def LL16 : SDNodeXForm<imm, [{ //lda part of constant
++  return getI64Imm(get_lda16(N->getZExtValue()), SDLoc(N));
++}]>;
++def LH16 : SDNodeXForm<imm, [{ //ldah part of constant (or more if too big)
++  return getI64Imm(get_ldah16(N->getZExtValue()), SDLoc(N));
++}]>;
++def iZAPX : SDNodeXForm<and, [{ // get imm to ZAPi
++  ConstantSDNode *RHS = cast<ConstantSDNode>(N->getOperand(1));
++  return getI64Imm(get_zapImm(SDValue(), RHS->getZExtValue()), SDLoc(N));
++}]>;
++def nearP2X : SDNodeXForm<imm, [{
++  return getI64Imm(Log2_64(getNearPower2((uint64_t)N->getZExtValue())), SDLoc(N));
++}]>;
++def nearP2RemX : SDNodeXForm<imm, [{
++  uint64_t x =
++    abs64(N->getZExtValue() - getNearPower2((uint64_t)N->getZExtValue()));
++  return getI64Imm(Log2_64(x), SDLoc(N));
++}]>;
++
++def immUExt8  : PatLeaf<(imm), [{ //imm fits in 8 bit zero extended field
++  return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
++}]>;
++def immUExt8inv  : PatLeaf<(imm), [{ //inverted imm fits in 8 bit zero extended field
++  return (uint64_t)~N->getZExtValue() == (uint8_t)~N->getZExtValue();
++}], invX>;
++def immUExt8neg  : PatLeaf<(imm), [{ //negated imm fits in 8 bit zero extended field
++  return ((uint64_t)~N->getZExtValue() + 1) ==
++         (uint8_t)((uint64_t)~N->getZExtValue() + 1);
++}], negX>;
++
++def immUExt13 : PatLeaf<(imm), [{
++  return (uint32_t)N->getZExtValue() < (1 << 13);
++}]>;
++
++def immSExt12  : PatLeaf<(imm), [{ //imm fits in 12 bit sign extended field
++  return ((int64_t)N->getZExtValue() << 52) >> 52 ==
++         (int64_t)N->getZExtValue();
++}]>;
++
++def immSExt16  : PatLeaf<(imm), [{ //imm fits in 16 bit sign extended field
++  return ((int64_t)N->getZExtValue() << 48) >> 48 ==
++         (int64_t)N->getZExtValue();
++}]>;
++
++def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm), [{
++  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
++  if (!RHS) return 0;
++  uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getZExtValue());
++  return build != 0;
++}]>;
++
++def immFPZ  : PatLeaf<(fpimm), [{ //the only fpconstant nodes are +/- 0.0
++  (void)N; // silence warning.
++  return true;
++}]>;
++
++def immRem1 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,0);}]>;
++def immRem2 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,0);}]>;
++def immRem3 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,0);}]>;
++def immRem4 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,0);}]>;
++def immRem5 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,0);}]>;
++def immRem1n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,1);}]>;
++def immRem2n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,1);}]>;
++def immRem3n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,1);}]>;
++def immRem4n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,1);}]>;
++def immRem5n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,1);}]>;
++
++def immRemP2n : PatLeaf<(imm), [{
++  return isPowerOf2_64(getNearPower2((uint64_t)N->getZExtValue()) -
++                       N->getZExtValue());
++}]>;
++def immRemP2 : PatLeaf<(imm), [{
++  return isPowerOf2_64(N->getZExtValue() -
++                       getNearPower2((uint64_t)N->getZExtValue()));
++}]>;
++def immUExt8ME : PatLeaf<(imm), [{ //use this imm for mulqi
++  int64_t d =  abs64((int64_t)N->getZExtValue() -
++               (int64_t)getNearPower2((uint64_t)N->getZExtValue()));
++  if (isPowerOf2_64(d)) return false;
++  switch (d) {
++    case 1: case 3: case 5: return false;
++    default: return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
++  };
++}]>;
++
++def intop : PatFrag<(ops node:$op), (sext_inreg node:$op, i32)>;
++def add4  : PatFrag<(ops node:$op1, node:$op2),
++                    (add (shl node:$op1, (i64 2)), node:$op2)>;
++def sub4  : PatFrag<(ops node:$op1, node:$op2),
++                    (sub (shl node:$op1, (i64 2)), node:$op2)>;
++def add8  : PatFrag<(ops node:$op1, node:$op2),
++                    (add (shl node:$op1, (i64 3)), node:$op2)>;
++def sub8  : PatFrag<(ops node:$op1, node:$op2),
++                    (sub (shl node:$op1, (i64 3)), node:$op2)>;
++
++class ThridOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>;
++class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
++class CmpOpFrag<dag res> : PatFrag<(ops node:$R), res>;
++
++def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{
++  return isOrEquivalentToAdd(N);
++}]>;
++def AddrFI : ComplexPattern<iPTR, 1, "SelectAddrFI", [frameindex], []>;
++
++//Pseudo ops for selection
++
++def WTF : PseudoInstSw64<(outs), (ins variable_ops), "#wtf", []>, Sched<[]>;
++
++let hasCtrlDep = 1, Defs = [R30], Uses = [R30] in {
++def ADJUSTSTACKUP : PseudoInstSw64<(outs), (ins s64imm:$amt1, s64imm:$amt2),
++                "; ADJUP $amt1",
++                [(callseq_end timm:$amt1, timm:$amt2)]>, Sched<[]>;
++def ADJUSTSTACKDOWN : PseudoInstSw64<(outs), (ins s64imm:$amt1, s64imm:$amt2),
++                "; ADJDOWN $amt1",
++                [(callseq_start (i64 timm:$amt1), (i64 timm:$amt2))]>, Sched<[]>;
++}
++
++let isCodeGenOnly = 1 in {
++def ALTENT : PseudoInstSw64<(outs), (ins s64imm:$TARGET), "$$${TARGET}..ng:\n",
++                            []>, Sched<[]>;
++def PCLABEL : PseudoInstSw64<(outs), (ins s64imm:$num), "PCMARKER_$num:\n",[]>,
++              Sched<[]>;
++def MEMLABEL : PseudoInstSw64<(outs), (ins s64imm:$i, s64imm:$j, s64imm:$k,
++                              s64imm:$m),
++                              "LSMARKER$$$i$$$j$$$k$$$m:", []>, Sched<[]>;
++}
++let hasNoSchedulingInfo = 1 in {
++let usesCustomInserter = 1 in {   // Expanded after instruction selection.
++def ATOMIC_CMP_SWAP_I32 : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic cmpare and swap",
++    [(set GPRC:$dst, (atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))]>;
++
++def ATOMIC_CMP_SWAP_I64 : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic compare and swap",
++    [(set GPRC:$dst, (atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))]>;
++
++
++def ATOMIC_LOAD_ADD_I32 : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic load add",
++    [(set GPRC:$dst, (atomic_load_add_32 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_ADD_I64 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic load add",
++    [(set GPRC:$dst, (atomic_load_add_64 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_LOAD_UMAX_I64 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 64-bit atomic load umax",
++    [(set GPRC:$dst, (atomic_load_umax_64 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_MAX_I64 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 64-bit atomic load max",
++    [(set GPRC:$dst, (atomic_load_max_64 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_UMIN_I64 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 64-bit atomic load umin",
++    [(set GPRC:$dst, (atomic_load_umin_64 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_MIN_I64 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 64-bit atomic load min",
++    [(set GPRC:$dst, (atomic_load_min_64 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_NAND_I64 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 64-bit atomic load nand",
++    [(set GPRC:$dst, (atomic_load_nand_64 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_LOAD_UMAX_I32 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 32-bit atomic load umax",
++    [(set GPRC:$dst, (atomic_load_umax_32 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_MAX_I32 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 32-bit atomic load max",
++    [(set GPRC:$dst, (atomic_load_max_32 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_UMIN_I32 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 32-bit atomic load umin",
++    [(set GPRC:$dst, (atomic_load_umin_32 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_MIN_I32 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 32-bit atomic load min",
++    [(set GPRC:$dst, (atomic_load_min_32 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_NAND_I32 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 32-bit atomic load nand",
++    [(set GPRC:$dst, (atomic_load_nand_32 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_LOAD_UMAX_I16 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 16-bit atomic load umax",
++    [(set GPRC:$dst, (atomic_load_umax_16 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_MAX_I16 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 16-bit atomic load max",
++    [(set GPRC:$dst, (atomic_load_max_16 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_UMIN_I16 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 16-bit atomic load umin",
++    [(set GPRC:$dst, (atomic_load_umin_16 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_MIN_I16 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 16-bit atomic load min",
++    [(set GPRC:$dst, (atomic_load_min_16 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_NAND_I16 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 16-bit atomic load nand",
++    [(set GPRC:$dst, (atomic_load_nand_16 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_LOAD_UMAX_I8 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 8-bit atomic load umax",
++    [(set GPRC:$dst, (atomic_load_umax_8 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_MAX_I8 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 8-bit atomic load max",
++    [(set GPRC:$dst, (atomic_load_max_8 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_UMIN_I8 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 8-bit atomic load umin",
++    [(set GPRC:$dst, (atomic_load_umin_8 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_MIN_I8 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 8-bit atomic load min",
++    [(set GPRC:$dst, (atomic_load_min_8 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_NAND_I8 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp),"# 8-bit atomic load nand",
++    [(set GPRC:$dst, (atomic_load_nand_8 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_SWAP_I32 : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic swap",
++    [(set GPRC:$dst, (atomic_swap_32 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_SWAP_I64 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic swap",
++    [(set GPRC:$dst, (atomic_swap_64 GPRC:$ptr, GPRC:$swp))]>;
++
++
++def ATOMIC_LOAD_AND_I32 : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic load and",
++    [(set GPRC:$dst, (atomic_load_and_32 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_AND_I64 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic load and",
++    [(set GPRC:$dst, (atomic_load_and_64 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_LOAD_OR_I32 : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic load or",
++    [(set GPRC:$dst, (atomic_load_or_32 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_OR_I64 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic load and",
++    [(set GPRC:$dst, (atomic_load_or_64 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_LOAD_SUB_I32 : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic load sub",
++    [(set GPRC:$dst, (atomic_load_sub_32 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_SUB_I64 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic load sub",
++    [(set GPRC:$dst, (atomic_load_sub_64 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_LOAD_XOR_I32 : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic load xor",
++    [(set GPRC:$dst, (atomic_load_xor_32 GPRC:$ptr, GPRC:$swp))]>;
++def ATOMIC_LOAD_XOR_I64 :PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic load xor",
++    [(set GPRC:$dst, (atomic_load_xor_64 GPRC:$ptr, GPRC:$swp))]>;
++
++
++//I8
++def ATOMIC_LOAD_ADD_I8: PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic load add",
++    [(set GPRC:$dst, (atomic_load_add_8 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_SWAP_I8: PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic swap",
++    [(set GPRC:$dst, (atomic_swap_8 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_LOAD_AND_I8: PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic load and",
++    [(set GPRC:$dst, (atomic_load_and_8 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_LOAD_OR_I8: PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic load or",
++    [(set GPRC:$dst, (atomic_load_or_8 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_LOAD_SUB_I8: PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic load sub",
++    [(set GPRC:$dst, (atomic_load_sub_8 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_LOAD_XOR_I8: PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic load xor",
++    [(set GPRC:$dst, (atomic_load_xor_8 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_CMP_SWAP_I8 : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic compare and swap",
++    [(set GPRC:$dst, (atomic_cmp_swap_8 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))]>;
++
++//I16
++def ATOMIC_LOAD_ADD_I16: PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic load add",
++    [(set GPRC:$dst, (atomic_load_add_16 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_SWAP_I16: PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic swap",
++    [(set GPRC:$dst, (atomic_swap_16 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_LOAD_AND_I16: PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic and",
++    [(set GPRC:$dst, (atomic_load_and_16 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_LOAD_OR_I16: PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic load or",
++    [(set GPRC:$dst, (atomic_load_or_16 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_LOAD_SUB_I16: PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic load and sub",
++    [(set GPRC:$dst, (atomic_load_sub_16 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_LOAD_XOR_I16: PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic xor",
++    [(set GPRC:$dst, (atomic_load_xor_16 GPRC:$ptr, GPRC:$swp))]>;
++
++def ATOMIC_CMP_SWAP_I16 : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic compare and swap",
++    [(set GPRC:$dst, (atomic_cmp_swap_16 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))]>;
++
++
++def CAS32 : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic compare and swap",
++    [(set GPRC:$dst, (atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))]>;
++def CAS64 : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic compare and swap",
++    [(set GPRC:$dst, (atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))]>;
++
++def LAS32 : PseudoInstSw64<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp),
++    "# 32-bit atomic load and sub",
++    [(set GPRC:$dst, (atomic_load_add_32 GPRC:$ptr, GPRC:$swp))]>;
++def LAS64 :PseudoInstSw64<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp),
++    "# 64-bit atomic load and sub",
++    [(set GPRC:$dst, (atomic_load_add_64 GPRC:$ptr, GPRC:$swp))]>;
++
++def SWAP32 : PseudoInstSw64<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp),
++    "# 32-bit atomic swap",
++    [(set GPRC:$dst, (atomic_swap_32 GPRC:$ptr, GPRC:$swp))]>;
++def SWAP64 :PseudoInstSw64<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp),
++    "# 64-bit atomic swap",
++    [(set GPRC:$dst, (atomic_swap_64 GPRC:$ptr, GPRC:$swp))]>;
++}
++
++let mayLoad = 1, mayStore = 1 in {
++  def ATOMIC_LOAD_ADD_I32_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++      (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic", []>;
++  def ATOMIC_LOAD_ADD_I64_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++      (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic", []>;
++
++  def ATOMIC_SWAP_I32_POSTRA      : PseudoInstSw64<(outs GPRC:$dst),
++      (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic", []>;
++  def ATOMIC_SWAP_I64_POSTRA      : PseudoInstSw64<(outs GPRC:$dst),
++      (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic", []>;
++
++  def ATOMIC_LOAD_AND_I32_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++      (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic", []>;
++  def ATOMIC_LOAD_AND_I64_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++      (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic", []>;
++
++  def ATOMIC_LOAD_OR_I32_POSTRA   : PseudoInstSw64<(outs GPRC:$dst),
++      (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic", []>;
++  def ATOMIC_LOAD_OR_I64_POSTRA   : PseudoInstSw64<(outs GPRC:$dst),
++      (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic", []>;
++
++  def ATOMIC_LOAD_SUB_I32_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++      (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic", []>;
++  def ATOMIC_LOAD_SUB_I64_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++      (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic", []>;
++
++  def ATOMIC_LOAD_XOR_I32_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++      (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic", []>;
++  def ATOMIC_LOAD_XOR_I64_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++      (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic", []>;
++
++  def ATOMIC_CMP_SWAP_I32_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++      (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic", []>;
++  def ATOMIC_CMP_SWAP_I64_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++      (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic", []>;
++}
++def ATOMIC_LOAD_ADD_I8_POSTRA     : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic", []>;
++def ATOMIC_LOAD_ADD_I16_POSTRA    : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic", []>;
++
++def ATOMIC_SWAP_I8_POSTRA         : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic", []>;
++def ATOMIC_SWAP_I16_POSTRA        : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic", []>;
++
++def ATOMIC_LOAD_AND_I8_POSTRA     : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic", []>;
++def ATOMIC_LOAD_AND_I16_POSTRA    : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic", []>;
++
++def ATOMIC_LOAD_OR_I8_POSTRA      : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic", []>;
++def ATOMIC_LOAD_OR_I16_POSTRA     : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic", []>;
++
++def ATOMIC_LOAD_SUB_I8_POSTRA     : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic", []>;
++def ATOMIC_LOAD_SUB_I16_POSTRA    : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic", []>;
++
++def ATOMIC_LOAD_XOR_I8_POSTRA     : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic", []>;
++def ATOMIC_LOAD_XOR_I16_POSTRA    : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic", []>;
++
++def ATOMIC_CMP_SWAP_I8_POSTRA     : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic", []>;
++def ATOMIC_CMP_SWAP_I16_POSTRA    : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic", []>;
++
++def ATOMIC_LOAD_UMAX_I8_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic", []>;
++def ATOMIC_LOAD_MAX_I8_POSTRA   : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic", []>;
++def ATOMIC_LOAD_UMIN_I8_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic", []>;
++def ATOMIC_LOAD_MIN_I8_POSTRA   : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic", []>;
++def ATOMIC_LOAD_NAND_I8_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic", []>;
++
++def ATOMIC_LOAD_UMAX_I16_POSTRA : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic", []>;
++def ATOMIC_LOAD_MAX_I16_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic", []>;
++def ATOMIC_LOAD_UMIN_I16_POSTRA : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic", []>;
++def ATOMIC_LOAD_MIN_I16_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic", []>;
++def ATOMIC_LOAD_NAND_I16_POSTRA : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic", []>;
++
++def ATOMIC_LOAD_UMAX_I32_POSTRA : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic", []>;
++def ATOMIC_LOAD_MAX_I32_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic", []>;
++def ATOMIC_LOAD_UMIN_I32_POSTRA : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic", []>;
++def ATOMIC_LOAD_MIN_I32_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic", []>;
++def ATOMIC_LOAD_NAND_I32_POSTRA : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic", []>;
++
++def ATOMIC_LOAD_UMAX_I64_POSTRA : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic", []>;
++def ATOMIC_LOAD_MAX_I64_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic", []>;
++def ATOMIC_LOAD_UMIN_I64_POSTRA : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic", []>;
++def ATOMIC_LOAD_MIN_I64_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic", []>;
++def ATOMIC_LOAD_NAND_I64_POSTRA : PseudoInstSw64<(outs GPRC:$dst),
++    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic", []>;
++
++
++
++}// for atomic load/store. set hasNoSchedulingInfo
++
++//***********************
++//Real instructions
++//***********************
++
++//4 The base instruction system
++//4.1 system call instruction
++let hasSideEffects = 1 in
++class inst_syscall<bits<6> opcode, string opstr, string operands="">
++    : PALForm<opcode, (ins u26imm:$disp), (outs), opstr, operands>;
++
++let Defs=[R0] in
++def SYS_CALL : inst_syscall<0x00, "sys_call", "$disp">;
++
++def LBR : inst_syscall<0x1d, "lbr", "$disp">;
++
++def : Pat<(Sw64_syscall (i64 immUExt8:$N)),(SYS_CALL immUExt8:$N )>;
++
++//4.2 control instruction
++
++//4.2.1 jump
++// Call
++
++def SDT_Sw64JmpLink      : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
++def Sw64JmpLink : SDNode<"Sw64ISD::JmpLink", SDT_Sw64JmpLink,
++                          [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
++                           SDNPVariadic]>;
++
++class arg_jmp<string opstr, bits<6> opcode, list<dag> pattern>
++    : MForm<opcode, (ins GPRC:$RA, GPRC:$RB , s16imm:$DISP), (outs) , opstr,
++            "$RA,(${RB}),$DISP", pattern>;
++
++class branch_i<string opstr, bits<6> opcode, list<dag> pattern>
++    : BForm<opcode, (ins GPRC:$RA, target:$disp) , (outs) ,
++            opstr, "$RA,${disp}", pattern>;
++
++let isCall = 1, Defs = [R26], Uses = [R27] in
++def JSR : arg_jmp<"call", 0x01,  []>;
++
++let isReturn = 1, isTerminator = 1, isBarrier = 1 in
++def RET : arg_jmp<"ret", 0x02,  []>;
++
++def Sw64Ret : SDNode<"Sw64ISD::Ret", SDTNone,
++                     [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
++
++let RA = 31, DISP=0 in
++def JMP : arg_jmp<"jmp", 0x03,  []>;
++
++let isBranch = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
++def BR : branch_i<"br", 0x04, []>;
++let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in
++def BSR : branch_i<"bsr", 0x05, []>;
++
++// for expand Call target, we create two Insns like:
++// load R27,symbol(GP) defs R27
++// call R26,R27,symbol use R27
++// so we dont need to use R27, we actually def R27
++let isBarrier = 1, isCall = 1, Defs = [R26, R27], Uses = [R29] in {
++def PseudoCall : PseudoInstSw64<(outs), (ins call_symbol:$func), "",
++                                []>,Sched<[WriteJmp]>;
++}
++
++let isBarrier = 1, isCall = 1, Defs = [R26], Uses = [R27, R29] in {
++  def PseudoCallIndirect : PseudoInstSw64<(outs), (ins GPRC:$RB), "",
++		            [(Sw64JmpLink  GPRC:$RB)]>,
++			  PseudoInstExpansion<(JSR R26, GPRC:$RB, 0)>,
++			    Sched<[WriteJmp]>;
++}
++
++let isBarrier = 1, isBranch = 1, isTerminator = 1 in
++def PseudoBrind : PseudoInstSw64<(outs), (ins GPRC:$RB), "",
++                                 [(brind GPRC:$RB)]>,
++                  PseudoInstExpansion<(JMP R31, GPRC:$RB, 0)>,
++                  Sched<[WriteJmp]>;
++
++//to match libgcc _div _rem
++let isBarrier = 1, isCall = 1, Defs = [R23, R24, R25, R27, R28]
++                    , Uses = [R24, R25, R27] in
++def PseudoCallDiv : PseudoInstSw64<(outs), (ins), "", []>,
++                    PseudoInstExpansion<(JSR R23, R27, 0)>,
++                    Sched<[WriteJmp]>;
++
++let  isBranch = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1  in
++def PseudoBR : PseudoInstSw64<(outs), (ins target:$disp), "", [(br bb:$disp)]>,
++               PseudoInstExpansion<(BR R31, target:$disp)>, Sched<[WriteJmp]>;
++
++let isBarrier = 1, isReturn = 1, isTerminator = 1 in
++def PseudoRet : PseudoInstSw64<(outs), (ins), "", [(Sw64Ret)]>,
++                PseudoInstExpansion<(RET R31, R26, 1)>, Sched<[WriteJmp]>;
++
++////4.2.2 uncondition shift
++/////////////////////////////////////////////////////////
++//Branching
++/////////////////////////////////////////////////////////
++let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in{
++class br_icc<bits<6> opc, string asmstr>
++    : BForm<opc, (ins GPRC:$RA, target:$disp), (outs),
++            asmstr, "$RA,${disp}">;
++class br_fcc<bits<6> opc, string asmstr>
++    : BForm<opc, (ins F8RC:$RA, target:$disp), (outs),
++            asmstr, "$RA,${disp}">;
++}
++def BEQ  : br_icc<0x30, "beq">;
++def BGE  : br_icc<0x35, "bge">;
++def BGT  : br_icc<0x34, "bgt">;
++def BLBC : br_icc<0x36, "blbc">;
++def BLBS : br_icc<0x37, "blbs">;
++def BLE  : br_icc<0x33, "ble">;
++def BLT  : br_icc<0x32, "blt">;
++def BNE  : br_icc<0x31, "bne">;
++
++//Branches, float
++def FBEQ : br_fcc<0x38, "fbeq">;
++def FBGE : br_fcc<0x3D, "fbge">;
++def FBGT : br_fcc<0x3C, "fbgt">;
++def FBLE : br_fcc<0x3B, "fble">;
++def FBLT : br_fcc<0x3A, "fblt">;
++def FBNE : br_fcc<0x39, "fbne">;
++//4.3 load and store instruction
++//4.3.1 load integer
++
++let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
++class load_ri<string opstr, bits<6> opcode, RegisterClass regtype,
++              SDPatternOperator loadop>
++    : MForm<opcode, (ins s64imm:$DISP, GPRC:$RB), (outs regtype:$RA),
++            opstr, "$RA,${DISP}(${RB})",
++            [(set regtype:$RA,
++                (loadop (add GPRC:$RB, immSExt16:$DISP)))]>;
++
++let hasSideEffects = 0, mayLoad = 1, mayStore = 0 ,Constraints = "$RB = $wback,@earlyclobber $wback" in
++class load_ri1<string opstr, bits<6> opcode, bits<4> func,
++               RegisterClass regtype, SDPatternOperator loadop>
++    : MFuncForm<opcode, func, (ins GPRC:$RB, s12imm:$disp), (outs regtype:$RA, GPRC:$wback),
++           opstr,"$RA,${disp}(${RB})">;
++
++let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
++class store_ri<string opstr, bits<6> opcode, RegisterClass regtype,
++               SDPatternOperator storeop>
++    : MForm<opcode, (ins regtype:$RA, s64imm:$DISP, GPRC:$RB), (outs),
++            opstr, "$RA,${DISP}(${RB})",
++            [(storeop regtype:$RA,
++               (add GPRC:$RB, immSExt16:$DISP))]>;
++
++let hasSideEffects = 0, mayLoad = 0, mayStore = 1 ,Constraints = "$RB = $wback,@earlyclobber $wback" in
++class store_ri1<string opstr, bits<6> opcode, bits<4> func,
++                RegisterClass regtype, SDPatternOperator storeop>
++    : MFuncForm<opcode, func, (ins regtype:$RA, GPRC:$RB, s12imm:$disp), (outs GPRC:$wback),
++            opstr, "$RA,${disp}(${RB})">;
++
++// integer load
++def LDL  : load_ri<"ldl",  0x23, GPRC, load>;
++def LDW  : load_ri<"ldw",  0x22, GPRC, sextloadi32>;
++def LDHU : load_ri<"ldhu", 0x21, GPRC, zextloadi16>;
++def LDBU : load_ri<"ldbu", 0x20, GPRC, zextloadi8>;
++def LDL_A  : load_ri1<"ldl_a",  0x1E, 0x3, GPRC, load>;
++def LDW_A  : load_ri1<"ldw_a",  0x1E, 0x2, GPRC, sextloadi32>;
++def LDHU_A : load_ri1<"ldhu_a", 0x1E, 0x1, GPRC, zextloadi16>;
++def LDBU_A : load_ri1<"ldbu_a", 0x1E, 0x0, GPRC, zextloadi8>;
++
++// float load
++def LDS : load_ri<"flds", 0x26, F4RC, load>;
++def LDD : load_ri<"fldd", 0x27, F8RC, load>;
++def LDS_A : load_ri1<"flds_a", 0x1E, 0x4, F4RC, load>;
++def LDD_A : load_ri1<"fldd_a", 0x1E, 0x5, F8RC, load>;
++
++// integer store
++def STL : store_ri<"stl", 0x2B, GPRC, store>;
++def STW : store_ri<"stw", 0x2A, GPRC, truncstorei32>;
++def STH : store_ri<"sth", 0x29, GPRC, truncstorei16>;
++def STB : store_ri<"stb", 0x28, GPRC, truncstorei8>;
++def STL_A : store_ri1<"stl_a", 0x1E, 0x9, GPRC, store>;
++def STW_A : store_ri1<"stw_a", 0x1E, 0x8, GPRC, truncstorei32>;
++def STH_A : store_ri1<"sth_a", 0x1E, 0x7, GPRC, truncstorei16>;
++def STB_A : store_ri1<"stb_a", 0x1E, 0x6, GPRC, truncstorei8>;
++
++// float store
++def STS : store_ri<"fsts", 0x2E, F4RC, store>;
++def STD : store_ri<"fstd", 0x2F, F8RC, store>;
++def STS_A : store_ri1<"fsts_a", 0x1E, 0xA, F4RC, store>;
++def STD_A : store_ri1<"fstd_a", 0x1E, 0xB, F8RC, store>;
++
++// imm inst
++//let mayLoad = 1 in {
++def LDA :  MForm<0x3E,(ins s64imm:$DISP, GPRC:$RB) , (outs GPRC:$RA),
++                 "ldi", "$RA,${DISP}(${RB})",
++                 [(set GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))]>;
++def LDAH : MForm<0x3F,(ins s64imm:$DISP, GPRC:$RB) , (outs GPRC:$RA),
++                 "ldih", "$RA,${DISP}(${RB})", []>;
++//}
++
++let Uses = [R29] in {
++def LOADgprel : PseudoInstSw64<(outs GPRC:$dst), (ins s64imm:$addr), "",
++    [(set GPRC:$dst, (Sw64_gprel tglobaladdr:$addr))]>, Sched<[WriteLD]>;
++
++def LOADconstant : PseudoInstSw64<(outs GPRC:$dst), (ins s64imm:$addr), "", []>,
++                   Sched<[WriteAdrLD]>;
++
++def LOADlit : PseudoInstSw64<(outs GPRC:$dst), (ins s64imm:$addr), "",
++    [(set GPRC:$dst, (Sw64_rellit tglobaladdr:$addr))]>, Sched<[WriteLD]>;
++
++def LOADlitSym : PseudoInstSw64<(outs GPRC:$dst), (ins s64imm:$addr), "",
++    [(set GPRC:$dst, (Sw64_rellit texternalsym:$addr))]>, Sched<[WriteLD]>;
++
++
++// The MOVaddr instruction should match only when the add is not folded
++// into a load or store address.
++def MOVaddrGP
++: PseudoInstSw64<(outs GPRC:$dst), (ins i64imm:$hi, i64imm:$low), "",
++  [(set GPRC:$dst, (Sw64ldi (Sw64ldih tglobaladdr:$hi),
++                     tglobaladdr:$low))]>, Sched<[WriteAdrAdr]>;
++
++def MOVaddrCP
++: PseudoInstSw64<(outs GPRC:$dst), (ins i64imm:$hi, i64imm:$low), "",
++  [(set GPRC:$dst, (Sw64ldi (Sw64ldih tconstpool:$hi),
++                     tconstpool:$low))]>, Sched<[WriteAdrAdr]>;
++
++def MOVaddrBA
++: PseudoInstSw64<(outs GPRC:$dst), (ins i64imm:$hi, i64imm:$low), "",
++  [(set GPRC:$dst, (Sw64ldi (Sw64ldih tblockaddress:$hi),
++                     tblockaddress:$low))]>, Sched<[WriteAdrAdr]>;
++
++def MOVaddrEXT
++: PseudoInstSw64<(outs GPRC:$dst), (ins i64imm:$hi, i64imm:$low), "",
++  [(set GPRC:$dst, (Sw64ldi (Sw64ldih texternalsym:$hi),
++                     texternalsym:$low))]>, Sched<[WriteAdrAdr]>;
++
++def MOVaddrJT
++: PseudoInstSw64<(outs GPRC:$dst), (ins i64imm:$hi, i64imm:$low), "",
++  [(set GPRC:$dst, (Sw64ldi (Sw64ldih tjumptable:$hi),
++                     tjumptable:$low))]>, Sched<[WriteAdrAdr]>;
++}
++
++//TODO: for core3 target, sw64 need gpdisp to get global address table
++// we cannot change Prologue disp, or it will cause error
++let isBarrier = 1, hasNoSchedulingInfo = 1, Defs = [R29] in
++def MOVProgPCGp : PseudoInstSw64<(outs),
++                  (ins s16imm:$DISP, s16imm:$NUM , GPRC:$dst_reg), "", []>,
++									 Sched<[WriteAdrAdr]>;
++
++let Defs = [R29] in
++def MOVaddrPCGp : PseudoInstSw64<(outs),
++                  (ins s16imm:$DISP, s16imm:$NUM , GPRC:$dst_reg), "", []>,
++									 Sched<[WriteAdrAdr]>;
++
++// def patterns
++//def : Pat<(Sw64JmpLink (i64 tglobaladdr:$dst)), (PseudoCall tglobaladdr:$dst)>;
++
++def : Pat<(Sw64JmpLink tglobaladdr:$func),
++          (PseudoCall tglobaladdr:$func)>;
++def : Pat<(Sw64JmpLink texternalsym:$func),
++          (PseudoCall texternalsym:$func)>;
++
++def : Pat<(Sw64_LDAWithChain GPRC:$RB, immSExt16:$DISP),
++          (LDA immSExt16:$DISP, GPRC:$RB)>;
++
++
++def : Pat<(i64 (extloadi8 (add GPRC:$RB, immSExt16:$DISP))),
++          (LDBU   immSExt16:$DISP, GPRC:$RB)>;
++def : Pat<(i64 (extloadi16 (add GPRC:$RB, immSExt16:$DISP))),
++          (LDHU  immSExt16:$DISP, GPRC:$RB)>;
++def : Pat<(i64 (extloadi32 (add GPRC:$RB, immSExt16:$DISP))),
++          (LDW   immSExt16:$DISP, GPRC:$RB)>;
++def : Pat<(i64 (zextloadi8 (add GPRC:$RB, immSExt16:$DISP))),
++          (LDBU   immSExt16:$DISP, GPRC:$RB)>;
++def : Pat<(i64 (zextloadi16 (add GPRC:$RB, immSExt16:$DISP))),
++          (LDHU   immSExt16:$DISP, GPRC:$RB)>;
++def : Pat<(i64 (sextloadi32 (add GPRC:$RB, immSExt16:$DISP))),
++          (LDW   immSExt16:$DISP, GPRC:$RB)>;
++
++def : Pat<(i64 (load GPRC:$addr)),
++          (LDL  0, GPRC:$addr)>;
++def : Pat<(i64 (sextloadi32 GPRC:$addr)),
++          (LDW  0, GPRC:$addr)>;
++def : Pat<(i64 (extloadi32 GPRC:$addr)),
++          (LDW  0, GPRC:$addr)>;
++def : Pat<(i64 (zextloadi16 GPRC:$addr)),
++          (LDHU 0, GPRC:$addr)>;
++def : Pat<(i64 (extloadi16 GPRC:$addr)),
++          (LDHU 0, GPRC:$addr)>;
++def : Pat<(i64 (zextloadi8 GPRC:$addr)),
++          (LDBU 0, GPRC:$addr)>;
++def : Pat<(i64 (extloadi8 GPRC:$addr)),
++          (LDBU 0, GPRC:$addr)>;
++
++//4.3.5 s float load
++def : Pat<(f32 (load GPRC:$addr)),
++          (LDS  0, GPRC:$addr)>;
++def : Pat<(f64 (load GPRC:$addr)),
++          (LDD  0, GPRC:$addr)>;
++//4.3.3 store integer
++def : Pat<(store GPRC:$DATA, GPRC:$addr),
++          (STL  GPRC:$DATA, 0, GPRC:$addr)>;
++def : Pat<(truncstorei32 GPRC:$DATA, GPRC:$addr),
++          (STW  GPRC:$DATA, 0, GPRC:$addr)>;
++def : Pat<(truncstorei16 GPRC:$DATA, GPRC:$addr),
++          (STH GPRC:$DATA, 0, GPRC:$addr)>;
++def : Pat<(truncstorei8 GPRC:$DATA, GPRC:$addr),
++          (STB GPRC:$DATA, 0, GPRC:$addr)>;
++def : Pat<(store F4RC:$DATA, GPRC:$addr),
++          (STS  F4RC:$DATA, 0, GPRC:$addr)>;
++def : Pat<(store F8RC:$DATA, GPRC:$addr),
++          (STD  F8RC:$DATA, 0, GPRC:$addr)>;
++multiclass LdPat<PatFrag LoadOp, InstSw64 Inst, ValueType VT> {
++  def : Pat<(VT (LoadOp GPRC:$addr)), (Inst 0, GPRC:$addr)>;
++  def : Pat<(VT (LoadOp AddrFI:$addr)), (Inst 0, AddrFI:$addr)>;
++  def : Pat<(VT (LoadOp (add GPRC:$addr, immSExt16:$DISP))),
++            (Inst immSExt16:$DISP, GPRC:$addr)>;
++   def : Pat<(VT (LoadOp (add AddrFI:$addr, immSExt16:$DISP))),
++            (Inst immSExt16:$DISP, AddrFI:$addr)>;
++  def : Pat<(VT (LoadOp (IsOrAdd AddrFI:$addr, immSExt16:$DISP))),
++            (Inst immSExt16:$DISP, AddrFI:$addr)>;
++}
++
++multiclass StPat<PatFrag StoreOp, InstSw64 Inst, RegisterClass StTy> {
++  def : Pat<(StoreOp StTy:$rs2, GPRC:$rs1), (Inst StTy:$rs2, 0, GPRC:$rs1)>;
++  def : Pat<(StoreOp StTy:$rs2, AddrFI:$rs1), (Inst StTy:$rs2, 0, AddrFI:$rs1)>;
++  def : Pat<(StoreOp StTy:$rs2, (add GPRC:$rs1, immSExt16:$DISP)),
++            (Inst StTy:$rs2, immSExt16:$DISP, GPRC:$rs1)>;
++  def : Pat<(StoreOp StTy:$rs2, (add AddrFI:$rs1, immSExt16:$DISP)),
++            (Inst StTy:$rs2, immSExt16:$DISP, AddrFI:$rs1)>;
++  def : Pat<(StoreOp StTy:$rs2, (IsOrAdd AddrFI:$rs1, immSExt16:$DISP)),
++            (Inst StTy:$rs2, immSExt16:$DISP, AddrFI:$rs1)>;
++}
++
++defm : LdPat<load, LDL, i64>;
++defm : LdPat<extloadi32, LDW, i64>;
++defm : LdPat<sextloadi32, LDW, i64>;
++defm : LdPat<extloadi16, LDHU, i64>;
++defm : LdPat<zextloadi16, LDHU, i64>;
++defm : LdPat<extloadi8, LDBU, i64>;
++defm : LdPat<zextloadi8, LDBU, i64>;
++defm : LdPat<load, LDS, f32>;
++defm : LdPat<load, LDD, f64>;
++
++defm : StPat<store, STL, GPRC>;
++defm : StPat<truncstorei32, STW, GPRC>;
++defm : StPat<truncstorei16, STH, GPRC>;
++defm : StPat<truncstorei8, STB, GPRC>;
++defm : StPat<store, STS, F4RC>;
++defm : StPat<store, STD, F8RC>;
++//4.5 int caculate instruction
++multiclass inst_rr_ri<bits<8> fun, string opstr,
++                      SDPatternOperator OpNode = null_frag> {
++  def r : OForm <0x10, fun, (ins GPRC:$RA, GPRC:$RB), (outs GPRC:$RC),
++                 opstr, "$RA,$RB,$RC",
++                 [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))]>;
++  def i : OFormL<0x12, fun,(ins GPRC:$RA, u8imm:$L), (outs GPRC:$RC),
++                 opstr, "$RA,$L,$RC",
++                 [(set GPRC:$RC, (OpNode GPRC:$RA, (i64 immUExt8:$L)))]>;
++}
++
++let Predicates = [EnableCrcInst] in
++class inst_rr_r<bits<8> fun, string opstr,
++                SDPatternOperator OpNode>
++    :  OForm <0x10, fun, (ins GPRC:$RA, GPRC:$RB), (outs GPRC:$RC),
++                 opstr, "$RA,$RB,$RC",
++                 [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))]>;
++
++multiclass inst_rr_rin<bits<8> funL, bits<8> funQ, string opstr,
++                      SDPatternOperator OpNode > {
++  def Q : OForm <0x10, funQ, (ins GPRC:$RA, GPRC:$RB), (outs GPRC:$RC),
++                 opstr # "l", "$RA,$RB,$RC",
++                 [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))]>;
++  def L : OForm <0x10, funL, (ins GPRC:$RA, GPRC:$RB), (outs GPRC:$RC),
++                 opstr # "w", "$RA,$RB,$RC",
++                 [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))]>;
++}
++
++class inst_rr_13<bits<8> fun, string opstr,
++                 SDPatternOperator OpNode=null_frag>
++    : OFormI <0x10, fun, (ins s13imm:$L), (outs GPRC:$RC),
++             opstr, "$L, $RC",
++             [(set GPRC:$RC, (OpNode immUExt13:$L))]>;
++
++let RA = 31 in
++class inst_rr_2<bits<8> fun, string opstr, ValueType vt>
++    : OForm <0x10, fun, (ins GPRC:$RB), (outs GPRC:$RC),
++             opstr, "$RB, $RC",
++             [(set GPRC:$RC, (sext_inreg GPRC:$RB, vt))]>;
++
++let RA = 31 in
++class inst_rr_3<bits<8> fun, string opstr,
++                SDPatternOperator OpNode=null_frag>
++    : OForm <0x10, fun, (ins GPRC:$RB), (outs GPRC:$RC),
++             opstr, "$RB, $RC",
++             [(set GPRC:$RC, (OpNode GPRC:$RB))]>;
++
++multiclass inst_lw<bits<8> funL, bits<8> funQ, string opstr,
++                   SDPatternOperator OpNode> {
++  defm L : inst_rr_ri<funL, opstr # "w">;
++  defm Q : inst_rr_ri<funQ, opstr # "l">;
++}
++
++defm ADD  : inst_lw<0x00, 0x08,"add",  add>;
++defm SUB  : inst_lw<0x01, 0x09,"sub",  sub>;
++defm S4ADD: inst_lw<0x02, 0x0a,"s4add", add4>;
++defm S4SUB: inst_lw<0x03, 0x0b,"s4sub", sub4>;
++defm S8ADD: inst_lw<0x04, 0x0c,"s8add", add8>;
++defm S8SUB: inst_lw<0x05, 0x0d,"s8sub", sub8>;
++defm MUL  : inst_lw<0x10, 0x18,"mul",  mul>;
++
++let mayRaiseFPException = 1, hasSideEffects = 1 in {
++  defm DIV  : inst_rr_rin<0x11, 0x1a, "div",  sdiv>;
++  defm UDIV : inst_rr_rin<0x12, 0x1b, "udiv",  udiv>;
++  defm REM  : inst_rr_rin<0x13, 0x1c, "rem",  srem>;
++  defm UREM : inst_rr_rin<0x14, 0x1d, "urem",  urem>;
++}
++
++def ADDPI : inst_rr_13<0x1e, "addpi", Sw64_addpi>;
++def ADDPIS : inst_rr_13<0x1f, "addpis", Sw64_addpis>;
++
++defm SBT  : inst_rr_ri<0x2d, "sbt",  Sw64_sbt>;
++defm CBT  : inst_rr_ri<0x2e, "cbt",  Sw64_cbt>;
++
++defm UMULH : inst_rr_ri<0x19, "umulh", mulhu>;
++
++let RA=31 in
++class inst_ct<bits<8> func, string opstr, SDNode OpNode>
++    : OForm<0x10, func, (ins GPRC:$RB), (outs GPRC:$RC),
++            opstr, "$RB,$RC", [(set GPRC:$RC, (OpNode GPRC:$RB))]>;
++
++def CTTZ : inst_ct<0x5A, "cttz", cttz>;
++def CTLZ : inst_ct<0x59, "ctlz", ctlz>;
++def CTPOP : inst_ct<0x58, "ctpop", ctpop>;
++
++defm ZAP : inst_rr_ri<0x68, "zap">;
++defm ZAPNOT : inst_rr_ri<0x69, "zapnot">;
++
++def SEXTB : inst_rr_2<0x6A, "sextb", i8>;
++def SEXTH : inst_rr_2<0x6B, "sexth", i16>;
++
++//4.5.2 integer cmp
++defm CMPEQ : inst_rr_ri<0x28, "cmpeq", seteq>;
++defm CMPLT : inst_rr_ri<0x29, "cmplt", setlt>;
++defm CMPLE : inst_rr_ri<0x2A, "cmple", setle>;
++defm CMPULT : inst_rr_ri<0x2B, "cmpult", setult>;
++defm CMPULE : inst_rr_ri<0x2C, "cmpule", setule>;
++
++//4.5.3 integer order
++defm AND   : inst_rr_ri<0x38, "and",   and>;
++defm BIC   : inst_rr_ri<0x39, "bic",
++     BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
++defm BIS   : inst_rr_ri<0x3A, "bis",   or>;
++defm ORNOT : inst_rr_ri<0x3B, "ornot",
++     BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
++defm XOR   : inst_rr_ri<0x3C, "xor",   xor>;
++defm EQV   : inst_rr_ri<0x3D, "eqv",
++     BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
++
++//4.5.4 integer move position
++defm SL    : inst_rr_ri<0x48, "sll",   shl>;
++defm SRA   : inst_rr_ri<0x4A, "sra",   sra>;
++defm SRL   : inst_rr_ri<0x49, "srl",   srl>;
++defm ROLL  : inst_rr_ri<0x4B, "roll",   rotl>;
++defm SLLW  : inst_rr_ri<0x4C, "sllw">;
++defm SRLW  : inst_rr_ri<0x4D, "srlw">;
++defm SRAW  : inst_rr_ri<0x4E, "sraw">;
++defm ROLW  : inst_rr_ri<0x4F, "rolw",   Sw64_rolw>;
++
++def sexti32 : ComplexPattern<i64, 1, "selectSExti32">;
++
++def zexti32 : ComplexPattern<i64, 1, "selectZExti32">;
++
++def : Pat<(srem (sexti32 (i64 GPRC:$RA)), (sexti32 (i64 GPRC:$RB))),
++          (REML GPRC:$RA, GPRC:$RB)>;
++
++def : Pat<(sext_inreg (sdiv GPRC:$RA, GPRC:$RB), i32),
++          (DIVL GPRC:$RA, GPRC:$RB)>;
++
++def : Pat<(sext_inreg (udiv (and GPRC:$RA,0xffffffff), (and GPRC:$RB,0xffffffff)), i32),
++          (UDIVL GPRC:$RA, GPRC:$RB)>;
++
++def : Pat<(sext_inreg (shl GPRC:$RA, GPRC:$RB), i32),
++          (SLLWr GPRC:$RA, GPRC:$RB)>,
++          Requires<[EnableIntShift, HasCore4]>;
++
++def : Pat<(sext_inreg (shl GPRC:$RA, (i64 immUExt8:$L)), i32),
++          (SLLWi GPRC:$RA, (i64 immUExt8:$L))>,
++          Requires<[EnableIntShift, HasCore4]>;
++
++def : Pat<(sext_inreg (srl GPRC:$RA, GPRC:$RB), i32),
++          (SRLWr GPRC:$RA, GPRC:$RB)>,
++          Requires<[EnableIntShift, HasCore4]>;
++
++def : Pat<(srl (i64 (zexti32 GPRC:$RA)), (i64 immUExt8:$L)),
++          (SRLWi GPRC:$RA, (i64 immUExt8:$L))>,
++          Requires<[EnableIntShift, HasCore4]>;
++
++def : Pat<(sra (i64 (sexti32 GPRC:$RA)), (i64 GPRC:$RB)),
++          (SRAWr GPRC:$RA, GPRC:$RB)>,
++          Requires<[EnableIntShift, HasCore4]>;
++
++def : Pat<(sra (i64 (sexti32 GPRC:$RA)), (i64 immUExt8:$L)),
++          (SRAWi GPRC:$RA, (i64 immUExt8:$L))>,
++          Requires<[EnableIntShift, HasCore4]>;
++
++def : Pat<(sext_inreg (rotl GPRC:$RA, GPRC:$RB), i32),
++          (ROLWr GPRC:$RA, GPRC:$RB)>,
++          Requires<[EnableIntShift, HasCore4]>;
++
++def : Pat<(sext_inreg (rotl GPRC:$RA, (i64 immUExt8:$L)), i32),
++          (ROLWi GPRC:$RA, (i64 immUExt8:$L))>,
++          Requires<[EnableIntShift, HasCore4]>;
++
++multiclass PatGprInst<SDPatternOperator OpNode, string Inst>{
++def : Pat<(OpNode GPRC:$rs1, GPRC:$rs2),
++            (!cast<Instruction>(Inst # "Qr")
++                  GPRC:$rs1, GPRC:$rs2)>;
++
++def : Pat<(intop (OpNode GPRC:$rs1, GPRC:$rs2)),
++            (!cast<Instruction>(Inst # "Lr")
++                  GPRC:$rs1, GPRC:$rs2)>;
++
++def : Pat<(OpNode GPRC:$rs1, immUExt8:$imm8),
++            (!cast<Instruction>(Inst # "Qi")
++                  GPRC:$rs1, immUExt8:$imm8)>;
++
++def : Pat<(intop (OpNode GPRC:$rs1, immUExt8:$imm8)),
++            (!cast<Instruction>(Inst # "Li")
++                  GPRC:$rs1, immUExt8:$imm8)>;
++}
++
++multiclass PatGprInstn<SDPatternOperator OpNode, string Inst>{
++def : Pat<(OpNode GPRC:$rs1, GPRC:$rs2),
++            (!cast<Instruction>(Inst # "Q")
++                  GPRC:$rs1, GPRC:$rs2)>;
++
++def : Pat<(intop (OpNode GPRC:$rs1, GPRC:$rs2)),
++            (!cast<Instruction>(Inst # "L")
++                  GPRC:$rs1, GPRC:$rs2)>;
++}
++
++defm : PatGprInst<add, "ADD">;
++defm : PatGprInst<sub, "SUB">;
++defm : PatGprInst<mul, "MUL">;
++defm : PatGprInst<add4, "S4ADD">;
++defm : PatGprInst<add8, "S8ADD">;
++defm : PatGprInst<sub4, "S4SUB">;
++defm : PatGprInst<sub8, "S8SUB">;
++//defm : PatGprInstn<sdiv, "DIV">;
++//defm : PatGprInstn<udiv, "UDIV">;
++//defm : PatGprInstn<srem, "REM">;
++//defm : PatGprInstn<urem, "UREM">;
++
++//Const cases since legalize does sub x, int -> add x, inv(int) + 1
++def : Pat<(intop (add GPRC:$RA, immUExt8neg:$L)),
++          (SUBLi GPRC:$RA, immUExt8neg:$L)>;
++def : Pat<(add GPRC:$RA, immUExt8neg:$L), (SUBQi GPRC:$RA, immUExt8neg:$L)>;
++def : Pat<(intop (add4 GPRC:$RA, immUExt8neg:$L)),
++          (S4SUBLi GPRC:$RA, immUExt8neg:$L)>;
++def : Pat<(add4 GPRC:$RA, immUExt8neg:$L), (S4SUBQi GPRC:$RA, immUExt8neg:$L)>;
++def : Pat<(intop (add8 GPRC:$RA, immUExt8neg:$L)),
++          (S8SUBLi GPRC:$RA, immUExt8neg:$L)>;
++def : Pat<(add8 GPRC:$RA, immUExt8neg:$L), (S8SUBQi GPRC:$RA, immUExt8neg:$L)>;
++// Define the pattern that produces ZAPNOTi.
++
++def : Pat<(cttz_zero_undef i64:$Rn), (CTTZ $Rn)>;
++
++def : Pat<(zappat:$imm GPRC:$RA),
++          (ZAPNOTi GPRC:$RA, (iZAPX GPRC:$imm))>;
++
++def : Pat<(sext_inreg GPRC:$RB, i32),
++          (ADDLi GPRC:$RB, 0)>;
++
++def : Pat<(sext_inreg (add GPRC:$RA, GPRC:$RB), i32),
++          (ADDLr GPRC:$RA, GPRC:$RB)>;
++
++def : Pat<(setueq GPRC:$X, GPRC:$Y), (CMPEQr GPRC:$X, GPRC:$Y)>;
++def : Pat<(setueq GPRC:$X, immUExt8:$Y), (CMPEQi GPRC:$X, immUExt8:$Y)>;
++def : Pat<(setueq immUExt8:$Y, GPRC:$X), (CMPEQi GPRC:$X, immUExt8:$Y)>;
++
++def : Pat<(seteq GPRC:$X, GPRC:$Y), (CMPEQr GPRC:$X, GPRC:$Y)>;
++def : Pat<(seteq GPRC:$X, immUExt8:$Y), (CMPEQi GPRC:$X, immUExt8:$Y)>;
++def : Pat<(seteq immUExt8:$Y, GPRC:$X), (CMPEQi GPRC:$X, immUExt8:$Y)>;
++
++def : Pat<(setugt GPRC:$X, GPRC:$Y), (CMPULTr GPRC:$Y, GPRC:$X)>;
++def : Pat<(setugt immUExt8:$X, GPRC:$Y), (CMPULTi GPRC:$Y, immUExt8:$X)>;
++def : Pat<(brcond (i64 (setugt GPRC:$Y, immUExt8:$X)), bb:$DISP), (BEQ (CMPULEi GPRC:$Y, immUExt8:$X), bb:$DISP)>;
++
++def : Pat<(setuge GPRC:$X, GPRC:$Y), (CMPULEr GPRC:$Y, GPRC:$X)>;
++def : Pat<(setuge immUExt8:$X, GPRC:$Y), (CMPULEi GPRC:$Y, immUExt8:$X)>;
++def : Pat<(brcond (i64 (setuge GPRC:$Y, immUExt8:$X)), bb:$DISP), (BEQ (CMPULTi GPRC:$Y, immUExt8:$X), bb:$DISP)>;
++
++def : Pat<(setgt GPRC:$X, GPRC:$Y), (CMPLTr GPRC:$Y, GPRC:$X)>;
++def : Pat<(setgt immUExt8:$X, GPRC:$Y), (CMPLTi GPRC:$Y, immUExt8:$X)>;
++def : Pat<(brcond (i64 (setgt GPRC:$Y, immUExt8:$X)), bb:$DISP), (BEQ (CMPLEi GPRC:$Y, immUExt8:$X), bb:$DISP)>;
++
++def : Pat<(setge GPRC:$X, GPRC:$Y), (CMPLEr GPRC:$Y, GPRC:$X)>;
++def : Pat<(setge immUExt8:$X, GPRC:$Y), (CMPLEi GPRC:$Y, immUExt8:$X)>;
++def : Pat<(brcond (i64 (setgt GPRC:$Y, immUExt8:$X)), bb:$DISP), (BEQ (CMPLTi GPRC:$Y, immUExt8:$X), bb:$DISP)>;
++
++def : Pat<(setne GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQr GPRC:$X, GPRC:$Y), 0)>;
++def : Pat<(setne GPRC:$X, immUExt8:$Y),
++          (CMPEQi (CMPEQi GPRC:$X, immUExt8:$Y), 0)>;
++def : Pat<(brcond (i64 (setne GPRC:$X, immUExt8:$Y)), bb:$DISP), (BEQ (CMPEQi GPRC:$X, immUExt8:$Y), bb:$DISP)>;
++def : Pat<(brcond (i64 (setne immUExt8:$Y, GPRC:$X)), bb:$DISP), (BEQ (CMPEQi GPRC:$X, immUExt8:$Y), bb:$DISP)>;
++
++def : Pat<(setune GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQr GPRC:$X, GPRC:$Y), 0)>;
++def : Pat<(setune GPRC:$X, immUExt8:$Y),
++          (CMPEQi (CMPEQr GPRC:$X, immUExt8:$Y), 0)>;
++
++//after put here because SLi
++//Stupid crazy arithmetic stuff:
++let AddedComplexity = 1 in {
++def : Pat<(mul GPRC:$RA, 5), (S4ADDQr GPRC:$RA, GPRC:$RA)>;
++def : Pat<(sext_inreg (mul GPRC:$RA, 5), i32), (S4ADDLr GPRC:$RA, GPRC:$RA)>;
++def : Pat<(mul GPRC:$RA, 9), (S8ADDQr GPRC:$RA, GPRC:$RA)>;
++def : Pat<(sext_inreg (mul GPRC:$RA, 9), i32), (S8ADDLr GPRC:$RA, GPRC:$RA)>;
++def : Pat<(mul GPRC:$RA, 3), (S4SUBQr GPRC:$RA, GPRC:$RA)>;
++def : Pat<(sext_inreg (mul GPRC:$RA, 3), i32), (S4SUBLr GPRC:$RA, GPRC:$RA)>;
++def : Pat<(mul GPRC:$RA, 7), (S8SUBQr GPRC:$RA, GPRC:$RA)>;
++def : Pat<(sext_inreg (mul GPRC:$RA, 7), i32), (S8SUBLr GPRC:$RA, GPRC:$RA)>;
++//slight tree expansion if we are multiplying near to a power of 2
++
++def : Pat<(mul GPRC:$RA, immRem1:$imm),
++          (ADDQr (SLi GPRC:$RA, (nearP2X immRem1:$imm)), GPRC:$RA)>;
++def : Pat<(mul GPRC:$RA, immRem2:$imm),
++          (ADDQr (SLi GPRC:$RA, (nearP2X immRem2:$imm)),
++                 (ADDQr GPRC:$RA, GPRC:$RA))>;
++def : Pat<(mul GPRC:$RA, immRem3:$imm),
++          (ADDQr (SLi GPRC:$RA, (nearP2X immRem3:$imm)),
++                 (S4SUBQr GPRC:$RA, GPRC:$RA))>;
++def : Pat<(mul GPRC:$RA, immRem4:$imm),
++          (S4ADDQr GPRC:$RA, (SLi GPRC:$RA, (nearP2X immRem4:$imm)))>;
++def : Pat<(mul GPRC:$RA, immRem5:$imm),
++          (ADDQr (SLi GPRC:$RA, (nearP2X immRem5:$imm)),
++                 (S4ADDQr GPRC:$RA, GPRC:$RA))>;
++def : Pat<(mul GPRC:$RA, immRemP2:$imm),
++          (ADDQr (SLi GPRC:$RA, (nearP2X immRemP2:$imm)),
++                 (SLi GPRC:$RA, (nearP2RemX immRemP2:$imm)))>;
++
++} //Added complexity
++//4.5.5 integer cond select
++// RA:COND RB:TRUE RC:FALSE RD:DEST
++multiclass select_inst< bits<3> fun, string opstr, SDPatternOperator OpNode> {
++def r : OForm4 <0x11, fun, (ins GPRC:$RA, GPRC:$RB, GPRC:$RC),
++                (outs GPRC:$RD), opstr, "$RA,$RB,$RC,$RD",
++                [(set GPRC:$RD,
++                    (select (i64 (OpNode GPRC:$RA)), GPRC:$RB, GPRC:$RC))]>;
++def i : OForm4L<0x13, fun, (ins GPRC:$RA, u8imm:$L, GPRC:$RC),
++                (outs GPRC:$RD), opstr, "$RA,$L,$RC,$RD",
++                [(set GPRC:$RD,
++                    (select (i64 (OpNode GPRC:$RA)), immUExt8:$L, GPRC:$RC))]>;
++}
++
++defm SELEQ  : select_inst<0x0, "seleq",  CmpOpFrag<(seteq node:$R, 0)>>;
++defm SELNE  : select_inst<0x5, "selne",  CmpOpFrag<(setne node:$R, 0)>>;
++defm SELLT  : select_inst<0x4, "sellt",  CmpOpFrag<(setlt node:$R, 0)>>;
++defm SELLE  : select_inst<0x3, "selle",  CmpOpFrag<(setle node:$R, 0)>>;
++defm SELGT  : select_inst<0x2, "selgt",  CmpOpFrag<(setgt node:$R, 0)>>;
++defm SELGE  : select_inst<0x1, "selge",  CmpOpFrag<(setge node:$R, 0)>>;
++defm SELLBC : select_inst<0x6, "sellbc", CmpOpFrag<(xor   node:$R, 1)>>;
++defm SELLBS : select_inst<0x7, "sellbs", CmpOpFrag<(and   node:$R, 1)>>;
++
++//General pattern for select
++def : Pat<(select GPRC:$which, GPRC:$src1, GPRC:$src2),
++          (SELNEr GPRC:$which, GPRC:$src1, GPRC:$src2)>;
++def : Pat<(select GPRC:$which, GPRC:$src1, immUExt8:$src2),
++          (SELEQi GPRC:$which, immUExt8:$src2, GPRC:$src1)>;
++def : Pat<(select (i64 (setne GPRC:$RCOND, 0)), GPRC:$RTRUE, immUExt8:$RFALSE),
++          (SELEQi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
++def : Pat<(select (i64 (setgt GPRC:$RCOND, 0)), GPRC:$RTRUE, immUExt8:$RFALSE),
++          (SELLEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
++def : Pat<(select (i64 (setge GPRC:$RCOND, 0)), GPRC:$RTRUE, immUExt8:$RFALSE),
++          (SELLTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
++def : Pat<(select (i64 (setlt GPRC:$RCOND, 0)), GPRC:$RTRUE, immUExt8:$RFALSE),
++          (SELGEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
++def : Pat<(select (i64 (setle GPRC:$RCOND, 0)), GPRC:$RTRUE, immUExt8:$RFALSE),
++          (SELGTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
++
++def : Pat<(mulhs GPRC:$RA, GPRC:$RB),
++          (SUBQr (UMULHr GPRC:$RA, GPRC:$RB),
++                 (ADDQr (SELGEr GPRC:$RB, R31, GPRC:$RA),
++                        (SELGEr GPRC:$RA, R31, GPRC:$RB)))>;
++
++defm CMPBGE : inst_rr_ri<0x6C, "cmpgeb">;
++
++defm EXTLB : inst_rr_ri<0x50, "extlb">;
++defm EXTLH : inst_rr_ri<0x51, "extlh">;
++defm EXTLW : inst_rr_ri<0x52, "extlw">;
++defm EXTLL : inst_rr_ri<0x53, "extll">;
++
++defm EXTHB : inst_rr_ri<0x54, "exthb">;
++defm EXTHH : inst_rr_ri<0x55, "exthh">;
++defm EXTHW : inst_rr_ri<0x56, "exthw">;
++defm EXTHL : inst_rr_ri<0x57, "exthl">;
++
++defm INSLB: inst_rr_ri<0x40, "inslb">;
++defm INSLH: inst_rr_ri<0x41, "inslh">;
++defm INSLW: inst_rr_ri<0x42, "inslw">;
++defm INSLL: inst_rr_ri<0x43, "insll">;
++defm INSHB: inst_rr_ri<0x44, "inshb">;
++defm INSHH: inst_rr_ri<0x45, "inshh">;
++defm INSHW: inst_rr_ri<0x46, "inshw">;
++defm INSHL: inst_rr_ri<0x47, "inshl">;
++
++def REVBH: inst_rr_3<0x5B, "revbh", Sw64_revbh>;
++def REVBW: inst_rr_3<0x5C, "revbw", Sw64_revbw>;
++def REVBL: inst_rr_3<0x5D, "revbl", bswap>;
++
++def : Pat<(sra (bswap GPRC:$RB), (i64 32)),
++          (REVBW GPRC:$RB)>;
++def : Pat<(sra (bswap GPRC:$RB), (i64 48)),
++          (REVBH GPRC:$RB)>;
++def : Pat<(srl (bswap GPRC:$RB), (i64 32)),
++          (REVBW GPRC:$RB)>;
++def : Pat<(srl (bswap GPRC:$RB), (i64 48)),
++          (REVBH GPRC:$RB)>;
++
++defm MASKLB: inst_rr_ri<0x60, "masklb">;
++defm MASKLH: inst_rr_ri<0x61, "masklh">;
++defm MASKLW: inst_rr_ri<0x62, "masklw">;
++defm MASKLL: inst_rr_ri<0x63, "maskll">;
++defm MASKHB: inst_rr_ri<0x64, "maskhb">;
++defm MASKHH: inst_rr_ri<0x65, "maskhh">;
++defm MASKHW: inst_rr_ri<0x66, "maskhw">;
++defm MASKHL: inst_rr_ri<0x67, "maskhl">;
++
++// 4.5.7 crc32 instruction
++def CRC32B: inst_rr_r<0x20, "crc32b", Sw64_crc32b>;
++def CRC32H: inst_rr_r<0x21, "crc32h", Sw64_crc32h>;
++def CRC32W: inst_rr_r<0x22, "crc32w", Sw64_crc32w>;
++def CRC32L: inst_rr_r<0x23, "crc32l", Sw64_crc32l>;
++def CRC32CB: inst_rr_r<0x24, "crc32cb", Sw64_crc32cb>;
++def CRC32CH: inst_rr_r<0x25, "crc32ch", Sw64_crc32ch>;
++def CRC32CW: inst_rr_r<0x26, "crc32cw", Sw64_crc32cw>;
++def CRC32CL: inst_rr_r<0x27, "crc32cl", Sw64_crc32cl>;
++
++def : Pat<(Sw64_crc32b GPRC:$rs1, GPRC:$rs2),
++          (CRC32B GPRC:$rs1, GPRC:$rs2)>,
++	  Requires<[EnableCrcInst, HasCore4]>;
++def : Pat<(Sw64_crc32h GPRC:$rs1, GPRC:$rs2),
++          (CRC32H GPRC:$rs1, GPRC:$rs2)>,
++	  Requires<[EnableCrcInst, HasCore4]>;
++def : Pat<(Sw64_crc32w GPRC:$rs1, GPRC:$rs2),
++          (CRC32W GPRC:$rs1, GPRC:$rs2)>,
++	  Requires<[EnableCrcInst, HasCore4]>;
++def : Pat<(Sw64_crc32l GPRC:$rs1, GPRC:$rs2),
++          (CRC32L GPRC:$rs1, GPRC:$rs2)>,
++	  Requires<[EnableCrcInst, HasCore4]>;
++def : Pat<(Sw64_crc32cb GPRC:$rs1, GPRC:$rs2),
++          (CRC32CB GPRC:$rs1, GPRC:$rs2)>,
++	  Requires<[EnableCrcInst, HasCore4]>;
++def : Pat<(Sw64_crc32ch GPRC:$rs1, GPRC:$rs2),
++          (CRC32CH GPRC:$rs1, GPRC:$rs2)>,
++	  Requires<[EnableCrcInst, HasCore4]>;
++def : Pat<(Sw64_crc32cw GPRC:$rs1, GPRC:$rs2),
++          (CRC32CW GPRC:$rs1, GPRC:$rs2)>,
++	  Requires<[EnableCrcInst, HasCore4]>;
++def : Pat<(Sw64_crc32cl GPRC:$rs1, GPRC:$rs2),
++          (CRC32CL GPRC:$rs1, GPRC:$rs2)>,
++	  Requires<[EnableCrcInst, HasCore4]>;
++
++//4.6 float caculate instruction
++//4.6.1 float caculate
++class inst_fpu<bits<8>func , string opstr, RegisterClass regtype>
++    : FPForm<0x18, func, (ins regtype:$RA, regtype:$RB),
++             (outs regtype:$RC), opstr, "$RA,$RB,$RC">;
++
++class inst_fpu_rr<bits<8>func , string opstr, RegisterClass regtype ,
++                  SDPatternOperator OpNode>
++    : FPForm<0x18, func, (ins regtype:$RA, regtype:$RB),
++             (outs regtype:$RC), opstr, "$RA,$RB,$RC",
++             [(set regtype:$RC, (OpNode regtype:$RA, regtype:$RB))]>;
++
++class inst_fpu_rr_rev64<bits<8>func , string opstr,
++                  SDPatternOperator OpNode>
++    : FPForm<0x18, func, (ins F8RC:$RA, F8RC:$RB),
++             (outs F8RC:$RC), opstr, "$RA,$RB,$RC",
++             [(set F8RC:$RC, (OpNode F8RC:$RB, F8RC:$RA))]>;
++
++let DecoderNamespace = "FP32" in {
++class inst_fpu_rr_rev32<bits<8>func , string opstr,
++                  SDPatternOperator OpNode>
++    : FPForm<0x18, func, (ins F4RC:$RA, F4RC:$RB),
++             (outs F4RC:$RC), opstr, "$RA,$RB,$RC",
++             [(set F4RC:$RC, (OpNode F4RC:$RB, F4RC:$RA))]>;
++}
++
++multiclass inst_fpu_sd<bits<8>funcS, bits<8>funcD ,
++                       string opstr, SDPatternOperator OpNode>{
++  def S : inst_fpu_rr<funcS, opstr # "s", F4RC, OpNode>;
++  def D : inst_fpu_rr<funcD, opstr # "d", F8RC, OpNode>;
++}
++
++defm ADD  : inst_fpu_sd<0x00, 0x01, "fadd", fadd>;
++defm SUB  : inst_fpu_sd<0x02, 0x03, "fsub", fsub>;
++defm MUL  : inst_fpu_sd<0x04, 0x05, "fmul", fmul>;
++defm DIV  : inst_fpu_sd<0x06, 0x07, "fdiv", fdiv>;
++
++def CPYSD   : inst_fpu_rr_rev64<0x30, "fcpys",  fcopysign>;
++def CPYSED  : inst_fpu_rr_rev64<0x31, "fcpyse", fcopysign>;
++def CPYSND  : inst_fpu_rr_rev64<0x32, "fcpysn", null_frag>;
++def CPYSS   : inst_fpu_rr_rev32<0x30, "fcpys",  fcopysign>;
++def CPYSES  : inst_fpu_rr_rev32<0x31, "fcpyse", fcopysign>;
++def CPYSNS  : inst_fpu_rr_rev32<0x32, "fcpysn", null_frag>;
++
++def CMPTEQ  : inst_fpu<0x10, "fcmpeq", F8RC>;
++def CMPTLE  : inst_fpu<0x11, "fcmple", F8RC>;
++def CMPTLT  : inst_fpu<0x12, "fcmplt", F8RC>;
++def CMPTUN  : inst_fpu<0x13, "fcmpun", F8RC>;
++
++//4.6.2 float cmp
++
++def : Pat<(fabs F8RC:$RB),
++          (CPYSD F31, F8RC:$RB)>;
++def : Pat<(fabs F4RC:$RB),
++          (CPYSS F31, F4RC:$RB)>;
++def : Pat<(fneg F8RC:$RB),
++          (CPYSND F8RC:$RB, F8RC:$RB)>;
++def : Pat<(fneg F4RC:$RB),
++          (CPYSNS F4RC:$RB, F4RC:$RB)>;
++
++def : Pat<(fcopysign F4RC:$A, (fneg F4RC:$B)),
++          (CPYSNS F4RC:$B, F4RC:$A)>;
++def : Pat<(fcopysign F8RC:$A, (fneg F8RC:$B)),
++          (CPYSND F8RC:$B, F8RC:$A)>;
++
++def : Pat<(fneg (fcopysign F4RC:$A, F4RC:$B)),
++          (CPYSNS F4RC:$B, F4RC:$A)>;
++def : Pat<(fneg (fcopysign F8RC:$A, F8RC:$B)),
++          (CPYSND F8RC:$B, F8RC:$A)>;
++
++def : Pat<(fneg (fcopysign F4RC:$A, F8RC:$B)),
++          (CPYSNS (COPY_TO_REGCLASS F8RC:$B, F4RC), F4RC:$A)>;
++def : Pat<(fneg (fcopysign F8RC:$A, F4RC:$B)),
++          (CPYSND (COPY_TO_REGCLASS F4RC:$B, F8RC), F8RC:$A)>;
++
++def : Pat<(fcopysign F4RC:$A, F8RC:$B),
++          (CPYSS (COPY_TO_REGCLASS F8RC:$B, F4RC), F4RC:$A)>;
++def : Pat<(fcopysign F8RC:$A, F4RC:$B),
++          (CPYSD (COPY_TO_REGCLASS F4RC:$B, F8RC), F8RC:$A)>;
++
++//4.6.3 float convert
++
++let RA = 31 in
++class inst_fpu_cvt<bits<8> func, string opstr,
++                   RegisterClass regorg, RegisterClass regdst,
++                   SDPatternOperator OpNode=null_frag>
++    : FPForm<0x18, func, (ins regorg:$RB), (outs regdst:$RC),
++             opstr, "$RB,$RC",
++             [(set regdst:$RC, (OpNode regorg:$RB))]>;
++
++let RA = 31 in
++class inst_fpu_cmov<bits<8> func, string opstr,
++                   RegisterClass regorg, RegisterClass regdst,
++                   SDPatternOperator OpNode=null_frag>
++    : FPForm<0x10, func, (ins regorg:$RB), (outs regdst:$RC),
++             opstr, "$RB,$RC",
++             [(set (i64 regdst:$RC), (OpNode regorg:$RB))]>;
++
++let RA = 31 in
++class inst_flw<bits<8>func , string opstr, RegisterClass regtype>
++    : FPForm<0x18, func, (ins regtype:$RB),
++             (outs regtype:$RC), opstr, "$RB,$RC">;
++
++let RB = 31 in
++class inst_fpu_cvt1<bits<8> func, string opstr,
++                   RegisterClass regorg, RegisterClass regdst,
++                   SDPatternOperator OpNode=null_frag>
++    : FPForm<0x18, func, (ins regorg:$RA), (outs regdst:$RC),
++             opstr, "$RA,$RC",
++             [(set regdst:$RC, (OpNode regorg:$RA))]>;
++
++def CVTQS : inst_fpu_cvt<0x2D, "fcvtls", F8RC, F4RC, Sw64_cvtqs>;
++def CVTQT : inst_fpu_cvt<0x2F, "fcvtld", F8RC, F8RC, Sw64_cvtqt>;
++def CVTTQ : inst_fpu_cvt<0x24, "fcvtdl_z", F8RC, F8RC, Sw64_cvttq>;
++def CVTST : inst_fpu_cvt<0x20, "fcvtsd", F4RC, F8RC, fpextend>;
++def CVTTS : inst_fpu_cvt<0x21, "fcvtds", F8RC, F4RC, fpround>;
++def : Pat<(Sw64_cvtts F8RC:$RB), (CVTTS F8RC:$RB)>;
++def : Pat<(Sw64_cvtst F4RC:$RB), (CVTST F4RC:$RB)>;
++
++def FCVTWL : inst_flw<0x28, "fcvtwl", F8RC>;
++def FCVTLW : inst_flw<0x29, "fcvtlw", F8RC>;
++
++def FCTTDL_G : inst_fpu_cvt<0x22, "fcvtdl_g", F8RC, F8RC>;
++def FCTTDL_P : inst_fpu_cvt<0x23, "fcvtdl_p", F8RC, F8RC>;
++def FCTTDL_N : inst_fpu_cvt<0x25, "fcvtdl_n", F8RC, F8RC>;
++def FCTTDL : inst_fpu_cvt<0x27, "fcvtdl", F8RC, F8RC>;
++
++def FCVTHS : inst_fpu_cvt<0x2E, "fcvths", F8RC, F8RC>;
++def FCVTSH : FCForm4L<0x1B, 0x37, (ins F8RC:$RA, F8RC:$RB, u6imm:$LIT),
++                 (outs F8RC:$RD),  "fcvtsh", "$RA,$RB,$LIT,$RD">;
++
++def CMOVDL : inst_fpu_cmov<0x72, "cmovdl", F8RC, GPRC>;
++def CMOVDL_G : inst_fpu_cmov<0x74, "cmovdl_g", F8RC, GPRC>;
++def CMOVDL_P : inst_fpu_cmov<0x7A, "cmovdl_p", F8RC, GPRC>;
++def CMOVDL_Z : inst_fpu_cmov<0x7C, "cmovdl_z", F8RC, GPRC>;
++def CMOVDL_N : inst_fpu_cmov<0x80, "cmovdl_n", F8RC, GPRC>;
++def CMOVDLU : inst_fpu_cmov<0x81, "cmovdlu", F8RC, GPRC>;
++def CMOVDLU_G : inst_fpu_cmov<0x82, "cmovdlu_g", F8RC, GPRC>;
++def CMOVDLU_P : inst_fpu_cmov<0x83, "cmovdlu_p", F8RC, GPRC>;
++def CMOVDLU_Z : inst_fpu_cmov<0x84, "cmovdlu_z", F8RC, GPRC>;
++def CMOVDLU_N : inst_fpu_cmov<0x85, "cmovdlu_n", F8RC, GPRC>;
++def CMOVDWU : inst_fpu_cmov<0x86, "cmovdwu", F8RC, GPRC>;
++def CMOVDWU_G : inst_fpu_cmov<0x87, "cmovdwu_g", F8RC, GPRC>;
++def CMOVDWU_P : inst_fpu_cmov<0x88, "cmovdwu_p", F8RC, GPRC>;
++def CMOVDWU_Z : inst_fpu_cmov<0x89, "cmovdwu_z", F8RC, GPRC>;
++def CMOVDWU_N : inst_fpu_cmov<0x8A, "cmovdwu_n", F8RC, GPRC>;
++def CMOVDW : inst_fpu_cmov<0x8B, "cmovdw", F8RC, GPRC>;
++def CMOVDW_G : inst_fpu_cmov<0x8C, "cmovdw_g", F8RC, GPRC>;
++def CMOVDW_P : inst_fpu_cmov<0x8D, "cmovdw_p", F8RC, GPRC>;
++def CMOVDW_Z : inst_fpu_cmov<0x8E, "cmovdw_z", F8RC, GPRC>;
++def CMOVDW_N : inst_fpu_cmov<0x8F, "cmovdw_n", F8RC, GPRC>;
++
++// f64 to i64
++def : Pat<(i64 (fp_to_sint F8RC:$Rn)), (CMOVDL_Z F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(i64 (fp_to_sint (fround F8RC:$Rn))), (CMOVDL_G F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(i64 (fp_to_sint (fceil F8RC:$Rn))), (CMOVDL_P F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(i64 (fp_to_sint (ffloor F8RC:$Rn))), (CMOVDL_N F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(i64 (fp_to_sint (fnearbyint F8RC:$Rn))), (CMOVDL F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++
++// f64 to u64
++def : Pat<(i64 (fp_to_uint F8RC:$Rn)), (CMOVDLU_Z F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(i64 (fp_to_uint (fround F8RC:$Rn))), (CMOVDLU_G F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(i64 (fp_to_uint (fceil F8RC:$Rn))), (CMOVDLU_P F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(i64 (fp_to_uint (ffloor F8RC:$Rn))), (CMOVDLU_N F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(i64 (fp_to_uint (fnearbyint F8RC:$Rn))), (CMOVDLU F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++
++// f32 to i64
++def : Pat<(i64 (fp_to_sint F4RC:$Rn)), (CMOVDL_Z (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(i64 (fp_to_sint (fround F4RC:$Rn))), (CMOVDL_G (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(i64 (fp_to_sint (fceil F4RC:$Rn))), (CMOVDL_P (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(i64 (fp_to_sint (ffloor F4RC:$Rn))), (CMOVDL_N (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(i64 (fp_to_sint (fnearbyint F4RC:$Rn))), (CMOVDL (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++
++// f32 to u64
++def : Pat<(i64 (fp_to_uint F4RC:$Rn)), (CMOVDLU_Z (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(i64 (fp_to_uint (fround F4RC:$Rn))), (CMOVDLU_G (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(i64 (fp_to_uint (fceil F4RC:$Rn))), (CMOVDLU_P (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(i64 (fp_to_uint (ffloor F4RC:$Rn))), (CMOVDLU_N (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(i64 (fp_to_uint (fnearbyint F4RC:$Rn))), (CMOVDLU (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++
++// f64 to u32
++def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint F8RC:$Rn))), i32),
++          (CMOVDWU_Z F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (fround F8RC:$Rn)))), i32),
++          (CMOVDWU_G F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (fceil F8RC:$Rn)))), i32),
++          (CMOVDWU_P F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (ffloor F8RC:$Rn)))), i32),
++          (CMOVDWU_N F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (fnearbyint F8RC:$Rn)))), i32),
++          (CMOVDWU F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++
++// f64 to i32
++def : Pat<(sext_inreg (fp_to_sint F8RC:$Rn), i32),
++          (CMOVDW_Z F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(sext_inreg (fp_to_sint (fround F8RC:$Rn)), i32),
++          (CMOVDW_G F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(sext_inreg (fp_to_sint (fceil F8RC:$Rn)), i32),
++          (CMOVDW_P F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(sext_inreg (fp_to_sint (ffloor F8RC:$Rn)), i32),
++          (CMOVDW_N F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(sext_inreg (fp_to_sint (fnearbyint F8RC:$Rn)), i32),
++          (CMOVDW F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++
++// f32 to u32
++def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint F4RC:$Rn))), i32),
++          (CMOVDWU_Z (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (fround F4RC:$Rn)))), i32),
++          (CMOVDWU_G (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (fceil F4RC:$Rn)))), i32),
++          (CMOVDWU_P (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (ffloor F4RC:$Rn)))), i32),
++          (CMOVDWU_N (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (fnearbyint F4RC:$Rn)))), i32),
++          (CMOVDWU (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++
++// f32 to i32
++def : Pat<(sext_inreg (fp_to_sint F4RC:$Rn), i32),
++          (CMOVDW_Z (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(sext_inreg (fp_to_sint (fround F4RC:$Rn)), i32),
++          (CMOVDW_G (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(sext_inreg (fp_to_sint (fceil F4RC:$Rn)), i32),
++          (CMOVDW_P (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(sext_inreg (fp_to_sint (ffloor F4RC:$Rn)), i32),
++          (CMOVDW_N (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(sext_inreg (fp_to_sint (fnearbyint F4RC:$Rn)), i32),
++          (CMOVDW (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
++
++def CMOVLS : inst_fpu_cvt<0x48, "cmovls", GPRC, F4RC>;
++def CMOVWS : inst_fpu_cvt<0x49, "cmovws", GPRC, F4RC>;
++def CMOVLD : inst_fpu_cvt<0x4a, "cmovld", GPRC, F8RC>;
++def CMOVWD : inst_fpu_cvt<0x4b, "cmovwd", GPRC, F8RC>;
++def CMOVULS : inst_fpu_cvt<0x4c, "cmovuls", GPRC, F4RC>;
++def CMOVULD : inst_fpu_cvt<0x4e, "cmovuld", GPRC, F8RC>;
++def CMOVUWS : inst_fpu_cvt<0x4d, "cmovuws", GPRC, F4RC>;
++def CMOVUWD : inst_fpu_cvt<0x4f, "cmovuwd", GPRC, F8RC>;
++
++def : Pat<(f32 (sint_to_fp GPRC:$Rn)), (CMOVLS GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(f32 (sint_to_fp (assertsext GPRC:$Rn))), (CMOVWS GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(f64 (sint_to_fp GPRC:$Rn)), (CMOVLD GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(f64 (sint_to_fp (assertsext GPRC:$Rn))), (CMOVWD GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(f32 (uint_to_fp GPRC:$Rn)), (CMOVULS GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(f64 (uint_to_fp GPRC:$Rn)), (CMOVULD GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(f32 (uint_to_fp (and (assertsext GPRC:$Rn), 0xffffffff))), (CMOVUWS GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++def : Pat<(f64 (uint_to_fp (and (assertsext GPRC:$Rn), 0xffffffff))), (CMOVUWD GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
++
++def FRIS   : inst_fpu_cvt<0x5A, "fris", F4RC, F4RC, fnearbyint>;
++def FRIS_G : inst_fpu_cvt<0x5B, "fris_g", F4RC, F4RC, fround>;
++def FRIS_P : inst_fpu_cvt<0x5C, "fris_p", F4RC, F4RC, fceil>;
++def FRIS_Z : inst_fpu_cvt<0x5D, "fris_z", F4RC, F4RC, ftrunc>;
++def FRIS_N : inst_fpu_cvt<0x5F, "fris_n", F4RC, F4RC, ffloor>;
++
++def FRID   : inst_fpu_cvt<0x60, "frid", F8RC, F8RC, fnearbyint>;
++def FRID_G : inst_fpu_cvt<0x61, "frid_g", F8RC, F8RC, fround>;
++def FRID_P : inst_fpu_cvt<0x62, "frid_p", F8RC, F8RC, fceil>;
++def FRID_Z : inst_fpu_cvt<0x63, "frid_z", F8RC, F8RC, ftrunc>;
++def FRID_N : inst_fpu_cvt<0x64, "frid_n", F8RC, F8RC, ffloor>;
++
++def SQRTSS : inst_flw<0x08, "fsqrts", F4RC>;
++def SQRTSD : inst_flw<0x09, "fsqrtd", F8RC>;
++
++def FRECS : inst_fpu_cvt1<0x58, "frecs", F4RC, F4RC, Sw64_frecs>;
++def FRECD : inst_fpu_cvt1<0x59, "frecd", F8RC, F8RC, Sw64_frecd>;
++
++def : Pat<(fsqrt F4RC:$RB), (SQRTSS F4RC:$RB)>;
++def : Pat<(fsqrt F8RC:$RB), (SQRTSD F8RC:$RB)>;
++
++//4.6.6 int 2 float or float 2 int
++
++let RB = 31 in
++class inst_fpu_fi<bits<6> opc, bits<8> func, string opstr,
++                  RegisterClass regorg, RegisterClass regdst,
++                  SDPatternOperator OpNode>
++    : FPForm<opc, func, (ins regorg:$RA), (outs regdst:$RC),
++             opstr, "$RA,$RC",
++             [(set regdst:$RC, (OpNode regorg:$RA))]>;
++
++def ITOFS : inst_fpu_fi<0x18, 0x40, "ifmovs", GPRC, F4RC, bitconvert>;
++def ITOFT : inst_fpu_fi<0x18, 0x41, "ifmovd", GPRC, F8RC, bitconvert>;
++def FTOIS : inst_fpu_fi<0x10, 0x70, "fimovs", F4RC, GPRC, bitconvert>;
++def FTOIT : inst_fpu_fi<0x10, 0x78, "fimovd", F8RC, GPRC, bitconvert>;
++
++let DecoderNamespace = "SIMD" in {
++class inst_fpu_fi_simd<bits<6> opc, bits<8> func, string opstr,
++                  RegisterClass regorg, RegisterClass regdst,
++                  SDPatternOperator OpNode>
++    : FPForm1<opc, func, (ins regorg:$RA), (outs regdst:$RC),
++             opstr, "$RA,$RC",
++             [(set regdst:$RC, (OpNode regorg:$RA))]>;
++}
++
++def ITOFStmp : inst_fpu_fi_simd<0x18, 0x40, "ifmovs", GPRC, FPRC, null_frag>;
++def ITOFTtmp : inst_fpu_fi_simd<0x18, 0x41, "ifmovd", GPRC, FPRC, null_frag>;
++def FTOIStmp : inst_fpu_fi_simd<0x10, 0x70, "fimovs", FPRC, GPRC, null_frag>;
++def FTOITtmp : inst_fpu_fi_simd<0x10, 0x78, "fimovd", FPRC, GPRC, null_frag>;
++
++multiclass inst_fpu_fma<bits<6> funcS, bits<6> funcD, string opstr> {
++  def S : FForm4<0x19, funcS, (ins F4RC:$RA, F4RC:$RB, F4RC:$RC),
++                 (outs F4RC:$RD),  opstr # "s", "$RA,$RB,$RC,$RD">;
++
++  def D : FForm4<0x19, funcD, (ins F8RC:$RA, F8RC:$RB, F8RC:$RC),
++                 (outs F8RC:$RD),  opstr # "d", "$RA,$RB,$RC,$RD">;
++}
++
++defm FMA  : inst_fpu_fma<0x00, 0x01, "fma">;
++defm FMS  : inst_fpu_fma<0x02, 0x03, "fms">;
++defm FNMA : inst_fpu_fma<0x04, 0x05, "fnma">;
++defm FNMS : inst_fpu_fma<0x06, 0x07, "fnms">;
++
++multiclass fma_pat<RegisterClass regtype, string type> {
++def : Pat<(fma regtype:$RA, regtype:$RB, regtype:$RC),
++    (!cast<Instruction>("FMA" # type)
++        regtype:$RA, regtype:$RB, regtype:$RC)>;
++
++def : Pat<(fma regtype:$RA, regtype:$RB,
++                            (fneg regtype:$RC)),
++    (!cast<Instruction>("FMS" # type)
++        regtype:$RA, regtype:$RB, regtype:$RC)>;
++
++def : Pat<(fneg
++          (fma regtype:$RA, regtype:$RB,
++                         (fneg regtype:$RC))),
++    (!cast<Instruction>("FNMA" # type)
++        regtype:$RA, regtype:$RB, regtype:$RC)>;
++
++def : Pat<(fneg
++          (fma regtype:$RA, regtype:$RB, regtype:$RC)),
++    (!cast<Instruction>("FNMS" # type)
++        regtype:$RA, regtype:$RB, regtype:$RC)>;
++
++}
++
++defm : fma_pat<F4RC, "S">;
++defm : fma_pat<F8RC, "D">;
++
++class inst_fpu_select64<bits<6> fun, string opstr>
++    : FForm4 <0x19, fun, (ins F8RC:$RC, F8RC:$RB, F8RC:$RA),
++                (outs F8RC:$RD), opstr, "$RA,$RB,$RC,$RD">;
++
++let DecoderNamespace = "FP32" in {
++class inst_fpu_select32<bits<6> fun, string opstr>
++    : FForm4 <0x19, fun, (ins F4RC:$RC, F4RC:$RB, F8RC:$RA),
++                (outs F4RC:$RD), opstr, "$RA,$RB,$RC,$RD">;
++}
++def FSELEQD : inst_fpu_select64<0x10, "fseleq">;
++def FSELNED : inst_fpu_select64<0x11, "fselne">;
++def FSELLTD : inst_fpu_select64<0x12, "fsellt">;
++def FSELLED : inst_fpu_select64<0x13, "fselle">;
++def FSELGTD : inst_fpu_select64<0x14, "fselgt">;
++def FSELGED : inst_fpu_select64<0x15, "fselge">;
++def FSELEQS : inst_fpu_select32<0x10, "fseleq">;
++def FSELNES : inst_fpu_select32<0x11, "fselne">;
++def FSELLTS : inst_fpu_select32<0x12, "fsellt">;
++def FSELLES : inst_fpu_select32<0x13, "fselle">;
++def FSELGTS : inst_fpu_select32<0x14, "fselgt">;
++def FSELGES : inst_fpu_select32<0x15, "fselge">;
++
++multiclass f_select_pat<SDPatternOperator OpNode, string Inst,
++                        InstSw64 InstCmp> {
++def : Pat<(select (i64 (OpNode F8RC:$RA, F8RC:$RB)), F4RC:$st, F4RC:$sf),
++        (!cast<Instruction>(Inst # "S")
++               F4RC:$sf, F4RC:$st, (InstCmp F8RC:$RA, F8RC:$RB))>;
++def : Pat<(select (i64 (OpNode F8RC:$RA, F8RC:$RB)), F8RC:$st, F8RC:$sf),
++        (!cast<Instruction>(Inst # "D")
++               F8RC:$sf, F8RC:$st, (InstCmp F8RC:$RA, F8RC:$RB))>;
++}
++
++multiclass f_select_pat_c<SDPatternOperator OpNode, string Inst,
++                          InstSw64 InstCmp> {
++def : Pat<(select (i64 (OpNode F8RC:$RA, F8RC:$RB)), F4RC:$st, F4RC:$sf),
++        (!cast<Instruction>(Inst # "S")
++               F4RC:$sf, F4RC:$st, (InstCmp F8RC:$RB, F8RC:$RA))>;
++def : Pat<(select (i64 (OpNode F8RC:$RA, F8RC:$RB)), F8RC:$st, F8RC:$sf),
++        (!cast<Instruction>(Inst # "D")
++               F8RC:$sf, F8RC:$st, (InstCmp F8RC:$RB, F8RC:$RA))>;
++}
++defm : f_select_pat<seteq, "FSELNE", CMPTEQ>;
++defm : f_select_pat<setne, "FSELEQ", CMPTEQ>;
++defm : f_select_pat<setlt, "FSELNE", CMPTLT>;
++defm : f_select_pat<setle, "FSELNE", CMPTLE>;
++
++defm : f_select_pat_c<setgt, "FSELNE", CMPTLT>;
++defm : f_select_pat_c<setge, "FSELNE", CMPTLE>;
++
++def :  Pat<(select GPRC:$RC, F8RC:$st, F8RC:$sf),
++           (f64 (FSELEQD  F8RC:$st, F8RC:$sf, (ITOFT GPRC:$RC)))>;
++def :  Pat<(select GPRC:$RC, F4RC:$st, F4RC:$sf),
++           (f32 (FSELEQS  F4RC:$st, F4RC:$sf, (ITOFT GPRC:$RC)))>;
++
++//4.6.9 read and write float register
++let RB=31, RC=31 in {
++def RFPCR : FPForm<0x18,0x50,(ins), (outs F8RC:$RA), "rfpcr", "$RA">;
++def WFPCR : FPForm<0x18,0x51,(ins F8RC:$RA), (outs), "wfpcr", "$RA">;
++}
++
++let RA=31, RB = 31, RC=31 in {
++def SETFPEC0 : FPForm<0x18,0x54,(ins), (outs),"setfpec0","">;
++def SETFPEC1 : FPForm<0x18,0x55,(ins), (outs),"setfpec1","">;
++def SETFPEC2 : FPForm<0x18,0x56,(ins), (outs),"setfpec2","">;
++def SETFPEC3 : FPForm<0x18,0x57,(ins), (outs),"setfpec3","">;
++
++def NOP      : FPForm<0x10,0x3a,(ins), (outs),"nop", "">;
++}
++//4.7 sundry instruction
++let RA = 0, RB= 0, hasNoSchedulingInfo=1 in{
++def WMEMB : MfcForm<0x06, 0x0002,(ins), (outs), "wmemb">;
++def IMEMB : MfcForm<0x06, 0x0001,(ins), (outs), "imemb">;
++def MB  : MfcForm<0x06, 0x0000,(ins), (outs), "memb">; //memory barrier
++def HALT : MfcForm<0x06, 0x0080,(ins), (outs), "halt">;
++}
++def : Pat<(trap), (SYS_CALL 0x80)>;
++def : Pat<(atomic_fence (i64 5), (timm)), (WMEMB)>,Requires<[EnableWmembInst, HasCore4]>;
++def : Pat<(atomic_fence (timm), (timm)), (MB)>;
++let RB = 31 in {
++//4.7.3 read time counter
++def RPCC : MfcForm<0x06, 0x0020,(ins), (outs GPRC:$RA), "rtc", "$RA">; //Read process cycle counter
++//4.7.4 read cpu core id
++def RCID : MfcForm<0x06, 0x0040,(ins), (outs GPRC:$RA), "rcid", "$RA">;
++}
++//4.7.6 atom operate instruction
++
++let mayLoad = 1 in {
++def LDQ_L : MFuncForm<0x08,0x1,(ins s64imm:$disp, GPRC:$RB),(outs GPRC:$RA),
++                      "lldl", "$RA,${disp}(${RB})">;
++def LDL_L : MFuncForm<0x08,0x0,(ins s64imm:$disp, GPRC:$RB),(outs GPRC:$RA),
++                      "lldw", "$RA,${disp}(${RB})">;
++}
++
++
++let mayStore = 1 in {
++def STQ_C : MFuncForm<0x08,0x9, (ins GPRC:$RA, s64imm:$disp, GPRC:$RB), (outs),
++                      "lstl","$RA,${disp}(${RB})">;
++def STL_C : MFuncForm<0x08,0x8, (ins GPRC:$RA, s64imm:$disp, GPRC:$RB), (outs),
++                      "lstw","$RA,${disp}(${RB})">;
++}
++
++let RB = 31,  hasNoSchedulingInfo = 1 in {
++def WR_F : MfcForm<0x06, 0x1020, (ins GPRC:$RA) , (outs), "wr_f", "$RA">;
++
++def RD_F : MfcForm<0x06, 0x1000, (ins GPRC:$RA) , (outs), "rd_f", "$RA">;
++}
++
++//4.8 cache control instruction
++// Prefetch
++def SDT_ZPrefetch : SDTypeProfile<0, 2, [
++  SDTCisPtrTy<0>, SDTCisInt<1>,
++]>;
++
++def z_s_fillcs : SDNode<"Sw64ISD::Z_S_FILLCS", SDT_ZPrefetch, [SDNPHasChain]>;
++def z_s_fillde : SDNode<"Sw64ISD::Z_S_FILLDE", SDT_ZPrefetch, [SDNPHasChain]>;
++def z_fillde : SDNode<"Sw64ISD::Z_FILLDE", SDT_ZPrefetch, [SDNPHasChain]>;
++def z_fillde_e : SDNode<"Sw64ISD::Z_FILLDE_E", SDT_ZPrefetch, [SDNPHasChain]>;
++def z_fillcs : SDNode<"Sw64ISD::Z_FILLCS", SDT_ZPrefetch, [SDNPHasChain]>;
++def z_fillcs_e : SDNode<"Sw64ISD::Z_FILLCS_E", SDT_ZPrefetch, [SDNPHasChain]>;
++def z_e_fillcs : SDNode<"Sw64ISD::Z_E_FILLCS", SDT_ZPrefetch, [SDNPHasChain]>;
++def z_e_fillde : SDNode<"Sw64ISD::Z_E_FILLDE", SDT_ZPrefetch, [SDNPHasChain]>;
++
++def z_flushd : SDNode<"Sw64ISD::Z_FLUSHD", SDT_ZPrefetch, [SDNPHasChain]>;
++
++let usesCustomInserter = 1, mayLoad = 1, mayStore = 0, RA = 31 in
++class Prefetch<string opstr, bits<6> opcode, SDPatternOperator loadop>
++      : MForm<opcode, (ins s64imm:$DISP, GPRC:$RB), (outs),
++            opstr, "${DISP}(${RB})",
++           [(loadop immSExt16:$DISP, GPRC:$RB)]>;
++
++def FILLCS   : Prefetch<"fillcs",   0x09, z_fillcs>;
++def S_FILLDE : Prefetch<"s_fillde", 0x23, z_s_fillde>;
++def S_FILLCS : Prefetch<"s_fillcs", 0x22, z_s_fillcs>;
++def FILLDE   : Prefetch<"fillde",   0x26, z_fillde>;
++def FILLDE_E : Prefetch<"fillde_e", 0x27, z_fillde_e>;
++def FILLCS_E : Prefetch<"fillcs_e", 0x0B, z_fillcs_e>;
++def E_FILLCS : Prefetch<"e_fillcs", 0x0A, z_e_fillcs>;
++def E_FILLDE : Prefetch<"e_fillde", 0x0C, z_e_fillde>;
++//def FLUSHD : Prefetch<"flushd", 0x20, z_flushd>;
++
++// END Prefetch
++
++
++//4.9 privilege instruction
++def DPFHR : MPrvlForm<0x1e,0xe,(ins u5imm:$TH, s64imm:$disp, GPRC:$RB), (outs),
++                      "dpfhr", "$TH,${disp}(${RB})">;
++def DPFHW : MPrvlForm<0x1e,0xf,(ins u5imm:$TH, s64imm:$disp, GPRC:$RB), (outs),
++                      "dpfhw", "$TH,${disp}(${RB})">;
++
++//4.9.1 csrr and csrw
++// ----------------------------------------------------------
++def CSRR : CSRForm<0x06, 0xfe, (ins GPRC:$RA, u5imm:$L), (outs), "csrr", "$RA,$L">;
++def CSRW : CSRForm<0x06, 0xff, (ins GPRC:$RA, u5imm:$L), (outs), "csrw", "$RA,$L">;
++// ----------------------------------------------------------
++
++//4.9.2 csrws and csrwc
++// ----------------------------------------------------------
++def CSRWS : CSRForm<0x06, 0xfc, (ins GPRC:$RA, u5imm:$L), (outs), "csrws", "$RA,$L">;
++def CSRWC : CSRForm<0x06, 0xfd, (ins GPRC:$RA, u5imm:$L), (outs), "csrwc", "$RA,$L">;
++// ----------------------------------------------------------
++
++class BrPat<InstSw64 Inst>
++    : Pat<(brcond GPRC:$RA, bb:$DISP),
++           (Inst GPRC:$RA, bb:$DISP)>;
++
++class BrPat_const<PatFrag CondOp, InstSw64 Inst>
++    : Pat<(brcond (i64 (CondOp GPRC:$RA, 0)), bb:$DISP),
++          (Inst GPRC:$RA, bb:$DISP)>;
++
++class BrPat_cond<PatFrag CondOp, InstSw64 InstBr,
++                 InstSw64 InstCmp, RegisterClass regtype>
++    : Pat<(brcond (i64 (CondOp regtype:$RB, regtype:$RA)), bb:$DISP),
++          (InstBr (InstCmp regtype:$RA, regtype:$RB), bb:$DISP)>;
++class BrPat_cond_i<PatFrag CondOp, InstSw64 InstBr,
++                 InstSw64 InstCmp, RegisterClass regtype>
++    : Pat<(brcond (i64 (CondOp regtype:$RA, regtype:$RB)), bb:$DISP),
++          (InstBr (InstCmp regtype:$RA, regtype:$RB), bb:$DISP)>;
++
++class BrSwapPat_cond<PatFrag CondOp, InstSw64 InstBr,
++                     InstSw64 InstCmp, RegisterClass regtype>
++    : Pat<(brcond (i64 (CondOp regtype:$RB, regtype:$RA)), bb:$DISP),
++          (InstBr (InstCmp regtype:$RB, regtype:$RA), bb:$DISP)>;
++
++class BrPat_f<PatFrag CondOp, InstSw64 InstBr>
++    : Pat<(brcond (i64 (CondOp F8RC:$RA, immFPZ)), bb:$DISP),
++          (InstBr F8RC:$RA, bb:$DISP)>;
++
++
++def : BrPat_const<seteq, BEQ>;
++def : BrPat_const<setge, BGE>;
++def : BrPat_const<setgt, BGT>;
++def : BrPat_const<setle, BLE>;
++def : BrPat_const<setlt, BLT>;
++def : BrPat_const<setne, BNE>;
++
++def : BrPat<BNE>;
++def : BrPat_cond_i<seteq, BNE, CMPEQr, GPRC>;
++def : BrPat_cond_i<setle, BNE, CMPLEr, GPRC>;
++def : BrPat_cond_i<setlt, BNE, CMPLTr, GPRC>;
++def : BrPat_cond_i<setule, BNE, CMPULEr, GPRC>;
++def : BrPat_cond_i<setult, BNE, CMPULTr, GPRC>;
++
++def : BrPat_cond_i<setne, BEQ, CMPEQr, GPRC>;
++def : BrPat_cond_i<setune, BEQ, CMPEQr, GPRC>;
++
++def : BrPat_f<seteq, FBEQ>;
++def : BrPat_f<setne, FBNE>;
++def : BrPat_f<setle, FBLE>;
++def : BrPat_f<setlt, FBLT>;
++
++def : BrPat_cond<setlt,  FBEQ, CMPTLE, F8RC>;
++def : BrPat_cond<setult,  FBEQ, CMPTLE, F8RC>;
++def : BrPat_cond<setle,  FBEQ, CMPTLT, F8RC>;
++def : BrPat_cond<setule,  FBEQ, CMPTLT, F8RC>;
++
++def : BrSwapPat_cond<setgt,  FBEQ, CMPTLE, F8RC>;
++def : BrSwapPat_cond<setugt,  FBEQ, CMPTLE, F8RC>;
++def : BrSwapPat_cond<setge,  FBEQ, CMPTLT, F8RC>;
++def : BrSwapPat_cond<setuge,  FBEQ, CMPTLT, F8RC>;
++
++def : BrPat_cond_i<setne,  FBEQ, CMPTEQ, F8RC>;
++def : BrPat_cond_i<seteq,  FBNE, CMPTEQ, F8RC>;
++
++def : BrPat_cond_i<setge,   FBEQ, CMPTLT, F8RC>;
++def : BrPat_cond_i<setgt,   FBEQ, CMPTLE, F8RC>;
++
++
++ class IBrPat_cond_i<PatFrag CondOp, InstSw64 InstBr,
++                 InstSw64 InstCmp, RegisterClass regtype>
++    : Pat<(brcond (i64(CondOp regtype:$RA, immUExt8:$RB)), bb:$DISP),
++          (InstBr (InstCmp regtype:$RA, immUExt8:$RB), bb:$DISP)>;
++ def : IBrPat_cond_i<seteq, BNE, CMPEQi, GPRC>;
++ def : IBrPat_cond_i<setne, BEQ, CMPEQi, GPRC>;
++ def : IBrPat_cond_i<setlt, BNE, CMPLTi, GPRC>;
++ def : IBrPat_cond_i<setle, BNE, CMPLEi, GPRC>;
++ def : IBrPat_cond_i<setgt, BEQ, CMPLEi, GPRC>;
++ def : IBrPat_cond_i<setge, BEQ, CMPLTi, GPRC>;
++
++ def : IBrPat_cond_i<setult, BNE, CMPULTi, GPRC>;
++ def : IBrPat_cond_i<setule, BNE, CMPULEi, GPRC>;
++ def : IBrPat_cond_i<setugt, BEQ, CMPULEi, GPRC>;
++ def : IBrPat_cond_i<setuge, BEQ, CMPULTi, GPRC>;
++
++
++//Int cond patterns
++def : Pat<(brcond (i64 (and GPRC:$RA, 1)), bb:$DISP),
++          (BLBS  GPRC:$RA, bb:$DISP)>;
++
++def : Pat<(brcond (i64 (seteq (and GPRC:$RA, 1), 0)), bb:$DISP),
++          (BLBC  GPRC:$RA, bb:$DISP)>;
++
++//Constant handling
++def immSExt16int  : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended field
++  return ((int64_t)N->getZExtValue() << 48) >> 48 ==
++         ((int64_t)N->getZExtValue() << 32) >> 32;
++}], SExt16>;
++
++def immConst2PartInt  : PatLeaf<(imm), [{
++  //true if imm fits in a LDAH LDA pair with zeroext
++  uint64_t uval = N->getZExtValue();
++  int32_t val32 = (int32_t)uval;
++  return ((uval >> 32) == 0 && //empty upper bits
++          val32 <= IMM_FULLHIGH);
++//          val32 >= IMM_FULLLOW  + IMM_LOW  * IMM_MULT); //Always True
++}]>;
++
++def immConst2Part  : PatLeaf<(imm), [{
++  //true if imm fits in a LDAH LDA pair
++  int64_t val = (int64_t)N->getZExtValue();
++  return (val <= IMM_FULLHIGH  && val >= IMM_FULLLOW);
++}]>;
++
++
++def : Pat<(i64 immConst2Part:$imm),
++          (LDA (LL16 immConst2Part:$imm),
++               (LDAH (LH16 immConst2Part:$imm), R31))>;
++
++def : Pat<(i64 immSExt16:$imm),
++          (LDA immSExt16:$imm, R31)>;
++
++def : Pat<(i64 immSExt16int:$imm),
++          (ZAPNOTi (LDA immSExt16int:$imm, R31), 15)>;
++
++def : Pat<(i64 immConst2PartInt:$imm),
++          (ZAPNOTi (LDA (LL16 (i64 (SExt32 immConst2PartInt:$imm))),
++                        (LDAH (LH16 (i64 (SExt32 immConst2PartInt:$imm))),
++                              R31)), 15)>;
++
++//===----------------------------------------------------------------------===//
++// Instruction aliases
++//===----------------------------------------------------------------------===//
++// 4.5.1 integer caculate
++def : InstAlias<"addw $RA,$L,$RC",
++                (ADDLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"subw $RA,$L,$RC",
++                (SUBLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"s4addw $RA,$L,$RC",
++                (S4ADDLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"s4subw $RA,$L,$RC",
++                (S4SUBLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"s8addw $RA,$L,$RC",
++                (S8ADDLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"s8subw $RA,$L,$RC",
++                (S8SUBLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"addl $RA,$L,$RC",
++                (ADDQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"subl $RA,$L,$RC",
++                (SUBQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"s4addl $RA,$L,$RC",
++                (S4ADDQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"s4subl $RA,$L,$RC",
++                (S4SUBQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"s8addl $RA,$L,$RC",
++                (S8ADDQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"s8subl $RA,$L,$RC",
++                (S8SUBQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"mulw $RA,$L,$RC",
++                (MULLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"mull $RA,$L,$RC",
++                (MULQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"umulh $RA,$L,$RC",
++                (UMULHi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"zap $RA,$L,$RC",
++                (ZAPi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"zapnot $RA,$L,$RC",
++                (ZAPNOTi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"addpi $L,$RC",
++                (ADDPI GPRC:$RC, s13imm:$L), 0>;
++def : InstAlias<"addpis $L,$RC",
++                (ADDPIS GPRC:$RC, s13imm:$L), 0>;
++def : InstAlias<"sbt $RA,$L,$RC",
++                (SBTi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"cbt $RA,$L,$RC",
++                (CBTi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++
++// 4.5.2 integer cmp
++def : InstAlias<"cmpeq $RA,$L,$RC",
++                (CMPEQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"cmple $RA,$L,$RC",
++                (CMPLEi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"cmplt $RA,$L,$RC",
++                (CMPLTi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"cmpule $RA,$L,$RC",
++                (CMPULEi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"cmpult $RA,$L,$RC",
++                (CMPULTi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++
++// 4.5.3 integer order
++def : InstAlias<"and $RA,$L,$RC",
++                (ANDi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"bic $RA,$L,$RC",
++                (BICi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"bis $RA,$L,$RC",
++                (BISi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"ornot $RA,$L,$RC",
++                (ORNOTi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"xor $RA,$L,$RC",
++                (XORi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"eqv $RA,$L,$RC",
++                (EQVi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++
++// 4.5.4 integer move position
++def : InstAlias<"sll $RA,$L,$RC",
++                (SLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"srl $RA,$L,$RC",
++                (SRLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++def : InstAlias<"sra $RA,$L,$RC",
++                (SRAi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
++
++// 4.5.5 integer cond select
++def : InstAlias<"seleq $RCOND,$RTRUE,$RFALSE,$RDEST",
++                (SELEQi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST),
++                 0>;
++def : InstAlias<"seleq $RCOND,$RTRUE,$RFALSE,$RDEST",
++                (SELEQr GPRC:$RCOND, GPRC:$RTRUE, GPRC:$RFALSE, GPRC:$RDEST),
++                 0>;
++def : InstAlias<"selge $RCOND,$RTRUE,$RFALSE,$RDEST",
++                (SELGEi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST),
++                 0>;
++def : InstAlias<"selgt $RCOND,$RTRUE,$RFALSE,$RDEST",
++                (SELGTi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST),
++                 0>;
++def : InstAlias<"selle $RCOND,$RTRUE,$RFALSE,$RDEST",
++                (SELLEi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST),
++                 0>;
++def : InstAlias<"sellt $RCOND,$RTRUE,$RFALSE,$RDEST",
++                (SELLTi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST),
++                 0>;
++def : InstAlias<"selne $RCOND,$RTRUE,$RFALSE,$RDEST",
++                (SELNEi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST),
++                 0>;
++def : InstAlias<"sellbc $RCOND,$RTRUE,$RFALSE,$RDEST",
++                (SELLBCi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST),
++                 0>;
++def : InstAlias<"sellbs $RCOND,$RTRUE,$RFALSE,$RDEST",
++                (SELLBSi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST),
++                 0>;
++
++// 4.5.6 byte operate
++def : InstAlias<"nop", (BISr R31, R31, R31), 0>;
++
++def : InstAlias<"ldi $RA, $imm",
++                (LDA GPRC:$RA, s64imm:$imm, R31), 0>;
++def : InstAlias<"br $disp",
++                (BR R31, target:$disp), 0>;
++def : InstAlias<"mov $imm,$RA",
++                (BISi GPRC:$RA, R31,  u8imm:$imm), 0>;
++def : InstAlias<"mov $RB,$RA",
++                (BISr GPRC:$RA, R31, GPRC:$RB), 0>;
++def : InstAlias<"ret $31,($26), $imm",
++                (RET R31, R26, s16imm:$imm), 0>;
++def : InstAlias<"ret",
++                (RET R31, R26, 1), 0>;
++def : InstAlias<"ldgp $29,0(${RA})",
++                (MOVaddrPCGp 0, 0, GPRC:$RA), 0>;
++def : InstAlias<"clr $RA",
++                (BISr R31, R31, GPRC:$RA), 0>;
++
++//===----------------------------------------------------------------------===//
++// Sw64 vector Definitions.
++//===----------------------------------------------------------------------===//
++
++include "Sw64VectorVarDefine.td"
++include "Sw64InstrFormatsV.td"
++include "Sw64InstrVector.td"
+diff --git a/llvm/lib/Target/Sw64/Sw64InstrVector.td b/llvm/lib/Target/Sw64/Sw64InstrVector.td
+new file mode 100644
+index 000000000..574345fdb
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64InstrVector.td
+@@ -0,0 +1,1970 @@
++//===- Sw64InstrVector.td - SIMD instructions -*- tablegen ----------------*-=//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file describes Sw64 SIMD instructions.
++//
++//===----------------------------------------------------------------------===//
++
++class IsCommutable {
++  bit isCommutable = 1;
++}
++
++def SDT_VSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>,
++                                      SDTCisInt<1>,
++                                      SDTCisSameAs<1, 2>,
++                                      SDTCisVT<3, OtherVT>]>;
++def SDT_VFSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>,
++                                       SDTCisFP<1>,
++                                       SDTCisSameAs<1, 2>,
++                                       SDTCisVT<3, OtherVT>]>;
++
++def SDT_VSHF : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<3>,
++                                    SDTCisSameAs<0, 1>]>;
++
++//def SDT_VSHFQ : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>,
++//                                    SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>;
++
++//def SDT_VCONW : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>,
++//                                    SDTCisVec<1>, SDTCisFP<3>,
++//                                    SDTCisSameAs<0, 2>, SDTCisSameAs<1, 2>]>;
++//
++//def SDT_VCON : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
++//                                    SDTCisVec<1>, SDTCisFP<3>,
++//                                    SDTCisSameAs<0, 2>, SDTCisSameAs<1, 2>]>;
++
++def SDT_SHF : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
++                                   SDTCisVT<1, i32>, SDTCisSameAs<0, 2>]>;
++def SDT_ILV : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
++                                   SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
++def SDT_INSVE : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
++                                     SDTCisVT<2, i32>, SDTCisSameAs<0, 3>,
++                                     SDTCisVT<4, i32>]>;
++def SDT_VINSECTL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
++                                   SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>;
++
++def SDT_VecReduce : SDTypeProfile<1, 1, [    // vector reduction
++  SDTCisFP<0>, SDTCisVec<1>
++]>;
++
++def SDTVBroadcast  : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
++
++def SDT_ZVecBinaryFp : SDTypeProfile<1, 2, [SDTCisVec<0>,
++                                             SDTCisSameAs<0, 1>,
++                                             SDTCisVT<2, f32>]>;
++
++def SDT_ZVecBinaryInt : SDTypeProfile<1, 2, [SDTCisVec<0>,
++                                             SDTCisSameAs<0, 1>,
++                                             SDTCisVT<2, i64>]>;
++
++def SDT_ZVecCT : SDTypeProfile<1, 1, [    // vector number of head 0/1.
++  SDTCisInt<0>, SDTCisVec<1>
++]>;
++def SDT_ZVecFREC : SDTypeProfile<1, 1, [    // vector number of head 0/1.
++  SDTCisVec<0>, SDTCisFP<0>, SDTCisSameAs<0, 1>
++]>;
++
++def SDT_Vlog : SDTypeProfile<1, 4, [       // vlogzz
++  SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>, SDTCisInt<4>
++]>;
++
++def SDT_ZVecFCMP : SDTypeProfile<1, 2, [
++  SDTCisVec<0>, SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
++]>;
++def SDT_ZVecFCVT : SDTypeProfile<1, 1, [
++  SDTCisVec<0>, SDTCisVec<1>
++]>;
++def SDT_ZVecFCVTDL : SDTypeProfile<1, 1, [
++  SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisFP<1>
++]>;
++def SDT_ZVecFCVTSH : SDTypeProfile<1, 3, [
++  SDTCisVec<0>, SDTCisVec<1>
++]>;
++def SDT_ZVecFCVTHS : SDTypeProfile<1, 2, [
++  SDTCisVec<0>, SDTCisVec<1>
++]>;
++
++def SDT_ZVecFRI : SDTypeProfile<1, 1, [
++  SDTCisVec<0>, SDTCisFP<0>, SDTCisSameAs<0, 1>
++]>;
++def SDT_ZVecCPY : SDTypeProfile<1, 1, [
++  SDTCisVec<1>
++]>;
++
++def SDT_VSELECT : SDTypeProfile<1, 3, [
++  SDTCisInt<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
++]>;
++def SDT_VSQRT : SDTypeProfile<1, 1, [
++  SDTCisVec<0>, SDTCisFP<0>, SDTCisSameAs<0, 1>
++]>;
++def SDT_VSUMF : SDTypeProfile<1, 1, [
++  SDTCisFP<0>, SDTCisVec<1>, SDTCisFP<1>
++]>;
++def SDT_Sw64VTruncStore : SDTypeProfile<0, 2, [SDTCisPtrTy<1>]>;
++
++def Sw64VBroadCastLd : SDNode<"Sw64ISD::VBROADCAST_LD", SDTLoad>;
++def Sw64VBroadCast   : SDNode<"Sw64ISD::VBROADCAST", SDTVBroadcast>;
++
++def Sw64VBroadCasti32: PatFrag<(ops node:$src),
++                           (Sw64VBroadCastLd node:$src), [{
++    return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 4;
++}]>;
++
++def Sw64VBroadCastf32: PatFrag<(ops node:$src),
++                           (Sw64VBroadCastLd node:$src), [{
++    return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 4
++           && cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f32;
++}]>;
++
++def Sw64VBroadCastf64: PatFrag<(ops node:$src),
++                           (Sw64VBroadCastLd node:$src), [{
++    return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 8;
++}]>;
++
++def Sw64VTruncStore : SDNode<"Sw64ISD::VTRUNCST", SDT_Sw64VTruncStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
++
++def z_ldwe : SDNode<"Sw64ISD::VLDWE", SDTLoad>;
++def z_ldse : SDNode<"Sw64ISD::VLDSE", SDTLoad>;
++def z_ldde : SDNode<"Sw64ISD::VLDDE", SDTLoad>;
++
++def z_vlog : SDNode<"Sw64ISD::VLOG", SDT_Vlog>;
++
++def z_ctpop : SDNode<"Sw64ISD::VCTPOP", SDT_ZVecCT>;
++def z_ctlz : SDNode<"Sw64ISD::VCTLZ", SDT_ZVecCT>;
++
++def Sw64VNOR : SDNode<"Sw64ISD::VNOR", SDTIntBinOp,
++                      [SDNPCommutative, SDNPAssociative]>;
++def Sw64VEQV : SDNode<"Sw64ISD::VEQV", SDTIntBinOp>;
++def Sw64VORNOT : SDNode<"Sw64ISD::VORNOT", SDTIntBinOp>;
++
++
++def Sw64VSHF : SDNode<"Sw64ISD::VSHF", SDT_VSHF>;
++//def Sw64VCONW : SDNode<"Sw64ISD::VCON_W", SDT_VCONW>;
++//def Sw64VCONS : SDNode<"Sw64ISD::VCON_S", SDT_VCON>;
++//def Sw64VCOND : SDNode<"Sw64ISD::VCON_D", SDT_VCON>;
++
++def Sw64SHF  : SDNode<"Sw64ISD::SHF", SDT_SHF>;
++def Sw64ILVEV : SDNode<"Sw64ISD::ILVEV", SDT_ILV>;
++def Sw64ILVOD : SDNode<"Sw64ISD::ILVOD", SDT_ILV>;
++def Sw64ILVL  : SDNode<"Sw64ISD::ILVL",  SDT_ILV>;
++def Sw64ILVR  : SDNode<"Sw64ISD::ILVR",  SDT_ILV>;
++def Sw64PCKEV : SDNode<"Sw64ISD::PCKEV", SDT_ILV>;
++def Sw64PCKOD : SDNode<"Sw64ISD::PCKOD", SDT_ILV>;
++def Sw64INSVE : SDNode<"Sw64ISD::INSVE", SDT_INSVE>;
++
++def Sw64VFCMPEQ : SDNode<"Sw64ISD::VFCMPEQ", SDT_ZVecFCMP>;
++def Sw64VFCMPLE : SDNode<"Sw64ISD::VFCMPLE", SDT_ZVecFCMP>;
++def Sw64VFCMPLT : SDNode<"Sw64ISD::VFCMPLT", SDT_ZVecFCMP>;
++def Sw64VFCMPUN : SDNode<"Sw64ISD::VFCMPUN", SDT_ZVecFCMP>;
++
++def Sw64VFCVTSD : SDNode<"Sw64ISD::VFCVTSD", SDT_ZVecFCVT>;
++def Sw64VFCVTDS : SDNode<"Sw64ISD::VFCVTDS", SDT_ZVecFCVT>;
++def Sw64VFCVTLS : SDNode<"Sw64ISD::VFCVTLS", SDT_ZVecFCVT>;
++def Sw64VFCVTLD : SDNode<"Sw64ISD::VFCVTLD", SDT_ZVecFCVT>;
++def Sw64VFCVTSH : SDNode<"Sw64ISD::VFCVTSH", SDT_ZVecFCVTSH>;
++def Sw64VFCVTHS : SDNode<"Sw64ISD::VFCVTHS", SDT_ZVecFCVTHS>;
++
++def Sw64VFCVTDL : SDNode<"Sw64ISD::VFCVTDL", SDT_ZVecFCVTDL>;
++def Sw64VFCVTDLG : SDNode<"Sw64ISD::VFCVTDLG", SDT_ZVecFCVTDL>;
++def Sw64VFCVTDLP : SDNode<"Sw64ISD::VFCVTDLP", SDT_ZVecFCVTDL>;
++def Sw64VFCVTDLZ : SDNode<"Sw64ISD::VFCVTDLZ", SDT_ZVecFCVTDL>;
++def Sw64VFCVTDLN : SDNode<"Sw64ISD::VFCVTDLN", SDT_ZVecFCVTDL>;
++
++def Sw64VFRIS : SDNode<"Sw64ISD::VFRIS", SDT_ZVecFRI>;
++def Sw64VFRISG : SDNode<"Sw64ISD::VFRISG", SDT_ZVecFRI>;
++def Sw64VFRISP : SDNode<"Sw64ISD::VFRISP", SDT_ZVecFRI>;
++def Sw64VFRISZ : SDNode<"Sw64ISD::VFRISZ", SDT_ZVecFRI>;
++def Sw64VFRISN : SDNode<"Sw64ISD::VFRISN", SDT_ZVecFRI>;
++def Sw64VFRID : SDNode<"Sw64ISD::VFRID", SDT_ZVecFRI>;
++def Sw64VFRIDG : SDNode<"Sw64ISD::VFRIDG", SDT_ZVecFRI>;
++def Sw64VFRIDP : SDNode<"Sw64ISD::VFRIDP", SDT_ZVecFRI>;
++def Sw64VFRIDZ : SDNode<"Sw64ISD::VFRIDZ", SDT_ZVecFRI>;
++def Sw64VFRIDN : SDNode<"Sw64ISD::VFRIDN", SDT_ZVecFRI>;
++
++def vseleqw :  SDNode<"Sw64ISD::VSELEQW", SDT_VSELECT>;
++def vselltw :  SDNode<"Sw64ISD::VSELLTW", SDT_VSELECT>;
++def vsellew :  SDNode<"Sw64ISD::VSELLEW", SDT_VSELECT>;
++def vsellbcw : SDNode<"Sw64ISD::VSELLBCW", SDT_VSELECT>;
++
++def vfcmovlt : SDNode<"Sw64ISD::VFCMOVLT", SDTFPTernaryOp>;
++def vfcmovle : SDNode<"Sw64ISD::VFCMOVLE", SDTFPTernaryOp>;
++def vfcmoveq : SDNode<"Sw64ISD::VFCMOVEQ", SDTFPTernaryOp>;
++
++def vect_vucaddw : SDNode<"Sw64ISD::VECT_VUCADDW", SDTIntBinOp>;
++def vect_vucaddh : SDNode<"Sw64ISD::VECT_VUCADDH", SDTIntBinOp>;
++def vect_vucaddb : SDNode<"Sw64ISD::VECT_VUCADDB", SDTIntBinOp>;
++def vect_vucsubw : SDNode<"Sw64ISD::VECT_VUCSUBW", SDTIntBinOp>;
++def vect_vucsubh : SDNode<"Sw64ISD::VECT_VUCSUBH", SDTIntBinOp>;
++def vect_vucsubb : SDNode<"Sw64ISD::VECT_VUCSUBB", SDTIntBinOp>;
++
++def z_vshl_by_scalar    : SDNode<"Sw64ISD::VSHL_BY_SCALAR",
++                                 SDT_ZVecBinaryFp>;
++def z_vsrl_by_scalar    : SDNode<"Sw64ISD::VSRL_BY_SCALAR",
++                                 SDT_ZVecBinaryFp>;
++def z_vsra_by_scalar    : SDNode<"Sw64ISD::VSRA_BY_SCALAR",
++                                 SDT_ZVecBinaryFp>;
++
++def z_vcopyf : SDNode<"Sw64ISD::VCOPYF",
++      SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>, []>;
++
++def z_v8sll    : SDNode<"Sw64ISD::V8SLL",
++                                 SDT_ZVecBinaryInt>;
++
++def z_v8srl    : SDNode<"Sw64ISD::V8SRL",
++                                 SDT_ZVecBinaryInt>;
++
++def z_v8sra    : SDNode<"Sw64ISD::V8SRA",
++                                 SDT_ZVecBinaryInt>;
++
++def z_vrotr    : SDNode<"Sw64ISD::VROTR",
++                                 SDT_ZVecBinaryInt>;
++
++def Sw64VINSECTL  : SDNode<"Sw64ISD::VINSECTL", SDT_VINSECTL>;
++//def Sw64VINSECTLH : SDNode<"Sw64ISD::VINSECTLH", SDT_VINSECTL>;
++//def Sw64VINSECTLW : SDNode<"Sw64ISD::VINSECTLW", SDT_VINSECTL>;
++//def Sw64VINSECTLL : SDNode<"Sw64ISD::VINSECTLL", SDT_VINSECTL>;
++//def Sw64VINSECTLB : SDNode<"Sw64ISD::VINSECTLB", SDT_VINSECTL>;
++//def Sw64VSHFQB : SDNode<"Sw64ISD::VSHFQB", SDT_VINSECTL>;
++//def Sw64VSHFQ : SDNode<"Sw64ISD::VSHFQ", SDT_VSHFQ>;
++//def Sw64VCPYB : SDNode<"Sw64ISD::VCPYB", SDT_ZVecCPY>;
++//def Sw64VCPYH : SDNode<"Sw64ISD::VCPYH", SDT_ZVecCPY>;
++
++
++// ---- For immediate format.
++
++def SDT_ZV8X : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
++
++def Sw64V8SLL : SDNode<"Sw64ISD::V8SLLi", SDT_ZV8X>;
++def Sw64V8SRL : SDNode<"Sw64ISD::V8SRLi", SDT_ZV8X>;
++def Sw64V8SRA : SDNode<"Sw64ISD::V8SRAi", SDT_ZV8X>;
++def Sw64VROTR : SDNode<"Sw64ISD::VROTRi", SDT_ZV8X>;
++def Sw64VROLB : SDNode<"Sw64ISD::VROLBi", SDT_ZV8X>;
++def Sw64VROLH : SDNode<"Sw64ISD::VROLHi", SDT_ZV8X>;
++def Sw64VROLL : SDNode<"Sw64ISD::VROLLi", SDT_ZV8X>;
++
++def z_v8slli : PatFrag<(ops node:$vec, node:$val),
++    (v8i32 (Sw64V8SLL node:$vec, node:$val))>;
++
++def z_v8srli : PatFrag<(ops node:$vec, node:$val),
++    (v8i32 (Sw64V8SRL node:$vec, node:$val))>;
++
++def z_v8srai : PatFrag<(ops node:$vec, node:$val),
++    (v8i32 (Sw64V8SRA node:$vec, node:$val))>;
++
++def z_vrotri : PatFrag<(ops node:$vec, node:$val),
++    (v8i32 (Sw64VROTR node:$vec, node:$val))>;
++
++def z_vrolbi : PatFrag<(ops node:$vec, node:$val),
++    (v32i8 (Sw64VROLB node:$vec, node:$val))>;
++def z_vrolhi : PatFrag<(ops node:$vec, node:$val),
++    (v16i16 (Sw64VROLH node:$vec, node:$val))>;
++def z_vrolli : PatFrag<(ops node:$vec, node:$val),
++    (v4i64 (Sw64VROLL node:$vec, node:$val))>;
++
++def z_vslls : PatFrag<(ops node:$vec, node:$val),
++    (v4f32 (Sw64V8SLL node:$vec, node:$val))>;
++
++def z_vslld : PatFrag<(ops node:$vec, node:$val),
++    (v4f64 (Sw64V8SLL node:$vec, node:$val))>;
++
++def z_vsrls : PatFrag<(ops node:$vec, node:$val),
++    (v4f32 (Sw64V8SRL node:$vec, node:$val))>;
++
++def z_vsrld : PatFrag<(ops node:$vec, node:$val),
++    (v4f64 (Sw64V8SRL node:$vec, node:$val))>;
++
++// ----
++
++def Sw64VExtractSExt : SDNode<"Sw64ISD::VEXTRACT_SEXT_ELT",
++    SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>;
++def Sw64VExtractZExt : SDNode<"Sw64ISD::VEXTRACT_ZEXT_ELT",
++    SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>;
++
++// Pattern fragments
++def vextract_sext_i8  : PatFrag<(ops node:$vec, node:$idx),
++                                (Sw64VExtractSExt node:$vec, node:$idx, i8)>;
++def vextract_sext_i16 : PatFrag<(ops node:$vec, node:$idx),
++                                (Sw64VExtractSExt node:$vec, node:$idx, i16)>;
++def vextract_sext_i32 : PatFrag<(ops node:$vec, node:$idx),
++                                (Sw64VExtractSExt node:$vec, node:$idx, i32)>;
++def vextract_sext_i64 : PatFrag<(ops node:$vec, node:$idx),
++                                (Sw64VExtractSExt node:$vec, node:$idx, i64)>;
++
++def vextract_sext_f32 : PatFrag<(ops node:$vec, node:$idx),
++                                (Sw64VExtractSExt node:$vec, node:$idx, f32)>;
++
++def vextract_sext_f64 : PatFrag<(ops node:$vec, node:$idx),
++                                (Sw64VExtractSExt node:$vec, node:$idx, f64)>;
++
++
++
++def vextract_zext_i8  : PatFrag<(ops node:$vec, node:$idx),
++                                (Sw64VExtractZExt node:$vec, node:$idx, i8)>;
++def vextract_zext_i16 : PatFrag<(ops node:$vec, node:$idx),
++                                (Sw64VExtractZExt node:$vec, node:$idx, i16)>;
++def vextract_zext_i32 : PatFrag<(ops node:$vec, node:$idx),
++                                (Sw64VExtractZExt node:$vec, node:$idx, i32)>;
++def vextract_zext_i64 : PatFrag<(ops node:$vec, node:$idx),
++                                (Sw64VExtractZExt node:$vec, node:$idx, i64)>;
++
++
++def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>;
++def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>;
++
++class vsetcc_type<ValueType ResTy, CondCode CC> :
++  PatFrag<(ops node:$lhs, node:$rhs),
++          (vsetcc (ResTy node:$lhs), (ResTy node:$rhs), CC)>;
++
++def SDT_VSetGE : SDTypeProfile<1, 2, [SDTCisInt<0>,
++                                      SDTCisVec<1>,
++                                      SDTCisSameAs<1, 2>]>;
++def vsetge_v8i32  : SDNode<"Sw64ISD::VSETGE", SDT_VSetGE>;
++
++def z_vsetge : PatFrag<(ops node:$vec, node:$val),
++              (vsetge_v8i32 node:$vec, node:$val)>;
++
++class Vector_2Op_Pat<SDPatternOperator OpNode, ValueType Ty> :
++  PatFrag<(ops node:$vec, node:$val),
++          (OpNode (Ty node:$vec), (Ty node:$val))>;
++
++class Vector_1Op_Pat<SDPatternOperator OpNode, ValueType Ty> :
++  PatFrag<(ops node:$src),
++          (OpNode (Ty node:$src))>;
++
++multiclass MultiVec2OpPat<SDPatternOperator OpNode> {
++  def v8i32  : Vector_2Op_Pat<OpNode, v8i32>;
++  def v4i64  : Vector_2Op_Pat<OpNode, v4i64>;
++  def v4f32  : Vector_2Op_Pat<OpNode, v4f32>;
++  def v4f64  : Vector_2Op_Pat<OpNode, v4f64>;
++  def v16i16 : Vector_2Op_Pat<OpNode, v16i16>;
++  def v32i8  : Vector_2Op_Pat<OpNode, v32i8>;
++}
++
++defm add   : MultiVec2OpPat<add>;
++defm sub   : MultiVec2OpPat<sub>;
++defm and   : MultiVec2OpPat<and>;
++defm xor   : MultiVec2OpPat<xor>;
++defm or    : MultiVec2OpPat<or>;
++
++def vbic   : BinOpFrag<(and node:$LHS, (vnot node:$RHS))>;
++def vornot : BinOpFrag<(or node:$LHS, (vnot node:$RHS))>;
++def veqv   : BinOpFrag<(vnot (xor node:$LHS, node:$RHS))>;
++
++def vseteq_v8i32  : vsetcc_type<v8i32, SETEQ>;
++def vsetle_v8i32  : vsetcc_type<v8i32, SETLE>;
++def vsetlt_v8i32  : vsetcc_type<v8i32, SETLT>;
++def vsetule_v8i32 : vsetcc_type<v8i32, SETULE>;
++def vsetult_v8i32 : vsetcc_type<v8i32, SETULT>;
++def vsetueq_v32i8  : vsetcc_type<v32i8, SETUEQ>;
++def vsetugt_v32i8  : vsetcc_type<v32i8, SETUGT>;
++
++def SDT_VMAX : SDTypeProfile<1, 2, [SDTCisInt<0>,SDTCisVec<0>,
++                                    SDTCisSameAs<0,1>,
++                                    SDTCisSameAs<0,2>]>;
++def SDT_VFMAX : SDTypeProfile<1, 2, [SDTCisFP<0>,SDTCisVec<0>,
++                                     SDTCisSameAs<0,1>,
++                                     SDTCisSameAs<0,2>]>;
++def vmax : SDNode<"Sw64ISD::VMAX", SDT_VMAX>;
++def vmin : SDNode<"Sw64ISD::VMIN", SDT_VMAX>;
++def vumax : SDNode<"Sw64ISD::VUMAX", SDT_VMAX>;
++def vumin : SDNode<"Sw64ISD::VUMIN", SDT_VMAX>;
++
++def vmaxf : SDNode<"Sw64ISD::VMAXF", SDT_VFMAX>;
++def vminf : SDNode<"Sw64ISD::VMINF", SDT_VFMAX>;
++
++
++class vfsetcc_type<ValueType ResTy, ValueType OpTy, CondCode CC> :
++  PatFrag<(ops node:$lhs, node:$rhs),
++          (ResTy (vfsetcc (OpTy node:$lhs), (OpTy node:$rhs), CC))>;
++
++// ISD::SETFALSE cannot occur
++def vfseteq_v4f32 : vfsetcc_type<v4i64, v4f32, SETEQ>;
++def vfseteq_v4f64 : vfsetcc_type<v4i64, v4f64, SETEQ>;
++def vfsetge_v4f32 : vfsetcc_type<v4i64, v4f32, SETGE>;
++def vfsetge_v4f64 : vfsetcc_type<v4i64, v4f64, SETGE>;
++def vfsetgt_v4f32 : vfsetcc_type<v4i64, v4f32, SETGT>;
++def vfsetgt_v4f64 : vfsetcc_type<v4i64, v4f64, SETGT>;
++def vfsetle_v4f32 : vfsetcc_type<v4i64, v4f32, SETLE>;
++def vfsetle_v4f64 : vfsetcc_type<v4i64, v4f64, SETLE>;
++def vfsetlt_v4f32 : vfsetcc_type<v4i64, v4f32, SETLT>;
++def vfsetlt_v4f64 : vfsetcc_type<v4i64, v4f64, SETLT>;
++def vfsetne_v4f32 : vfsetcc_type<v4i64, v4f32, SETNE>;
++def vfsetne_v4f64 : vfsetcc_type<v4i64, v4f64, SETNE>;
++
++def vfsetoeq_v4f32 : vfsetcc_type<v4i64, v4f32, SETOEQ>;
++def vfsetoeq_v4f64 : vfsetcc_type<v4i64, v4f64, SETOEQ>;
++def vfsetoge_v4f32 : vfsetcc_type<v4i64, v4f32, SETOGE>;
++def vfsetoge_v4f64 : vfsetcc_type<v4i64, v4f64, SETOGE>;
++def vfsetogt_v4f32 : vfsetcc_type<v4i64, v4f32, SETOGT>;
++def vfsetogt_v4f64 : vfsetcc_type<v4i64, v4f64, SETOGT>;
++def vfsetole_v4f32 : vfsetcc_type<v4i64, v4f32, SETOLE>;
++def vfsetole_v4f64 : vfsetcc_type<v4i64, v4f64, SETOLE>;
++def vfsetolt_v4f32 : vfsetcc_type<v4i64, v4f32, SETOLT>;
++def vfsetolt_v4f64 : vfsetcc_type<v4i64, v4f64, SETOLT>;
++def vfsetone_v4f32 : vfsetcc_type<v4i64, v4f32, SETONE>;
++def vfsetone_v4f64 : vfsetcc_type<v4i64, v4f64, SETONE>;
++def vfsetord_v4f32 : vfsetcc_type<v4i64, v4f32, SETO>;
++def vfsetord_v4f64 : vfsetcc_type<v4i64, v4f64, SETO>;
++def vfsetun_v4f32  : vfsetcc_type<v4i64, v4f32, SETUO>;
++def vfsetun_v4f64  : vfsetcc_type<v4i64, v4f64, SETUO>;
++def vfsetueq_v4f32 : vfsetcc_type<v4i64, v4f32, SETUEQ>;
++def vfsetueq_v4f64 : vfsetcc_type<v4i64, v4f64, SETUEQ>;
++def vfsetuge_v4f32 : vfsetcc_type<v4i64, v4f32, SETUGE>;
++def vfsetuge_v4f64 : vfsetcc_type<v4i64, v4f64, SETUGE>;
++def vfsetugt_v4f32 : vfsetcc_type<v4i64, v4f32, SETUGT>;
++def vfsetugt_v4f64 : vfsetcc_type<v4i64, v4f64, SETUGT>;
++def vfsetule_v4f32 : vfsetcc_type<v4i64, v4f32, SETULE>;
++def vfsetule_v4f64 : vfsetcc_type<v4i64, v4f64, SETULE>;
++def vfsetult_v4f32 : vfsetcc_type<v4i64, v4f32, SETULT>;
++def vfsetult_v4f64 : vfsetcc_type<v4i64, v4f64, SETULT>;
++def vfsetune_v4f32 : vfsetcc_type<v4i64, v4f32, SETUNE>;
++def vfsetune_v4f64 : vfsetcc_type<v4i64, v4f64, SETUNE>;
++// ISD::SETTRUE cannot occur
++// ISD::SETFALSE2 cannot occur
++// ISD::SETTRUE2 cannot occur
++
++class SplatComplexPattern<Operand opclass, ValueType ty, int numops, string fn,
++                          list<SDNode> roots = [],
++                          list<SDNodeProperty> props = []> :
++  ComplexPattern<ty, numops, fn, roots, props> {
++  Operand OpClass = opclass;
++}
++
++multiclass MultiVec1OpPat<SDPatternOperator OpNode> {
++  def v8i32  : Vector_1Op_Pat<OpNode, v8i32>;
++  def v4i64  : Vector_1Op_Pat<OpNode, v4i64>;
++  def v4f32  : Vector_1Op_Pat<OpNode, v4f32>;
++  def v4f64  : Vector_1Op_Pat<OpNode, v4f64>;
++  def v16i16 : Vector_1Op_Pat<OpNode, v16i16>;
++  def v32i8  : Vector_1Op_Pat<OpNode, v32i8>;
++}
++
++defm vsplat : MultiVec1OpPat<Sw64VBroadCast>;
++
++//def vsplatv8i32 : PatFrag<(ops node:$e0),
++//                        (v8i32 (build_vector node:$e0, node:$e0,
++//                                             node:$e0, node:$e0,
++//                                             node:$e0, node:$e0,
++//                                             node:$e0, node:$e0))>;
++//def vsplatv4i64 : PatFrag<(ops node:$e0),
++//                        (v4i64 (build_vector node:$e0, node:$e0,
++//                                             node:$e0, node:$e0))>;
++//def vsplatv4f32 : PatFrag<(ops node:$e0),
++//                        (v4f32 (build_vector node:$e0, node:$e0,
++//                                             node:$e0, node:$e0))>;
++//
++//def vsplatv4f64 : PatFrag<(ops node:$e0),
++//                        (v4f64 (build_vector node:$e0, node:$e0,
++//                                             node:$e0, node:$e0))>;
++
++def vsplati64_simm8 : SplatComplexPattern<s8imm, v4i64, 1,
++                                          "selectVSplatSimm8",
++                                          [build_vector, bitconvert]>;
++
++def vsplati64_uimm8 : SplatComplexPattern<vsplat_uimm8, v4i64, 1,
++                                          "selectVSplatUimm8",
++                                          [build_vector, bitconvert]>;
++
++def vsplati32_simm8 : SplatComplexPattern<vsplat_simm8, v8i32, 1,
++                                          "selectVSplatSimm8",
++                                          [build_vector, bitconvert]>;
++
++def vsplati32_uimm8 : SplatComplexPattern<vsplat_uimm8, v8i32, 1,
++                                          "selectVSplatUimm8",
++                                          [build_vector, bitconvert]>;
++
++def vsplati16_uimm8 : SplatComplexPattern<vsplat_uimm8, v16i16, 1,
++                                          "selectVSplatUimm8",
++                                          [build_vector, bitconvert]>;
++
++def vsplati8_uimm8 : SplatComplexPattern<vsplat_uimm8, v32i8, 1,
++                                          "selectVSplatUimm8",
++                                          [build_vector, bitconvert]>;
++
++def AddSubImm8Pat : ComplexPattern<i64, 1, "SelectAddSubImm<MVT::i8>", []>;
++def ComplexImmPat : ComplexPattern<i64, 1, "SelectComplexImm", []>;
++
++def addrimm10 : ComplexPattern<iPTR, 2, "selectIntAddrSImm10", [frameindex]>;
++
++def addrimm10lsl1 : ComplexPattern<iPTR, 2, "selectIntAddrSImm10Lsl1",
++                                   [frameindex]>;
++
++def addrimm16 : ComplexPattern<iPTR, 2, "selectIntAddrSImm16",
++                                   [frameindex]>;
++def addrimm12 : ComplexPattern<iPTR, 2, "selectIntAddrSImm12",
++                                   [frameindex]>;
++
++def immZExt1Ptr : ImmLeaf<iPTR, [{return isUInt<1>(Imm);}]>;
++def immZExt2Ptr : ImmLeaf<iPTR, [{return isUInt<2>(Imm);}]>;
++def immZExt3Ptr : ImmLeaf<iPTR, [{return isUInt<3>(Imm);}]>;
++def immZExt4Ptr : ImmLeaf<iPTR, [{return isUInt<4>(Imm);}]>;
++def immZExt5Ptr : ImmLeaf<iPTR, [{return isUInt<5>(Imm);}]>;
++def immZExt8Ptr : ImmLeaf<iPTR, [{return isUInt<8>(Imm);}]>;
++
++def vinsert_v8i32 : PatFrag<(ops node:$vec, node:$val, node:$idx),
++    (v8i32 (vector_insert node:$vec, node:$val, node:$idx))>;
++
++def vinsert_v4f32 : PatFrag<(ops node:$vec, node:$val, node:$idx),
++    (v4f32 (vector_insert node:$vec, node:$val, node:$idx))>;
++
++def vinsert_v4f64 : PatFrag<(ops node:$vec, node:$val, node:$idx),
++    (v4f64 (vector_insert node:$vec, node:$val, node:$idx))>;
++
++def vinsert_v32i8 : PatFrag<(ops node:$vec, node:$val, node:$idx),
++    (v32i8 (vector_insert node:$vec, node:$val, node:$idx))>;
++
++def vinsert_v16i16 : PatFrag<(ops node:$vec, node:$val, node:$idx),
++    (v16i16 (vector_insert node:$vec, node:$val, node:$idx))>;
++// Instruction desc.
++// 存储器指令格式
++class VectorStoreBASE<string instr_asm, ValueType vt,
++                   SDPatternOperator OpNode,
++                   RegisterOperand ROWD, Operand MemOpnd,
++                   ComplexPattern Addr = addrimm10> {
++  dag OutOperandList = (outs);
++  dag InOperandList = (ins ROWD:$RA, MemOpnd:$addr);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $addr");
++  list<dag> Pattern = [(OpNode (vt ROWD:$RA), Addr:$addr)];
++}
++
++class VectorLoadBASE<string instr_asm, ValueType vt,
++                   SDPatternOperator OpNode,
++                   RegisterOperand ROWD, Operand MemOpnd,
++                   ComplexPattern Addr = addrimm10> {
++  dag OutOperandList = (outs ROWD:$RA);
++  dag InOperandList = (ins MemOpnd:$addr);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $addr");
++  list<dag> Pattern = [(set ROWD:$RA, (vt (OpNode Addr:$addr)))];
++}
++
++let mayStore = 1 in
++class VectorStore<bits<6> opcode, string instr_asm, RegisterOperand ROWD,
++                  ValueType vt, SDPatternOperator OpNode=null_frag>
++  : MFormV<opcode>,
++  VectorStoreBASE<instr_asm, vt, OpNode, ROWD, mem_simm16, addrimm16>;
++
++let mayLoad = 1 in
++class VectorLoad<bits<6> opcode, string instr_asm, RegisterOperand ROWD,
++                 ValueType vt, SDPatternOperator OpNode=null_frag>
++  : MFormV<opcode>,
++  VectorLoadBASE<instr_asm, vt, OpNode, ROWD, mem_simm16, addrimm16>;
++
++let DecoderMethod = "DecodeFIXMEInstruction" in{
++def VSTS : VectorStore<0x0E, "vsts", V256LOpnd, v4f32, store>;
++def VSTD : VectorStore<0x0F, "vstd", V256LOpnd, v4f64, store>;
++def VLDS : VectorLoad <0x0C, "vlds", V256LOpnd, v4f32, load>;
++def VLDD : VectorLoad <0x0D, "vldd", V256LOpnd, v4f64, load>;
++def VLDWE : VectorLoad <0x09, "ldwe", V256LOpnd, v8i32, Sw64VBroadCasti32>;
++def VLDSE : VectorLoad <0x0A, "ldse", V256LOpnd, v4f32, Sw64VBroadCastf32>;
++def VLDDE : VectorLoad <0x0B, "ldde", V256LOpnd, v4f64, Sw64VBroadCastf64>;
++}
++multiclass V256Pat<SDPatternOperator OpNode> {
++  def v32i8  : PatFrag<(ops node:$src), (v32i8 (OpNode node:$src))>;
++  def v16i16 : PatFrag<(ops node:$src), (v16i16 (OpNode node:$src))>;
++  def v8i32  : PatFrag<(ops node:$src), (v8i32 (OpNode node:$src))>;
++  def v4i64  : PatFrag<(ops node:$src), (v4i64 (OpNode node:$src))>;
++  def v4f64  : PatFrag<(ops node:$src), (v4f64 (OpNode node:$src))>;
++}
++
++//def : Pat <(v8i32 (int_sw64_vload addrimm16:$src)), (VLDD addrimm16:$src)>;
++
++//def : Pat <(vloadv32i8  addrimm16:$src), (VLDD addrimm16:$src)>;
++//def : Pat <(vloadv16i16 addrimm16:$src), (VLDD addrimm16:$src)>;
++//def : Pat <(vloadv8i32  addrimm16:$src), (VLDD addrimm16:$src)>;
++//def : Pat <(vloadv4i64  addrimm16:$src), (VLDD addrimm16:$src)>;
++//
++//def : Pat <(store (v32i8 V256L:$DST),  addrimm16:$src),
++//           (VSTD $DST, addrimm16:$src)>;
++//def : Pat <(store (v16i16 V256L:$DST),  addrimm16:$src),
++//           (VSTD $DST, addrimm16:$src)>;
++//def : Pat <(store (v8i32 V256L:$DST),  addrimm16:$src),
++//           (VSTD $DST, addrimm16:$src)>;
++//def : Pat <(store (v4i64 V256L:$DST),  addrimm16:$src),
++//           (VSTD $DST, addrimm16:$src)>;
++
++////////////////////////////////////////////
++// Extern Vector Memory Operation
++// /////////////////////////////////////////
++// 带功能域的存储器指令格式
++let mayStore = 1 in
++class VectorStoreExt<bits<4> func, string instr_asm, ValueType vt,
++                     SDPatternOperator OpNode=null_frag>
++  : MFuncFormV<0x1C, func>,
++  VectorStoreBASE<instr_asm, vt, OpNode, V256LOpnd, mem_simm12, addrimm12>;
++
++let mayLoad = 1 in
++class VectorLoadExt<bits<4> func, string instr_asm, ValueType vt,
++                    SDPatternOperator OpNode=null_frag>
++  : MFuncFormV<0x1C, func>,
++  VectorLoadBASE<instr_asm, vt, OpNode, V256LOpnd, mem_simm12, addrimm12>;
++
++let DecoderMethod = "DecodeFIXMEInstruction" in{
++def VLDWU  : VectorLoadExt <0x00, "vldw_u" , v8i32>;
++def VLDSU  : VectorLoadExt <0x02, "vlds_u" , v4f32>;
++def VLDDU  : VectorLoadExt <0x04, "vldd_u" , v4f64>;
++def VLDDNC : VectorLoadExt <0x0e, "vldd_nc", v4f64>;
++def VSTWU  : VectorStoreExt<0x01, "vstw_u" , v8i32>;
++def VSTSU  : VectorStoreExt<0x03, "vsts_u" , v4f32>;
++def VSTDU  : VectorStoreExt<0x05, "vstd_u" , v4f64>;
++def VSTWUL : VectorStoreExt<0x08, "vstw_ul", v8i32>;
++def VSTSUL : VectorStoreExt<0x0a, "vsts_ul", v4f32>;
++def VSTDUL : VectorStoreExt<0x0c, "vstd_ul", v4f64>;
++def VSTWUH : VectorStoreExt<0x09, "vstw_uh", v8i32>;
++def VSTSUH : VectorStoreExt<0x0b, "vsts_uh", v4f32>;
++def VSTDUH : VectorStoreExt<0x0d, "vstd_uh", v4f64>;
++def VSTDNC : VectorStoreExt<0x0f, "vstd_nc", v4f64>;
++}
++class vload_pat<ValueType Vt, SDPatternOperator OpNode, Instruction Inst>
++  : Pat<(Vt (OpNode addrimm16:$src)), (Inst addrimm16:$src)>;
++
++class vstore_pat<ValueType Vt, SDPatternOperator OpNode, Instruction Inst>
++  : Pat<(OpNode (Vt V256L:$DST), addrimm16:$src), (Inst $DST, addrimm16:$src)>;
++
++
++// commom pattern for load/store intrinsic
++multiclass vector_mem_multipat<ValueType Vt> {
++def : vload_pat<Vt, load, VLDD>;
++def : vstore_pat<Vt, store, VSTD>;
++def : vload_pat<Vt, int_sw64_vload, VLDD>;
++def : vstore_pat<Vt, int_sw64_vstore, VSTD>;
++}
++
++multiclass vector_mem_intrpat<ValueType Vt, Instruction InstL, Instruction InstS> {
++def : vload_pat<Vt, int_sw64_vload, InstL>;
++def : vstore_pat<Vt, int_sw64_vstore, InstS>;
++}
++
++// extension pattern for load_u/loade/store_u/storeuh/..
++multiclass vector_mem_extension<ValueType Vt, string LoadI, string StoreI> {
++def : vload_pat <Vt, int_sw64_vload_u, !cast<Instruction>(LoadI#U)>;
++def : vload_pat <Vt, int_sw64_vloade,  !cast<Instruction>(LoadI#E)>;
++def : vstore_pat<Vt, int_sw64_vstore_u, !cast<Instruction>(StoreI#U)>;
++def : vstore_pat<Vt, int_sw64_vstoreuh, !cast<Instruction>(StoreI#UH)>;
++def : vstore_pat<Vt, int_sw64_vstoreul, !cast<Instruction>(StoreI#UL)>;
++}
++
++defm : vector_mem_multipat<v32i8>;
++defm : vector_mem_multipat<v16i16>;
++defm : vector_mem_multipat<v8i32>;
++defm : vector_mem_multipat<v4i64>;
++
++defm : vector_mem_intrpat<v4f32, VLDS, VSTS>;
++defm : vector_mem_intrpat<v4f64, VLDD, VSTD>;
++
++defm : vector_mem_extension<v8i32, "VLDW", "VSTW">;
++defm : vector_mem_extension<v4f32, "VLDS", "VSTS">;
++defm : vector_mem_extension<v4f64, "VLDD", "VSTD">;
++defm : vector_mem_extension<v4i64, "VLDD", "VSTD">;
++
++multiclass vector_mem_nc<ValueType Vt> {
++def : vload_pat <Vt, int_sw64_vloadnc, VLDDNC>;
++def : vstore_pat<Vt, int_sw64_vstorenc, VSTDNC>;
++}
++
++defm : vector_mem_nc<v32i8>;
++defm : vector_mem_nc<v16i16>;
++defm : vector_mem_nc<v8i32>;
++defm : vector_mem_nc<v4i64>;
++defm : vector_mem_nc<v4f64>;
++
++def : Pat<(v8i32 (Sw64VBroadCast (i64 (extloadi32 addrimm16:$src)))),
++          (VLDWE addrimm16:$src)>;
++def : Pat<(v4f32 (Sw64VBroadCast (f32 (load addrimm16:$src)))),
++          (VLDSE addrimm16:$src)>;
++def : Pat<(v4i64 (Sw64VBroadCast (i64 (load addrimm16:$src)))),
++          (VLDDE addrimm16:$src)>;
++def : Pat<(v4f64 (Sw64VBroadCast (f64 (load addrimm16:$src)))),
++          (VLDDE addrimm16:$src)>;
++
++def : vstore_pat<v4i64, Sw64VTruncStore, VSTS>;
++
++class SIMD_3RR_SAME<string instr_asm, SDPatternOperator OpNode, ValueType Vt,
++                   RegisterOperand ROC, RegisterOperand ROA = ROC,
++                   RegisterOperand ROB = ROC> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA, ROB:$RB);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
++  list<dag> Pattern = [(set (Vt ROC:$RC), (OpNode (Vt ROA:$RA), (Vt ROB:$RB)))];
++}
++
++class SIMD_3RI_SAME<string instr_asm, SDPatternOperator OpNode, ValueType Vt,
++                   Operand immtype, RegisterOperand ROC,
++                   RegisterOperand ROA = ROC> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA, immtype:$Imm);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $Imm, $RC");
++  list<dag> Pattern = [(set (Vt ROC:$RC), (OpNode (Vt ROA:$RA), immtype:$Imm))];
++}
++
++class SIMD_4RR_SAME<string instr_asm, SDPatternOperator OpNode, ValueType Vt,
++                   RegisterOperand ROC> {
++  dag OutOperandList = (outs ROC:$RD);
++  dag InOperandList = (ins ROC:$RA, ROC:$RB, ROC:$RC);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC, $RD");
++  list<dag> Pattern = [(set (Vt ROC:$RD),
++    (OpNode (Vt ROC:$RA), (Vt ROC:$RB), (Vt ROC:$RC)))];
++}
++
++class Vector_2OP_Reg_Pat<SDPatternOperator OpNode, ValueType Vt, ValueType Yt,
++                        RegisterOperand OR, Instruction Inst>
++  : Pat<(OpNode (Vt OR:$RA), (Yt OR:$RB)),
++        (Inst (Vt OR:$RA), (Yt OR:$RB))>;
++
++class Vector_1OP_Imm_Pat<SDPatternOperator OpNode, ValueType Vt,
++                        ComplexPattern cpx,
++                        RegisterOperand OR, Instruction Inst>
++  : Pat<(Vt (OpNode (Vt OR:$RA), (i64 cpx:$Imm))),
++        (Inst (Vt OR:$RA), $Imm)>;
++
++class Vector_2OP_Imm_VB_Pat<SDPatternOperator OpNode, SDPatternOperator immop,
++                        ValueType Vt, ValueType it, ComplexPattern cpx,
++                        RegisterOperand OR, Instruction Inst>
++  : Pat<(Vt (OpNode (Vt OR:$RA), (it (immop (i64 cpx:$Imm))))),
++        (Inst (Vt OR:$RA), $Imm)>;
++
++class Vector_2OP_Reg_Scalar<SDPatternOperator OpNode,
++                        ValueType Vt, ValueType it, RegisterOperand ROA,
++                        RegisterOperand ROB, Instruction Inst>
++  : Pat<(Vt (OpNode (Vt ROA:$RA), (i64 GPRCOpnd:$RB))),
++        (Inst (Vt ROA:$RA), (i32 (COPY_TO_REGCLASS GPRCOpnd:$RB, ROB)))>;
++
++class Vector_2OP_Reg_S32<SDPatternOperator OpNode,
++                        ValueType Vt, RegisterOperand ROA, Instruction Inst>
++  : Pat<(Vt (OpNode (Vt ROA:$RA), (Vt (Sw64VBroadCast (i64 GPRCOpnd:$RB))))),
++        (Inst (Vt ROA:$RA), (i32 (COPY_TO_REGCLASS GPRCOpnd:$RB, FPRC_lo)))>;
++
++class Vector_3OP_SameReg_Pat<SDPatternOperator OpNode,
++                        ValueType Vt, RegisterOperand OR, Instruction Inst>
++  : Pat<(OpNode (Vt OR:$RA), (Vt OR:$RB), (Vt OR:$RC)),
++        (Inst OR:$RA, OR:$RB, OR:$RC)>;
++
++//def : Pat<(v8i32 (int_sw64_vsll (v8i32 V256LOpnd:$RA),
++//          (v8i32 (Sw64VBroadCast (i64 GPRCOpnd:$RB))))),
++//        (VSLLv8i32rr (v8i32 V256LOpnd:$RA), (i64 (ITOFStmp GPRCOpnd:$RB)))>;
++
++multiclass SIMD_ARITH<bits<6> Opcode, bits<8>func,
++                      string instr_asm, SDPatternOperator OpNode, ValueType Vt,
++                      Operand immtype, RegisterOperand RO,
++                      SDPatternOperator IOp = null_frag,
++                      ComplexPattern cpx = AddSubImm8Pat> {
++  def rr : FPFormV<Opcode, func>, SIMD_3RR_SAME<instr_asm, OpNode, Vt, RO>;
++
++  def ri : FPFormIV<Opcode, func>,
++           SIMD_3RI_SAME<instr_asm, null_frag, Vt, immtype, RO>;
++
++  def : Vector_2OP_Reg_Pat<IOp, Vt, Vt, RO, !cast<Instruction>(NAME # rr)>;
++
++  def : Vector_2OP_Imm_VB_Pat<OpNode, Sw64VBroadCast, Vt, Vt,
++        cpx, RO, !cast<Instruction>(NAME # ri)>;
++
++  def : Vector_2OP_Imm_VB_Pat<IOp, Sw64VBroadCast, Vt, Vt,
++        cpx, RO, !cast<Instruction>(NAME # ri)>;
++}
++
++defm VUCADDv16i16 : SIMD_ARITH<0x1A, 0x42, "vucaddh", add, v16i16,
++                            s8imm, V256LOpnd, int_sw64_vucaddh_v16hi>;
++defm VUCSUBv16i16 : SIMD_ARITH<0x1A, 0x43, "vucsubh", sub, v16i16,
++                            s8imm, V256LOpnd, int_sw64_vucsubh_v16hi>;
++defm VUCADDv32i8  : SIMD_ARITH<0x1A, 0x44, "vucaddb", add, v32i8,
++                            s8imm, V256LOpnd, int_sw64_vucaddb_v32qi>;
++defm VUCSUBv32i8  : SIMD_ARITH<0x1A, 0x45, "vucsubb", sub, v32i8,
++                            s8imm, V256LOpnd, int_sw64_vucsubb_v32qi>;
++defm VADDv8i32    : SIMD_ARITH<0x1A, 0x00, "vaddw", add, v8i32,
++                            s8imm, V256LOpnd>;
++defm VSUBv8i32    : SIMD_ARITH<0x1A, 0x01, "vsubw", sub, v8i32,
++                            s8imm, V256LOpnd>;
++defm VUCADDv8i32  : SIMD_ARITH<0x1A, 0x40, "vucaddw", add, v8i32,
++                            s8imm, V256LOpnd, int_sw64_vucaddw>;
++defm VUCSUBv8i32  : SIMD_ARITH<0x1A, 0x41, "vucsubw", sub, v8i32,
++                            s8imm, V256LOpnd, int_sw64_vucsubw>;
++defm VADDv4i64    : SIMD_ARITH<0x1A, 0x0E, "vaddl", add, v4i64,
++                            s8imm, V256LOpnd>;
++defm VSUBv4i64    : SIMD_ARITH<0x1A, 0x0F, "vsubl", sub, v4i64,
++                            s8imm, V256LOpnd>;
++
++def : Vector_2OP_Reg_Pat<int_sw64_vucaddb, v8i32, v8i32, V256LOpnd, VUCADDv32i8rr>;
++def : Vector_2OP_Reg_Pat<int_sw64_vucsubb, v8i32, v8i32, V256LOpnd, VUCSUBv32i8rr>;
++def : Vector_2OP_Reg_Pat<int_sw64_vucaddh, v8i32, v8i32, V256LOpnd, VUCADDv16i16rr>;
++def : Vector_2OP_Reg_Pat<int_sw64_vucsubh, v8i32, v8i32, V256LOpnd, VUCSUBv16i16rr>;
++
++def : Vector_1OP_Imm_Pat<int_sw64_vucaddbi, v8i32, AddSubImm8Pat, V256LOpnd, VUCADDv32i8ri>;
++def : Vector_1OP_Imm_Pat<int_sw64_vucsubbi, v8i32, AddSubImm8Pat, V256LOpnd, VUCSUBv32i8ri>;
++def : Vector_1OP_Imm_Pat<int_sw64_vucaddhi, v8i32, AddSubImm8Pat, V256LOpnd, VUCADDv16i16ri>;
++def : Vector_1OP_Imm_Pat<int_sw64_vucsubhi, v8i32, AddSubImm8Pat, V256LOpnd, VUCSUBv16i16ri>;
++
++//def : Pat<(int_sw64_vaddw (v8i32 V256LOpnd:$RA), (v8i32 V256LOpnd:$RB)),
++//          (VADDv32i8rr (v8i32 V256LOpnd:$RA), (v8i32 V256LOpnd:$RB))>;
++//
++//def : Pat<(int_sw64_vaddw (v8i32 V256LOpnd:$RA), (v8i32
++//                          (vsplatv8i32 (i64 AddSubImm8Pat:$Imm)))),
++//          (VADDv32i8ri (v8i32 V256LOpnd:$RA), s8imm:$Imm)>;
++
++class SIMD_3RR_VCMPGEW<string instr_asm, SDPatternOperator OpNode, ValueType Vt,
++                   RegisterOperand ROC = FPRCOpnd, RegisterOperand ROA = V256LOpnd,
++                   RegisterOperand ROB = V256LOpnd> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA, ROB:$RB);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
++//  list<dag> Pattern = [(set (i64 ROC:$RC), (OpNode (Vt ROA:$RA), (Vt ROB:$RB)))];
++}
++
++class SIMD_3RI_VCMPGEW<string instr_asm, SDPatternOperator OpNode, ValueType Vt,
++                   Operand immtype, RegisterOperand ROC = FPRCOpnd,
++                   RegisterOperand ROA = V256LOpnd> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA, immtype:$Imm);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $Imm, $RC");
++//  list<dag> Pattern = [(set (i64 ROC:$RC), (OpNode (Vt ROA:$RA), immtype:$Imm))];
++}
++
++def VCMPGEWrr : FPFormV<0x1A, 0x02>,  SIMD_3RR_VCMPGEW<"vcmpgew", null_frag, v8i32>;
++def VCMPGEWri : FPFormIV<0x1A, 0x02>, SIMD_3RI_VCMPGEW<"vcmpgew", null_frag, v8i32, u8imm>;
++
++def : Pat<(int_sw64_vcmpgew (v8i32 V256LOpnd:$RA), (v8i32 V256LOpnd:$RB)),
++          (i64 (FTOIStmp (VCMPGEWrr (v8i32 V256LOpnd:$RA), (v8i32 V256LOpnd:$RB))))>;
++def : Pat<(int_sw64_vcmpgew (v8i32 V256LOpnd:$RA), (v8i32 (Sw64VBroadCast (i64 AddSubImm8Pat:$Imm)))),
++          (i64 (FTOIStmp (VCMPGEWri (v8i32 V256LOpnd:$RA), $Imm)))>;
++
++defm VCMPEQW  : SIMD_ARITH<0x1A, 0x03, "vcmpeqw",  seteq, v8i32,
++                           u8imm, V256LOpnd, int_sw64_vcmpeqw,  AddSubImm8Pat>;
++defm VCMPLEW  : SIMD_ARITH<0x1A, 0x04, "vcmplew",  setle, v8i32,
++                           u8imm, V256LOpnd, int_sw64_vcmplew,  AddSubImm8Pat>;
++defm VCMPLTW  : SIMD_ARITH<0x1A, 0x05, "vcmpltw",  setlt, v8i32,
++                           u8imm, V256LOpnd, int_sw64_vcmpltw,  AddSubImm8Pat>;
++defm VCMPULEW : SIMD_ARITH<0x1A, 0x06, "vcmpulew", setule, v8i32,
++                           u8imm, V256LOpnd, int_sw64_vcmpulew, AddSubImm8Pat>;
++defm VCMPULTW : SIMD_ARITH<0x1A, 0x07, "vcmpultw", setult, v8i32,
++                           u8imm, V256LOpnd, int_sw64_vcmpultw, AddSubImm8Pat>;
++
++defm VCMPUEQB : SIMD_ARITH<0x1A, 0x4B, "vcmpueqb", null_frag, v32i8,
++                           u8imm, V256LOpnd, int_sw64_vcmpueqb, AddSubImm8Pat>;
++defm VCMPUGTB : SIMD_ARITH<0x1A, 0x4C, "vcmpugtb", null_frag, v32i8,
++                           u8imm, V256LOpnd, int_sw64_vcmpugtb, AddSubImm8Pat>;
++
++class SIMD_2RR_BASE<string instr_asm,
++                   RegisterOperand ROC, RegisterOperand ROA = ROC> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RC");
++}
++
++def CTPOPOW : FPFormV_CT<0x1A, 0x18>,
++              SIMD_2RR_BASE<"ctpopow", FPRCOpnd, V256LOpnd>;
++def CTLZOW  : FPFormV_CT<0x1A, 0x19>,
++              SIMD_2RR_BASE<"ctlzow", FPRCOpnd, V256LOpnd>;
++
++def VSUMv8i32 : FPFormV_CT<0x1A, 0x47>,
++                SIMD_2RR_BASE<"vsumw", FPRCOpnd, V256LOpnd>;
++def VSUMv4i64 : FPFormV_CT<0x1A, 0x48>,
++                SIMD_2RR_BASE<"vsuml", FPRCOpnd, V256LOpnd>;
++
++def : Pat<(int_sw64_vsumw (v8i32 V256LOpnd:$RA)),
++          (i64 (FTOIStmp (i64 (VSUMv8i32 (v8i32 V256LOpnd:$RA)))))>;
++
++def : Pat<(int_sw64_vsuml (v4i64 V256LOpnd:$RA)),
++          (i64 (FTOITtmp (i64 (VSUMv4i64 (v4i64 V256LOpnd:$RA)))))>;
++
++def : Pat<(int_sw64_ctpopow (v4i64 V256LOpnd:$RA)),
++          (i64 (FTOIStmp (i64 (CTPOPOW (v4i64 V256LOpnd:$RA)))))>;
++
++def : Pat<(int_sw64_ctlzow (v4i64 V256LOpnd:$RA)),
++          (i64 (FTOIStmp (i64 (CTLZOW (v4i64 V256LOpnd:$RA)))))>;
++
++class SIMD_3RR_SCALER<string instr_asm, SDPatternOperator OpNode, ValueType Vt,
++                   RegisterOperand ROC, RegisterOperand ROA = ROC,
++                   RegisterOperand ROB = ROC> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA, ROB:$RB);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
++  list<dag> Pattern = [(set (Vt ROC:$RC), (OpNode (Vt ROA:$RA), ROB:$RB))];
++}
++
++multiclass SIMD_SHIFT<bits<6> Opcode, bits<8>func,
++                      string instr_asm, SDPatternOperator OpNode, ValueType Vt,
++                      Operand immtype, RegisterOperand ROA, RegisterOperand ROB,
++                      SDPatternOperator IOp = null_frag> {
++  def rr : FPFormV<Opcode, func>,
++           SIMD_3RR_SCALER<instr_asm, null_frag, Vt, ROA, ROA, ROB>;
++
++  def ri : FPFormIV<Opcode, func>,
++           SIMD_3RI_SAME<instr_asm, null_frag, Vt, immtype, ROA>;
++
++  def : Vector_2OP_Reg_Scalar<OpNode, Vt, Vt, ROA, ROB,
++                      !cast<Instruction>(NAME # rr)>;
++
++  def : Vector_1OP_Imm_Pat<OpNode, Vt, AddSubImm8Pat, ROA, !cast<Instruction>(NAME # ri)>;
++}
++
++multiclass SIMD_Shift_Multi<bits<8> funcW, bits<8> funcB,bits<8> funcH,
++                            bits<8> funcL, string instr_asm, RegisterOperand RO,
++                            SDPatternOperator OpNode> {
++defm v8i32  : SIMD_SHIFT<0x1A, funcW, instr_asm#w, OpNode, v8i32,
++                        s8imm, V256LOpnd, RO>;
++defm v16i16 : SIMD_SHIFT<0x1A, funcH, instr_asm#h, OpNode, v16i16,
++                        s8imm, V256LOpnd, RO>;
++defm v32i8  : SIMD_SHIFT<0x1A, funcB, instr_asm#b, OpNode, v32i8,
++                        s8imm, V256LOpnd, RO>;
++defm v4i64  : SIMD_SHIFT<0x1A, funcL, instr_asm#l, OpNode, v4i64,
++                        s8imm, V256LOpnd, RO>;
++
++def : Vector_2OP_Imm_VB_Pat<OpNode, Sw64VBroadCast, v8i32, v8i32,
++      AddSubImm8Pat, V256LOpnd, !cast<Instruction>(NAME # v8i32 #ri)>;
++
++def : Vector_2OP_Imm_VB_Pat<OpNode, Sw64VBroadCast, v16i16, v16i16,
++      AddSubImm8Pat, V256LOpnd, !cast<Instruction>(NAME # v16i16 #ri)>;
++
++def : Vector_2OP_Imm_VB_Pat<OpNode, Sw64VBroadCast, v32i8, v32i8,
++      AddSubImm8Pat, V256LOpnd, !cast<Instruction>(NAME # v32i8 #ri)>;
++
++def : Vector_2OP_Imm_VB_Pat<OpNode, Sw64VBroadCast, v4i64, v4i64,
++      AddSubImm8Pat, V256LOpnd, !cast<Instruction>(NAME # v4i64 #ri)>;
++}
++
++defm VSLL : SIMD_Shift_Multi<0x08, 0x10, 0x14, 0x1A, "vsll",
++                             FPRCloOpnd, int_sw64_vsll>;
++defm VSRL : SIMD_Shift_Multi<0x09, 0x11, 0x15, 0x1B, "vsrl",
++                             FPRCloOpnd, int_sw64_vsrl>;
++defm VSRA : SIMD_Shift_Multi<0x0A, 0x12, 0x16, 0x1C, "vsra",
++                             FPRCloOpnd, int_sw64_vsra>;
++defm VROL : SIMD_Shift_Multi<0x0B, 0x13, 0x17, 0x1D, "vrol",
++                             FPRCloOpnd, int_sw64_vrol>;
++
++//def : Pat<(v8i32 (int_sw64_vsll (v8i32 V256LOpnd:$RA),
++//          (v8i32 (Sw64VBroadCast (i64 GPRCOpnd:$RB))))),
++//        (v8i32 (VSLLv8i32rr (v8i32 V256LOpnd:$RA),
++//            (i32 (ITOFStmp GPRCOpnd:$RB))))>;
++multiclass Vector_Shift_VB<SDPatternOperator OpNode, ValueType Vt,
++                           string InstName> {
++def : Vector_1OP_Imm_Pat<OpNode, Vt, AddSubImm8Pat, V256LOpnd,
++      !cast<Instruction>(InstName # ri)>;
++def : Vector_2OP_Reg_S32<OpNode, Vt, V256LOpnd,
++      !cast<Instruction>(InstName # rr)>;
++}
++
++multiclass Vector_Shift<SDPatternOperator OpNode, string InstName> {
++defm : Vector_Shift_VB<OpNode, v32i8 , InstName # v32i8 >;
++defm : Vector_Shift_VB<OpNode, v16i16, InstName # v16i16>;
++defm : Vector_Shift_VB<OpNode, v8i32 , InstName # v8i32 >;
++defm : Vector_Shift_VB<OpNode, v4i64 , InstName # v4i64 >;
++}
++
++defm : Vector_Shift<int_sw64_vsll, "VSLL">;
++defm : Vector_Shift<int_sw64_vsrl, "VSRL">;
++defm : Vector_Shift<int_sw64_vsra, "VSRA">;
++defm : Vector_Shift<int_sw64_vrol, "VROL">;
++
++defm VSLLOW : SIMD_SHIFT<0x1A, 0x0C, "sllow", int_sw64_sllow, v4i64,
++                         s8imm, V256LOpnd, FPRCloOpnd>;
++defm VSRLOW : SIMD_SHIFT<0x1A, 0x0D, "srlow", int_sw64_srlow, v4i64,
++                         s8imm, V256LOpnd, FPRCloOpnd>;
++defm VSRAOW : SIMD_SHIFT<0x1A, 0x46, "sraow", int_sw64_sraow, v4i64,
++                         s8imm, V256LOpnd, FPRCloOpnd>;
++
++//def  : Pat<(int_sw64_sllow (v4i64 V256LOpnd:$RA), (i64 GPRCOpnd:$RB)),
++//        (VSLLOWrr (v4i64 V256LOpnd:$RA),(i32 (ITOFStmp GPRCOpnd:$RB)))>;
++def  : Pat<(int_sw64_vslls (v4f32 V256LOpnd:$RA), (i64 AddSubImm8Pat:$Imm)),
++        (VSLLOWri V256LOpnd:$RA, $Imm)>;
++def  : Pat<(int_sw64_vslld (v4f64 V256LOpnd:$RA), (i64 AddSubImm8Pat:$Imm)),
++        (VSLLOWri V256LOpnd:$RA, $Imm)>;
++
++def  : Pat<(int_sw64_vsrls (v4f32 V256LOpnd:$RA), (i64 AddSubImm8Pat:$Imm)),
++        (VSRLOWri V256LOpnd:$RA, $Imm)>;
++def  : Pat<(int_sw64_vsrld (v4f64 V256LOpnd:$RA), (i64 AddSubImm8Pat:$Imm)),
++        (VSRLOWri V256LOpnd:$RA, $Imm)>;
++
++//def  : Pat<(int_sw64_srlow (v4i64 V256LOpnd:$RA), (i64 GPRCOpnd:$RB)),
++//        (VSRLOWrr (v4i64 V256LOpnd:$RA),(i32 (ITOFStmp GPRCOpnd:$RB)))>;
++//def  : Pat<(int_sw64_srlow (v4i64 V256LOpnd:$RA), (i64 immUExt8:$Imm)),
++//        (VSRLOWri (v4i64 V256LOpnd:$RA), $Imm)>;
++//
++//def  : Pat<(int_sw64_sraow (v4i64 V256LOpnd:$RA), (i64 GPRCOpnd:$RB)),
++//        (VSRAOWrr (v4i64 V256LOpnd:$RA),(i32 (ITOFStmp GPRCOpnd:$RB)))>;
++//def  : Pat<(int_sw64_sraow (v4i64 V256LOpnd:$RA), (i64 immUExt8:$Imm)),
++//        (VSRAOWri (v4i64 V256LOpnd:$RA), $Imm)>;
++
++multiclass SIMD_LOGIC<bits<8> OpFunc,string instr_asm, RegisterOperand RO,
++                      SDPatternOperator OpNode> {
++def "" : FForm4LVLog<0x5, OpFunc>,
++            SIMD_3RR_SAME<instr_asm, OpNode, v8i32, RO>;
++
++def : Vector_2OP_Reg_Pat<OpNode, v16i16, v16i16, RO,
++                        !cast<Instruction>(NAME)>;
++def : Vector_2OP_Reg_Pat<OpNode, v32i8, v32i8, RO,
++                        !cast<Instruction>(NAME)>;
++def : Vector_2OP_Reg_Pat<OpNode, v4i64, v4i64, RO,
++                        !cast<Instruction>(NAME)>;
++}
++
++defm VOR  : SIMD_LOGIC<0x54, "vbisw", V256LOpnd, or>;
++defm VAND : SIMD_LOGIC<0x40, "vandw", V256LOpnd, and>;
++defm VXOR : SIMD_LOGIC<0x1c, "vxorw", V256LOpnd, xor>;
++
++defm VORNOT  : SIMD_LOGIC<0x51, "vornotw", V256LOpnd, vornot>;
++defm VBIC    : SIMD_LOGIC<0x10, "vbicw",   V256LOpnd, vbic>;
++defm VEQV    : SIMD_LOGIC<0x41, "veqvw",   V256LOpnd, veqv>;
++//defm VNOT    : SIMD_LOGIC<0x0f, "vnot",   V256LOpnd, null_frag>;
++
++def : Pat<(v8i32  immAllZerosV), (VOR  (v8i32 V31) , (v8i32 V31))>;
++def : Pat<(v32i8  immAllZerosV), (VOR  (v32i8 V31) , (v32i8 V31))>;
++def : Pat<(v16i16 immAllZerosV), (VOR  (v16i16 V31), (v16i16 V31))>;
++def : Pat<(v4i64  immAllZerosV), (VOR  (v4i64 V31) , (v4i64 V31))>;
++
++def : Pat<(v8i32  immAllOnesV), (VEQV  (v8i32 V31) , (v8i32 V31))>;
++def : Pat<(v32i8  immAllOnesV), (VEQV  (v32i8 V31) , (v32i8 V31))>;
++def : Pat<(v16i16 immAllOnesV), (VEQV  (v16i16 V31), (v16i16 V31))>;
++def : Pat<(v4i64  immAllOnesV), (VEQV  (v4i64 V31) , (v4i64 V31))>;
++
++//def : Vector_1OP_Imm_Pat<z_v8sll, v4i64, immUExt8, ROA, VSLLOWri)>;
++//def : Vector_1OP_Imm_Pat<z_v8sra, v4i64, immUExt8, ROA, VSRLOWri)>;
++//def : Vector_1OP_Imm_Pat<z_v8srl, v4i64, immUExt8, ROA, VSRAOWri)>;
++//def : Pat<(int_sw64_vsllw (v8i32 V256LOpnd:$RA), immUExt8:$Imm),
++//          (VSLLv8i32ri (v8i32 V256LOpnd:$RA), $Imm)>;
++
++class SIMD_INSERT_BASE<string instr_asm,
++                  Operand ImmOp, ValueType vectype, ValueType eltVt,
++                  RegisterOperand FPO = FPRCOpnd> {
++  dag OutOperandList = (outs V256LOpnd:$RD);
++  dag InOperandList = (ins FPO:$RA, V256LOpnd:$RB, ImmOp:$Imm);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $Imm, $RD");
++
++  list<dag> Pattern = [(set V256LOpnd:$RD,
++              (vector_insert (vectype V256LOpnd:$RB),
++                    (eltVt FPO:$RA), ImmOp:$Imm))];
++
++//  string Constraints = "@earlyclobber $RD";
++}
++
++multiclass SIMD_INSERT_Multi<bits<6> funcB, bits<6> funcH,bits<6> funcW,
++                            bits<6> funcL, string instr_asm> {
++def E8  : FForm4LV<0x1B, funcB>,
++         SIMD_INSERT_BASE<instr_asm # b, VectorIndexB, v32i8,  i64>;
++
++def E16 : FForm4LV<0x1B, funcH>,
++         SIMD_INSERT_BASE<instr_asm # h, VectorIndexH, v16i16, i64>;
++
++def E32 : FForm4LV<0x1B, funcW>,
++         SIMD_INSERT_BASE<instr_asm # w, VectorIndexS, v8i32,  i32, FPRCloOpnd>;
++
++def E64 : FForm4LV<0x1B, funcL>,
++         SIMD_INSERT_BASE<instr_asm # f, VectorIndexD, v4f64,  f64>;
++}
++
++defm VINS : SIMD_INSERT_Multi<0x2A, 0x2B, 0x20, 0x21, "vins">;
++
++def : Pat<(vector_insert (v4f32 V256LOpnd:$RB), (f32 FPRCloOpnd:$RA), VectorIndexD:$idx),
++          (VINSE64 (f64 (COPY_TO_REGCLASS FPRCloOpnd:$RA, FPRC)), (v4f32 V256LOpnd:$RB), VectorIndexD:$idx)>;
++
++def : Pat<(vector_insert (v4i64 V256LOpnd:$RB), (i64 FPRCOpnd:$RA), VectorIndexD:$idx),
++          (VINSE64 (i64 FPRCOpnd:$RA), (v4i64 V256LOpnd:$RB), VectorIndexD:$idx)>;
++
++class vins_pat<SDPatternOperator OpNode, Instruction Inst, RegisterClass RC,
++               ValueType vectype, ValueType eltvt, Operand ImmOp>
++    : Pat<(OpNode GPRCOpnd:$RA, (vectype V256LOpnd:$RB), ImmOp:$idx),
++          (vectype (Inst (eltvt (COPY_TO_REGCLASS GPRCOpnd:$RA, RC)), (vectype V256LOpnd:$RB), ImmOp:$idx))>;
++
++class vinselt<SDPatternOperator OpNode, Instruction Inst, RegisterClass RC,
++               ValueType vectype, ValueType eltvt, Operand ImmOp>
++    : Pat<(OpNode (vectype V256LOpnd:$RA), GPRCOpnd:$RB, ImmOp:$idx),
++          (vectype (Inst (eltvt (COPY_TO_REGCLASS GPRCOpnd:$RB, RC)), (vectype V256LOpnd:$RA), ImmOp:$idx))>;
++
++def : vins_pat<int_sw64_vinsw,  VINSE32, FPRC_lo, v8i32 , i32, VectorIndexS>;
++def : vins_pat<int_sw64_vinsb,  VINSE8 , FPRC, v32i8 , i64, VectorIndexB>;
++def : vins_pat<int_sw64_vinsh,  VINSE16, FPRC, v16i16, i64, VectorIndexH>;
++def : vins_pat<int_sw64_vinsl,  VINSE64, FPRC, v4i64 , i64, VectorIndexD>;
++
++def : vinselt<vector_insert,  VINSE32, FPRC_lo, v8i32 , i32, VectorIndexS>;
++def : vinselt<vector_insert,  VINSE8 , FPRC, v32i8 , i64, VectorIndexB>;
++def : vinselt<vector_insert,  VINSE16, FPRC, v16i16, i64, VectorIndexH>;
++def : vinselt<vector_insert,  VINSE64, FPRC, v4i64 , i64, VectorIndexD>;
++
++def : Pat<(int_sw64_vinsfs (f32 FPRCloOpnd:$RA),
++              (v4f32 V256LOpnd:$RB), VectorIndexD:$idx),
++          (v4f32 (VINSE64 (f64 (COPY_TO_REGCLASS FPRCloOpnd:$RA, FPRC)), (v4f32 V256LOpnd:$RB), VectorIndexD:$idx))>;
++def : Pat<(int_sw64_vinsfd (f64 FPRCOpnd:$RA),
++              (v4f64 V256LOpnd:$RB), VectorIndexD:$idx),
++          (v4f64 (VINSE64 (f64 FPRCOpnd:$RA), (v4f64 V256LOpnd:$RB), VectorIndexD:$idx))>;
++
++multiclass SIMD_COPY_Multi<bits<6> funcB, bits<6> funcH,bits<6> funcW,
++                           bits<6> funcL, string instr_asm> {
++def E8  : FForm2V<0x1B, funcB>,
++          SIMD_2RR_BASE<instr_asm # b, V256LOpnd, FPRCOpnd>;
++
++def E16 : FForm2V<0x1B, funcH>,
++          SIMD_2RR_BASE<instr_asm # h, V256LOpnd, FPRCOpnd>;
++
++def E32 : FForm2V<0x1B, funcW>,
++          SIMD_2RR_BASE<instr_asm # w, V256LOpnd, FPRCloOpnd>;
++
++def E64 : FForm2V<0x1B, funcL>,
++          SIMD_2RR_BASE<instr_asm # f, V256LOpnd, FPRCOpnd>;
++
++def : Pat <(v32i8 (Sw64VBroadCast GPRCOpnd:$RA)),
++           (v32i8 (!cast<Instruction>(NAME # E8)
++                  (i64 (COPY_TO_REGCLASS GPRCOpnd:$RA, FPRC))))>;
++
++def : Pat <(v16i16 (Sw64VBroadCast GPRCOpnd:$RA)),
++           (v16i16 (!cast<Instruction>(NAME # E16)
++                  (i64 (COPY_TO_REGCLASS GPRCOpnd:$RA, FPRC))))>;
++
++def : Pat <(v8i32 (Sw64VBroadCast GPRCOpnd:$RA)),
++           (v8i32 (!cast<Instruction>(NAME # E32)
++                  (i32 (COPY_TO_REGCLASS GPRCOpnd:$RA, FPRC_lo))))>;
++
++def : Pat <(v4i64 (Sw64VBroadCast GPRCOpnd:$RA)),
++           (v4i64 (!cast<Instruction>(NAME # E64)
++                  (i64 (COPY_TO_REGCLASS GPRCOpnd:$RA, FPRC))))>;
++
++def : Pat <(v4f64 (Sw64VBroadCast (f64 FPRCOpnd:$RA))),
++           (v4f64 (!cast<Instruction>(NAME # E64) (f64 FPRCOpnd:$RA)))>;
++
++def : Pat <(v4f32 (Sw64VBroadCast (f32 FPRCloOpnd:$RA))),
++           (v4f32 (!cast<Instruction>(NAME # E64) (f64 (COPY_TO_REGCLASS FPRCloOpnd:$RA, FPRC))))>;
++}
++
++defm VCPY : SIMD_COPY_Multi<0x32, 0x33, 0x24, 0x25, "vcpy">;
++
++multiclass SIMD_VINSECT_Multi<string instr_asm> {
++def H : FForm4VINSECTL<0x1B, 0x2C>,
++        SIMD_3RR_SAME<instr_asm # h, Sw64VINSECTL, v16i16, V256LOpnd>;
++
++def W : FForm4VINSECTL<0x1B, 0x2D>,
++        SIMD_3RR_SAME<instr_asm # w, Sw64VINSECTL, v8i32, V256LOpnd>;
++
++def L : FForm4VINSECTL<0x1B, 0x2E>,
++        SIMD_3RR_SAME<instr_asm # l, Sw64VINSECTL, v4i64, V256LOpnd>;
++
++def B : FForm4VINSECTL<0x1B, 0x2F>,
++        SIMD_3RR_SAME<instr_asm # b, Sw64VINSECTL, v32i8, V256LOpnd>;
++}
++
++defm VINSECTL : SIMD_VINSECT_Multi<"vinsectl">;
++
++def VSHFQB : FForm4VINSECTL<0x1B, 0x31>,
++             SIMD_3RR_SAME<"vshfqb", int_sw64_vshfqb, v32i8, V256LOpnd>;
++
++class SIMD_4RI_BASE<string instr_asm, SDPatternOperator OpNode, ValueType Vt,
++                    Operand ImmOp, ImmLeaf Imm, RegisterOperand ROD,
++                    RegisterOperand ROA = ROD, RegisterOperand ROB = ROD> {
++  dag OutOperandList = (outs ROD:$RD);
++  dag InOperandList = (ins ROA:$RA, ROB:$RB, ImmOp:$Imm);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $Imm, $RD");
++
++  list<dag> Pattern = [(set ROD:$RD,
++                      (OpNode (Vt ROA:$RA), (Vt ROB:$RB), Imm:$Imm))];
++}
++
++class SIMD_4RR_BASE<string instr_asm,
++                    RegisterOperand ROC, RegisterOperand ROA,
++                    RegisterOperand ROB = ROA, RegisterOperand ROD = ROA> {
++
++  dag OutOperandList = (outs ROD:$RD);
++  dag InOperandList = (ins ROA:$RA, ROB:$RB, ROC:$RC);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC, $RD");
++
++//  bit usesCustomInserter = Num;
++}
++
++def VSHFQ  : FForm4LV<0x1B, 0x30>,
++             SIMD_4RI_BASE<"vshfq", int_sw64_vshfq, v8i32, u5imm, immZExt4Ptr, V256LOpnd>;
++
++def VCONW  : FForm4LV2<0x1B, 0x26>,
++             SIMD_4RR_BASE<"vconw", FPRCOpnd, V256LOpnd>;
++def VCONS  : FForm4LV2<0x1B, 0x28>,
++             SIMD_4RR_BASE<"vcons", FPRCOpnd, V256LOpnd>;
++def VCOND  : FForm4LV2<0x1B, 0x29>,
++             SIMD_4RR_BASE<"vcond", FPRCOpnd, V256LOpnd>;
++def VSHFW  : FForm4LV2<0x1B, 0x27>,
++             SIMD_4RR_BASE<"vshfw", FPRCOpnd, V256LOpnd>;
++
++def : Pat<(int_sw64_vshfq (v8i32 V256LOpnd:$RA),
++                (v8i32 V256LOpnd:$RB), (i64 ComplexImmPat:$imm)),
++          (VSHFQ V256LOpnd:$RA, V256LOpnd:$RB, $imm)>;
++
++def : Pat<(int_sw64_vconw (v8i32 V256LOpnd:$RA),
++                (v8i32 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)),
++          (VCONW (v8i32 V256LOpnd:$RA), (v8i32 V256LOpnd:$RB),
++                        (i64 (ITOFTtmp GPRCOpnd:$RC)))>;
++
++def : Pat<(int_sw64_vcons (v4f32 V256LOpnd:$RA),
++                (v4f32 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)),
++          (VCONS (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB),
++                        (i64 (ITOFTtmp GPRCOpnd:$RC)))>;
++
++def : Pat<(int_sw64_vcond (v4f64 V256LOpnd:$RA),
++                (v4f64 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)),
++          (VCOND (v4f64 V256LOpnd:$RA), (v4f64 V256LOpnd:$RB),
++                        (i64 (ITOFTtmp GPRCOpnd:$RC)))>;
++
++def : Pat<(int_sw64_vconl (v4i64 V256LOpnd:$RA),
++                (v4i64 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)),
++          (VCOND (v4i64 V256LOpnd:$RA), (v4i64 V256LOpnd:$RB),
++                        (i64 (ITOFTtmp GPRCOpnd:$RC)))>;
++
++def : Pat<(Sw64VSHF (v8i32 V256LOpnd:$RA),
++                (v8i32 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)),
++          (VSHFW (v8i32 V256LOpnd:$RA), (v8i32 V256LOpnd:$RB),
++                        (i64 (ITOFTtmp GPRCOpnd:$RC)))>;
++def : Pat<(Sw64VSHF (v4i64 V256LOpnd:$RA),
++                (v4i64 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)),
++          (VSHFW (v4i64 V256LOpnd:$RA), (v4i64 V256LOpnd:$RB),
++                        (i64 (ITOFTtmp GPRCOpnd:$RC)))>;
++def : Pat<(Sw64VSHF (v4f32 V256LOpnd:$RA),
++                (v4f32 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)),
++          (VSHFW (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB),
++                        (i64 (ITOFTtmp GPRCOpnd:$RC)))>;
++def : Pat<(Sw64VSHF (v4f64 V256LOpnd:$RA),
++                (v4f64 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)),
++          (VSHFW (v4f64 V256LOpnd:$RA), (v4f64 V256LOpnd:$RB),
++                        (i64 (ITOFTtmp GPRCOpnd:$RC)))>;
++
++def VEXTW : FForm4LVV<0x1B, 0x22>,
++            SIMD_3RI_SAME<"vextw", null_frag, v8i32, u5imm, FPRCOpnd, V256LOpnd>;
++def VEXTF : FForm4LVV<0x1B, 0x23>,
++            SIMD_3RI_SAME<"vextf", null_frag, v4f32, u5imm, FPRCOpnd, V256LOpnd>;
++
++multiclass Vector_extract_pat<SDPatternOperator Intr, Instruction Inst,
++                              Instruction TransI, ValueType vecty,
++                              ValueType ext_vt, SDPatternOperator Index> {
++def : Pat<(ext_vt (vector_extract (vecty V256LOpnd:$RA), Index:$Idx)),
++          (ext_vt (TransI (Inst (vecty V256LOpnd:$RA), Index:$Idx)))>;
++
++def : Pat<(ext_vt (Intr (vecty V256LOpnd:$RA), Index:$Idx)),
++          (ext_vt (TransI (Inst (vecty V256LOpnd:$RA), Index:$Idx)))>;
++}
++
++defm : Vector_extract_pat<int_sw64_vextw, VEXTW, FTOIStmp, v8i32, i64, VectorIndexS>;
++defm : Vector_extract_pat<int_sw64_vextl, VEXTF, FTOITtmp, v4i64, i64, VectorIndexD>;
++
++// TODO: How to Combine it with class pattern?
++def : Pat<(f64 (vector_extract (v4f64 V256LOpnd:$RA), VectorIndexD:$Idx)),
++          (f64 (VEXTF (v4f64 V256LOpnd:$RA), VectorIndexD:$Idx))>;
++def : Pat<(f64 (int_sw64_vextfd (v4f64 V256LOpnd:$RA), VectorIndexD:$Idx)),
++          (f64 (VEXTF (v4f64 V256LOpnd:$RA), VectorIndexD:$Idx))>;
++def : Pat<(f32 (vector_extract (v4f32 V256LOpnd:$RA), VectorIndexD:$Idx)),
++          (f32 (COPY_TO_REGCLASS (VEXTF (v4f32 V256LOpnd:$RA), VectorIndexD:$Idx), FPRC_lo))>;
++def : Pat<(f32 (int_sw64_vextfs (v4f32 V256LOpnd:$RA), VectorIndexD:$Idx)),
++          (f32 (COPY_TO_REGCLASS (VEXTF (v4f32 V256LOpnd:$RA), VectorIndexD:$Idx), FPRC_lo))>;
++
++class SIMD_VLOGZZ<string instr_asm,
++                  Operand ImmOp, ImmLeaf Imm, ValueType TyNode,
++                  RegisterOperand ROD> {
++  dag OutOperandList = (outs ROD:$RD);
++  dag InOperandList = (ins ROD:$RA, ROD:$RB, ROD:$RC, ImmOp:$Imm);
++  string AsmString = !strconcat(instr_asm # "$Imm", "\t$RA, $RB, $RC, $RD");
++
++  list<dag> Pattern = [(set ROD:$RD,
++                        (z_vlog (TyNode ROD:$RA), (TyNode ROD:$RB),
++                         (TyNode ROD:$RC), Imm:$Imm))];
++}
++
++def VLOGZZ : FForm4LVLogZZ<0x5>,
++             SIMD_VLOGZZ<"vlog", u8immHex, immZExt8Ptr, v4i64, V256LOpnd>;
++
++multiclass SIMD_Floating_3RR<bits<6> Opcode, bits<8>func,
++                      string instr_asm, SDPatternOperator OpNode> {
++def "" : FPFormV<Opcode, func>,
++         SIMD_3RR_SAME<instr_asm, null_frag, v4f64, V256LOpnd>;
++
++def : Pat<(v4i64 (OpNode (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB))),
++          (v4i64 (!cast<Instruction>(NAME) V256LOpnd:$RA, V256LOpnd:$RB))>;
++
++def : Pat<(v4i64 (OpNode (v4f64 V256LOpnd:$RA), (v4f64 V256LOpnd:$RB))),
++          (v4i64 (!cast<Instruction>(NAME) V256LOpnd:$RA, V256LOpnd:$RB))>;
++}
++
++defm VFCMPEQ : SIMD_Floating_3RR<0x1A, 0x8C, "vfcmpeq", setoeq>;
++defm VFCMPLE : SIMD_Floating_3RR<0x1A, 0x8D, "vfcmple", setole>;
++defm VFCMPLT : SIMD_Floating_3RR<0x1A, 0x8E, "vfcmplt", setolt>;
++defm VFCMPUN : SIMD_Floating_3RR<0x1A, 0x8F, "vfcmpun", setuo>;
++
++
++multiclass Vector_compare_pat<SDPatternOperator OpNode, Instruction Inst> {
++def : Pat <(OpNode (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB)),
++           (Inst V256LOpnd:$RA, V256LOpnd:$RB)>;
++def : Pat <(OpNode (v4f64 V256LOpnd:$RA), (v4f64 V256LOpnd:$RB)),
++           (Inst V256LOpnd:$RA, V256LOpnd:$RB)>;
++}
++
++defm : Vector_compare_pat<Sw64VFCMPEQ, VFCMPEQ>;
++defm : Vector_compare_pat<Sw64VFCMPLE, VFCMPLE>;
++defm : Vector_compare_pat<Sw64VFCMPLT, VFCMPLT>;
++defm : Vector_compare_pat<Sw64VFCMPUN, VFCMPUN>;
++
++//def VFCMPEQ : FPFormV<0x1A, 0x8C>, SIMD_3RR_SAME<"vfcmpeq", Sw64VFCMPEQ, v4f64, V256LOpnd>;
++//def VFCMPLE : FPFormV<0x1A, 0x8D>, SIMD_3RR_SAME<"vfcmple", Sw64VFCMPLE, v4f64, V256LOpnd>;
++//def VFCMPLT : FPFormV<0x1A, 0x8E>, SIMD_3RR_SAME<"vfcmplt", Sw64VFCMPLT, v4f64, V256LOpnd>;
++//def VFCMPUN : FPFormV<0x1A, 0x8F>, SIMD_3RR_SAME<"vfcmpun", Sw64VFCMPUN, v4f64, V256LOpnd>;
++
++//def : Pat<(Sw64VFCMPEQ (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB)),
++//          (VFCMPEQ V256LOpnd:$RA, V256LOpnd:$RB)>;
++//def : Pat<(Sw64VFCMPLE (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB)),
++//          (VFCMPLE V256LOpnd:$RA, V256LOpnd:$RB)>;
++//def : Pat<(Sw64VFCMPLT (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB)),
++//          (VFCMPLT V256LOpnd:$RA, V256LOpnd:$RB)>;
++//def : Pat<(Sw64VFCMPUN (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB)),
++//          (VFCMPUN V256LOpnd:$RA, V256LOpnd:$RB)>;
++
++def VCPYS  : FPFormV<0x1A, 0x90>,
++    SIMD_3RR_SAME<"vcpys", int_sw64_vcpysd,  v4f64, V256LOpnd>;
++def VCPYSE : FPFormV<0x1A, 0x91>,
++    SIMD_3RR_SAME<"vcpyse", int_sw64_vcpysed, v4f64, V256LOpnd>;
++def VCPYSN : FPFormV<0x1A, 0x92>,
++    SIMD_3RR_SAME<"vcpysn", int_sw64_vcpysnd, v4f64, V256LOpnd>;
++
++def : Pat<(int_sw64_vcpyss V256LOpnd:$RA, V256LOpnd:$RB),
++          (VCPYS V256LOpnd:$RA, V256LOpnd:$RB)>;
++def : Pat<(int_sw64_vcpyses V256LOpnd:$RA, V256LOpnd:$RB),
++          (VCPYSE V256LOpnd:$RA, V256LOpnd:$RB)>;
++def : Pat<(int_sw64_vcpysns V256LOpnd:$RA, V256LOpnd:$RB),
++          (VCPYSN V256LOpnd:$RA, V256LOpnd:$RB)>;
++
++multiclass SIMD_FMA<bits<6> funcS, bits<6> funcD,
++                    string instr_asm, SDPatternOperator OpNode> {
++def S : FForm4V<0x1B, funcS>,
++        SIMD_4RR_SAME<instr_asm#s, OpNode, v4f32, V256LOpnd>;
++def D : FForm4V<0x1B, funcD>,
++        SIMD_4RR_SAME<instr_asm#d, OpNode, v4f64, V256LOpnd>;
++}
++
++defm VMA  : SIMD_FMA<0x00, 0x01, "vma", fma>;
++defm VMS  : SIMD_FMA<0x02, 0x03, "vms",
++  ThridOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
++defm VNMA : SIMD_FMA<0x04, 0x05, "vnma",
++  ThridOpFrag<(fma (fneg node:$LHS), node:$MHS, node:$RHS)> >;
++defm VNMS : SIMD_FMA<0x06, 0x07, "vnms",
++  ThridOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >;
++
++multiclass SIMD_FLOAT_SEL<bits<6> func, string instr_asm,
++                          SDPatternOperator OpNode> {
++def "" : FForm4V<0x1B, func>,
++         SIMD_4RR_SAME<instr_asm, OpNode, v4f64, V256LOpnd>;
++
++def : Vector_3OP_SameReg_Pat<OpNode, v4f32, V256LOpnd,
++                            !cast<Instruction>(NAME)>;
++}
++
++defm VFSELEQ : SIMD_FLOAT_SEL<0x10, "vfseleq", vfcmoveq>;
++defm VFSELLT : SIMD_FLOAT_SEL<0x12, "vfsellt", vfcmovlt>;
++defm VFSELLE : SIMD_FLOAT_SEL<0x13, "vfselle", vfcmovle>;
++
++// 简单运算指令格式： 寄存器格式
++class SIMD_3RV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       RegisterOperand ROC, RegisterOperand ROA = ROC,
++                       RegisterOperand ROB = ROC,
++                       InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA, ROB:$RB);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
++//  list<dag> Pattern = [(set ROC:$RC, (OpNode ROA:$RA, ROB:$RB))];
++  InstrItinClass Itinerary = itin;
++
++  string Constraints = "@earlyclobber $RC";
++}
++
++class SIMD_3RVV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       RegisterOperand ROC, RegisterOperand ROA = ROC,
++                       RegisterOperand ROB = ROC,
++                       InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA, ROB:$RB);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
++//  list<dag> Pattern = [(set ROC:$RC, (OpNode ROA:$RA, ROB:$RB))];
++  InstrItinClass Itinerary = itin;
++}
++
++class SIMD_3RV_TY_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       ValueType TyNode,
++                       RegisterOperand ROC, RegisterOperand ROA = ROC,
++                       RegisterOperand ROB = ROC,
++                       InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA, ROB:$RB);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
++//  list<dag> Pattern = [(set ROC:$RC, (OpNode (TyNode ROA:$RA), (TyNode ROB:$RB)))];
++  InstrItinClass Itinerary = itin;
++
++  string Constraints = "@earlyclobber $RC";
++}
++
++class SIMD_VFCMPS_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       RegisterOperand ROC, RegisterOperand ROA,
++                       RegisterOperand ROB = ROA,
++                       InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA, ROB:$RB);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
++//  list<dag> Pattern = [(set ROC:$RC, (OpNode ROA:$RA, ROB:$RB))];
++  InstrItinClass Itinerary = itin;
++
++  string Constraints = "@earlyclobber $RC";
++}
++
++class SIMD_2RV_R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       RegisterOperand ROC, RegisterOperand ROA,
++                       RegisterOperand ROB,
++                       InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA, ROB:$RB);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
++//  list<dag> Pattern = [(set ROC:$RC, (OpNode ROA:$RA, ROB:$RB))];
++  InstrItinClass Itinerary = itin;
++}
++
++class SIMD_VSETGE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       RegisterOperand ROC, RegisterOperand ROA,
++                       RegisterOperand ROB = ROA,
++                       InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA, ROB:$RB);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
++//  list<dag> Pattern = [(set ROC:$RC, (OpNode (v8i32 ROA:$RA), ROB:$RB))];
++
++  bit usesCustomInserter = 1;
++  InstrItinClass Itinerary = itin;
++}
++
++class SIMD_VSQRT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       RegisterOperand ROC, RegisterOperand ROB,
++                       InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROB:$RB);
++  string AsmString = !strconcat(instr_asm, "\t$RB, $RC");
++//  list<dag> Pattern = [(set ROC:$RC, (OpNode ROB:$RB))];
++  InstrItinClass Itinerary = itin;
++
++  string Constraints = "@earlyclobber $RC";
++}
++
++class SIMD_POPCNT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       ValueType TyNode,
++                       RegisterOperand ROC, RegisterOperand ROB,
++                       InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROB:$RB);
++  string AsmString = !strconcat(instr_asm, "\t$RB, $RC");
++//  list<dag> Pattern = [(set ROC:$RC, (OpNode (TyNode ROB:$RB)))];
++  InstrItinClass Itinerary = itin;
++
++  bit usesCustomInserter = 1;
++}
++
++class SIMD_REDUCE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       ValueType TyNode,
++                       RegisterOperand ROC, RegisterOperand ROA,
++                       InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RC");
++//  list<dag> Pattern = [(set ROC:$RC, (OpNode (TyNode ROA:$RA)))];
++  InstrItinClass Itinerary = itin;
++
++  bit usesCustomInserter = 1; // 6A should be extend.
++//  string Constraints = "@earlyclobber $RC";
++}
++
++// 简单运算指令格式： 立即数格式
++class SIMD_I8_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       SplatComplexPattern SplatImm, RegisterOperand ROC,
++                       RegisterOperand ROA = ROC,
++                       InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA, SplatImm.OpClass:$imm);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $imm, $RC");
++//  list<dag> Pattern = [(set ROC:$RC, (OpNode ROA:$RA, SplatImm:$imm))];
++  InstrItinClass Itinerary = itin;
++}
++
++
++// 浮点复核运算指令格式  寄存器格式
++class SIMD_4RV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       RegisterOperand ROD, RegisterOperand ROA = ROD,
++                       RegisterOperand ROB = ROD, RegisterOperand ROC = ROD,
++                       InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROD:$RD);
++  dag InOperandList = (ins ROA:$RA, ROB:$RB, ROC:$RC);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC, $RD");
++
++//  list<dag> Pattern = [(set ROD:$RD, (OpNode ROA:$RA, ROB:$RB, ROC:$RC))];
++  InstrItinClass Itinerary = itin;
++
++  string Constraints = "@earlyclobber $RD";
++}
++
++class SIMD_4RV_DESC_SEL<string instr_asm, SDPatternOperator OpNode,
++                       ValueType Vt, RegisterOperand ROD, RegisterOperand ROA = ROD,
++                       RegisterOperand ROB = ROD, RegisterOperand ROC = ROD> {
++  dag OutOperandList = (outs ROD:$RD);
++  dag InOperandList = (ins ROA:$RA, ROB:$RB, ROC:$RC);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC, $RD");
++
++  list<dag> Pattern = [(set (Vt ROD:$RD), (OpNode (Vt ROA:$RA), (Vt ROB:$RB), (Vt ROC:$RC)))];
++}
++
++class SIMD_4RV_DESC_VNMSS<string instr_asm,
++                       RegisterOperand ROD, RegisterOperand ROA = ROD,
++                       RegisterOperand ROB = ROD, RegisterOperand ROC = ROD,
++                       InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROD:$RD);
++  dag InOperandList = (ins ROA:$RA, ROB:$RB, ROC:$RC);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC, $RD");
++
++//  list<dag> Pattern = [(set ROD:$RD, (fneg (fma ROA:$RA, ROB:$RB,
++//                                              (fneg ROC:$RC))))];
++
++  InstrItinClass Itinerary = itin;
++
++  string Constraints = "@earlyclobber $RD";
++}
++
++
++class SIMD_SELECT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       RegisterOperand ROD, RegisterOperand ROA = ROD,
++                       RegisterOperand ROB = ROD, RegisterOperand ROC = ROD,
++                       InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROD:$RD);
++  dag InOperandList = (ins ROC:$RC, ROB:$RB, ROA:$RA);
++  string AsmString = !strconcat(instr_asm, "\t$RC, $RB, $RA, $RD");
++
++//  list<dag> Pattern = [(set ROD:$RD,
++//                      (OpNode ROA:$RA, ROB:$RB, ROC:$RC))];
++  InstrItinClass Itinerary = itin;
++}
++
++
++
++class SIMD_VSETGE_I_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       ValueType VecTy, Operand ImmOp, ImmLeaf Imm,
++                       RegisterOperand ROC, RegisterOperand ROA,
++                       InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA, ImmOp:$imm);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $imm, $RC");
++  //list<dag> Pattern = [(set ROC:$RC, (OpNode (VecTy ROA:$RA), Imm:$imm))];
++
++  bit usesCustomInserter = 1;
++  InstrItinClass Itinerary = itin;
++}
++
++// Since we canonicalize buildvectors to v16i8, all vnots "-1" operands will be
++// of that type.
++def vnot_sw64 : PatFrag<(ops node:$in),
++                       (xor node:$in, (bitconvert (v8i32 immAllOnesV)))>;
++
++class SIMD_VBIC_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++			SDPatternOperator OpNode1, RegisterOperand ROC,
++			RegisterOperand ROA = ROC,
++                        RegisterOperand ROB = ROC,
++                        InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA, ROB:$RB);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
++
++//  list<dag> Pattern = [(set ROC:$RC, (OpNode (v8i32 ROA:$RA), (OpNode1 (v8i32 ROB:$RB))))];
++  InstrItinClass Itinerary = itin;
++}
++
++class SIMD_VORNOT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                        RegisterOperand ROC,
++                        RegisterOperand ROA = ROC,
++                        RegisterOperand ROB = ROC,
++                        InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA, ROB:$RB);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
++
++//  list<dag> Pattern = [(set ROC:$RC, (OpNode (v8i32 ROA:$RA), (v8i32 ROB:$RB)))];
++  InstrItinClass Itinerary = itin;
++}
++
++class SIMD_COPY_DESC_BASE<string instr_asm, ValueType TyNode,
++                            SDPatternOperator OpNode, RegisterOperand ROB,
++                            RegisterOperand ROA = ROB, bit Num,
++                            InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROB:$RB);
++  dag InOperandList = (ins ROA:$RA);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB");
++//  list<dag> Pattern = [(set ROB:$RB, (TyNode (OpNode ROA:$RA)))];
++
++  bit usesCustomInserter = Num; // 6A should be extend.
++  InstrItinClass Itinerary = itin;
++}
++
++
++class SIMD_COPYF_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                            RegisterOperand ROB, RegisterOperand ROA = ROB,
++                            InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROB:$RB);
++  dag InOperandList = (ins ROA:$RA);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB");
++  list<dag> Pattern = [];
++  InstrItinClass Itinerary = itin;
++}
++
++
++class SIMD_COPYF_PSEUDO_BASE<ValueType VT, SDPatternOperator OpNode,
++                              RegisterClass RCWD, RegisterClass RCWS = RCWD> :
++      SIMDPseudo<(outs RCWD:$wd), (ins RCWS:$fs),
++                [(set RCWD:$wd, (VT (OpNode RCWS:$fs)))]> {
++  let usesCustomInserter = 1;
++}
++
++
++class SIMD_VSHIFT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       ValueType TyNode, RegisterOperand ROC,
++                       RegisterOperand ROA = ROC,
++                       InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA, F4RCOpnd:$RB);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
++//  list<dag> Pattern = [(set ROC:$RC, (TyNode (OpNode ROA:$RA, F4RCOpnd:$RB)))];
++  InstrItinClass Itinerary = itin;
++
++  //string Constraints = "@earlyclobber $RC";
++}
++
++class SIMD_VINSECTL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       RegisterOperand ROD,
++                       RegisterOperand ROA = ROD, RegisterOperand ROB = ROD,
++                       InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROD:$RD);
++  dag InOperandList = (ins ROA:$RA, ROB:$RB);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RD");
++//  list<dag> Pattern = [(set ROD:$RD, (OpNode ROA:$RA, ROB:$RB))];
++  InstrItinClass Itinerary = itin;
++
++  //string Constraints = "@earlyclobber $RC";
++}
++
++class SIMD_INSERT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                           Operand ImmOp, ImmLeaf Imm, RegisterOperand ROD,
++                           RegisterOperand ROA, bit Num,
++                           InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROD:$RD);
++  dag InOperandList = (ins ROA:$RA, ROD:$RB, ImmOp:$Imm);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $Imm, $RD");
++//  list<dag> Pattern = [(set ROD:$RD, (OpNode ROD:$RB, ROA:$RA, Imm:$Imm))];
++  InstrItinClass Itinerary = itin;
++
++  bit usesCustomInserter = Num;
++//  string Constraints = "@earlyclobber $RD";
++}
++
++class SIMD_EXTRACT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                            ValueType VecTy, Operand ImmOp, ImmLeaf Imm,
++                            RegisterOperand ROD, RegisterOperand ROA,
++                            bit Num, InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROD:$RD);
++  dag InOperandList = (ins ROA:$RA, ImmOp:$Imm);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $Imm, $RD");
++//  list<dag> Pattern = [(set ROD:$RD, (OpNode (VecTy ROA:$RA), Imm:$Imm))];
++
++  bit usesCustomInserter = Num;
++  InstrItinClass Itinerary = itin;
++}
++
++class SIMD_MIX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                            bit Num, RegisterOperand ROC, RegisterOperand ROA,
++                            RegisterOperand ROB = ROA, RegisterOperand ROD = ROA,
++                            InstrItinClass itin = NoItinerary> {
++
++  dag OutOperandList = (outs ROD:$RD);
++  dag InOperandList = (ins ROA:$RA, ROB:$RB, ROC:$RC);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC, $RD");
++//  list<dag> Pattern = [(set ROD:$RD, (OpNode ROA:$RA, ROB:$RB,
++//                                                ROC:$RC))];
++
++  bit usesCustomInserter = Num;
++  InstrItinClass Itinerary = itin;
++}
++
++class VADDWC_DESC : SIMD_3RVV_DESC_BASE<"vaddw", addv8i32, V256LOpnd>, IsCommutable;
++
++class SIMD_2RV_SRi_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                       Operand ImmOp, ImmLeaf Imm, RegisterOperand ROC,
++                       RegisterOperand ROA = ROC,
++                       InstrItinClass itin = NoItinerary> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA, ImmOp:$Imm);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $Imm, $RC");
++//  list<dag> Pattern = [(set ROC:$RC, (OpNode ROA:$RA, Imm:$Imm))];
++  InstrItinClass Itinerary = itin;
++//  string Constraints = "@earlyclobber $RC";
++}
++class VSUMW_DESC : SIMD_REDUCE_DESC_BASE<"vsumw", vecreduce_add, v8i32, GPRCOpnd, V256LOpnd>;
++class VSUML_DESC : SIMD_REDUCE_DESC_BASE<"vsuml", vecreduce_add, v4i64, GPRCOpnd, V256LOpnd>;
++
++class VADDWC_ENC : FPFormV<0x1A, 0x00>;
++
++class VSUMW_ENC : FPFormV_2RV<0x1A, 0x47>;
++class VSUML_ENC : FPFormV_2RV<0x1A, 0x48>;
++
++//--------------------------- Instruction defs ----------------------------------------//
++
++class SIMD_VMAX_VMIN<bits<8>func, string instr_asm, SDPatternOperator OpNode,
++                     ValueType vt, RegisterOperand RO>
++  : FPFormV<0x1A, func>, SIMD_3RR_SAME<instr_asm, OpNode, vt, RO>, IsCommutable;
++
++def VMAXB : SIMD_VMAX_VMIN<0x1E, "vmaxb", vmax, v32i8,  V256LOpnd>;
++def VMINB : SIMD_VMAX_VMIN<0x1F, "vminb", vmin, v32i8,  V256LOpnd>;
++def VMAXH : SIMD_VMAX_VMIN<0x50, "vmaxh", vmax, v16i16, V256LOpnd>;
++def VMINH : SIMD_VMAX_VMIN<0x51, "vminh", vmin, v16i16, V256LOpnd>;
++def VMAXW : SIMD_VMAX_VMIN<0x52, "vmaxw", vmax, v8i32,  V256LOpnd>;
++def VMINW : SIMD_VMAX_VMIN<0x53, "vminw", vmin, v8i32,  V256LOpnd>;
++def VMAXL : SIMD_VMAX_VMIN<0x54, "vmaxl", vmax, v4i64,  V256LOpnd>;
++def VMINL : SIMD_VMAX_VMIN<0x55, "vminl", vmin, v4i64,  V256LOpnd>;
++
++def VUMAXB : SIMD_VMAX_VMIN<0x56, "vumaxb", vumax, v32i8,  V256LOpnd>;
++def VUMINB : SIMD_VMAX_VMIN<0x57, "vuminb", vumin, v32i8,  V256LOpnd>;
++def VUMAXH : SIMD_VMAX_VMIN<0x58, "vumaxh", vumax, v16i16, V256LOpnd>;
++def VUMINH : SIMD_VMAX_VMIN<0x59, "vuminh", vumin, v16i16, V256LOpnd>;
++def VUMAXW : SIMD_VMAX_VMIN<0x5A, "vumaxw", vumax, v8i32,  V256LOpnd>;
++def VUMINW : SIMD_VMAX_VMIN<0x5B, "vuminw", vumin, v8i32,  V256LOpnd>;
++def VUMAXL : SIMD_VMAX_VMIN<0x5C, "vumaxl", vumax, v4i64,  V256LOpnd>;
++def VUMINL : SIMD_VMAX_VMIN<0x5D, "vuminl", vumin, v4i64,  V256LOpnd>;
++
++def VMAXS : SIMD_VMAX_VMIN<0xAC, "vmaxs", vmaxf, v4f32, V256LOpnd>;
++def VMINS : SIMD_VMAX_VMIN<0xAD, "vmins", vminf, v4f32, V256LOpnd>;
++def VMAXD : SIMD_VMAX_VMIN<0xAE, "vmaxd", vmaxf, v4f64, V256LOpnd>;
++def VMIND : SIMD_VMAX_VMIN<0xAF, "vmind", vminf, v4f64, V256LOpnd>;
++
++
++// For VSELXX pattern match with imm operand
++multiclass SIMD_VSELXX<bits<6> Opcode, bits<6>func,
++                      string instr_asm, SDPatternOperator OpNode, ValueType Vt,
++                      Operand immtype, RegisterOperand RO,
++                      SDPatternOperator IOp = null_frag,
++                      ComplexPattern cpx = ComplexImmPat> {
++
++  def rr : FForm4V<Opcode, func>, SIMD_4RV_DESC_SEL<instr_asm, OpNode, Vt, RO>;
++
++  def ri : FForm4_VSELi<Opcode, func>, SIMD_4RI_BASE<instr_asm, OpNode, Vt, immtype, immZExt5Ptr, RO>;
++
++  def : Pat<(Vt (OpNode (Vt RO:$RA), (Vt RO:$RB), (Vt (Sw64VBroadCast (i64 cpx:$Imm))))),
++            (!cast<Instruction>(NAME # ri) (Vt RO:$RA), (Vt RO:$RB), $Imm)>;
++}
++
++defm VSELEQW  : SIMD_VSELXX<0x1B, 0x18, "vseleqw",  vseleqw,  v8i32, u5imm, V256LOpnd>;
++defm VSELLBCW : SIMD_VSELXX<0x1B, 0x19, "vsellbcw", vsellbcw, v8i32, u5imm, V256LOpnd>;
++defm VSELLTW  : SIMD_VSELXX<0x1B, 0x1A, "vselltw",  vselltw,  v8i32, u5imm, V256LOpnd>;
++defm VSELLEW  : SIMD_VSELXX<0x1B, 0x1B, "vsellew",  vsellew,  v8i32, u5imm, V256LOpnd>;
++
++class SIMD_ARITH_FLOAT<bits<6> Opcode, bits<8>func,
++                       string instr_asm, SDPatternOperator OpNode,
++                       ValueType Vt, RegisterOperand RO> :
++      FPFormV<Opcode, func>, SIMD_3RR_SAME<instr_asm, OpNode, Vt, RO>;
++
++def VADDS : SIMD_ARITH_FLOAT<0x1A, 0x80, "vadds", fadd, v4f32, V256LOpnd>;
++def VADDD : SIMD_ARITH_FLOAT<0x1A, 0x81, "vaddd", fadd, v4f64, V256LOpnd>;
++def VSUBS : SIMD_ARITH_FLOAT<0x1A, 0x82, "vsubs", fsub, v4f32, V256LOpnd>;
++def VSUBD : SIMD_ARITH_FLOAT<0x1A, 0x83, "vsubd", fsub, v4f64, V256LOpnd>;
++def VMULS : SIMD_ARITH_FLOAT<0x1A, 0x84, "vmuls", fmul, v4f32, V256LOpnd>;
++def VMULD : SIMD_ARITH_FLOAT<0x1A, 0x85, "vmuld", fmul, v4f64, V256LOpnd>;
++def VDIVS : SIMD_ARITH_FLOAT<0x1A, 0x86, "vdivs", fdiv, v4f32, V256LOpnd>;
++def VDIVD : SIMD_ARITH_FLOAT<0x1A, 0x87, "vdivd", fdiv, v4f64, V256LOpnd>;
++
++
++def vsqrt_sw :  SDNode<"Sw64ISD::VSQRT", SDT_VSQRT>;
++
++class SIMD_VSQRT<string instr_asm, SDPatternOperator OpNode,
++                           ValueType Vt, RegisterOperand ROC,
++                           RegisterOperand ROB=ROC> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROB:$RB);
++  string AsmString = !strconcat(instr_asm, "\t$RB, $RC");
++  list<dag> Pattern = [(set (Vt ROC:$RC), (OpNode (Vt ROB:$RB)))];
++}
++
++def VSQRTS : FPFormV_2RV1<0x1A, 0x88>, SIMD_VSQRT<"vsqrts", vsqrt_sw, v4f32, V256LOpnd>;
++def VSQRTD : FPFormV_2RV1<0x1A, 0x89>, SIMD_VSQRT<"vsqrtd", vsqrt_sw, v4f64, V256LOpnd>;
++
++def Sw64VFREC : SDNode<"Sw64ISD::VFREC", SDT_ZVecFREC>;
++
++def VFRECS : FPFormV_2RV1<0x1A, 0xAA>, SIMD_VSQRT<"vfrecs", Sw64VFREC, v4f32, V256LOpnd>;
++def VFRECD : FPFormV_2RV1<0x1A, 0xAB>, SIMD_VSQRT<"vfrecd", Sw64VFREC, v4f64, V256LOpnd>;
++
++class SIMD_VSUMF<string instr_asm, SDPatternOperator OpNode,
++                       ValueType TyC, ValueType TyA,
++                       RegisterOperand ROC, RegisterOperand ROA> {
++  dag OutOperandList = (outs ROC:$RC);
++  dag InOperandList = (ins ROA:$RA);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RC");
++  list<dag> Pattern = [(set (TyC ROC:$RC), (OpNode (TyA ROA:$RA)))];
++}
++
++def VFCVTSD : FPFormV_2RV<0x1A, 0x95>, SIMD_VSUMF<"vfcvtsd", Sw64VFCVTSD, v4f64, v4f32, V256LOpnd, V256LOpnd>;
++def VFCVTDS : FPFormV_2RV<0x1A, 0x96>, SIMD_VSUMF<"vfcvtds", Sw64VFCVTDS, v4f32, v4f64, V256LOpnd, V256LOpnd>;
++def VFCVTLS : FPFormV_2RV<0x1A, 0x99>, SIMD_VSUMF<"vfcvtls", Sw64VFCVTLS, v4f32, v4i64, V256LOpnd, V256LOpnd>;
++def VFCVTLD : FPFormV_2RV<0x1A, 0x9A>, SIMD_VSUMF<"vfcvtld", Sw64VFCVTLD, v4f64, v4i64, V256LOpnd, V256LOpnd>;
++
++class SIMD_FCVTSH_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                           Operand ImmOp, ImmLeaf Imm, RegisterOperand ROD> {
++  dag OutOperandList = (outs ROD:$RD);
++  dag InOperandList = (ins ROD:$RA, ROD:$RB, ImmOp:$Imm);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $Imm, $RD");
++  list<dag> Pattern = [(set (v4f64 ROD:$RD), (OpNode (v4f32 ROD:$RA), (v4f32 ROD:$RB), Imm:$Imm))];
++}
++
++class SIMD_FCVTHS_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
++                           Operand ImmOp, ImmLeaf Imm, RegisterOperand ROD> {
++  dag OutOperandList = (outs ROD:$RD);
++  dag InOperandList = (ins ROD:$RA, ImmOp:$Imm);
++  string AsmString = !strconcat(instr_asm, "\t$RA, $Imm, $RD");
++  list<dag> Pattern = [(set (v4f32 ROD:$RD), (OpNode (v4f64 ROD:$RA), Imm:$Imm))];
++}
++
++def VFCVTSH : FForm4LV<0x1B, 0x35>,  SIMD_FCVTSH_DESC_BASE<"vfcvtsh", Sw64VFCVTSH, uimm5, immZExt5Ptr, V256LOpnd>;
++def VFCVTHS : FForm4LV1<0x1B, 0x36>, SIMD_FCVTHS_DESC_BASE<"vfcvths", Sw64VFCVTHS, uimm5, immZExt5Ptr, V256LOpnd>;
++
++def VFCVTDL  : FPFormV_2RV<0x1A, 0x9B>, SIMD_VSUMF<"vfcvtdl",   Sw64VFCVTDL,  v4i64, v4f32, V256LOpnd, V256LOpnd>;
++def VFCVTDLG : FPFormV_2RV<0x1A, 0x9C>, SIMD_VSUMF<"vfcvtdl_g", Sw64VFCVTDLG, v4i64, v4f32, V256LOpnd, V256LOpnd>;
++def VFCVTDLP : FPFormV_2RV<0x1A, 0x9D>, SIMD_VSUMF<"vfcvtdl_p", Sw64VFCVTDLP, v4i64, v4f32, V256LOpnd, V256LOpnd>;
++def VFCVTDLZ : FPFormV_2RV<0x1A, 0x9E>, SIMD_VSUMF<"vfcvtdl_z", Sw64VFCVTDLZ, v4i64, v4f32, V256LOpnd, V256LOpnd>;
++def VFCVTDLN : FPFormV_2RV<0x1A, 0x9F>, SIMD_VSUMF<"vfcvtdl_n", Sw64VFCVTDLN, v4i64, v4f32, V256LOpnd, V256LOpnd>;
++
++def VFRIS  : FPFormV_2RV1<0x1A, 0xA0>, SIMD_VSQRT<"vfris",   Sw64VFRIS,  v4f32, V256LOpnd>;
++def VFRISG : FPFormV_2RV1<0x1A, 0xA1>, SIMD_VSQRT<"vfris_g", Sw64VFRISG, v4f32, V256LOpnd>;
++def VFRISP : FPFormV_2RV1<0x1A, 0xA2>, SIMD_VSQRT<"vfris_p", Sw64VFRISP, v4f32, V256LOpnd>;
++def VFRISZ : FPFormV_2RV1<0x1A, 0xA3>, SIMD_VSQRT<"vfris_z", Sw64VFRISZ, v4f32, V256LOpnd>;
++def VFRISN : FPFormV_2RV1<0x1A, 0xA4>, SIMD_VSQRT<"vfris_n", Sw64VFRISN, v4f32, V256LOpnd>;
++def VFRID  : FPFormV_2RV1<0x1A, 0xA5>, SIMD_VSQRT<"vfrid",   Sw64VFRID,  v4f64, V256LOpnd>;
++def VFRIDG : FPFormV_2RV1<0x1A, 0xA6>, SIMD_VSQRT<"vfrid_g", Sw64VFRIDG, v4f64, V256LOpnd>;
++def VFRIDP : FPFormV_2RV1<0x1A, 0xA7>, SIMD_VSQRT<"vfrid_p", Sw64VFRIDP, v4f64, V256LOpnd>;
++def VFRIDZ : FPFormV_2RV1<0x1A, 0xA8>, SIMD_VSQRT<"vfrid_z", Sw64VFRIDZ, v4f64, V256LOpnd>;
++def VFRIDN : FPFormV_2RV1<0x1A, 0xA9>, SIMD_VSQRT<"vfrid_n", Sw64VFRIDN, v4f64, V256LOpnd>;
++
++def vsumf :  SDNode<"Sw64ISD::VSUMF", SDT_VSUMF>;
++
++def VSUMS : FPFormV_2RV<0x1A, 0x93>, SIMD_VSUMF<"vsums", vsumf, f32, v4f32, F4RCOpnd, V256LOpnd>;
++def VSUMD : FPFormV_2RV<0x1A, 0x94>, SIMD_VSUMF<"vsumd", vsumf, f64, v4f64, F8RCOpnd, V256LOpnd>;
++
++// Patterns.
++class SIMDPat<dag pattern, dag result, list<Predicate> pred = [HasSIMD]> :
++  Pat<pattern, result>, Requires<pred>;
++
++
++//class SIMDBitconvertPat<ValueType DstVT, ValueType SrcVT,
++//                       RegisterClass DstRC, list<Predicate> preds = [HasSIMD]> :
++//   SIMDPat<(DstVT (bitconvert SrcVT:$src)),
++//          (COPY_TO_REGCLASS SrcVT:$src, DstRC), preds>;
++//
++//def : SIMDBitconvertPat<v16i16, v8i32, V256B, [HasSIMD]>;
++//def : SIMDBitconvertPat<v16i16, v4i64, V256B, [HasSIMD]>;
++//def : SIMDBitconvertPat<v16i16, v2i64, V256B, [HasSIMD]>;
++//def : SIMDBitconvertPat<v16i16, v8f16, V256B, [HasSIMD]>;
++//def : SIMDBitconvertPat<v16i16, v4f32, V256B, [HasSIMD]>;
++//def : SIMDBitconvertPat<v16i16, v4f64, V256B, [HasSIMD]>;
++//
++//def : SIMDBitconvertPat<v8i32, v16i16, V256H, [HasSIMD]>;
++//def : SIMDBitconvertPat<v8i32, v4i64, V256H, [HasSIMD]>;
++//def : SIMDBitconvertPat<v8i32, v2i64, V256H, [HasSIMD]>;
++//def : SIMDBitconvertPat<v8i32, v4f32, V256H, [HasSIMD]>;
++//def : SIMDBitconvertPat<v8i32, v4f64, V256H, [HasSIMD]>;
++//
++//def : SIMDBitconvertPat<v4i64, v16i16, V256W, [HasSIMD]>;
++//def : SIMDBitconvertPat<v4i64, v8i32, V256W, [HasSIMD]>;
++//def : SIMDBitconvertPat<v4i64, v2i64, V256W, [HasSIMD]>;
++//def : SIMDBitconvertPat<v4i64, v4f32, V256W, [HasSIMD]>;
++//def : SIMDBitconvertPat<v4i64, v4f64, V256W, [HasSIMD]>;
++//
++//def : SIMDBitconvertPat<v4i64, v16i16, V256L, [HasSIMD]>;
++//def : SIMDBitconvertPat<v4i64, v8i32, V256L, [HasSIMD]>;
++//def : SIMDBitconvertPat<v4i64, v4i64, V256L, [HasSIMD]>;
++//def : SIMDBitconvertPat<v4i64, v8f16, V256L, [HasSIMD]>;
++//def : SIMDBitconvertPat<v4i64, v4f32, V256L, [HasSIMD]>;
++//
++//def : SIMDBitconvertPat<v4f32, v16i16, V256W, [HasSIMD]>;
++//def : SIMDBitconvertPat<v4f32, v8i32, V256W, [HasSIMD]>;
++//def : SIMDBitconvertPat<v4f32, v2i64, V256W, [HasSIMD]>;
++//def : SIMDBitconvertPat<v4f32, v8f16, V256W, [HasSIMD]>;
++//def : SIMDBitconvertPat<v4f32, v4f64, V256W, [HasSIMD]>;
++//
++//def : SIMDBitconvertPat<v4f64, v16i16, V256L, [HasSIMD]>;
++//def : SIMDBitconvertPat<v4f64, v8i32, V256L, [HasSIMD]>;
++//def : SIMDBitconvertPat<v4f64, v4i64, V256L, [HasSIMD]>;
++//def : SIMDBitconvertPat<v4f64, v8f16, V256L, [HasSIMD]>;
++//def : SIMDBitconvertPat<v4f64, v4f32, V256L, [HasSIMD]>;
++
++
++// ------------------------
++//class Sw64Pat<dag pattern, dag result> : Pat<pattern, result>, PredicateControl;
++class Sw64Pat<dag pattern, dag result> : Pat<pattern, result>;
++
++// TODO: Add support for FPOpFusion::Standard
++def AllowFPOpFusion : Predicate<"TM.Options.AllowFPOpFusion =="
++                                " FPOpFusion::Fast">;
++
++class ASE_SIMD {
++  list<Predicate> ASEPredicate = [HasSIMD];
++}
++
++
++class FPOP_FUSION_FAST {
++  list <Predicate> AdditionalPredicates = [AllowFPOpFusion];
++}
++
++
++// Additional VNMSX patterns: -a*b + c == -(a*b - c)
++multiclass Vecotr_fma_pat<SDPatternOperator OpNode, string Inst> {
++def : Vector_3OP_SameReg_Pat<OpNode, v4f32, V256LOpnd,
++          !cast<Instruction>(Inst#S)>;
++
++def : Vector_3OP_SameReg_Pat<OpNode, v4f64, V256LOpnd,
++          !cast<Instruction>(Inst#D)>;
++}
++
++defm : Vecotr_fma_pat<ThridOpFrag<(fma (fneg node:$LHS), node:$MHS, node:$RHS)>, "VNMA">;
++defm : Vecotr_fma_pat<ThridOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)>, "VNMA">;
++
++def : Pat<(int_sw64_vnmsd V256LOpnd:$RA, V256LOpnd:$RB, V256LOpnd:$RC),
++          (VNMSD $RA, $RB, $RC)>;
++
++def : Pat<(fneg v4f64:$RA), (VCPYSN $RA, $RA)>;
++def : Pat<(fneg v4f32:$RA), (VCPYSN $RA, $RA)>;
++
++def :Pat<(v4f32 (fadd (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB))),
++         (VADDD V256LOpnd:$RA, V256LOpnd:$RB)>;
++
++class bitconvert_pat<ValueType dstTy, ValueType srcTy>
++ : Pat<(dstTy  (bitconvert (srcTy V256LOpnd:$RA))), (dstTy V256LOpnd:$RA)>;
++
++def : bitconvert_pat<v32i8, v16i16>;
++def : bitconvert_pat<v32i8, v8i32>;
++def : bitconvert_pat<v32i8, v4i64>;
++def : bitconvert_pat<v32i8, v4f64>;
++
++def : bitconvert_pat<v16i16, v8i32>;
++def : bitconvert_pat<v16i16, v32i8>;
++def : bitconvert_pat<v16i16, v4i64>;
++def : bitconvert_pat<v16i16, v4f64>;
++
++def : bitconvert_pat<v8i32, v16i16>;
++def : bitconvert_pat<v8i32, v32i8>;
++def : bitconvert_pat<v8i32, v4i64>;
++def : bitconvert_pat<v8i32, v4f64>;
++
++def : bitconvert_pat<v4i64, v16i16>;
++def : bitconvert_pat<v4i64, v32i8>;
++def : bitconvert_pat<v4i64, v8i32>;
++def : bitconvert_pat<v4i64, v4f64>;
++
++def : bitconvert_pat<v4f64, v16i16>;
++def : bitconvert_pat<v4f64, v32i8>;
++def : bitconvert_pat<v4f64, v8i32>;
++def : bitconvert_pat<v4f64, v4i64>;
++
++//def : Pat<(fma (fneg v4f64:$RA), v4f64:$RB, v4f64:$RC),
++//          (VNMSD $RA, $RB, $RC)>;
++//def : Pat<(fma v4f64:$RA, (fneg v4f64:$RB), v4f64:$RC),
++//          (VNMSD $RA, $RB, $RC)>;
++//
++//def : Pat<(fma (fneg v4f32:$RA), v4f32:$RB, v4f32:$RC),
++//          (VNMSS $RA, $RB, $RC)>;
++//def : Pat<(fma v4f32:$RA, (fneg v4f32:$RB), v4f32:$RC),
++//          (VNMSS $RA, $RB, $RC)>;
++//
++//def : Pat<(int_sw64_vnmsd v4f64:$RA, v4f64:$RB, v4f64:$RC),
++//          (VNMSD $RA, $RB, $RC)>;
+diff --git a/llvm/lib/Target/Sw64/Sw64LLRP.cpp b/llvm/lib/Target/Sw64/Sw64LLRP.cpp
+new file mode 100644
+index 000000000..81af6ed42
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64LLRP.cpp
+@@ -0,0 +1,476 @@
++//===-- Sw64LLRP.cpp - Sw64 Load Load Replay Trap elimination pass. -- --===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// Here we check for potential replay traps introduced by the spiller
++// We also align some branch targets if we can do so for free.
++//
++//===----------------------------------------------------------------------===//
++
++#define DEBUG_TYPE "sw_64-nops"
++#include "MCTargetDesc/Sw64BaseInfo.h"
++#include "Sw64.h"
++#include "Sw64FrameLowering.h"
++#include "Sw64Subtarget.h"
++#include "llvm/ADT/SetOperations.h"
++#include "llvm/ADT/Statistic.h"
++#include "llvm/CodeGen/MachineFunctionPass.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/TargetInstrInfo.h"
++#include "llvm/Support/CommandLine.h"
++#include "llvm/Target/TargetMachine.h"
++
++using namespace llvm;
++cl::opt<bool> Sw64Mieee("mieee", cl::desc("Support the IEEE754"),
++                        cl::init(true));
++
++cl::opt<bool> Sw64DeleteNop("sw64-delete-nop", cl::desc("Delete NOP"),
++                            cl::init(true));
++
++STATISTIC(nopintro, "Number of nops inserted");
++STATISTIC(nopalign, "Number of nops inserted for alignment");
++namespace llvm {
++cl::opt<bool> AlignAll("sw_64-align-all", cl::Hidden,
++                       cl::desc("Align all blocks"));
++
++struct Sw64LLRPPass : public MachineFunctionPass {
++  /// Target machine description which we query for reg. names, data
++  /// layout, etc.
++  ///
++  Sw64TargetMachine &TM;
++
++  static char ID;
++  Sw64LLRPPass(Sw64TargetMachine &tm) : MachineFunctionPass(ID), TM(tm) {}
++
++  // virtual const char *getPassName() const {
++  StringRef getPassName() const { return "Sw64 NOP inserter"; }
++
++  bool runOnMachineFunction(MachineFunction &F) {
++    const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo();
++    bool flag = false; // hasJSR ?
++    bool Changed = false;
++    MachineInstr *prev[3] = {0, 0, 0};
++    unsigned count = 0;
++
++    DebugLoc dl;
++    const Sw64Subtarget &Subtarget = F.getSubtarget<Sw64Subtarget>();
++    int curgpdist = Subtarget.getCurgpdist();
++
++    SmallVector<MachineInstr *, 4> Ops;
++    for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE;
++         ++FI) {
++      MachineBasicBlock &MBB = *FI;
++      for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
++           MII != MIE;) {
++        MachineInstr *MI = &*MII;
++        ++MII;
++        if (MII == MIE)
++          break;
++        MachineInstr *MINext = &*MII;
++        if (MINext->getOpcode() == Sw64::FILLCS ||
++            MINext->getOpcode() == Sw64::FILLDE) {
++          if (MI->getOpcode() == Sw64::LDA &&
++              (MI->getOperand(1).getImm() == MINext->getOperand(0).getImm())) {
++            bool isRead = false;
++            for (MachineBasicBlock::iterator M1 = MII; M1 != MIE;) {
++              MachineInstr *Mtest = &*M1;
++              if (Mtest->getOpcode() == Sw64::LDA ||
++                  Mtest->getOpcode() == Sw64::LDAH ||
++                  Mtest->getOpcode() == Sw64::LDL ||
++                  Mtest->getOpcode() == Sw64::LDW ||
++                  Mtest->getOpcode() == Sw64::LDHU ||
++                  Mtest->getOpcode() == Sw64::LDBU) {
++                if (Mtest->getOperand(0).getReg() ==
++                        MI->getOperand(0).getReg() &&
++                    !isRead) {
++                  Ops.push_back(MI);
++                  break;
++                }
++              }
++              if (Mtest->getOpcode() == Sw64::STL ||
++                  Mtest->getOpcode() == Sw64::STW ||
++                  Mtest->getOpcode() == Sw64::STH ||
++                  Mtest->getOpcode() == Sw64::STB) {
++                if (Mtest->getOperand(2).getReg() ==
++                        MI->getOperand(0).getReg() ||
++                    Mtest->getOperand(0).getReg() ==
++                        MI->getOperand(0).getReg()) {
++                  isRead = true;
++                }
++              }
++              ++M1;
++            }
++          }
++        }
++      }
++      for (auto *PrefMI : Ops)
++        PrefMI->eraseFromParent();
++      Ops.clear();
++    }
++
++    // Remove all duplicate prefetch instr
++    SmallVector<MachineInstr *, 12> FILL;
++    int Dul;
++    for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE;
++         ++FI) {
++      MachineBasicBlock &MBB = *FI;
++      for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
++           MII != MIE;) {
++        MachineInstr *MI = &*MII;
++        ++MII;
++        Dul = 1;
++        if (MII == MIE)
++          break;
++        if (MI->getOpcode() == Sw64::FILLCS ||
++            MI->getOpcode() == Sw64::FILLCS_E ||
++            MI->getOpcode() == Sw64::FILLDE ||
++            MI->getOpcode() == Sw64::FILLDE_E ||
++            MI->getOpcode() == Sw64::S_FILLDE ||
++            MI->getOpcode() == Sw64::S_FILLCS) {
++          if (!FILL.empty()) {
++            for (auto *PrefMI : FILL) {
++              if (PrefMI->getOperand(1).getReg() ==
++                  MI->getOperand(1).getReg()) {
++                Dul = 2;
++                break;
++              }
++            }
++          }
++          if (Dul == 1) {
++            for (MachineBasicBlock::iterator M1 = MII; M1 != MIE;) {
++              MachineInstr *Mtest = &*M1;
++              if (Mtest->getOpcode() == Sw64::FILLCS ||
++                  Mtest->getOpcode() == Sw64::FILLCS_E ||
++                  Mtest->getOpcode() == Sw64::FILLDE ||
++                  Mtest->getOpcode() == Sw64::FILLDE_E ||
++                  Mtest->getOpcode() == Sw64::S_FILLCS ||
++                  Mtest->getOpcode() == Sw64::S_FILLDE) {
++                if (Mtest->getOperand(1).getReg() ==
++                    MI->getOperand(1).getReg()) {
++                  FILL.push_back(Mtest);
++                }
++              }
++              ++M1;
++            }
++          }
++        }
++      }
++      if (!FILL.empty()) {
++        for (auto *PrefMI1 : FILL)
++          PrefMI1->eraseFromParent();
++      }
++      FILL.clear();
++    }
++
++    // If read and write, use fillde
++    int N = 0;
++    for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
++      MachineBasicBlock &MBB = *FI;
++      ++FI;
++      for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
++           MII != MIE;) {
++        MachineInstr *MI = &*MII;
++        ++MII;
++        if (MII == MIE)
++          break;
++        if (MI->getOpcode() == Sw64::FILLCS ||
++            MI->getOpcode() == Sw64::S_FILLCS) {
++          for (MachineBasicBlock::iterator M1 = MII; M1 != MIE;) {
++            MachineInstr *Mtest = &*M1;
++            if (Mtest->getOpcode() == Sw64::LDA ||
++                Mtest->getOpcode() == Sw64::LDAH ||
++                Mtest->getOpcode() == Sw64::LDL ||
++                Mtest->getOpcode() == Sw64::LDW ||
++                Mtest->getOpcode() == Sw64::LDHU ||
++                Mtest->getOpcode() == Sw64::LDBU) {
++              if (Mtest->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
++                N = 1;
++              }
++            }
++            ++M1;
++          }
++          if (FI == FE)
++            break;
++          MachineBasicBlock &MBB1 = *FI;
++          for (MachineBasicBlock::iterator MII1 = MBB1.begin(),
++                                           MIE1 = MBB1.end();
++               MII1 != MIE1;) {
++            MachineInstr *MI1 = &*MII1;
++            if (MI1->getOpcode() == Sw64::STL ||
++                MI1->getOpcode() == Sw64::STW ||
++                MI1->getOpcode() == Sw64::STB ||
++                MI1->getOpcode() == Sw64::STH) {
++              if (MI1->getOperand(2).getReg() == MI->getOperand(1).getReg() &&
++                  N == 0) {
++                if (MI->getOpcode() == Sw64::FILLCS)
++                  MI->setDesc(TII->get(Sw64::FILLDE));
++                if (MI->getOpcode() == Sw64::S_FILLCS)
++                  MI->setDesc(TII->get(Sw64::S_FILLDE));
++                N = 0;
++              }
++            }
++            ++MII1;
++          }
++        }
++      }
++    }
++
++    const TargetRegisterInfo *TRI = F.getSubtarget().getRegisterInfo();
++    for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE;
++         ++FI) {
++      MachineBasicBlock &MBB = *FI;
++      for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
++           MII != MIE;) {
++        MachineInstr *MI = &*MII;
++        ++MII;
++        if (MII == MIE)
++          break;
++        if (MI->getOpcode() == Sw64::FILLCS ||
++            MI->getOpcode() == Sw64::FILLDE) {
++          int N = 0;
++          int isDul = 0;
++          for (MachineBasicBlock::iterator MIT = MII; MIT != MIE;) {
++            MachineInstr *MITT = &*MIT;
++            if (MITT->readsRegister(MI->getOperand(1).getReg(), TRI)) {
++              N++;
++            }
++            if (MITT->getOpcode() == Sw64::FILLCS ||
++                MITT->getOpcode() == Sw64::FILLDE ||
++                MITT->getOpcode() == Sw64::FILLCS_E ||
++                MITT->getOpcode() == Sw64::FILLDE_E)
++              isDul++;
++            ++MIT;
++          }
++          if (N == 1 && isDul > 0) {
++            if (MI->getOpcode() == Sw64::FILLCS)
++              MI->setDesc(TII->get(Sw64::FILLCS_E));
++            if (MI->getOpcode() == Sw64::FILLDE) {
++              MI->setDesc(TII->get(Sw64::FILLDE_E));
++            }
++          }
++        }
++      }
++    }
++
++    for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE;
++         ++FI) {
++      MachineBasicBlock &MBB = *FI;
++      for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
++           MII != MIE;) {
++        MachineInstr *MI = &*MII;
++        if (MI->getOpcode() == Sw64::FILLCS ||
++            MI->getOpcode() == Sw64::S_FILLCS) {
++          for (MachineBasicBlock::iterator M1 = MII; M1 != MIE;) {
++            MachineInstr *Mtest = &*M1;
++            if (Mtest->getOpcode() == Sw64::STL ||
++                Mtest->getOpcode() == Sw64::STW ||
++                Mtest->getOpcode() == Sw64::STH ||
++                Mtest->getOpcode() == Sw64::STB) {
++              if (Mtest->getOperand(2).getReg() == MI->getOperand(1).getReg()) {
++                if (MI->getOpcode() == Sw64::FILLCS)
++                  MI->setDesc(TII->get(Sw64::FILLDE));
++                if (MI->getOpcode() == Sw64::S_FILLCS)
++                  MI->setDesc(TII->get(Sw64::S_FILLDE));
++              }
++            }
++            ++M1;
++          }
++        }
++        ++MII;
++      }
++    }
++
++    for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE;
++         ++FI) {
++      MachineBasicBlock &MBB = *FI;
++
++      int count = 0;
++      bool isLable = 0;
++      if (MBB.getBasicBlock() && MBB.getBasicBlock()->isLandingPad()) {
++        MachineBasicBlock::iterator MBBI = MBB.begin();
++        for (MBBI = MBB.begin(); MBBI != MBB.end(); ++MBBI, ++count) {
++          if (count == 0 && MBBI->isLabel())
++            isLable = true;
++          if (count == 1 && isLable) {
++            BuildMI(MBB, MBBI, dl, TII->get(Sw64::MOVaddrPCGp))
++                .addGlobalAddress(&(F.getFunction()))
++                .addImm(++curgpdist)
++                .addReg(Sw64::R26);
++            isLable = false;
++          }
++        }
++        if (count == 1 && isLable) {
++          BuildMI(MBB, MBBI, dl, TII->get(Sw64::MOVaddrPCGp))
++              .addGlobalAddress(&(F.getFunction()))
++              .addImm(++curgpdist)
++              .addReg(Sw64::R26);
++          isLable = false;
++        }
++      }
++
++      MachineBasicBlock::iterator I;
++      for (I = MBB.begin(); I != MBB.end(); ++I) {
++        if (flag) {
++          BuildMI(MBB, I, dl, TII->get(Sw64::MOVaddrPCGp))
++              .addGlobalAddress(&(F.getFunction()))
++              .addImm(++curgpdist)
++              .addReg(Sw64::R26);
++          if (Sw64Mieee) {
++            if (!Sw64DeleteNop)
++              BuildMI(MBB, I, dl, TII->get(Sw64::NOP));
++          }
++          flag = false;
++        }
++        if (I->getOpcode() == Sw64::JSR ||
++            I->getOpcode() == Sw64::PseudoCallIndirect) {
++          dl = MBB.findDebugLoc(I);
++          if (Sw64Mieee) {
++            if (!Sw64DeleteNop)
++              BuildMI(MBB, I, dl, TII->get(Sw64::NOP));
++          }
++          flag = true;
++        }
++      }
++      if (flag) {
++        BuildMI(MBB, I, dl, TII->get(Sw64::MOVaddrPCGp))
++            .addGlobalAddress(&(F.getFunction()))
++            .addImm(++curgpdist)
++            .addReg(Sw64::R26);
++        if (Sw64Mieee) {
++          if (!Sw64DeleteNop)
++            BuildMI(MBB, I, dl, TII->get(Sw64::NOP));
++        }
++        flag = false;
++      }
++    }
++
++    if (!Sw64DeleteNop) {
++      for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE;
++           ++FI) {
++        MachineBasicBlock &MBB = *FI;
++        bool ub = false;
++        for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
++          if (count % 4 == 0)
++            prev[0] = prev[1] = prev[2] = 0; // Slots cleared at fetch boundary
++          ++count;
++          MachineInstr *MI = &(*I);
++          I++;
++          switch (MI->getOpcode()) {
++          case Sw64::LDL:
++          case Sw64::LDW:
++          case Sw64::LDHU:
++          case Sw64::LDBU:
++          case Sw64::LDD:
++          case Sw64::LDS:
++          case Sw64::STL:
++          case Sw64::STW:
++          case Sw64::STH:
++          case Sw64::STB:
++          case Sw64::STD:
++          case Sw64::STS:
++            dl = MBB.findDebugLoc(MI);
++            if (MI->getOperand(2).getReg() == Sw64::R30) {
++              if (prev[0] &&
++                  prev[0]->getOperand(2).getReg() ==
++                      MI->getOperand(2).getReg() &&
++                  prev[0]->getOperand(1).getImm() ==
++                      MI->getOperand(1).getImm()) {
++                prev[0] = prev[1];
++                prev[1] = prev[2];
++                prev[2] = 0;
++                BuildMI(MBB, MI, dl, TII->get(Sw64::BISr), Sw64::R31)
++                    .addReg(Sw64::R31)
++                    .addReg(Sw64::R31);
++                Changed = true;
++                nopintro += 1;
++                count += 1;
++              } else if (prev[1] &&
++                         prev[1]->getOperand(2).getReg() ==
++                             MI->getOperand(2).getReg() &&
++                         prev[1]->getOperand(1).getImm() ==
++                             MI->getOperand(1).getImm()) {
++                prev[0] = prev[2];
++                prev[1] = prev[2] = 0;
++                BuildMI(MBB, MI, dl, TII->get(Sw64::BISr), Sw64::R31)
++                    .addReg(Sw64::R31)
++                    .addReg(Sw64::R31);
++                BuildMI(MBB, MI, dl, TII->get(Sw64::BISr), Sw64::R31)
++                    .addReg(Sw64::R31)
++                    .addReg(Sw64::R31);
++                Changed = true;
++                nopintro += 2;
++                count += 2;
++              } else if (prev[2] &&
++                         prev[2]->getOperand(2).getReg() ==
++                             MI->getOperand(2).getReg() &&
++                         prev[2]->getOperand(1).getImm() ==
++                             MI->getOperand(1).getImm()) {
++                prev[0] = prev[1] = prev[2] = 0;
++                BuildMI(MBB, MI, dl, TII->get(Sw64::BISr), Sw64::R31)
++                    .addReg(Sw64::R31)
++                    .addReg(Sw64::R31);
++                BuildMI(MBB, MI, dl, TII->get(Sw64::BISr), Sw64::R31)
++                    .addReg(Sw64::R31)
++                    .addReg(Sw64::R31);
++                BuildMI(MBB, MI, dl, TII->get(Sw64::BISr), Sw64::R31)
++                    .addReg(Sw64::R31)
++                    .addReg(Sw64::R31);
++                Changed = true;
++                nopintro += 3;
++                count += 3;
++              }
++              prev[0] = prev[1];
++              prev[1] = prev[2];
++              prev[2] = MI;
++              break;
++            }
++            prev[0] = prev[1];
++            prev[1] = prev[2];
++            prev[2] = 0;
++            break;
++          case Sw64::ALTENT:
++          case Sw64::MEMLABEL:
++          case Sw64::PCLABEL:
++            --count;
++            break;
++          case Sw64::BR:
++          case Sw64::PseudoBR:
++          case Sw64::JMP:
++            ub = true;
++          // fall through
++          default:
++            prev[0] = prev[1];
++            prev[1] = prev[2];
++            prev[2] = 0;
++            break;
++          }
++        }
++        if (ub || AlignAll) {
++          // we can align stuff for free at this point
++          while (count % 4) {
++            BuildMI(MBB, MBB.end(), dl, TII->get(Sw64::BISr), Sw64::R31)
++                .addReg(Sw64::R31)
++                .addReg(Sw64::R31);
++            ++count;
++            ++nopalign;
++            prev[0] = prev[1];
++            prev[1] = prev[2];
++            prev[2] = 0;
++          }
++        }
++      }
++    }
++    return Changed;
++  }
++};
++char Sw64LLRPPass::ID = 0;
++} // namespace llvm
++
++FunctionPass *llvm::createSw64LLRPPass(Sw64TargetMachine &tm) {
++  return new Sw64LLRPPass(tm);
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64MCInstLower.cpp b/llvm/lib/Target/Sw64/Sw64MCInstLower.cpp
+new file mode 100644
+index 000000000..dc9935b59
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64MCInstLower.cpp
+@@ -0,0 +1,282 @@
++//===-- Sw64MCInstLower.cpp - Convert Sw64 MachineInstr to MCInst -------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++///
++/// \file
++/// This file contains code to lower Sw64 MachineInstrs to their
++/// corresponding MCInst records.
++///
++//===----------------------------------------------------------------------===//
++#include "Sw64MCInstLower.h"
++#include "MCTargetDesc/Sw64BaseInfo.h"
++#include "MCTargetDesc/Sw64MCExpr.h"
++#include "Sw64.h"
++#include "llvm/CodeGen/AsmPrinter.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/MachineInstr.h"
++#include "llvm/CodeGen/MachineOperand.h"
++#include "llvm/IR/Mangler.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCExpr.h"
++#include "llvm/MC/MCInst.h"
++#include "llvm/MC/MCInstrDesc.h"
++
++using namespace llvm;
++
++#include "Sw64GenInstrInfo.inc"
++
++namespace llvm {
++struct Sw64InstrTable {
++  MCInstrDesc Insts[1000];
++};
++extern const Sw64InstrTable Sw64Descs;
++} // namespace llvm
++
++Sw64MCInstLower::Sw64MCInstLower(class AsmPrinter &asmprinter)
++    : Printer(asmprinter) {}
++
++void Sw64MCInstLower::Initialize(MCContext *C) { Ctx = C; }
++
++static bool lowerLitUseMOp(const MachineOperand &MO,
++                           Sw64MCExpr::Sw64ExprKind &Kind) {
++  Sw64MCExpr::Sw64ExprKind TargetKind = Sw64MCExpr::MEK_None;
++  unsigned flags = MO.getTargetFlags();
++  if (flags & Sw64II::MO_LITERAL && flags & Sw64II::MO_LITERAL_BASE) {
++    TargetKind = Sw64MCExpr::MEK_LITUSE_BASE;
++  } else if (flags & Sw64II::MO_HINT && flags & Sw64II::MO_LITUSE) {
++    TargetKind = Sw64MCExpr::MEK_LITUSE_JSRDIRECT;
++  } else
++    return false;
++
++  Kind = TargetKind;
++  return true;
++}
++
++MCOperand Sw64MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
++                                              MachineOperandType MOTy,
++                                              unsigned Offset) const {
++  MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
++  Sw64MCExpr::Sw64ExprKind TargetKind = Sw64MCExpr::MEK_None;
++  const MCSymbol *Symbol;
++
++  switch (MO.getTargetFlags()) {
++  default:
++    if (lowerLitUseMOp(MO, TargetKind))
++      break;
++    llvm_unreachable("Invalid target flag!");
++  case Sw64II::MO_NO_FLAG:
++    TargetKind = Sw64MCExpr::MEK_None;
++    break;
++  case Sw64II::MO_GPDISP_HI:
++    TargetKind = Sw64MCExpr::MEK_GPDISP_HI16;
++    break;
++  case Sw64II::MO_GPDISP_LO:
++    TargetKind = Sw64MCExpr::MEK_GPDISP_LO16;
++    break;
++  case Sw64II::MO_GPREL_HI:
++    TargetKind = Sw64MCExpr::MEK_GPREL_HI16;
++    break;
++  case Sw64II::MO_GPREL_LO:
++    TargetKind = Sw64MCExpr::MEK_GPREL_LO16;
++    break;
++  case Sw64II::MO_ABS_LO:
++  case Sw64II::MO_LITERAL:
++    TargetKind = Sw64MCExpr::MEK_ELF_LITERAL;
++    break;
++  case Sw64II::MO_LITERAL_GOT:
++    TargetKind = Sw64MCExpr::MEK_ELF_LITERAL_GOT;
++    break;
++  case Sw64II::MO_TPREL_HI:
++    TargetKind = Sw64MCExpr::MEK_TPREL_HI16;
++    break;
++  case Sw64II::MO_TPREL_LO:
++    TargetKind = Sw64MCExpr::MEK_TPREL_LO16;
++    break;
++  case Sw64II::MO_TLSGD:
++    TargetKind = Sw64MCExpr::MEK_TLSGD;
++    break;
++  case Sw64II::MO_TLSLDM:
++    TargetKind = Sw64MCExpr::MEK_TLSLDM;
++    break;
++  case Sw64II::MO_GOTTPREL:
++    TargetKind = Sw64MCExpr::MEK_GOTTPREL16;
++    break;
++  case Sw64II::MO_DTPREL_HI:
++    TargetKind = Sw64MCExpr::MEK_DTPREL_HI16;
++    break;
++  case Sw64II::MO_DTPREL_LO:
++    TargetKind = Sw64MCExpr::MEK_DTPREL_LO16;
++    break;
++  case Sw64II::MO_HINT:
++    TargetKind = Sw64MCExpr::MEK_HINT;
++  }
++
++  switch (MOTy) {
++  case MachineOperand::MO_MachineBasicBlock:
++    Symbol = MO.getMBB()->getSymbol();
++    break;
++  case MachineOperand::MO_GlobalAddress:
++    Symbol = Printer.getSymbol(MO.getGlobal());
++    Offset += MO.getOffset();
++    break;
++  case MachineOperand::MO_BlockAddress:
++    Symbol = Printer.GetBlockAddressSymbol(MO.getBlockAddress());
++    Offset += MO.getOffset();
++    break;
++  case MachineOperand::MO_ExternalSymbol:
++    Symbol = Printer.GetExternalSymbolSymbol(MO.getSymbolName());
++    Offset += MO.getOffset();
++    break;
++  case MachineOperand::MO_JumpTableIndex:
++    Symbol = Printer.GetJTISymbol(MO.getIndex());
++    break;
++  case MachineOperand::MO_ConstantPoolIndex:
++    Symbol = Printer.GetCPISymbol(MO.getIndex());
++    Offset += MO.getOffset();
++    break;
++  default:
++    llvm_unreachable("<unknown operand type>");
++  }
++
++  const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, *Ctx);
++
++  if (Offset) {
++    // Assume offset is never negative.
++    assert(Offset > 0);
++
++    Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, *Ctx),
++                                   *Ctx);
++  }
++
++  if (TargetKind != Sw64MCExpr::MEK_None)
++    Expr = Sw64MCExpr::create(TargetKind, Expr, *Ctx);
++
++  return MCOperand::createExpr(Expr);
++}
++
++MCOperand Sw64MCInstLower::LowerOperand(const MachineOperand &MO,
++                                        unsigned offset) const {
++  MachineOperandType MOTy = MO.getType();
++
++  switch (MOTy) {
++  default:
++    llvm_unreachable("unknown operand type");
++  case MachineOperand::MO_Register:
++    // Ignore all implicit register operands.
++    if (MO.isImplicit())
++      break;
++    return MCOperand::createReg(MO.getReg());
++  case MachineOperand::MO_Immediate:
++    return MCOperand::createImm(MO.getImm() + offset);
++  case MachineOperand::MO_MachineBasicBlock:
++  case MachineOperand::MO_GlobalAddress:
++  case MachineOperand::MO_ExternalSymbol:
++  case MachineOperand::MO_JumpTableIndex:
++  case MachineOperand::MO_ConstantPoolIndex:
++  case MachineOperand::MO_BlockAddress:
++    return LowerSymbolOperand(MO, MOTy, offset);
++  case MachineOperand::MO_RegisterMask:
++    break;
++  }
++
++  return MCOperand();
++}
++
++void Sw64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
++  OutMI.setOpcode(MI->getOpcode());
++
++  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
++    const MachineOperand &MO = MI->getOperand(i);
++    MCOperand MCOp = LowerOperand(MO);
++
++    if (MCOp.isValid())
++      OutMI.addOperand(MCOp);
++  }
++}
++
++static MCOperand lowerSymbolOperand(const MachineOperand &MO,
++                                    MachineOperandType MOTy, unsigned Offset,
++                                    const AsmPrinter &AP) {
++  MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
++  Sw64MCExpr::Sw64ExprKind TargetKind = Sw64MCExpr::MEK_None;
++  const MCSymbol *Symbol;
++  MCContext &Ctx = AP.OutContext;
++
++  switch (MOTy) {
++  case MachineOperand::MO_MachineBasicBlock:
++    Symbol = MO.getMBB()->getSymbol();
++    break;
++  case MachineOperand::MO_GlobalAddress:
++    Symbol = AP.getSymbol(MO.getGlobal());
++    Offset += MO.getOffset();
++    break;
++  case MachineOperand::MO_BlockAddress:
++    Symbol = AP.GetBlockAddressSymbol(MO.getBlockAddress());
++    Offset += MO.getOffset();
++    break;
++  case MachineOperand::MO_ExternalSymbol:
++    Symbol = AP.GetExternalSymbolSymbol(MO.getSymbolName());
++    Offset += MO.getOffset();
++    break;
++  case MachineOperand::MO_JumpTableIndex:
++    Symbol = AP.GetJTISymbol(MO.getIndex());
++    break;
++  case MachineOperand::MO_ConstantPoolIndex:
++    Symbol = AP.GetCPISymbol(MO.getIndex());
++    Offset += MO.getOffset();
++    break;
++  default:
++    llvm_unreachable("<unknown operand type>");
++  }
++
++  const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, Ctx);
++
++  if (Offset) {
++    // Assume offset is never negative.
++    assert(Offset > 0);
++
++    Expr =
++        MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx);
++  }
++
++  if (TargetKind != Sw64MCExpr::MEK_None)
++    Expr = Sw64MCExpr::create(TargetKind, Expr, Ctx);
++
++  return MCOperand::createExpr(Expr);
++}
++
++bool llvm::LowerSw64MachineOperandToMCOperand(const MachineOperand &MO,
++                                              MCOperand &MCOp,
++                                              const AsmPrinter &AP) {
++  switch (MO.getType()) {
++  default:
++    report_fatal_error("LowerSw64MachineInstrToMCInst: unknown operand type");
++  case MachineOperand::MO_Register:
++    // Ignore all implicit register operands.
++    if (MO.isImplicit())
++      return false;
++    MCOp = MCOperand::createReg(MO.getReg());
++    break;
++  case MachineOperand::MO_RegisterMask:
++    // Regmasks are like implicit defs.
++    return false;
++  case MachineOperand::MO_Immediate:
++    MCOp = MCOperand::createImm(MO.getImm());
++    break;
++    return false;
++  case MachineOperand::MO_MachineBasicBlock:
++  case MachineOperand::MO_GlobalAddress:
++  case MachineOperand::MO_ExternalSymbol:
++  case MachineOperand::MO_JumpTableIndex:
++  case MachineOperand::MO_ConstantPoolIndex:
++  case MachineOperand::MO_BlockAddress:
++    MCOp = lowerSymbolOperand(MO, MO.getType(), 0, AP);
++    return false;
++  }
++  return true;
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64MCInstLower.h b/llvm/lib/Target/Sw64/Sw64MCInstLower.h
+new file mode 100644
+index 000000000..07454f3e6
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64MCInstLower.h
+@@ -0,0 +1,44 @@
++//===-- Sw64MCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*--===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_SW64MCINSTLOWER_H
++#define LLVM_LIB_TARGET_SW64_SW64MCINSTLOWER_H
++#include "llvm/CodeGen/MachineOperand.h"
++#include "llvm/Support/Compiler.h"
++
++namespace llvm {
++class MCContext;
++class MCInst;
++class MCOperand;
++class MachineInstr;
++class MachineFunction;
++class Mangler;
++class AsmPrinter;
++
++typedef MachineOperand::MachineOperandType MachineOperandType;
++/// This class is used to lower an MachineInstr into an MCInst.
++class LLVM_LIBRARY_VISIBILITY Sw64MCInstLower {
++  MCContext *Ctx;
++  AsmPrinter &Printer;
++
++public:
++  Sw64MCInstLower(class AsmPrinter &asmprinter);
++  void Initialize(MCContext *C);
++  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
++  MCOperand LowerOperand(const MachineOperand &MO, unsigned offset = 0) const;
++
++  void lowerMemory(const MachineInstr *MI, MCInst &OutMI) const;
++
++private:
++  MCOperand LowerSymbolOperand(const MachineOperand &MO,
++                               MachineOperandType MOTy, unsigned Offset) const;
++};
++} // namespace llvm
++
++#endif
+diff --git a/llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.cpp b/llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.cpp
+new file mode 100644
+index 000000000..54a53e2bc
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.cpp
+@@ -0,0 +1,33 @@
++//===-- Sw64MachineFunctionInfo.cpp - Sw64 machine function info --------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64MachineFunctionInfo.h"
++#include "Sw64InstrInfo.h"
++#include "llvm/CodeGen/TargetSubtargetInfo.h"
++#include "llvm/IR/Function.h"
++
++using namespace llvm;
++
++void Sw64MachineFunctionInfo::anchor() {}
++
++bool Sw64MachineFunctionInfo::isLargeFrame(const MachineFunction &MF) const {
++  if (CachedEStackSize == -1) {
++    CachedEStackSize = MF.getFrameInfo().estimateStackSize(MF);
++  }
++  // isLargeFrame() is used when deciding if spill slots should be added to
++  // allow eliminateFrameIndex() to scavenge registers.
++  // This is only required when there is no FP and offsets are greater than
++  // ~256KB (~64Kwords). Thus only for code run on the emulator!
++  //
++  // The arbitrary value of 0xf000 allows frames of up to ~240KB before spill
++  // slots are added for the use of eliminateFrameIndex() register scavenging.
++  // For frames less than 240KB, it is assumed that there will be less than
++  // 16KB of function arguments.
++  return CachedEStackSize > 0xf000;
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.h b/llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.h
+new file mode 100644
+index 000000000..dd14c2d0d
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.h
+@@ -0,0 +1,69 @@
++//===- Sw64MachineFunctionInfo.h - Sw64 machine function info -*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file declares Sw64-specific per-machine-function information.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_SW64MACHINEFUNCTIONINFO_H
++#define LLVM_LIB_TARGET_SW64_SW64MACHINEFUNCTIONINFO_H
++
++#include "llvm/CodeGen/MachineBasicBlock.h"
++#include "llvm/CodeGen/MachineFrameInfo.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include <cassert>
++#include <utility>
++#include <vector>
++
++namespace llvm {
++
++/// Sw64MachineFunctionInfo - This class is derived from MachineFunction private
++/// Sw64 target-specific information for each MachineFunction.
++class Sw64MachineFunctionInfo : public MachineFunctionInfo {
++private:
++  /// GlobalBaseReg - keeps track of the virtual register initialized for
++  /// use as the global base register. This is used for PIC in some PIC
++  /// relocation models.
++  unsigned GlobalBaseReg;
++
++  /// GlobalRetAddr = keeps track of the virtual register initialized for
++  /// the return address value.
++  unsigned GlobalRetAddr;
++
++  /// VarArgsOffset - What is the offset to the first vaarg
++  int VarArgsOffset;
++  /// VarArgsBase - What is the base FrameIndex
++  int VarArgsBase;
++
++  virtual void anchor();
++  mutable int CachedEStackSize = -1;
++
++public:
++  Sw64MachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI)
++      : GlobalBaseReg(0), GlobalRetAddr(0), VarArgsOffset(0), VarArgsBase(0) {}
++
++  //~Sw64MachineFunctionInfo() override;
++
++  bool globalBaseRegSet() const;
++  unsigned getGlobalBaseReg(MachineFunction &MF) const { return GlobalBaseReg; }
++  void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
++
++  bool globalRetAddrSet() const;
++  void setGlobalRetAddr(unsigned Reg) { GlobalRetAddr = Reg; }
++  unsigned getGlobalRetAddr(MachineFunction &MF) const { return GlobalRetAddr; }
++
++  int getVarArgsOffset() const { return VarArgsOffset; }
++  void setVarArgsOffset(int Offset) { VarArgsOffset = Offset; }
++
++  int getVarArgsBase() const { return VarArgsBase; }
++  void setVarArgsBase(int Base) { VarArgsBase = Base; }
++  bool isLargeFrame(const MachineFunction &MF) const;
++};
++} // end namespace llvm
++#endif // LLVM_LIB_TARGET_SW64_SW64MACHINEFUNCTIONINFO_H
+diff --git a/llvm/lib/Target/Sw64/Sw64MacroFusion.cpp b/llvm/lib/Target/Sw64/Sw64MacroFusion.cpp
+new file mode 100644
+index 000000000..46926285c
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64MacroFusion.cpp
+@@ -0,0 +1,65 @@
++//===- Sw64MacroFusion.cpp - Sw64 Macro Fusion ----------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file This file contains the Sw64 implementation of the DAG scheduling
++///  mutation to pair instructions back to back.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64MacroFusion.h"
++#include "Sw64Subtarget.h"
++#include "llvm/CodeGen/MacroFusion.h"
++#include "llvm/CodeGen/TargetInstrInfo.h"
++
++using namespace llvm;
++
++/// CMPxx followed by BEQ/BNE
++static bool isCmpBqPair(const MachineInstr *FirstMI,
++                        const MachineInstr &SecondMI) {
++  if (SecondMI.getOpcode() != Sw64::BEQ && SecondMI.getOpcode() != Sw64::BNE)
++    return false;
++
++  // Assume the 1st instr to be a wildcard if it is unspecified.
++  if (FirstMI == nullptr)
++    return true;
++
++  switch (FirstMI->getOpcode()) {
++  case Sw64::CMPEQr:
++  case Sw64::CMPEQi:
++  case Sw64::CMPLTr:
++  case Sw64::CMPLTi:
++  case Sw64::CMPLEr:
++  case Sw64::CMPLEi:
++  case Sw64::CMPULTr:
++  case Sw64::CMPULTi:
++  case Sw64::CMPULEr:
++  case Sw64::CMPULEi:
++    return true;
++  }
++
++  return false;
++}
++
++/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
++/// together. Given SecondMI, when FirstMI is unspecified, then check if
++/// SecondMI may be part of a fused pair at all.
++static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
++                                   const TargetSubtargetInfo &TSI,
++                                   const MachineInstr *FirstMI,
++                                   const MachineInstr &SecondMI) {
++  const Sw64Subtarget &ST = static_cast<const Sw64Subtarget &>(TSI);
++
++  if (ST.hasCore4() && isCmpBqPair(FirstMI, SecondMI))
++    return true;
++
++  return false;
++}
++
++std::unique_ptr<ScheduleDAGMutation> llvm::createSw64MacroFusionDAGMutation() {
++  return createMacroFusionDAGMutation(shouldScheduleAdjacent);
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64MacroFusion.h b/llvm/lib/Target/Sw64/Sw64MacroFusion.h
+new file mode 100644
+index 000000000..8c07a4f13
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64MacroFusion.h
+@@ -0,0 +1,28 @@
++//===- Sw64MacroFusion.h - Sw64 Macro Fusion ------------------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file This file contains the Sw64 definition of the DAG scheduling
++/// mutation to pair instructions back to back.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_SW64MACROFUSION_H
++#define LLVM_LIB_TARGET_SW64_SW64MACROFUSION_H
++
++#include "llvm/CodeGen/MachineScheduler.h"
++
++namespace llvm {
++
++/// Note that you have to add:
++///   DAG.addMutation(createSw64MacroFusionDAGMutation());
++/// to Sw64PassConfig::createMachineScheduler() to have an effect.
++std::unique_ptr<ScheduleDAGMutation> createSw64MacroFusionDAGMutation();
++
++} // namespace llvm
++
++#endif // LLVM_LIB_TARGET_SW64_SW64MACROFUSION_H
+diff --git a/llvm/lib/Target/Sw64/Sw64OptionRecord.h b/llvm/lib/Target/Sw64/Sw64OptionRecord.h
+new file mode 100644
+index 000000000..c629dc6bf
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64OptionRecord.h
+@@ -0,0 +1,68 @@
++//===- Sw64OptionRecord.h - Abstraction for storing information -*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// Sw64OptionRecord - Abstraction for storing arbitrary information in
++// ELF files. Arbitrary information (e.g. register usage) can be stored in Sw64
++// specific ELF sections like .Sw64.options. Specific records should subclass
++// Sw64OptionRecord and provide an implementation to EmitSw64OptionRecord which
++// basically just dumps the information into an ELF section. More information
++// about .Sw64.option can be found in the SysV ABI and the 64-bit ELF Object
++// specification.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_SW64OPTIONRECORD_H
++#define LLVM_LIB_TARGET_SW64_SW64OPTIONRECORD_H
++
++#include "MCTargetDesc/Sw64MCTargetDesc.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCRegisterInfo.h"
++#include <cstdint>
++
++namespace llvm {
++
++class Sw64ELFStreamer;
++
++class Sw64OptionRecord {
++public:
++  virtual ~Sw64OptionRecord() = default;
++
++  virtual void EmitSw64OptionRecord() = 0;
++};
++
++class Sw64RegInfoRecord : public Sw64OptionRecord {
++public:
++  Sw64RegInfoRecord(Sw64ELFStreamer *S, MCContext &Context)
++      : Streamer(S), Context(Context) {
++
++    const MCRegisterInfo *TRI = Context.getRegisterInfo();
++    GPRCRegClass = &(TRI->getRegClass(Sw64::GPRCRegClassID));
++    F4RCRegClass = &(TRI->getRegClass(Sw64::F4RCRegClassID));
++    F8RCRegClass = &(TRI->getRegClass(Sw64::F8RCRegClassID));
++    V256LRegClass = &(TRI->getRegClass(Sw64::V256LRegClassID));
++  }
++
++  ~Sw64RegInfoRecord() override = default;
++
++  void EmitSw64OptionRecord() override;
++  void SetPhysRegUsed(unsigned Reg, const MCRegisterInfo *MCRegInfo);
++
++private:
++  Sw64ELFStreamer *Streamer;
++  MCContext &Context;
++  const MCRegisterClass *GPRCRegClass;
++  const MCRegisterClass *F4RCRegClass;
++  const MCRegisterClass *F8RCRegClass;
++  const MCRegisterClass *V256LRegClass;
++};
++
++} // end namespace llvm
++
++//  const MCRegisterClass *COP3RegClass;
++#endif // LLVM_LIB_TARGET_SW64_SW64OPTIONRECORD_H
+diff --git a/llvm/lib/Target/Sw64/Sw64PreLegalizerCombiner.cpp b/llvm/lib/Target/Sw64/Sw64PreLegalizerCombiner.cpp
+new file mode 100644
+index 000000000..5790ce81f
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64PreLegalizerCombiner.cpp
+@@ -0,0 +1,96 @@
++//=== lib/CodeGen/GlobalISel/Sw64PreLegalizerCombiner.cpp --------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This pass does combining of machine instructions at the generic MI level,
++// before the legalizer.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64TargetMachine.h"
++#include "llvm/CodeGen/GlobalISel/Combiner.h"
++#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
++#include "llvm/CodeGen/MachineFunctionPass.h"
++#include "llvm/CodeGen/TargetPassConfig.h"
++#include "llvm/InitializePasses.h"
++#include "llvm/Support/Debug.h"
++
++#define DEBUG_TYPE "sw_64-prelegalizer-combiner"
++
++using namespace llvm;
++
++namespace {
++class Sw64PreLegalizerCombinerInfo : public CombinerInfo {
++public:
++  Sw64PreLegalizerCombinerInfo()
++      : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
++                     /*LegalizerInfo*/ nullptr, /*EnableOpt*/ false,
++                     /*EnableOptSize*/ false, /*EnableMinSize*/ false) {}
++
++  virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
++                       MachineIRBuilder &B) const override;
++};
++
++bool Sw64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
++                                           MachineInstr &MI,
++                                           MachineIRBuilder &B) const {
++  return false;
++}
++
++// Pass boilerplate
++// ================
++
++class Sw64PreLegalizerCombiner : public MachineFunctionPass {
++public:
++  static char ID;
++
++  Sw64PreLegalizerCombiner();
++
++  StringRef getPassName() const override { return "Sw64PreLegalizerCombiner"; }
++
++  bool runOnMachineFunction(MachineFunction &MF) override;
++
++  void getAnalysisUsage(AnalysisUsage &AU) const override;
++};
++} // end anonymous namespace
++
++void Sw64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
++  AU.addRequired<TargetPassConfig>();
++  AU.setPreservesCFG();
++  getSelectionDAGFallbackAnalysisUsage(AU);
++  MachineFunctionPass::getAnalysisUsage(AU);
++}
++
++Sw64PreLegalizerCombiner::Sw64PreLegalizerCombiner() : MachineFunctionPass(ID) {
++  initializeSw64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
++}
++
++bool Sw64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
++  if (MF.getProperties().hasProperty(
++          MachineFunctionProperties::Property::FailedISel))
++    return false;
++  auto *TPC = &getAnalysis<TargetPassConfig>();
++  Sw64PreLegalizerCombinerInfo PCInfo;
++  Combiner C(PCInfo, TPC);
++  return C.combineMachineInstrs(MF, nullptr);
++}
++
++char Sw64PreLegalizerCombiner::ID = 0;
++INITIALIZE_PASS_BEGIN(Sw64PreLegalizerCombiner, DEBUG_TYPE,
++                      "Combine Sw64 machine instrs before legalization", false,
++                      false)
++INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
++INITIALIZE_PASS_END(Sw64PreLegalizerCombiner, DEBUG_TYPE,
++                    "Combine Sw64 machine instrs before legalization", false,
++                    false)
++
++namespace llvm {
++FunctionPass *createSw64PreLegalizeCombiner() {
++  return new Sw64PreLegalizerCombiner();
++}
++} // end namespace llvm
+diff --git a/llvm/lib/Target/Sw64/Sw64RegisterInfo.cpp b/llvm/lib/Target/Sw64/Sw64RegisterInfo.cpp
+new file mode 100644
+index 000000000..ea2bd0735
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64RegisterInfo.cpp
+@@ -0,0 +1,300 @@
++//===-- Sw64RegisterInfo.cpp - Sw64 Register Information ----------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the Sw64 implementation of the MRegisterInfo class.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64RegisterInfo.h"
++#include "MCTargetDesc/Sw64ABIInfo.h"
++#include "Sw64.h"
++#include "Sw64InstrInfo.h"
++#include "Sw64MachineFunctionInfo.h"
++#include "Sw64Subtarget.h"
++#include "llvm/ADT/BitVector.h"
++#include "llvm/ADT/STLExtras.h"
++#include "llvm/CodeGen/MachineFrameInfo.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/MachineModuleInfo.h"
++#include "llvm/CodeGen/MachineRegisterInfo.h"
++#include "llvm/CodeGen/RegisterScavenging.h"
++#include "llvm/CodeGen/TargetFrameLowering.h"
++#include "llvm/IR/Function.h"
++#include "llvm/IR/Type.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/MathExtras.h"
++#include "llvm/Support/raw_ostream.h"
++#include "llvm/Target/TargetMachine.h"
++#include "llvm/Target/TargetOptions.h"
++#include "llvm/TargetParser/Triple.h"
++
++using namespace llvm;
++
++#define DEBUG_TYPE "sw_64-reg-info"
++
++#define GET_REGINFO_TARGET_DESC
++#include "Sw64GenRegisterInfo.inc"
++
++static cl::opt<bool> EnableOptReg("enable-sw64-opt-reg",
++                                  cl::desc("Enalbe R15/R28 reg alloc on SW64"),
++                                  cl::init(true), cl::Hidden);
++
++Sw64RegisterInfo::Sw64RegisterInfo() : Sw64GenRegisterInfo(Sw64::R26) {}
++
++// helper functions
++static long getUpper16(long l) {
++  long y = l / Sw64::IMM_MULT;
++  if (l % Sw64::IMM_MULT > Sw64::IMM_HIGH)
++    ++y;
++  return y;
++}
++
++static long getLower16(long l) {
++  long h = getUpper16(l);
++  return l - h * Sw64::IMM_MULT;
++}
++
++const uint16_t *
++Sw64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
++
++  return CSR_F64_SaveList;
++}
++
++BitVector Sw64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
++  BitVector Reserved(getNumRegs());
++  const Sw64FrameLowering *TFI = getFrameLowering(MF);
++  if (EnableOptReg) {
++    if (TFI->hasFP(MF))
++      Reserved.set(Sw64::R15);
++  } else {
++    Reserved.set(Sw64::R15);
++    Reserved.set(Sw64::R28);
++  }
++  Reserved.set(Sw64::R29);
++  Reserved.set(Sw64::R30);
++  Reserved.set(Sw64::R31);
++  Reserved.set(Sw64::F31);
++  Reserved.set(Sw64::V31);
++  for (size_t i = 0; i < Sw64::GPRCRegClass.getNumRegs(); ++i) {
++    if (MF.getSubtarget<Sw64Subtarget>().isRegisterReserved(i)) {
++      StringRef RegName("$" + std::to_string(i));
++      Reserved.set(
++          MF.getSubtarget<Sw64Subtarget>().getTargetLowering()->MatchRegName(
++              RegName));
++    }
++  }
++
++  // hasBP
++  if (hasStackRealignment(MF) && MF.getFrameInfo().hasVarSizedObjects())
++    Reserved.set(Sw64::R14);
++
++  return Reserved;
++}
++
++const u_int32_t *
++Sw64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
++                                       CallingConv::ID) const {
++  return CSR_F64_RegMask;
++}
++
++const TargetRegisterClass *
++Sw64RegisterInfo::getPointerRegClass(const MachineFunction &MF,
++                                     unsigned Kind) const {
++  //  Sw64ABIInfo ABI = MF.getSubtarget<Sw64Subtarget>().getABI();
++  Sw64PtrClass PtrClassKind = static_cast<Sw64PtrClass>(Kind);
++
++  switch (PtrClassKind) {
++  case Sw64PtrClass::Default:
++    return &Sw64::GPRCRegClass;
++  case Sw64PtrClass::StackPointer:
++    return &Sw64::SP64RegClass;
++  case Sw64PtrClass::GlobalPointer:
++    return &Sw64::GP64RegClass;
++  }
++
++  llvm_unreachable("Unknown pointer kind");
++}
++
++bool Sw64RegisterInfo::requiresRegisterScavenging(
++    const MachineFunction &MF) const {
++  return true;
++}
++bool Sw64RegisterInfo::requiresFrameIndexScavenging(
++    const MachineFunction &MF) const {
++  return true;
++}
++bool Sw64RegisterInfo::trackLivenessAfterRegAlloc(
++    const MachineFunction &MF) const {
++  return true;
++}
++
++bool Sw64RegisterInfo::useFPForScavengingIndex(
++    const MachineFunction &MF) const {
++  return false;
++}
++
++void Sw64RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
++                                   unsigned OpNo, int FrameIndex,
++                                   uint64_t StackSize, int64_t SPOffset) const {
++  MachineInstr &MI = *II;
++  MachineBasicBlock &MBB = *MI.getParent();
++  MachineFunction &MF = *MI.getParent()->getParent();
++  MachineFrameInfo &MFI = MF.getFrameInfo();
++
++  const Sw64InstrInfo &TII =
++      *static_cast<const Sw64InstrInfo *>(MF.getSubtarget().getInstrInfo());
++  const Sw64RegisterInfo *RegInfo = static_cast<const Sw64RegisterInfo *>(
++      MF.getSubtarget().getRegisterInfo());
++
++  unsigned i = OpNo;
++  int MinCSFI = 0;
++  int MaxCSFI = -1;
++
++  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
++  if (CSI.size()) {
++    MinCSFI = CSI[0].getFrameIdx();
++    MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
++  }
++
++  // The following stack frame objects are always referenced relative to $sp:
++  //  1. Outgoing arguments.
++  //  2. Pointer to dynamically allocated stack space.
++  //  3. Locations for callee-saved registers.
++  // Everything else is referenced relative to whatever register
++  // getFrameRegister() returns.
++  unsigned FrameReg;
++
++  if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)
++    FrameReg = Sw64::R30;
++  else if (RegInfo->hasStackRealignment(MF)) {
++    if (MFI.hasVarSizedObjects() && !MFI.isFixedObjectIndex(FrameIndex))
++      FrameReg = Sw64::R14;
++    else if (MFI.isFixedObjectIndex(FrameIndex))
++      FrameReg = getFrameRegister(MF);
++    else
++      FrameReg = Sw64::R30;
++  } else
++    FrameReg = getFrameRegister(MF);
++
++  // Calculate final offset.
++  // - There is no need to change the offset if the frame object is one of the
++  //   following: an outgoing argument, pointer to a dynamically allocated
++  //   stack space or a $gp restore location,
++  // - If the frame object is any of the following, its offset must be adjusted
++  //   by adding the size of the stack:
++  //   incoming argument, callee-saved register location or local variable.
++  int64_t Offset = SPOffset + (int64_t)StackSize;
++  const MCInstrDesc &MCID = TII.get(MI.getOpcode());
++  if (MI.getNumOperands() > 2 && MI.getOperand(2).isImm()) {
++    if (MCID.mayLoad() || MCID.mayStore())
++      Offset += MI.getOperand(2).getImm();
++  }
++
++  if (MI.getOperand(1).isImm())
++    Offset += MI.getOperand(1).getImm();
++
++  if (MI.isDebugValue())
++    MI.getOperand(i + 1).ChangeToRegister(FrameReg, false);
++  else
++    MI.getOperand(2).ChangeToRegister(FrameReg, false);
++
++  LLVM_DEBUG(errs() << "Offset     : " << Offset << "\n"
++                    << "<--------->\n");
++
++  // Now add the frame object offset to the offset from the virtual frame index.
++  if (Offset > Sw64::IMM_HIGH || Offset < Sw64::IMM_LOW) {
++    LLVM_DEBUG(errs() << "Unconditionally using R28 for evil purposes Offset: "
++                      << Offset << "\n");
++    // so in this case, we need to use a temporary register, and move the
++    // original inst off the SP/FP
++    // fix up the old:
++    MachineInstr *nMI;
++    bool FrameRegIsKilled = false;
++    // insert the new
++    Register vreg = MF.getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
++    if (MI.getOperand(1).getTargetFlags() == 15) {
++      nMI = BuildMI(MF, MI.getDebugLoc(), TII.get(Sw64::LDAH), vreg)
++                .addImm(getUpper16(Offset))
++                .addReg(FrameReg);
++      FrameRegIsKilled = true;
++    } else {
++      nMI = BuildMI(MF, MI.getDebugLoc(), TII.get(Sw64::LDAH), vreg)
++                .addImm(getUpper16(Offset))
++                .addReg(FrameReg);
++      FrameRegIsKilled = true;
++    }
++
++    MBB.insert(II, nMI);
++    MI.getOperand(2).ChangeToRegister(vreg, false, false, FrameRegIsKilled);
++    MI.getOperand(1).ChangeToImmediate(getLower16(Offset));
++  } else {
++    if (MI.isDebugValue())
++      MI.getOperand(i + 1).ChangeToImmediate(Offset);
++    else
++      MI.getOperand(1).ChangeToImmediate(Offset);
++  }
++}
++
++// FrameIndex represent objects inside a abstract stack.
++// We must replace FrameIndex with an stack/frame pointer
++// direct reference.
++bool Sw64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
++                                           int SPAdj, unsigned FIOperandNum,
++                                           RegScavenger *RS) const {
++  MachineInstr &MI = *II;
++  MachineFunction &MF = *MI.getParent()->getParent();
++
++  LLVM_DEBUG(errs() << "\nFunction : " << MF.getName() << "\n";
++             errs() << "<--------->\n"
++                    << MI);
++
++  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
++  uint64_t stackSize = MF.getFrameInfo().getStackSize();
++  int64_t spOffset = MF.getFrameInfo().getObjectOffset(FrameIndex);
++
++  LLVM_DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
++                    << "spOffset   : " << spOffset << "\n"
++                    << "stackSize  : " << stackSize << "\n"
++                    << "alignment  : "
++                    << DebugStr(MF.getFrameInfo().getObjectAlign(FrameIndex))
++                    << "\n");
++
++  eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset);
++  return false;
++}
++
++Register Sw64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
++  const Sw64FrameLowering *TFI = getFrameLowering(MF);
++
++  return TFI->hasFP(MF) ? Sw64::R15 : Sw64::R30;
++}
++// bool Sw64RegisterInfo::isConstantPhysReg(MCRegister physreg) const {
++//   return physreg == Sw64::R31 || physreg == Sw64::F31 || physreg ==
++//   Sw64::V31;
++// }
++unsigned Sw64RegisterInfo::getEHExceptionRegister() const {
++  llvm_unreachable("What is the exception register");
++  return 0;
++}
++
++unsigned Sw64RegisterInfo::getEHHandlerRegister() const {
++  llvm_unreachable("What is the exception handler register");
++  return 0;
++}
++
++std::string Sw64RegisterInfo::getPrettyName(unsigned reg) {
++  std::string s("#reg_#-#");
++  return s;
++}
++
++bool Sw64RegisterInfo::needsFrameMoves(const MachineFunction &MF) {
++  return MF.getMMI().hasDebugInfo() || MF.getFunction().needsUnwindTableEntry();
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64RegisterInfo.h b/llvm/lib/Target/Sw64/Sw64RegisterInfo.h
+new file mode 100644
+index 000000000..c1dba2b67
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64RegisterInfo.h
+@@ -0,0 +1,81 @@
++//===-- Sw64RegisterInfo.h - Sw64 Register Information Impl ---*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains the Sw64 implementation of the MRegisterInfo class.
++//
++//===----------------------------------------------------------------------===//
++#ifndef LLVM_LIB_TARGET_SW64_SW64REGISTERINFO_H
++#define LLVM_LIB_TARGET_SW64_SW64REGISTERINFO_H
++
++#include "Sw64.h"
++#include "llvm/CodeGen/MachineBasicBlock.h"
++
++#define GET_REGINFO_HEADER
++#include "Sw64GenRegisterInfo.inc"
++
++namespace llvm {
++
++class TargetInstrInfo;
++class TargetRegisterClass;
++
++class Sw64RegisterInfo : public Sw64GenRegisterInfo {
++public:
++  Sw64RegisterInfo();
++  enum class Sw64PtrClass {
++    /// The default register class for integer values.
++    Default = 0,
++    /// The stack pointer only.
++    StackPointer = 1,
++    /// The global pointer only.
++    GlobalPointer = 2,
++  };
++
++  /// Code Generation virtual methods...
++
++  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
++
++  BitVector getReservedRegs(const MachineFunction &MF) const override;
++
++  // Eliminate virtual register which Prologue/Epilogue generate.
++  bool requiresRegisterScavenging(const MachineFunction &MF) const override;
++  bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
++  bool useFPForScavengingIndex(const MachineFunction &MF) const override;
++  bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
++
++  /// Code Generation virtual methods...
++  const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
++                                                unsigned Kind) const override;
++
++  bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
++                           unsigned FIOperandNum,
++                           RegScavenger *RS = nullptr) const override;
++
++  // Debug information queries.
++  Register getFrameRegister(const MachineFunction &MF) const override;
++
++  const u_int32_t *getCallPreservedMask(const MachineFunction &MF,
++                                        CallingConv::ID) const override;
++  // LLVM16 mov isConstantPhysReg into MachineRegisterInfo.h
++  // bool isConstantPhysReg(MCRegister phyreg)const override;
++
++  //! Return whether to emit frame moves
++  static bool needsFrameMoves(const MachineFunction &MF);
++  // Exception handling queries.
++  unsigned getEHExceptionRegister() const;
++  unsigned getEHHandlerRegister() const;
++
++  static std::string getPrettyName(unsigned reg);
++
++private:
++  void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
++                   int FrameIndex, uint64_t StackSize, int64_t SPOffset) const;
++};
++
++} // end namespace llvm
++#endif
+diff --git a/llvm/lib/Target/Sw64/Sw64RegisterInfo.td b/llvm/lib/Target/Sw64/Sw64RegisterInfo.td
+new file mode 100644
+index 000000000..62faa8209
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64RegisterInfo.td
+@@ -0,0 +1,312 @@
++//===- Sw64RegisterInfo.td - The Sw64 Register File ------*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file describes the Sw64 register set.
++//
++//===----------------------------------------------------------------------===//
++let Namespace = "Sw64" in {
++def sub_32: SubRegIndex<32>;
++} //Namespace Sw64
++
++// For register encoding
++class Sw64Reg<bits<16> Enc, string n, list<string> alt= []> : Register<n> {
++  let HWEncoding = Enc;
++  let Namespace = "Sw64";
++  let AltNames = alt;
++}
++
++class Sw64RegWithSubRegs<bits<16> Enc, string n, list<Register> subregs>
++  : RegisterWithSubRegs<n, subregs> {
++  let HWEncoding = Enc;
++  let Namespace = "Sw64";
++}
++
++// GPR - One of the 32 32-bit general-purpose registers
++class Sw64GPR<bits<16> Enc, string n, list<string> alt= []> : Sw64Reg<Enc, n, alt>;
++// FPR - One of the 32 64-bit floating-point registers
++class Sw64FPR<bits<16> Enc, string n, list<Register> subregs = []>
++ : Sw64RegWithSubRegs<Enc, n, subregs>;
++
++// VEC - One of the 32 256-bit vector registers
++class Sw64VEC<bits<16> Enc, string n, list<Register> subregs>
++  : Sw64RegWithSubRegs<Enc, n, subregs> {
++  let SubRegIndices = [sub_32];
++}
++
++class Unallocatable {
++  bit isAllocatable = 0;
++}
++
++let Namespace = "Sw64" in {
++
++// General-purpose registers
++def R0 : Sw64GPR< 0, "$0">, DwarfRegNum<[0]>;
++def R1 : Sw64GPR< 1, "$1">, DwarfRegNum<[1]>;
++def R2 : Sw64GPR< 2, "$2">, DwarfRegNum<[2]>;
++def R3 : Sw64GPR< 3, "$3">, DwarfRegNum<[3]>;
++def R4 : Sw64GPR< 4, "$4">, DwarfRegNum<[4]>;
++def R5 : Sw64GPR< 5, "$5">, DwarfRegNum<[5]>;
++def R6 : Sw64GPR< 6, "$6">, DwarfRegNum<[6]>;
++def R7 : Sw64GPR< 7, "$7">, DwarfRegNum<[7]>;
++def R8 : Sw64GPR< 8, "$8">, DwarfRegNum<[8]>;
++def R9 : Sw64GPR< 9, "$9">, DwarfRegNum<[9]>;
++def R10 : Sw64GPR< 10, "$10">, DwarfRegNum<[10]>;
++def R11 : Sw64GPR< 11, "$11">, DwarfRegNum<[11]>;
++def R12 : Sw64GPR< 12, "$12">, DwarfRegNum<[12]>;
++def R13 : Sw64GPR< 13, "$13">, DwarfRegNum<[13]>;
++def R14 : Sw64GPR< 14, "$14">, DwarfRegNum<[14]>;
++def R15 : Sw64GPR< 15, "$15", ["$fp"]>, DwarfRegNum<[15]>;
++def R16 : Sw64GPR< 16, "$16">, DwarfRegNum<[16]>;
++def R17 : Sw64GPR< 17, "$17">, DwarfRegNum<[17]>;
++def R18 : Sw64GPR< 18, "$18">, DwarfRegNum<[18]>;
++def R19 : Sw64GPR< 19, "$19">, DwarfRegNum<[19]>;
++def R20 : Sw64GPR< 20, "$20">, DwarfRegNum<[20]>;
++def R21 : Sw64GPR< 21, "$21">, DwarfRegNum<[21]>;
++def R22 : Sw64GPR< 22, "$22">, DwarfRegNum<[22]>;
++def R23 : Sw64GPR< 23, "$23">, DwarfRegNum<[23]>;
++def R24 : Sw64GPR< 24, "$24">, DwarfRegNum<[24]>;
++def R25 : Sw64GPR< 25, "$25">, DwarfRegNum<[25]>;
++def R26 : Sw64GPR< 26, "$26", ["$ra"]>, DwarfRegNum<[26]>;
++def R27 : Sw64GPR< 27, "$27", ["$pv"]>, DwarfRegNum<[27]>;
++def R28 : Sw64GPR< 28, "$28", ["$at"]>, DwarfRegNum<[28]>;
++def R29 : Sw64GPR< 29, "$29", ["$gp"]>, DwarfRegNum<[29]>;
++def R30 : Sw64GPR< 30, "$30", ["$sp"]>, DwarfRegNum<[30]>;
++def R31 : Sw64GPR< 31, "$31", ["$zero"]>, DwarfRegNum<[31]>;
++
++// Floating-point registers
++def F0 : Sw64FPR< 0, "$f0">, DwarfRegNum<[32]>;
++def F1 : Sw64FPR< 1, "$f1">, DwarfRegNum<[33]>;
++def F2 : Sw64FPR< 2, "$f2">, DwarfRegNum<[34]>;
++def F3 : Sw64FPR< 3, "$f3">, DwarfRegNum<[35]>;
++def F4 : Sw64FPR< 4, "$f4">, DwarfRegNum<[36]>;
++def F5 : Sw64FPR< 5, "$f5">, DwarfRegNum<[37]>;
++def F6 : Sw64FPR< 6, "$f6">, DwarfRegNum<[38]>;
++def F7 : Sw64FPR< 7, "$f7">, DwarfRegNum<[39]>;
++def F8 : Sw64FPR< 8, "$f8">, DwarfRegNum<[40]>;
++def F9 : Sw64FPR< 9, "$f9">, DwarfRegNum<[41]>;
++def F10 : Sw64FPR< 10, "$f10">, DwarfRegNum<[42]>;
++def F11 : Sw64FPR< 11, "$f11">, DwarfRegNum<[43]>;
++def F12 : Sw64FPR< 12, "$f12">, DwarfRegNum<[44]>;
++def F13 : Sw64FPR< 13, "$f13">, DwarfRegNum<[45]>;
++def F14 : Sw64FPR< 14, "$f14">, DwarfRegNum<[46]>;
++def F15 : Sw64FPR< 15, "$f15">, DwarfRegNum<[47]>;
++def F16 : Sw64FPR< 16, "$f16">, DwarfRegNum<[48]>;
++def F17 : Sw64FPR< 17, "$f17">, DwarfRegNum<[49]>;
++def F18 : Sw64FPR< 18, "$f18">, DwarfRegNum<[50]>;
++def F19 : Sw64FPR< 19, "$f19">, DwarfRegNum<[51]>;
++def F20 : Sw64FPR< 20, "$f20">, DwarfRegNum<[52]>;
++def F21 : Sw64FPR< 21, "$f21">, DwarfRegNum<[53]>;
++def F22 : Sw64FPR< 22, "$f22">, DwarfRegNum<[54]>;
++def F23 : Sw64FPR< 23, "$f23">, DwarfRegNum<[55]>;
++def F24 : Sw64FPR< 24, "$f24">, DwarfRegNum<[56]>;
++def F25 : Sw64FPR< 25, "$f25">, DwarfRegNum<[57]>;
++def F26 : Sw64FPR< 26, "$f26">, DwarfRegNum<[58]>;
++def F27 : Sw64FPR< 27, "$f27">, DwarfRegNum<[59]>;
++def F28 : Sw64FPR< 28, "$f28">, DwarfRegNum<[60]>;
++def F29 : Sw64FPR< 29, "$f29">, DwarfRegNum<[61]>;
++def F30 : Sw64FPR< 30, "$f30">, DwarfRegNum<[62]>;
++def F31 : Sw64FPR< 31, "$f31">, DwarfRegNum<[63]>;
++
++// Floating-point registers
++let SubRegIndices = [sub_32] in {
++def Q0 : Sw64FPR< 0, "$f0", [F0]>, DwarfRegNum<[32]>;
++def Q1 : Sw64FPR< 1, "$f1", [F1]>, DwarfRegNum<[33]>;
++def Q2 : Sw64FPR< 2, "$f2", [F2]>, DwarfRegNum<[34]>;
++def Q3 : Sw64FPR< 3, "$f3", [F3]>, DwarfRegNum<[35]>;
++def Q4 : Sw64FPR< 4, "$f4", [F4]>, DwarfRegNum<[36]>;
++def Q5 : Sw64FPR< 5, "$f5", [F5]>, DwarfRegNum<[37]>;
++def Q6 : Sw64FPR< 6, "$f6", [F6]>, DwarfRegNum<[38]>;
++def Q7 : Sw64FPR< 7, "$f7", [F7]>, DwarfRegNum<[39]>;
++def Q8 : Sw64FPR< 8, "$f8", [F8]>, DwarfRegNum<[40]>;
++def Q9 : Sw64FPR< 9, "$f9", [F9]>, DwarfRegNum<[41]>;
++def Q10 : Sw64FPR< 10, "$f10", [F10]>, DwarfRegNum<[42]>;
++def Q11 : Sw64FPR< 11, "$f11", [F11]>, DwarfRegNum<[43]>;
++def Q12 : Sw64FPR< 12, "$f12", [F12]>, DwarfRegNum<[44]>;
++def Q13 : Sw64FPR< 13, "$f13", [F13]>, DwarfRegNum<[45]>;
++def Q14 : Sw64FPR< 14, "$f14", [F14]>, DwarfRegNum<[46]>;
++def Q15 : Sw64FPR< 15, "$f15", [F15]>, DwarfRegNum<[47]>;
++def Q16 : Sw64FPR< 16, "$f16", [F16]>, DwarfRegNum<[48]>;
++def Q17 : Sw64FPR< 17, "$f17", [F17]>, DwarfRegNum<[49]>;
++def Q18 : Sw64FPR< 18, "$f18", [F18]>, DwarfRegNum<[50]>;
++def Q19 : Sw64FPR< 19, "$f19", [F19]>, DwarfRegNum<[51]>;
++def Q20 : Sw64FPR< 20, "$f20", [F20]>, DwarfRegNum<[52]>;
++def Q21 : Sw64FPR< 21, "$f21", [F21]>, DwarfRegNum<[53]>;
++def Q22 : Sw64FPR< 22, "$f22", [F22]>, DwarfRegNum<[54]>;
++def Q23 : Sw64FPR< 23, "$f23", [F23]>, DwarfRegNum<[55]>;
++def Q24 : Sw64FPR< 24, "$f24", [F24]>, DwarfRegNum<[56]>;
++def Q25 : Sw64FPR< 25, "$f25", [F25]>, DwarfRegNum<[57]>;
++def Q26 : Sw64FPR< 26, "$f26", [F26]>, DwarfRegNum<[58]>;
++def Q27 : Sw64FPR< 27, "$f27", [F27]>, DwarfRegNum<[59]>;
++def Q28 : Sw64FPR< 28, "$f28", [F28]>, DwarfRegNum<[60]>;
++def Q29 : Sw64FPR< 29, "$f29", [F29]>, DwarfRegNum<[61]>;
++def Q30 : Sw64FPR< 30, "$f30", [F30]>, DwarfRegNum<[62]>;
++def Q31 : Sw64FPR< 31, "$f31", [F31]>, DwarfRegNum<[63]>;
++}
++
++// Vector registers
++def V0 : Sw64VEC< 0, "$f0", [Q0]>, DwarfRegNum<[32]>;
++def V1 : Sw64VEC< 1, "$f1", [Q1]>, DwarfRegNum<[33]>;
++def V2 : Sw64VEC< 2, "$f2", [Q2]>, DwarfRegNum<[34]>;
++def V3 : Sw64VEC< 3, "$f3", [Q3]>, DwarfRegNum<[35]>;
++def V4 : Sw64VEC< 4, "$f4", [Q4]>, DwarfRegNum<[36]>;
++def V5 : Sw64VEC< 5, "$f5", [Q5]>, DwarfRegNum<[37]>;
++def V6 : Sw64VEC< 6, "$f6", [Q6]>, DwarfRegNum<[38]>;
++def V7 : Sw64VEC< 7, "$f7", [Q7]>, DwarfRegNum<[39]>;
++def V8 : Sw64VEC< 8, "$f8", [Q8]>, DwarfRegNum<[40]>;
++def V9 : Sw64VEC< 9, "$f9", [Q9]>, DwarfRegNum<[41]>;
++def V10 : Sw64VEC< 10, "$f10", [Q10]>, DwarfRegNum<[42]>;
++def V11 : Sw64VEC< 11, "$f11", [Q11]>, DwarfRegNum<[43]>;
++def V12 : Sw64VEC< 12, "$f12", [Q12]>, DwarfRegNum<[44]>;
++def V13 : Sw64VEC< 13, "$f13", [Q13]>, DwarfRegNum<[45]>;
++def V14 : Sw64VEC< 14, "$f14", [Q14]>, DwarfRegNum<[46]>;
++def V15 : Sw64VEC< 15, "$f15", [Q15]>, DwarfRegNum<[47]>;
++def V16 : Sw64VEC< 16, "$f16", [Q16]>, DwarfRegNum<[48]>;
++def V17 : Sw64VEC< 17, "$f17", [Q17]>, DwarfRegNum<[49]>;
++def V18 : Sw64VEC< 18, "$f18", [Q18]>, DwarfRegNum<[50]>;
++def V19 : Sw64VEC< 19, "$f19", [Q19]>, DwarfRegNum<[51]>;
++def V20 : Sw64VEC< 20, "$f20", [Q20]>, DwarfRegNum<[52]>;
++def V21 : Sw64VEC< 21, "$f21", [Q21]>, DwarfRegNum<[53]>;
++def V22 : Sw64VEC< 22, "$f22", [Q22]>, DwarfRegNum<[54]>;
++def V23 : Sw64VEC< 23, "$f23", [Q23]>, DwarfRegNum<[55]>;
++def V24 : Sw64VEC< 24, "$f24", [Q24]>, DwarfRegNum<[56]>;
++def V25 : Sw64VEC< 25, "$f25", [Q25]>, DwarfRegNum<[57]>;
++def V26 : Sw64VEC< 26, "$f26", [Q26]>, DwarfRegNum<[58]>;
++def V27 : Sw64VEC< 27, "$f27", [Q27]>, DwarfRegNum<[59]>;
++def V28 : Sw64VEC< 28, "$f28", [Q28]>, DwarfRegNum<[60]>;
++def V29 : Sw64VEC< 29, "$f29", [Q29]>, DwarfRegNum<[61]>;
++def V30 : Sw64VEC< 30, "$f30", [Q30]>, DwarfRegNum<[62]>;
++def V31 : Sw64VEC< 31, "$f31", [Q31]>, DwarfRegNum<[63]>;
++
++} // Namespace Sw64
++
++/// Register classes
++def GPRC : RegisterClass<"Sw64", [i64], 64, (add
++     // Volatile
++     R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22,
++     R23, R24, R25, R28,
++     //Special meaning, but volatile
++     R27, //procedure address
++     R26, //return address
++     R29, //global offset table address
++     // Non-volatile
++     R9, R10, R11, R12, R13, R14,
++     // Don't allocate 15, 30, 31
++     R15, R30, R31)>;
++
++def F4RC : RegisterClass<"Sw64", [f32], 64, (add F0, F1,
++        F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
++        F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
++        // Saved:
++        F2, F3, F4, F5, F6, F7, F8, F9,
++        F31)>; //zero
++
++def F8RC : RegisterClass<"Sw64", [f64], 64, (add F4RC)>;
++
++// lowest 64bits part for simd vector
++def FPRC : RegisterClass<"Sw64", [i64, f64], 64, (sequence "Q%u", 0, 31)>;
++
++//def FPRC_lo : RegisterClass<"Sw64", [f32, i32], 64, (add F0, F1,
++//        F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
++//        F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
++//        // Saved:
++//        F2, F3, F4, F5, F6, F7, F8, F9,
++//        F31)>; //zero
++
++def FPRC_lo : RegisterClass<"Sw64", [i32, f32], 64, (trunc F4RC, 32)>;
++
++// Stack pointer and global pointer classes for instructions that are limited
++// to a single register.
++def SP64 : RegisterClass<"Sw64", [i64], 64, (add R30)>, Unallocatable;
++def GP64 : RegisterClass<"Sw64", [i64], 64, (add R29)>, Unallocatable;
++
++def FP30 : RegisterClass<"Sw64", [f32], 64, (add F30)>, Unallocatable;
++def FD30 : RegisterClass<"Sw64", [f64], 64, (add F30)>, Unallocatable;
++// Register Operands.
++
++class Sw64AsmRegOperand : AsmOperandClass {
++  let ParserMethod = "parseAnyRegister";
++}
++
++def GPRCAsmOperand : Sw64AsmRegOperand {
++  let Name = "Reg";
++}
++
++def F4RCAsmOperand : Sw64AsmRegOperand {
++  let Name = "F4RCAsmReg";
++  let PredicateMethod = "isFGRAsmReg";
++}
++
++def F8RCAsmOperand : Sw64AsmRegOperand {
++  let Name = "F8RCAsmReg";
++  let PredicateMethod = "isFGRAsmReg";
++}
++
++def V256AsmOperand : Sw64AsmRegOperand {
++  let Name = "V256AsmReg";
++}
++
++def V256B : RegisterClass<"Sw64", [v32i8], 256, (add (sequence "V%u", 0, 31))>;
++def V256H : RegisterClass<"Sw64", [v16i16], 256, (add (sequence "V%u", 0, 31))>;
++def V256W : RegisterClass<"Sw64", [v4f32], 256, (add (sequence "V%u", 0, 31))>;
++
++def V256L : RegisterClass<"Sw64", [v32i8, v16i16, v8i32, v4i64, v4f64, v4f32], 256,
++                                  (add (sequence "V%u", 0, 31))>;
++
++def V256all : RegisterClass<"Sw64", [v32i8, v16i16, v8i32, v4i64, v4f32 ,v4f64],
++                                  256, (add (sequence "V%u", 0, 31))>;
++
++// adding a special class for floating selection
++def V256Floating : RegisterClass<"Sw64", [v4f32, v4f64],
++                            256, (add (sequence "V%u", 0, 31))>;
++def V256E64 : RegisterClass<"Sw64", [v4i64, v4f32, v4f64],
++                            256, (add (sequence "V%u", 0, 31))>;
++
++def GPRCOpnd : RegisterOperand<GPRC> {
++  let ParserMatchClass = GPRCAsmOperand;
++}
++
++def F4RCOpnd : RegisterOperand<F4RC> {
++  let ParserMatchClass = F4RCAsmOperand;
++}
++
++def F8RCOpnd : RegisterOperand<F8RC> {
++  let ParserMatchClass = F8RCAsmOperand;
++}
++
++def FPRCOpnd : RegisterOperand<FPRC> {
++  let ParserMatchClass = F8RCAsmOperand;
++}
++
++def FPRCloOpnd : RegisterOperand<FPRC_lo> {
++  let ParserMatchClass = F8RCAsmOperand;
++}
++
++def V256BOpnd : RegisterOperand<V256B> {
++  let ParserMatchClass = V256AsmOperand;
++}
++
++def V256HOpnd : RegisterOperand<V256H> {
++  let ParserMatchClass = V256AsmOperand;
++}
++
++def V256WOpnd : RegisterOperand<V256W> {
++  let ParserMatchClass = V256AsmOperand;
++}
++
++def V256LOpnd : RegisterOperand<V256L> {
++  let ParserMatchClass = V256AsmOperand;
++}
++
++def V256ALOpnd : RegisterOperand<V256all> {
++  let ParserMatchClass = V256AsmOperand;
++}
++
++def V256FOpnd : RegisterOperand<V256Floating> {
++  let ParserMatchClass = V256AsmOperand;
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64Relocations.h b/llvm/lib/Target/Sw64/Sw64Relocations.h
+new file mode 100644
+index 000000000..b32f148d7
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64Relocations.h
+@@ -0,0 +1,30 @@
++//===- Sw64Relocations.h - Sw64 Code Relocations --------------*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines the Sw64 target-specific relocation types.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef Sw64RELOCATIONS_H
++#define Sw64RELOCATIONS_H
++
++#include "llvm/CodeGen/MachineRelocation.h"
++
++namespace llvm {
++namespace Sw64 {
++enum RelocationType {
++  reloc_literal,
++  reloc_gprellow,
++  reloc_gprelhigh,
++  reloc_gpdist,
++  reloc_bsr
++};
++}
++} // namespace llvm
++#endif
+diff --git a/llvm/lib/Target/Sw64/Sw64SchedCore3.td b/llvm/lib/Target/Sw64/Sw64SchedCore3.td
+new file mode 100644
+index 000000000..63a87c841
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64SchedCore3.td
+@@ -0,0 +1,233 @@
++//===- Sw64Schedule.td - Sw64 Scheduling Definitions -----*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++// Core3 processor architecture mannual
++def GenericSw64Model : SchedMachineModel {
++	// Core 3 has 4-way decode and 7-way dispatch,
++	// in a cycle, can maxinum dispatch 3-way to ALU, 2-way to AGU, 2-way to FPU,
++	// so set the dispatch width to 4 is optitional.
++  let IssueWidth        =   4;
++  let LoadLatency       =   4; // Optimistic load latency
++	let LoopMicroOpBufferSize = 16; //InsnQueue has 16 entry
++  let MispredictPenalty =  13; // Fetch + Decode/Rename/Dispatch + Branch
++  let CompleteModel = false;
++  let MicroOpBufferSize =  72; // ROB size
++
++  let PostRAScheduler = 1;
++}
++// let CompleteModel = 1;
++//def Core3SchedModel : GenericSw64Model;
++
++let SchedModel = GenericSw64Model in {
++  // chapter 2.2.1
++  // 3 pipeline ALU
++  def C3PortALU0 : ProcResource<1>;
++  def C3PortALU1 : ProcResource<1>;
++  def C3PortALU2 : ProcResource<1>;
++//	def C3ScheduleQI : ProcResource<3> {
++//		let BufferSize = 0; //
++//	}
++
++//	def C3ScheduleQF : ProcResource<2> {
++//		let BufferSize = 12; //
++//	}
++//	def C3INTQ : ProcResource<7> {
++//		let BufferSize = 36;
++//	}
++
++  // 2.2.1
++  // alu0 has ADD MUL, alu1 has BR/CSR BOP/SHT
++  // alu2 has CNT BOP/SHT ADD/SEL
++  def C3PortALU : ProcResGroup<[C3PortALU0, C3PortALU1, C3PortALU2]> {
++		let BufferSize = 32;
++	}
++  def C3PortALU01 : ProcResGroup<[C3PortALU0, C3PortALU1]>;
++  def C3PortALU12 : ProcResGroup<[C3PortALU1, C3PortALU2]>;
++
++	//def C3Multiplier : ProcResource<1>;
++  // 2 pipeline Alu Mem
++  // 2.2.3
++  // Core3a interger has two AGU Unit
++	// 2 LSU Unit deel with all load/store
++	def C3LSU  : ProcResource<2>;
++
++  def C3PortAGU0 : ProcResource<1>;
++  def C3PortAGU1 : ProcResource<1>;
++
++  def C3PortAGU01 : ProcResGroup<[C3PortAGU0, C3PortAGU1]>;
++
++	let Super = C3LSU in
++  def C3Load :  ProcResource<2> {
++    let BufferSize = 32;
++  }
++
++  def C3LoadQueue : LoadQueue<C3Load>;
++
++	let Super = C3LSU in
++  def C3Store : ProcResource<1> {
++    let BufferSize = 16;
++  }
++
++  def C3StoreQueue : StoreQueue<C3Store>;
++
++  // 2 pipeline FPU-SIMD
++  def C3PortFPU0 : ProcResource<1>;
++  def C3PortFPU1 : ProcResource<1>;
++
++
++  def C3PortFPU : ProcResGroup<[C3PortFPU0, C3PortFPU1]>;
++
++	def C3GprRF: RegisterFile<105, [GPRC], [1]>;
++
++	def C3FpuRF: RegisterFile<95, [F4RC, F8RC], [1]>;
++
++	def C3RCU : RetireControlUnit<72, 4>;
++
++	class C3WriteRes<SchedWrite SchedRW, list<ProcResourceKind> ExePorts,
++	     int Lat, list<int> Res = [], int UOps = 1> :
++		WriteRes<SchedRW, ExePorts> {
++			let Latency = Lat;
++			let ResourceCycles = Res;
++			//!if(!empty(Res), [1, 1], !listconcat([1], Res));
++			let NumMicroOps = UOps;
++		}
++
++	class C3LSWriteRes<SchedWrite SchedRW, list<ProcResourceKind> ExePorts,
++	     int Lat, list<int> Res = [], int UOps = 1> :
++		WriteRes<SchedRW, ExePorts> {
++			let Latency = !add(Lat, 1);
++			let ResourceCycles = !if(!empty(Res), [1, 1], !listconcat([1], Res));
++			//!if(!empty(Res), [1, 1, 1], !listconcat([1, 1], Res));
++			let NumMicroOps = UOps;
++		}
++
++
++def :  C3WriteRes<WriteJmp, [C3PortALU1], 1> ;
++def :  C3WriteRes<WriteBR, [C3PortALU1], 1> ;
++def :  C3WriteRes<WriteFBR, [C3PortFPU0], 6> ;
++def :  C3WriteRes<WriteImm, [C3PortALU], 1> ;
++def :  C3WriteRes<WriteSHT, [C3PortALU12], 1> ;
++def :  C3WriteRes<WriteIALU, [C3PortALU], 1> ;
++def :  C3WriteRes<WriteNOP, [C3PortALU], 0> ; //nop do not execute in backend
++def :  C3WriteRes<WriteIMul, [C3PortALU0], 4> ;
++def :  C3WriteRes<WriteCSR, [C3PortALU1], 3> ;
++def :  C3WriteRes<WriteCNT, [C3PortALU2], 1> ;
++def :  C3WriteRes<WriteSEL, [C3PortALU2], 1> ;
++def :  C3WriteRes<WriteBOP, [C3PortALU12], 1> ;
++
++def : C3WriteRes<WriteFPU32, [C3PortFPU], 6> ;
++def : C3WriteRes<WriteFPU64, [C3PortFPU], 6> ;
++
++def : C3WriteRes<WriteFSEL, [C3PortFPU1], 2, []> ;
++
++def : C3WriteRes<WriteFPS, [C3Load], 4> ;
++
++def : C3LSWriteRes<WriteAtomicSTW, [C3PortAGU01, C3Store], 4> ;
++
++def : C3WriteRes<WriteFDiv32, [C3PortFPU1], 19, [19]> ;
++
++def : C3WriteRes<WriteFDiv64, [C3PortFPU1], 34, [34]> ;
++
++def : C3WriteRes<WriteFSqrt32, [C3PortFPU1], 19, [19]> ;
++
++def : C3WriteRes<WriteFSqrt64, [C3PortFPU1], 33, [33]> ;
++
++def : C3LSWriteRes<WriteST, [C3PortAGU01, C3Store], 4>;
++def : C3LSWriteRes<WriteLD, [C3PortAGU01, C3Load], 4>;
++
++def : C3LSWriteRes<WriteFST, [C3PortAGU01, C3Store], 4>;
++def : C3LSWriteRes<WriteFLD, [C3PortAGU01, C3Load], 4>;
++
++def : C3WriteRes<WriteFCvtF32ToI64, [C3PortFPU0], 4>;
++def : C3WriteRes<WriteFCvtF64ToI64, [C3PortFPU0], 4>;
++
++def : C3WriteRes<WriteFMovF64ToI64, [C3PortFPU], 4>;
++def : C3WriteRes<WriteFMovI64ToF64, [C3PortFPU], 4>;
++
++def : C3WriteRes<WriteFCvtF64ToF32, [C3PortFPU0], 4>;
++def : C3WriteRes<WriteFCvtF32ToF64, [C3PortFPU0], 4>;
++
++def : InstRW<[WriteIALU], (instrs COPY)>;
++
++def : InstRW<[WriteBR], (instrs BR, BEQ, BGE,
++                          BGT, BLBC, BLBS, BLE, BLT, BNE, BSR)>;
++
++def : InstRW<[WriteBR], (instrs SYS_CALL)>;
++def : InstRW<[WriteBR], (instrs JMP, JSR, RET)>;
++//def : InstRW<[WriteBR], (instregex "^B(EQ|GE|GT|LE|LT|NE)$")>;
++def : InstRW<[WriteFBR], (instregex "^FB(EQ|GE|GT|LE|LT|NE)$")>;
++
++def : InstRW<[WriteLD], (instregex "^(S_FILL|E_FILL)(CS|DE)$")>;
++def : InstRW<[WriteLD], (instregex "^FILL(CS|DE|CS_E|DE_E)$")>;
++
++def : InstRW<[WriteLD], (instregex "^LD(L|W|HU|BU)$")>;
++def : InstRW<[WriteFLD], (instregex "^LD(S|D)$")>;
++
++def : InstRW<[WriteST], (instregex "^ST(L|W|H|B)$")>;
++def : InstRW<[WriteFST], (instregex "^ST(S|D)$")>;
++
++def : InstRW<[WriteImm], (instregex "^LDAH*$")>;
++
++def : InstRW<[WriteIALU], (instregex "^(ADD|SUB|S(4|8)(ADD|SUB))(L|Q)(r|i)$")>;
++def : InstRW<[WriteIMul], (instregex "^(MUL)(L|Q)(r|i)$")>;
++
++def : InstRW<[WriteCNT], (instrs CTLZ, CTPOP, CTTZ)>;
++
++def : InstRW<[WriteBOP], (instrs ZAPr, ZAPi, ZAPNOTr, ZAPNOTi, SEXTB, SEXTH)>;
++
++def : InstRW<[WriteIALU], (instregex "^CMP(EQ|LE|LT|ULE|ULT|BGE)(r|i)*$")>;
++def : InstRW<[WriteFPU64], (instregex "^CMP(TEQ|TLE|TLT|TUN)$")>;
++
++def : InstRW<[WriteIALU], (instregex "^(AND|BIC|BIS|ORNOT|XOR|EQV)(r|i)*$")>;
++
++def : InstRW<[WriteSHT], (instregex "^(SL|SRA|SRL)(r|i)*$")>;
++def : InstRW<[WriteIMul], (instrs UMULHi, UMULHr)>;
++
++
++def : InstRW<[WriteSEL], (instregex "^SEL(EQ|NE|LE|LT|GT|GE|LBC|LBS)(r|i)*$")>;
++
++def : InstRW<[WriteBOP], (instregex "^EXT(BL|WL|LL|LW|HB|HH|HW|HL)(r|i)*$")>;
++
++
++def : InstRW<[WriteBOP], (instregex "^MASKL[BHLW](r|i)*$")>;
++def : InstRW<[WriteBOP], (instregex "^MASKH[BHLW](r|i)*$")>;
++def : InstRW<[WriteBOP], (instregex "^INSL[BHLW](r|i)*$")>;
++def : InstRW<[WriteBOP], (instregex "^INSH[BHLW](r|i)*$")>;
++
++def : InstRW<[WriteFPU32], (instregex "^(ADD|SUB|MUL)(S|D)*$")>;
++def : InstRW<[WriteFPU32], (instregex "^CPY(S|SE|SN)(S|D)*$")>;
++def : InstRW<[WriteFPU64], (instregex "^SETFPEC(0|1|2|3)*$")>;
++def : InstRW<[WriteImm], (instrs NOP)>;
++
++def : InstRW<[WriteFCvtF64ToF32], (instrs FCVTLW, FCVTWL)>;
++def : InstRW<[WriteFCvtF64ToI64], (instrs CVTQS, CVTQT)>;
++def : InstRW<[WriteFCvtF64ToI64], (instrs CVTTQ, FCTTDL, FCTTDL_G, FCTTDL_P, FCTTDL_N)>;
++def : InstRW<[WriteFCvtF64ToF32], (instrs CVTST, CVTTS)>;
++
++def : InstRW<[WriteFPU32], (instregex "^(F|FN)M(A|S)S$")>;
++def : InstRW<[WriteFPU64], (instregex "^(F|FN)M(A|S)D$")>;
++
++def : InstRW<[WriteFSEL], (instregex "^FSEL(EQ|GE|GT|LE|LT|NE)S$")>;
++def : InstRW<[WriteFSEL], (instregex "^FSEL(EQ|GE|GT|LE|LT|NE)D$")>;
++
++def : InstRW<[WriteFSqrt32], (instrs SQRTSS)>;
++def : InstRW<[WriteFSqrt64], (instrs SQRTSD)>;
++
++def : InstRW<[WriteFDiv32], (instrs DIVS)>;
++def : InstRW<[WriteFDiv64], (instrs DIVD)>;
++
++
++def : InstRW<[WriteFPS], (instrs FTOIS, FTOIT, ITOFS, ITOFT)>;
++
++def : InstRW<[WriteLD], (instrs LDL_L, LDQ_L)>;
++def : InstRW<[WriteST], (instrs STL_C, STQ_C)>;
++
++def : InstRW<[WriteIALU], (instrs RCID, RPCC)>;
++def : InstRW<[WriteFPS], (instrs WFPCR, RFPCR)>;
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64SchedCore3SIMD.td b/llvm/lib/Target/Sw64/Sw64SchedCore3SIMD.td
+new file mode 100644
+index 000000000..505704ce1
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64SchedCore3SIMD.td
+@@ -0,0 +1,101 @@
++//===- Sw64Schedule.td - Sw64 Scheduling Definitions -----*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++let SchedModel = GenericSw64Model in {
++
++def :  WriteRes<WriteFLDS, [C3PortFPU]> { let Latency = 4; }
++def :  WriteRes<WriteFSTDS, [C3PortFPU0]> { let Latency = 4; }
++def :  WriteRes<WriteVEADD, [C3PortFPU]> { let Latency = 2; }
++def :  WriteRes<WriteVESHT2, [C3PortFPU0]> { let Latency = 2; }
++def :  WriteRes<WriteVESHT3, [C3PortFPU0]> { let Latency = 3; }
++def :  WriteRes<WriteVECNT2, [C3PortFPU0]> { let Latency = 2; }
++def :  WriteRes<WriteVECNT3, [C3PortFPU0]> { let Latency = 3; }
++def :  WriteRes<WriteVESEL, [C3PortFPU0]> { let Latency = 2; }
++def :  WriteRes<WriteFMA2, [C3PortFPU]> { let Latency = 2; }
++def :  WriteRes<WriteFMA6, [C3PortFPU]> { let Latency = 6; }
++def :  WriteRes<WriteFMA17, [C3PortFPU]> { let Latency = 17; }
++def :  WriteRes<WriteVPM1, [C3PortFPU0]> { let Latency = 1; }
++def :  WriteRes<WriteVPM2, [C3PortFPU0]> { let Latency = 2; }
++def :  WriteRes<WriteVCON1, [C3PortFPU]> { let Latency = 1; }
++def :  WriteRes<WriteVCON2, [C3PortFPU]> { let Latency = 2; }
++def :  WriteRes<WriteVSUM, [C3PortFPU]> { let Latency = 2; }
++def :  WriteRes<WriteVFREC, [C3PortFPU]> { let Latency = 3; }
++def :  WriteRes<WriteVFCT, [C3PortFPU]> { let Latency = 4; }
++def :  WriteRes<WriteVFRIS, [C3PortFPU]> { let Latency = 3; }
++
++def : InstRW<[WriteFLDS], (instregex "^(VLD)(W|S|D)(E)$")>;
++def : InstRW<[WriteFLDS], (instregex "^(VLD)(S|D)$")>;
++
++//def : InstRW<[WriteFSTDS], (instregex "^(VST)(S|DD|DW|DL)$")>;
++def : InstRW<[WriteFLDS], (instregex "^(VLD)(W|S|D)(U)$")>;
++def : InstRW<[WriteFSTDS], (instregex "^(VST)(W|S|D)(U)$")>;
++def : InstRW<[WriteFSTDS], (instregex "^(VST)(WU|SU|DU)(L|H)$")>;
++
++// FIXME: Change the latency
++//def : InstRW<[WriteVEADD], (instrs VBICW, VXORW, VANDW, VEQVW, VORNOTW, VBISW)>;
++
++def : InstRW<[WriteFLDS], (instrs VLDDNC)>;
++def : InstRW<[WriteFSTDS], (instrs VSTDNC)>;
++
++//def : InstRW<[WriteVEADD], (instrs VADDW, VSUBW, VADDL, VSUBL, VADDWi, VSUBWi, VADDLi, VSUBLi)>;
++//def : InstRW<[WriteVEADD], (instregex "^(VCMP)(GE|EQ|LE|LT|ULE|ULT)(W)$")>;
++//
++//def : InstRW<[WriteVESHT2], (instregex "^(VSLL|VSRL|VSRA|VROL)(W|B|H|L)$")>;
++//def : InstRW<[WriteVESHT3], (instrs SLLOW, SRLOW, SRAOW)>;
++//
++//def : InstRW<[WriteVECNT2], (instrs CTLZOW)>;
++//def : InstRW<[WriteVECNT3], (instrs CTPOPOW)>;
++//
++//def : InstRW<[WriteVFREC], (instrs VFRECS,VFRECD)>;
++//
++//def : InstRW<[WriteVEADD], (instregex "^(VUC)(ADD|SUB)(W|H|B)$")>;
++//def : InstRW<[WriteVEADD], (instregex "^(VCMP)(GE|EQ|LE|LT|ULE|ULT)(Wi)$")>;
++//def : InstRW<[WriteVEADD], (instregex "^(VCMPU)(GT|EQ)(B|Bi)$")>;
++//
++//def : InstRW<[WriteVEADD], (instregex "^(V|VU)(MAX|MIN)(B|H|W|L)$")>;
++//
++//def : InstRW<[WriteVFCT], (instregex "^(VFCVT)(SD|DS|SH|HS|LS|LD|DL|DLG|DLP|DLZ|DLN)$")>;
++//
++//def : InstRW<[WriteVFRIS], (instregex "^(VFRI)(S|D)$")>;
++//def : InstRW<[WriteVFRIS], (instregex "^(VFRI)(S|D)(G|P|Z|N)$")>;
++//
++//def : InstRW<[WriteFMA6], (instregex "^(V)(MAX|MIN)(S|D)$")>;
++//
++//def : InstRW<[WriteVESHT2], (instrs VSLLD, VSRLD)>;
++//def : InstRW<[WriteVESHT2], (instregex "^(VSLL|VSRL|VSRA|VROL)(W|B|H|L)(i)$")>;
++//def : InstRW<[WriteVESHT3], (instrs SLLOWi, SRLOWi, SRAOWi)>;
++//
++//def : InstRW<[WriteVEADD], (instregex "^(VUC)(ADD|SUB)(Wi|Hi|Bi)$")>;
++//def : InstRW<[WriteVESEL], (instregex "^(VSEL)(EQ|LBC|LT|LE)(W|Wi)$")>;
++
++def : InstRW<[WriteFMA6], (instregex "^(V)(ADD|SUB|MUL)(S|D)$")>;
++//def : InstRW<[WriteFMA17], (instrs VDIVS, VDIVD, VSQTTS, VSQRTD)>;
++
++def : InstRW<[WriteFMA6], (instregex "^(VFCMP)(EQ|LE|LT|UN)$")>;
++def : InstRW<[WriteFMA2], (instregex "^(VCPY)(S|SE|SN)$")>;
++def : InstRW<[WriteFMA2], (instregex "^(V)(M|NM)(A|S)(S|D)$")>;
++
++//def : InstRW<[WriteFMA6], (instrs VADDDC, VADDSC)>;
++
++def : InstRW<[WriteFMA2], (instregex "^(VFSEL)(EQ|LT|LE)$")>;
++def : InstRW<[WriteVPM1], (instregex "^(V)(INS|EXT|CPY)(W|FS|FD)$")>;
++
++//def : InstRW<[WriteVPM1], (instregex "^(VINS)(B|H)$")>;
++def : InstRW<[WriteVPM1], (instregex "^(VINSECTL)(H|W|L|B)$")>;
++//def : InstRW<[WriteVPM1], (instrs VSHFQ, VSHFQB, VCPYB, VCPYH)>;
++
++// FIXME: Change the latency
++//def : InstRW<[WriteVPM1], (instrs VCPYFS_PSEUDO, VCPYFD_PSEUDO)>;
++
++def : InstRW<[WriteVCON1], (instregex "^(VCON)(W|S|D)$")>;
++def : InstRW<[WriteVCON2], (instrs VSHFW)>;
++
++//def : InstRW<[WriteVSUM], (instregex "^(VSUM)(W|L|S|D)$")>;
++def : InstRW<[WriteVPM2], (instrs VLOGZZ)>;
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64SchedCore4.td b/llvm/lib/Target/Sw64/Sw64SchedCore4.td
+new file mode 100644
+index 000000000..9628187d1
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64SchedCore4.td
+@@ -0,0 +1,77 @@
++//===- Sw64Schedule.td - Sw64 Scheduling Definitions -----*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++let SchedModel = GenericSw64Model in {
++  // 3 pipeline ALU
++
++def :  WriteRes<WriteCrc, [C3PortALU]> { let Latency = 1; }
++
++// FIXME: the latency of div and rem
++//def :  WriteRes<WriteIDiv, [C3PortALU0, C3PortIDiv]> { let Latency = 4; }
++//def :  WriteRes<WriteIRem, [C3PortALU0, C3PortIRem]> { let Latency = 4; }
++def :  WriteRes<WriteIDiv, [C3PortALU0]> { let Latency = 4; }
++def :  WriteRes<WriteIRem, [C3PortALU0]> { let Latency = 4; }
++
++def : WriteRes<WriteFCvtF64ToF64, [C3PortFPU]>{  let Latency = 2; }
++def : WriteRes<WriteFCvtF32ToF32, [C3PortFPU]>{  let Latency = 2; }
++
++def : WriteRes<WriteFREC, [C3PortFPU]>{  let Latency = 3; }
++
++def : WriteRes<WriteFCvtF64ToI32, [C3PortFPU]>{  let Latency = 2; }
++def : WriteRes<WriteFCvtI64ToF32, [C3PortFPU]>{  let Latency = 2; }
++def : WriteRes<WriteFCvtI32ToF32, [C3PortFPU]>{  let Latency = 2; }
++def : WriteRes<WriteFCvtI64ToF64, [C3PortFPU]>{  let Latency = 2; }
++def : WriteRes<WriteFCvtI32ToF64, [C3PortFPU]>{  let Latency = 2; }
++
++def : InstRW<[WriteLD], (instrs LDL_A, LDW_A, LDHU_A, LDBU_A)>;
++def : InstRW<[WriteFLD], (instrs LDS_A, LDD_A)>;
++
++def : InstRW<[WriteST], (instrs STL_A, STW_A, STH_A, STB_A)>;
++def : InstRW<[WriteFST], (instrs STS_A, STD_A)>;
++
++def : InstRW<[WriteIDiv], (instregex "^(DIV)(L|Q)$")>;
++def : InstRW<[WriteIDiv], (instregex "^(UDIV)(L|Q)$")>;
++def : InstRW<[WriteIRem], (instregex "^(REM)(L|Q)$")>;
++def : InstRW<[WriteIRem], (instregex "^(UREM)(L|Q)$")>;
++def : InstRW<[WriteJmp], (instrs ADDPI, ADDPIS)>;
++def : InstRW<[WriteImm], (instregex "^(C|S)(BT)(r|i)$")>;
++
++def : InstRW<[WriteIALU], (instrs REVBH, REVBW, REVBL)>;
++
++def : InstRW<[WriteIALU], (instregex "^(SLLW|SRAW|SRLW|ROLW|ROLL)(r|i)*$")>;
++
++def : InstRW<[WriteCrc], (instregex "^(CRC32C)(B|H|W|L)*$")>;
++def : InstRW<[WriteCrc], (instregex "^(CRC32)(B|H|W|L)*$")>;
++
++def : InstRW<[WriteFCvtF64ToI64], (instrs CMOVDL, CMOVDL_G, CMOVDL_P, CMOVDL_Z, CMOVDL_N)>;
++def : InstRW<[WriteFCvtF64ToI64], (instrs CMOVDLU, CMOVDLU_G, CMOVDLU_P, CMOVDLU_Z, CMOVDLU_N)>;
++def : InstRW<[WriteFCvtF64ToI32], (instrs CMOVDWU, CMOVDWU_G, CMOVDWU_P, CMOVDWU_Z, CMOVDWU_N)>;
++def : InstRW<[WriteFCvtF64ToI32], (instrs CMOVDW, CMOVDW_G, CMOVDW_P, CMOVDW_Z, CMOVDW_N)>;
++def : InstRW<[WriteFCvtF64ToI32], (instrs FCVTHS, FCVTSH)>;
++
++def : InstRW<[WriteFCvtI64ToF32], (instrs CMOVLS, CMOVULS)>;
++def : InstRW<[WriteFCvtI32ToF32], (instrs CMOVWS, CMOVUWS)>;
++def : InstRW<[WriteFCvtI64ToF64], (instrs CMOVLD, CMOVULD)>;
++def : InstRW<[WriteFCvtI32ToF64], (instrs CMOVWD, CMOVUWD)>;
++
++def : InstRW<[WriteFCvtF64ToF64], (instrs FRID, FRID_G, FRID_P, FRID_Z, FRID_N)>;
++def : InstRW<[WriteFCvtF32ToF32], (instrs FRIS, FRIS_G, FRIS_P, FRIS_Z, FRIS_N)>;
++
++def : InstRW<[WriteFREC], (instrs FRECS)>;
++def : InstRW<[WriteFREC], (instrs FRECD)>;
++
++def : InstRW<[WriteST], (instrs CASW, CASL)>;
++
++def : InstRW<[WriteLD], (instrs DPFHR, DPFHW)>;
++
++def : InstRW<[WriteCSR], (instrs CSRR, CSRW)>;
++def : InstRW<[WriteCSR], (instrs CSRWS, CSRWC)>;
++
++def : InstRW<[WriteJmp], (instrs LBR)>;
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64Schedule.td b/llvm/lib/Target/Sw64/Sw64Schedule.td
+new file mode 100644
+index 000000000..c5fc7739f
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64Schedule.td
+@@ -0,0 +1,80 @@
++
++/// Define scheduler resources associated with def operands.
++def WriteIALU       : SchedWrite;    // 32 or 64-bit integer ALU operations
++def WriteSHT        : SchedWrite;    // 32 or 64-bit integer ALU operations
++def WriteIMul       : SchedWrite;    // 32-bit or 64-bit multiply
++def WriteIDiv       : SchedWrite;    // 32-bit or 64-bit divided
++def WriteIRem       : SchedWrite;    // 32-bit or 64-bit remainder
++def WriteImm        : SchedWrite;    // 32-bit multiply on RV64I
++def WriteCrc        : SchedWrite;
++def WriteJmp        : SchedWrite;    // Jump
++def WriteBR         : SchedWrite;    // Jump
++def WriteFBR        : SchedWrite;    // float Jump
++def WriteNop        : SchedWrite;
++def WriteLD         : SchedWrite;    // Load double-word
++def WriteFLD        : SchedWrite;    // Load double-word
++def WriteFREC       : SchedWrite;
++def WriteCSR        : SchedWrite;    // CSR instructions
++def WriteST         : SchedWrite;    // Store byte
++def WriteFST        : SchedWrite;    // Store byte
++def WriteCNT        : SchedWrite;    //Atomic memory operation word size
++def WriteSEL        : SchedWrite;    // bytes operate and selection operate
++def WriteFPS        : SchedWrite;    //Atomic memory operation double word size
++def WriteBOP        : SchedWrite;    //Atomic memory operation double word size
++def WriteAtomicSTW  : SchedWrite;    // Atomic store word
++def WriteAtomicSTD  : SchedWrite;    // Atomic store double word
++def WriteFPU32      : SchedWrite;    // FP 32-bit computation
++def WriteFPU64      : SchedWrite;    // FP 64-bit computation
++def WriteFMul32     : SchedWrite;    // 32-bit floating point multiply
++def WriteFMulAdd32  : SchedWrite;    // 32-bit floating point multiply add
++def WriteFMulSub32  : SchedWrite;    // 32-bit floating point multiply sub
++def WriteFMul64     : SchedWrite;    // 64-bit floating point multiply
++def WriteFMulAdd64  : SchedWrite;      // 64-bit floating point multiply add
++def WriteFMulSub64  : SchedWrite;    // 64-bit floating point multiply sub
++def WriteFDiv32     : SchedWrite;    // 32-bit floating point divide
++def WriteFDiv64     : SchedWrite;    // 64-bit floating point divide
++def WriteFSqrt32    : SchedWrite;    // 32-bit floating point sqrt
++def WriteFSqrt64    : SchedWrite;    // 64-bit floating point sqrt
++def WriteFSEL       : SchedWrite;    // float selection operate
++def WriteNOP       : SchedWrite;    // float selection operate
++def WriteFCvtF32ToI32     : SchedWrite;
++def WriteFCvtF32ToI64     : SchedWrite;
++def WriteFCvtF64ToI32     : SchedWrite;
++def WriteFCvtF64ToI64     : SchedWrite;
++def WriteFCvtI32ToF32     : SchedWrite;
++def WriteFCvtI32ToF64     : SchedWrite;
++def WriteFCvtI64ToF32     : SchedWrite;
++def WriteFCvtI64ToF64     : SchedWrite;
++def WriteFMovF32ToI32     : SchedWrite;
++def WriteFMovI32ToF32     : SchedWrite;
++def WriteFMovF64ToI64     : SchedWrite;
++def WriteFMovI64ToF64     : SchedWrite;
++def WriteFCvtF32ToF64     : SchedWrite;
++def WriteFCvtF64ToF32     : SchedWrite;
++def WriteFCvtF64ToF64     : SchedWrite;
++def WriteFCvtF32ToF32     : SchedWrite;
++
++
++def WriteAdrLD : WriteSequence<[WriteImm, WriteLD]>;
++def WriteAdrAdr : WriteSequence<[WriteImm, WriteImm]>;
++
++
++def WriteFLDS : SchedWrite;
++def WriteFSTDS : SchedWrite;
++def WriteVEADD : SchedWrite;
++def WriteVESHT2 : SchedWrite;
++def WriteVESHT3 : SchedWrite;
++def WriteVECNT2 : SchedWrite;
++def WriteVECNT3 : SchedWrite;
++def WriteVESEL : SchedWrite;
++def WriteFMA2 : SchedWrite;
++def WriteFMA6 : SchedWrite;
++def WriteFMA17 : SchedWrite;
++def WriteVPM1 : SchedWrite;
++def WriteVPM2 : SchedWrite;
++def WriteVCON1 : SchedWrite;
++def WriteVCON2 : SchedWrite;
++def WriteVSUM : SchedWrite;
++def WriteVFREC : SchedWrite;
++def WriteVFCT : SchedWrite;
++def WriteVFRIS : SchedWrite;
+diff --git a/llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.cpp b/llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.cpp
+new file mode 100644
+index 000000000..aefaadeb9
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.cpp
+@@ -0,0 +1,54 @@
++//===-- Sw64SelectionDAGInfo.cpp - Sw64 SelectionDAG Info ---------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file implements the Sw64SelectionDAGInfo class.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64TargetMachine.h"
++using namespace llvm;
++
++#define DEBUG_TYPE "sw_64-selectiondag-info"
++
++SDValue Sw64SelectionDAGInfo::EmitTargetCodeForMemcpy(
++    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
++    SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
++    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
++  unsigned SizeBitWidth = Size.getValueSizeInBits();
++  // Call __memcpy_4 if the src, dst and size are all 4 byte aligned.
++  if (!AlwaysInline && Alignment >= Align(4) &&
++      DAG.MaskedValueIsZero(Size, APInt(SizeBitWidth, 3))) {
++    const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering();
++    TargetLowering::ArgListTy Args;
++    TargetLowering::ArgListEntry Entry;
++    Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
++    Entry.Node = Dst;
++    Args.push_back(Entry);
++    Entry.Node = Src;
++    Args.push_back(Entry);
++    Entry.Node = Size;
++    Args.push_back(Entry);
++
++    TargetLowering::CallLoweringInfo CLI(DAG);
++    CLI.setDebugLoc(dl)
++        .setChain(Chain)
++        .setLibCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
++                      Type::getVoidTy(*DAG.getContext()),
++                      DAG.getExternalSymbol(
++                          "memcpy", TLI.getPointerTy(DAG.getDataLayout())),
++                      std::move(Args))
++        .setDiscardResult();
++
++    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
++    return CallResult.second;
++  }
++
++  // Otherwise have the target-independent code call memcpy.
++  return SDValue();
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.h b/llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.h
+new file mode 100644
+index 000000000..1d242766f
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.h
+@@ -0,0 +1,34 @@
++//===-- Sw64SelectionDAGInfo.h - Sw64 SelectionDAG Info -------*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines the Sw64 subclass for SelectionDAGTargetInfo.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_SW64SELECTIONDAGINFO_H
++#define LLVM_LIB_TARGET_SW64_SW64SELECTIONDAGINFO_H
++
++#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
++
++namespace llvm {
++
++class Sw64TargetMachine;
++
++class Sw64SelectionDAGInfo : public SelectionDAGTargetInfo {
++public:
++  SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
++                                  SDValue Chain, SDValue Op1, SDValue Op2,
++                                  SDValue Op3, Align Alignment, bool isVolatile,
++                                  bool AlwaysInline,
++                                  MachinePointerInfo DstPtrInfo,
++                                  MachinePointerInfo SrcPtrInfo) const override;
++};
++} // namespace llvm
++
++#endif
+diff --git a/llvm/lib/Target/Sw64/Sw64Subtarget.cpp b/llvm/lib/Target/Sw64/Sw64Subtarget.cpp
+new file mode 100644
+index 000000000..f450d97ea
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64Subtarget.cpp
+@@ -0,0 +1,116 @@
++//===-- Sw64Subtarget.cpp - Sw64 Subtarget Information ------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file implements the Sw64 specific subclass of TargetSubtargetInfo.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64Subtarget.h"
++#include "Sw64.h"
++#include "llvm/ADT/BitVector.h"
++#include "llvm/CodeGen/MachineScheduler.h"
++#include "llvm/MC/TargetRegistry.h"
++using namespace llvm;
++
++#define DEBUG_TYPE "sw_64-subtarget"
++
++#define GET_SUBTARGETINFO_TARGET_DESC
++#define GET_SUBTARGETINFO_CTOR
++#include "Sw64GenSubtargetInfo.inc"
++
++static cl::opt<bool> Sw64IntArith("sw-int-divmod", cl::init(true),
++                                  cl::desc("Enable sw64 core4 integer"
++                                           "arithmetic instructions"));
++
++static cl::opt<bool> Sw64IntShift("sw-shift-word", cl::init(false),
++                                  cl::desc("Enable sw64 core4 integer"
++                                           "shift instructions"));
++
++static cl::opt<bool> Sw64ByteInst("sw-rev", cl::init(false),
++                                  cl::desc("Enable sw64 core4 byte"
++                                           "manipulation instructions"));
++
++static cl::opt<bool> Sw64FloatArith("sw-recip", cl::init(true),
++                                    cl::desc("Enable sw64 core4 float"
++                                             "arithmetic instructions"));
++
++static cl::opt<bool> Sw64FloatRound("sw-fprnd", cl::init(false),
++                                    cl::desc("Enable sw64 core4 float"
++                                             "round instructions"));
++
++static cl::opt<bool> Sw64FloatCmov("sw-cmov", cl::init(true),
++                                   cl::desc("Enable sw64 core4 float"
++                                            "cmov instructions"));
++
++static cl::opt<bool> Sw64PostInc("sw-auto-inc-dec", cl::init(false),
++                                 cl::desc("Enable sw64 core4 post-inc"
++                                          "load and store instructions"));
++
++static cl::opt<bool>
++    Sw64CasInst("sw-use-cas", cl::init(true),
++                cl::desc("Enable sw64 core4 cas instructions"));
++
++static cl::opt<bool>
++    Sw64CrcInst("sw-crc32", cl::init(false),
++                cl::desc("Enable sw64 core4 crc32 instructions"));
++
++static cl::opt<bool> Sw64SCbtInst("sw-sbt-cbt", cl::init(false),
++                                  cl::desc("Enable sw64 core4 integer"
++                                           "sbt and cbt instructions"));
++
++static cl::opt<bool>
++    Sw64WmembInst("sw-wmemb", cl::init(false),
++                  cl::desc("Enable sw64 core4 wmemb instructions"));
++cl::opt<bool> HasSIMD("msimd", cl::desc("Support the SIMD"), cl::init(false));
++
++static cl::opt<bool> Sw64InstMullShiftAddSub("sw64-inst-mull-shiftaddsub",
++                                             cl::init(true),
++                                             cl::desc("Inst mull optmize to"
++                                                      "shift with add or sub"));
++
++static cl::opt<bool> Sw64InstExt("sw64-ext-opt", cl::init(false),
++                                 cl::desc("Optimize zext and sext"));
++
++static cl::opt<bool> Sw64InstMemset("sw64-inst-memset", cl::init(true),
++                                    cl::desc("Delete part of call memset"));
++
++void Sw64Subtarget::anchor() {}
++
++Sw64Subtarget &Sw64Subtarget::initializeSubtargetDependencies(const Triple &TT,
++                                                              StringRef CPU,
++                                                              StringRef FS) {
++  std::string CPUName = std::string(CPU);
++  std::string TuneCPUName = std::string(CPU);
++  ParseSubtargetFeatures(CPUName, /*TuneCPU*/ TuneCPUName, FS);
++  return *this;
++}
++
++Sw64Subtarget::Sw64Subtarget(const Triple &TT, const std::string &CPU,
++                             const std::string &FS, const TargetMachine &TM)
++    : Sw64GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), InstrInfo(),
++      Sw64OptMul(Sw64InstMullShiftAddSub), Sw64OptMemset(Sw64InstMemset),
++      Sw64OptExt(Sw64InstExt),
++      ReserveRegister(Sw64::GPRCRegClass.getNumRegs() +
++                      Sw64::F4RCRegClass.getNumRegs() + 1),
++      Sw64EnableIntAri(Sw64IntArith), Sw64EnableIntShift(Sw64IntShift),
++      Sw64EnableByteInst(Sw64ByteInst), Sw64EnableFloatAri(Sw64FloatArith),
++      Sw64EnableFloatRound(Sw64FloatRound), Sw64EnableFloatCmov(Sw64FloatCmov),
++      Sw64EnablePostInc(Sw64PostInc), Sw64EnableCasInst(Sw64CasInst),
++      Sw64EnableCrcInst(Sw64CrcInst), Sw64EnableSCbtInst(Sw64SCbtInst),
++      Sw64EnableWmembInst(Sw64WmembInst),
++      FrameLowering(initializeSubtargetDependencies(TT, CPU, FS)),
++      TLInfo(TM, *this), TSInfo(), curgpdist(0) {}
++
++void Sw64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
++                                        unsigned NumRegionInstrs) const {
++  Policy.OnlyBottomUp = false;
++  // Spilling is generally expensive on Sw64, so always enable
++  // register-pressure tracking.
++  Policy.ShouldTrackPressure = true;
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64Subtarget.h b/llvm/lib/Target/Sw64/Sw64Subtarget.h
+new file mode 100644
+index 000000000..5f69ee377
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64Subtarget.h
+@@ -0,0 +1,164 @@
++//===-- Sw64Subtarget.h - Define Subtarget for the Sw64 -------*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file declares the Sw64 specific subclass of TargetSubtargetInfo.
++//
++//===----------------------------------------------------------------------===//
++#ifndef LLVM_LIB_TARGET_SW64_SW64SUBTARGET_H
++#define LLVM_LIB_TARGET_SW64_SW64SUBTARGET_H
++
++#include "Sw64FrameLowering.h"
++#include "Sw64ISelLowering.h"
++#include "Sw64InstrInfo.h"
++#include "Sw64SelectionDAGInfo.h"
++#include "llvm/CodeGen/TargetSubtargetInfo.h"
++#include "llvm/IR/DataLayout.h"
++#include "llvm/Support/CommandLine.h"
++#include "llvm/Target/TargetMachine.h"
++#include <string>
++using namespace llvm;
++extern cl::opt<bool> Sw64Mieee;
++extern cl::opt<bool> Sw64DeleteNop;
++
++extern cl::opt<bool> HasSIMD;
++
++#define GET_SUBTARGETINFO_HEADER
++#include "Sw64GenSubtargetInfo.inc"
++
++namespace llvm {
++class StringRef;
++
++class Sw64Subtarget : public Sw64GenSubtargetInfo {
++  virtual void anchor();
++
++  enum Sw64ArchEnum { sw64 = 0, swTarch, core3b, core4 };
++
++  bool isCore3b;
++  bool isCore4;
++  bool relax;
++  bool Ev;
++
++  bool Sw64OptMul;
++
++  bool Sw64OptMemset;
++
++  bool Sw64OptExt;
++
++  bool Sw64EnableIntAri;
++  bool Sw64EnableIntShift;
++  bool Sw64EnableByteInst;
++  bool Sw64EnableFloatAri;
++  bool Sw64EnableFloatRound;
++  bool Sw64EnableFloatCmov;
++  bool Sw64EnablePostInc;
++  bool Sw64EnableCasInst;
++  bool Sw64EnableCrcInst;
++  bool Sw64EnableSCbtInst;
++  bool Sw64EnableWmembInst;
++  bool Misaligned256StoreIsSlow = false;
++  uint8_t MaxInterleaveFactor = 2;
++  unsigned WideningBaseCost = 0;
++
++  Sw64InstrInfo InstrInfo;
++  // ReserveRegister[i] - #i is not available as a general purpose register.
++  BitVector ReserveRegister;
++  Sw64FrameLowering FrameLowering;
++  Sw64TargetLowering TLInfo;
++  Sw64SelectionDAGInfo TSInfo;
++
++  bool HasCT;
++  bool Is64Bit = true;
++
++  Sw64ArchEnum Sw64ArchVersion;
++
++public:
++  mutable int curgpdist;
++  /// This constructor initializes the data members to match that
++  /// of the specified triple.
++  ///
++  Sw64Subtarget &initializeSubtargetDependencies(const Triple &TT,
++                                                 StringRef CPU, StringRef FS);
++
++  Sw64Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
++                const TargetMachine &TM);
++
++  bool hasMieee() const { return Sw64Mieee; }
++  bool hasDeleteNop() const { return Sw64DeleteNop; }
++
++  int &getCurgpdist() const { return curgpdist; }
++  void setCurgpdist(int &count) { curgpdist = count; }
++  bool hasSIMD() const { return HasSIMD; }
++
++  unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
++  unsigned getWideningBaseCost() const { return WideningBaseCost; }
++  bool isMisaligned256StoreSlow() const { return Misaligned256StoreIsSlow; }
++
++  /// ParseSubtargetFeatures - Parses features string setting specified
++  /// subtarget options.  Definition of function is auto generated by tblgen.
++  void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
++
++  const Sw64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
++  const Sw64FrameLowering *getFrameLowering() const override {
++    return &FrameLowering;
++  }
++  const Sw64TargetLowering *getTargetLowering() const override {
++    return &TLInfo;
++  }
++  const Sw64SelectionDAGInfo *getSelectionDAGInfo() const override {
++    return &TSInfo;
++  }
++
++  const Sw64RegisterInfo *getRegisterInfo() const override {
++    return &InstrInfo.getRegisterInfo();
++  }
++
++  AntiDepBreakMode getAntiDepBreakMode() const override {
++    return TargetSubtargetInfo::ANTIDEP_CRITICAL;
++  }
++
++  // TODO: enable PostRAscheduler for test
++  bool enablePostRAScheduler() const { return true; }
++
++  bool enableMachineScheduler() const { return true; }
++
++  bool is64Bit() const { return true; }
++  bool hasCore3b() const { return Sw64ArchVersion == core3b; }
++#ifdef SW64_DEFAULT_ARCH_CORE3
++  bool hasCore4() const { return Sw64ArchVersion == core4; }
++#else
++  bool hasCore4() const { return true; }
++#endif
++  bool enRelax() const { return relax; }
++  bool hasEv() const { return Ev; }
++  bool hasCT() const { return HasCT; }
++  bool isRegisterReserved(size_t i) const { return ReserveRegister[i]; }
++
++  bool enOptMul() const { return Sw64OptMul; }
++
++  bool enOptMemset() const { return Sw64OptMemset; }
++
++  bool enOptExt() const { return Sw64OptExt; }
++
++  bool enableIntAri() const { return Sw64EnableIntAri; }
++  bool enableIntShift() const { return Sw64EnableIntShift; }
++  bool enableByteInst() const { return Sw64EnableByteInst; }
++  bool enableFloatAri() const { return Sw64EnableFloatAri; }
++  bool enableFloatRound() const { return Sw64EnableFloatRound; }
++  bool enableFloatCmov() const { return Sw64EnableFloatCmov; }
++  bool enablePostInc() const { return Sw64EnablePostInc; }
++  bool enableCasInst() const { return Sw64EnableCasInst; }
++  bool enableCrcInst() const { return Sw64EnableCrcInst; }
++  bool enableSCbtInst() const { return Sw64EnableSCbtInst; }
++  bool enableWmembInst() const { return Sw64EnableWmembInst; }
++
++  void overrideSchedPolicy(MachineSchedPolicy &Policy,
++                           unsigned NumRegionInstrs) const;
++};
++} // namespace llvm
++#endif
+diff --git a/llvm/lib/Target/Sw64/Sw64TargetMachine.cpp b/llvm/lib/Target/Sw64/Sw64TargetMachine.cpp
+new file mode 100644
+index 000000000..b74f12c0f
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64TargetMachine.cpp
+@@ -0,0 +1,194 @@
++//===-- Sw64TargetMachine.cpp - Define TargetMachine for Sw64 -----------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64TargetMachine.h"
++#include "MCTargetDesc/Sw64MCTargetDesc.h"
++#include "Sw64.h"
++#include "Sw64MachineFunctionInfo.h"
++#include "Sw64MacroFusion.h"
++#include "Sw64TargetObjectFile.h"
++#include "Sw64TargetTransformInfo.h"
++#include "llvm/ADT/STLExtras.h"
++#include "llvm/Analysis/TargetTransformInfo.h"
++#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
++#include "llvm/CodeGen/MachineScheduler.h"
++#include "llvm/CodeGen/Passes.h"
++#include "llvm/CodeGen/TargetPassConfig.h"
++#include "llvm/MC/TargetRegistry.h"
++#include "llvm/Support/CodeGen.h"
++#include "llvm/Transforms/Scalar.h"
++#include <optional>
++
++using namespace llvm;
++
++static cl::opt<bool> EnableMCR("sw_64-enable-mcr",
++                               cl::desc("Enable the machine combiner pass"),
++                               cl::init(true), cl::Hidden);
++
++static cl::opt<bool>
++    EnablePrefetch("enable-sw64-prefetching",
++                   cl::desc("Enable software prefetching on SW64"),
++                   cl::init(true), cl::Hidden);
++
++cl::opt<bool> FS_LOAD("fastload",
++                      cl::desc("Enable fast/load optimize(developing)"),
++                      cl::init(false), cl::Hidden);
++
++static Reloc::Model getEffectiveRelocModel(const Triple &TT,
++                                           std::optional<Reloc::Model> RM) {
++  if (!RM)
++    return Reloc::Static;
++  return *RM;
++}
++
++static CodeModel::Model
++getEffectiveSw64CodeModel(std::optional<CodeModel::Model> CM) {
++  if (CM) {
++    if (*CM != CodeModel::Small && *CM != CodeModel::Medium &&
++        *CM != CodeModel::Large)
++      report_fatal_error(
++          "Target only supports CodeModel Small, Medium or Large");
++    return *CM;
++  }
++  return CodeModel::Small;
++}
++
++/// Create an ILP32 architecture model
++///
++Sw64TargetMachine::Sw64TargetMachine(const Target &T, const Triple &TT,
++                                     StringRef CPU, StringRef FS,
++                                     const TargetOptions &Options,
++                                     std::optional<Reloc::Model> RM,
++                                     std::optional<CodeModel::Model> CM,
++                                     CodeGenOpt::Level OL, bool JIT)
++    : LLVMTargetMachine(
++          T,
++          "e-m:e-p:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n64-S128-v256:256",
++          TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM),
++          getEffectiveSw64CodeModel(CM), OL),
++      TLOF(std::make_unique<Sw64TargetObjectFile>()),
++      ABI(Sw64ABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)),
++      Subtarget(TT, std::string(CPU), std::string(FS), *this) {
++  initAsmInfo();
++}
++
++Sw64TargetMachine::~Sw64TargetMachine() = default;
++
++namespace {
++
++/// Sw64 Code Generator Pass Configuration Options.
++class Sw64PassConfig : public TargetPassConfig {
++public:
++  Sw64PassConfig(Sw64TargetMachine &TM, PassManagerBase &PM)
++      : TargetPassConfig(TM, PM) {
++    if (TM.getOptLevel() != CodeGenOpt::None)
++      substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
++  }
++
++  Sw64TargetMachine &getSw64TargetMachine() const {
++    return getTM<Sw64TargetMachine>();
++  }
++  ScheduleDAGInstrs *
++  createMachineScheduler(MachineSchedContext *C) const override {
++    ScheduleDAGMILive *DAG = createGenericSchedLive(C);
++    DAG->addMutation(createSw64MacroFusionDAGMutation());
++    return DAG;
++  }
++
++  ScheduleDAGInstrs *
++  createPostMachineScheduler(MachineSchedContext *C) const override {
++    ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
++    DAG->addMutation(createSw64MacroFusionDAGMutation());
++    return DAG;
++  }
++
++  void addIRPasses() override;
++  bool addILPOpts() override;
++  bool addInstSelector() override;
++  void addPreSched2() override;
++  void addPreEmitPass() override;
++  void addPreRegAlloc() override;
++  void addPreLegalizeMachineIR() override;
++  // for Inst Selector.
++  bool addGlobalInstructionSelect() override;
++};
++
++} // end anonymous namespace
++
++TargetPassConfig *Sw64TargetMachine::createPassConfig(PassManagerBase &PM) {
++  return new Sw64PassConfig(*this, PM);
++}
++
++void Sw64PassConfig::addIRPasses() {
++  addPass(createAtomicExpandPass());
++
++  if (EnablePrefetch)
++    addPass(createLoopDataPrefetchPass());
++
++  TargetPassConfig::addIRPasses();
++}
++
++void Sw64PassConfig::addPreLegalizeMachineIR() {
++  addPass(createSw64PreLegalizeCombiner());
++}
++
++void Sw64PassConfig::addPreSched2() { addPass(createSw64ExpandPseudo2Pass()); }
++
++bool Sw64PassConfig::addInstSelector() {
++  addPass(createSw64ISelDag(getSw64TargetMachine(), getOptLevel()));
++  return false;
++}
++
++void Sw64PassConfig::addPreRegAlloc() {
++  addPass(createSw64IEEEConstraintPass());
++}
++
++void Sw64PassConfig::addPreEmitPass() {
++  addPass(createSw64BranchSelection());
++  addPass(createSw64LLRPPass(getSw64TargetMachine()));
++  addPass(createSw64ExpandPseudoPass());
++}
++
++bool Sw64PassConfig::addILPOpts() {
++
++  if (EnableMCR)
++    addPass(&MachineCombinerID);
++
++  return true;
++}
++
++bool Sw64PassConfig::addGlobalInstructionSelect() {
++  addPass(new InstructionSelect());
++  return false;
++}
++
++// Force static initialization.
++extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSw64Target() {
++  RegisterTargetMachine<Sw64TargetMachine> X(getTheSw64Target());
++
++  PassRegistry *PR = PassRegistry::getPassRegistry();
++  initializeSw64BranchSelectionPass(*PR);
++  initializeSw64PreLegalizerCombinerPass(*PR);
++  initializeSw64DAGToDAGISelPass(*PR);
++}
++
++TargetTransformInfo
++Sw64TargetMachine::getTargetTransformInfo(const Function &F) const {
++  return TargetTransformInfo(Sw64TTIImpl(this, F));
++}
++
++MachineFunctionInfo *Sw64TargetMachine::createMachineFunctionInfo(
++    BumpPtrAllocator &Allocator, const Function &F,
++    const TargetSubtargetInfo *STI) const {
++  return Sw64MachineFunctionInfo::create<Sw64MachineFunctionInfo>(Allocator, F,
++                                                                  STI);
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64TargetMachine.h b/llvm/lib/Target/Sw64/Sw64TargetMachine.h
+new file mode 100644
+index 000000000..40e34b131
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64TargetMachine.h
+@@ -0,0 +1,61 @@
++//===-- Sw64TargetMachine.h - Define TargetMachine for Sw64 ---*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file declares the Sw64 specific subclass of TargetMachine.
++//
++//===----------------------------------------------------------------------===//
++#ifndef LLVM_LIB_TARGET_SW64_SW64TARGETMACHINE_H
++#define LLVM_LIB_TARGET_SW64_SW64TARGETMACHINE_H
++
++#include "MCTargetDesc/Sw64ABIInfo.h"
++#include "Sw64Subtarget.h"
++#include "llvm/ADT/StringRef.h"
++#include "llvm/Analysis/TargetTransformInfo.h"
++#include "llvm/Support/CodeGen.h"
++#include "llvm/Target/TargetMachine.h"
++#include <memory>
++#include <optional>
++
++namespace llvm {
++
++class Sw64TargetMachine : public LLVMTargetMachine {
++  std::unique_ptr<TargetLoweringObjectFile> TLOF;
++  Sw64ABIInfo ABI;
++  Sw64Subtarget Subtarget;
++
++public:
++  Sw64TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
++                    StringRef FS, const TargetOptions &Options,
++                    std::optional<Reloc::Model> RM,
++                    std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
++                    bool JIT);
++  ~Sw64TargetMachine() override;
++
++  const Sw64ABIInfo &getABI() const { return ABI; }
++  const Sw64Subtarget *getSubtargetImpl() const { return &Subtarget; }
++  const Sw64Subtarget *getSubtargetImpl(const Function &) const override {
++    return &Subtarget;
++  }
++
++  MachineFunctionInfo *
++  createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F,
++                            const TargetSubtargetInfo *STI) const override;
++
++  // Pass Pipeline Configuration
++  TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
++
++  TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
++
++  TargetLoweringObjectFile *getObjFileLowering() const override {
++    return TLOF.get();
++  }
++};
++
++} // end namespace llvm
++#endif
+diff --git a/llvm/lib/Target/Sw64/Sw64TargetObjectFile.cpp b/llvm/lib/Target/Sw64/Sw64TargetObjectFile.cpp
+new file mode 100644
+index 000000000..a858b5fef
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64TargetObjectFile.cpp
+@@ -0,0 +1,121 @@
++//===-- Sw64TargetObjectFile.cpp - Sw64 object files --------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64TargetObjectFile.h"
++#include "Sw64Subtarget.h"
++#include "llvm/BinaryFormat/ELF.h"
++#include "llvm/IR/DataLayout.h"
++#include "llvm/MC/MCContext.h"
++#include "llvm/MC/MCSectionELF.h"
++#include "llvm/Target/TargetMachine.h"
++
++using namespace llvm;
++void Sw64TargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) {
++  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
++  InitializeELF(TM.Options.UseInitArray);
++
++  SmallDataSection = getContext().getELFSection(
++      ".sdata", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
++  SmallBSSSection = getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
++                                               ELF::SHF_WRITE | ELF::SHF_ALLOC);
++  // TextSection       - see MObjectFileInfo.cpp
++  // StaticCtorSection - see MObjectFileInfo.cpp
++  // StaticDtorSection - see MObjectFileInfo.cpp
++}
++// A address must be loaded from a small section if its size is less than the
++// small section size threshold. Data in this section could be addressed by
++// using gp_rel operator.
++bool Sw64TargetObjectFile::isInSmallSection(uint64_t Size) const {
++  // gcc has traditionally not treated zero-sized objects as small data, so this
++  // is effectively part of the ABI.
++  return Size > 0 && Size <= SSThreshold;
++}
++
++// Return true if this global address should be placed into small data/bss
++// section.
++bool Sw64TargetObjectFile::isGlobalInSmallSection(
++    const GlobalObject *GO, const TargetMachine &TM) const {
++  // Only global variables, not functions.
++  const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GO);
++  if (!GVA)
++    return false;
++
++  // If the variable has an explicit section, it is placed in that section.
++  if (GVA->hasSection()) {
++    StringRef Section = GVA->getSection();
++
++    // Explicitly placing any variable in the small data section overrides
++    // the global -G value.
++    if (Section == ".sdata" || Section == ".sbss")
++      return true;
++
++    // Otherwise reject putting the variable to small section if it has an
++    // explicit section name.
++    return false;
++  }
++
++  if (((GVA->hasExternalLinkage() && GVA->isDeclaration()) ||
++       GVA->hasCommonLinkage()))
++    return false;
++
++  Type *Ty = GVA->getValueType();
++  // It is possible that the type of the global is unsized, i.e. a declaration
++  // of a extern struct. In this case don't presume it is in the small data
++  // section. This happens e.g. when building the FreeBSD kernel.
++  if (!Ty->isSized())
++    return false;
++
++  return isInSmallSection(
++      GVA->getParent()->getDataLayout().getTypeAllocSize(Ty));
++}
++
++MCSection *Sw64TargetObjectFile::SelectSectionForGlobal(
++    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
++  // Handle Small Section classification here.
++  if (Kind.isBSS() && isGlobalInSmallSection(GO, TM))
++    return SmallBSSSection;
++  if (Kind.isData() && isGlobalInSmallSection(GO, TM))
++    return SmallDataSection;
++  if (Kind.isReadOnly())
++    return GO->hasLocalLinkage() ? ReadOnlySection : DataRelROSection;
++
++  // Otherwise, we work the same as ELF.
++  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM);
++}
++
++void Sw64TargetObjectFile::getModuleMetadata(Module &M) {
++  SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
++  M.getModuleFlagsMetadata(ModuleFlags);
++
++  for (const auto &MFE : ModuleFlags) {
++    StringRef Key = MFE.Key->getString();
++    if (Key == "SmallDataLimit") {
++      SSThreshold = mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue();
++      break;
++    }
++  }
++}
++
++/// Return true if this constant should be placed into small data section.
++bool Sw64TargetObjectFile::isConstantInSmallSection(const DataLayout &DL,
++                                                    const Constant *CN) const {
++  return isInSmallSection(DL.getTypeAllocSize(CN->getType()));
++}
++
++MCSection *Sw64TargetObjectFile::getSectionForConstant(const DataLayout &DL,
++                                                       SectionKind Kind,
++                                                       const Constant *C,
++                                                       Align &Alignment) const {
++  if (isConstantInSmallSection(DL, C))
++    return SmallDataSection;
++
++  // Otherwise, we work the same as ELF.
++  return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C,
++                                                            Alignment);
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64TargetObjectFile.h b/llvm/lib/Target/Sw64/Sw64TargetObjectFile.h
+new file mode 100644
+index 000000000..0bae78a81
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64TargetObjectFile.h
+@@ -0,0 +1,49 @@
++//===-- Sw64TargetObjectFile.h - Sw64 Object Info -------------*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++#ifndef LLVM_LIB_TARGET_SW64_SW64TARGETOBJECTFILE_H
++#define LLVM_LIB_TARGET_SW64_SW64TARGETOBJECTFILE_H
++
++#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
++
++namespace llvm {
++
++static const unsigned CodeModelLargeSize = 256;
++
++class Sw64TargetObjectFile : public TargetLoweringObjectFileELF {
++  MCSection *BSSSectionLarge;
++  MCSection *DataSectionLarge;
++  MCSection *ReadOnlySectionLarge;
++  MCSection *DataRelROSectionLarge;
++  MCSection *SmallDataSection;
++  MCSection *SmallBSSSection;
++  unsigned SSThreshold = 8;
++
++public:
++  void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
++  /// Return true if this global address should be placed into small data/bss
++  /// section.
++  bool isGlobalInSmallSection(const GlobalObject *GO,
++                              const TargetMachine &TM) const;
++
++  MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
++                                    const TargetMachine &TM) const override;
++
++  /// Return true if this constant should be placed into small data section.
++  bool isConstantInSmallSection(const DataLayout &DL, const Constant *CN) const;
++
++  MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
++                                   const Constant *C,
++                                   Align &Alignment) const override;
++
++  void getModuleMetadata(Module &M) override;
++
++  bool isInSmallSection(uint64_t Size) const;
++};
++} // end namespace llvm
++#endif
+diff --git a/llvm/lib/Target/Sw64/Sw64TargetStreamer.h b/llvm/lib/Target/Sw64/Sw64TargetStreamer.h
+new file mode 100644
+index 000000000..884c03d97
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64TargetStreamer.h
+@@ -0,0 +1,150 @@
++//===-- Sw64TargetStreamer.h - Sw64 Target Streamer ------------*- C++ -*--===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_SW64TARGETSTREAMER_H
++#define LLVM_LIB_TARGET_SW64_SW64TARGETSTREAMER_H
++
++#include "MCTargetDesc/Sw64ABIFlagsSection.h"
++#include "MCTargetDesc/Sw64ABIInfo.h"
++#include "llvm/ADT/STLExtras.h"
++#include "llvm/MC/MCELFStreamer.h"
++#include "llvm/MC/MCInstrInfo.h"
++#include "llvm/MC/MCRegisterInfo.h"
++#include "llvm/MC/MCStreamer.h"
++#include "llvm/Support/FormattedStream.h"
++#include <optional>
++
++namespace llvm {
++
++struct Sw64ABIFlagsSection;
++
++class Sw64TargetStreamer : public MCTargetStreamer {
++public:
++  Sw64TargetStreamer(MCStreamer &S);
++
++  virtual void setPic(bool Value) {}
++
++  virtual void emitDirectiveSetReorder();
++  virtual void emitDirectiveSetNoReorder();
++  virtual void emitDirectiveSetMacro();
++  virtual void emitDirectiveSetNoMacro();
++  virtual void emitDirectiveSetAt();
++  virtual void emitDirectiveSetNoAt();
++  virtual void emitDirectiveEnd(StringRef Name);
++
++  virtual void emitDirectiveEnt(const MCSymbol &Symbol);
++  virtual void emitDirectiveNaN2008();
++  virtual void emitDirectiveNaNLegacy();
++  virtual void emitDirectiveInsn();
++  virtual void emitDirectiveSetCore3b();
++  virtual void emitDirectiveSetCore4();
++  virtual void emitFrame(unsigned StackReg, unsigned StackSize,
++                         unsigned ReturnReg);
++  virtual void emitDirectiveSetArch(StringRef Arch);
++
++  void prettyPrintAsm(MCInstPrinter &InstPrinter, uint64_t Address,
++                      const MCInst &Inst, const MCSubtargetInfo &STI,
++                      raw_ostream &OS) override;
++
++  void emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI);
++
++  void forbidModuleDirective() { ModuleDirectiveAllowed = false; }
++  void reallowModuleDirective() { ModuleDirectiveAllowed = true; }
++  bool isModuleDirectiveAllowed() { return ModuleDirectiveAllowed; }
++
++  // This method enables template classes to set internal abi flags
++  // structure values.
++  template <class PredicateLibrary>
++  void updateABIInfo(const PredicateLibrary &P) {
++    ABI = P.getABI();
++    ABIFlagsSection.setAllFromPredicates(P);
++  }
++
++  Sw64ABIFlagsSection &getABIFlagsSection() { return ABIFlagsSection; }
++  const Sw64ABIInfo &getABI() const {
++    assert(ABI && "ABI hasn't been set!");
++    return *ABI;
++  }
++
++protected:
++  std::optional<Sw64ABIInfo> ABI;
++  Sw64ABIFlagsSection ABIFlagsSection;
++
++  bool GPRInfoSet;
++  unsigned GPRBitMask;
++  int GPROffset;
++
++  bool FPRInfoSet;
++  unsigned FPRBitMask;
++  int FPROffset;
++
++  bool FrameInfoSet;
++  int FrameOffset;
++  unsigned FrameReg;
++  unsigned ReturnReg;
++
++private:
++  bool ModuleDirectiveAllowed;
++};
++
++// This part is for ascii assembly output
++class Sw64TargetAsmStreamer : public Sw64TargetStreamer {
++  formatted_raw_ostream &OS;
++
++public:
++  Sw64TargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
++
++  void emitDirectiveSetReorder() override;
++  void emitDirectiveSetNoReorder() override;
++  void emitDirectiveSetMacro() override;
++  void emitDirectiveSetNoMacro() override;
++  void emitDirectiveSetAt() override;
++  void emitDirectiveSetNoAt() override;
++  void emitDirectiveEnd(StringRef Name) override;
++
++  void emitDirectiveEnt(const MCSymbol &Symbol) override;
++  void emitDirectiveNaN2008() override;
++  void emitDirectiveNaNLegacy() override;
++  void emitDirectiveInsn() override;
++  void emitFrame(unsigned StackReg, unsigned StackSize,
++                 unsigned ReturnReg) override;
++  void emitDirectiveSetCore3b() override;
++  void emitDirectiveSetCore4() override;
++
++  void emitDirectiveSetArch(StringRef Arch) override;
++};
++
++// This part is for ELF object output
++class Sw64TargetELFStreamer : public Sw64TargetStreamer {
++  bool MicroSw64Enabled;
++  const MCSubtargetInfo &STI;
++  bool Pic;
++
++public:
++  MCELFStreamer &getStreamer();
++  Sw64TargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
++
++  void setPic(bool Value) override { Pic = Value; }
++
++  void emitLabel(MCSymbol *Symbol) override;
++  void finish() override;
++
++  void emitDirectiveSetNoReorder() override;
++
++  void emitDirectiveEnt(const MCSymbol &Symbol) override;
++  void emitDirectiveNaN2008() override;
++  void emitDirectiveNaNLegacy() override;
++  void emitDirectiveInsn() override;
++  void emitFrame(unsigned StackReg, unsigned StackSize,
++                 unsigned ReturnReg) override;
++
++  void emitSw64AbiFlags();
++};
++} // namespace llvm
++#endif
+diff --git a/llvm/lib/Target/Sw64/Sw64TargetTransformInfo.cpp b/llvm/lib/Target/Sw64/Sw64TargetTransformInfo.cpp
+new file mode 100644
+index 000000000..5fc0011ec
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64TargetTransformInfo.cpp
+@@ -0,0 +1,804 @@
++//===-- Sw64TargetTransformInfo.cpp - Sw64-specific TTI -------------===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++//
++// This file implements a TargetTransformInfo analysis pass specific to the
++// Sw64 target machine. It uses the target's detailed information to provide
++// more precise answers to certain TTI queries, while letting the target
++// independent and default TTI implementations handle the rest.
++//
++//===----------------------------------------------------------------------===//
++
++#include "Sw64TargetTransformInfo.h"
++#include "llvm/Analysis/TargetTransformInfo.h"
++#include "llvm/CodeGen/BasicTTIImpl.h"
++#include "llvm/CodeGen/CostTable.h"
++#include "llvm/CodeGen/TargetLowering.h"
++#include "llvm/IR/IntrinsicInst.h"
++#include "llvm/Support/Debug.h"
++using namespace llvm;
++
++#define DEBUG_TYPE "sw64tti"
++
++//===----------------------------------------------------------------------===//
++//
++// Sw64 cost model.
++//
++//===----------------------------------------------------------------------===//
++
++InstructionCost Sw64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
++                                           TTI::TargetCostKind CostKind) {
++  assert(Ty->isIntegerTy());
++
++  unsigned BitSize = Ty->getPrimitiveSizeInBits();
++  // There is no cost model for constants with a bit size of 0. Return TCC_Free
++  // here, so that constant hoisting will ignore this constant.
++  if (BitSize == 0)
++    return TTI::TCC_Free;
++  // No cost model for operations on integers larger than 64 bit implemented
++  // yet.
++  if (BitSize > 64)
++    return TTI::TCC_Free;
++
++  if (Imm == 0)
++    return TTI::TCC_Free;
++
++  if (Imm.getBitWidth() <= 64) {
++    // Constants loaded via lgfi.
++    if (isInt<32>(Imm.getSExtValue()))
++      return TTI::TCC_Basic;
++    // Constants loaded via llilf.
++    if (isUInt<32>(Imm.getZExtValue()))
++      return TTI::TCC_Basic;
++    // Constants loaded via llihf:
++    if ((Imm.getZExtValue() & 0xffffffff) == 0)
++      return TTI::TCC_Basic;
++
++    return 2 * TTI::TCC_Basic;
++  }
++
++  return 4 * TTI::TCC_Basic;
++}
++
++InstructionCost Sw64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
++                                               const APInt &Imm, Type *Ty,
++                                               TTI::TargetCostKind CostKind,
++                                               Instruction *Inst) {
++  assert(Ty->isIntegerTy());
++
++  unsigned BitSize = Ty->getPrimitiveSizeInBits();
++  // There is no cost model for constants with a bit size of 0. Return TCC_Free
++  // here, so that constant hoisting will ignore this constant.
++  if (BitSize == 0)
++    return TTI::TCC_Free;
++  // No cost model for operations on integers larger than 64 bit implemented
++  // yet.
++  if (BitSize > 64)
++    return TTI::TCC_Free;
++
++  switch (Opcode) {
++  default:
++    return TTI::TCC_Free;
++  case Instruction::GetElementPtr:
++    // Always hoist the base address of a GetElementPtr. This prevents the
++    // creation of new constants for every base constant that gets constant
++    // folded with the offset.
++    if (Idx == 0)
++      return 2 * TTI::TCC_Basic;
++    return TTI::TCC_Free;
++  case Instruction::Store:
++    return TTI::TCC_Basic;
++  case Instruction::ICmp:
++  case Instruction::Add:
++  case Instruction::Sub:
++    if (Idx == 1 && Imm.getBitWidth() <= 64) {
++      // We use algfi/slgfi to add/subtract 32-bit unsigned immediates.
++      if (isUInt<32>(Imm.getZExtValue()))
++        return TTI::TCC_Free;
++      // Or their negation, by swapping addition vs. subtraction.
++      if (isUInt<32>(-Imm.getSExtValue()))
++        return TTI::TCC_Free;
++    }
++    break;
++  case Instruction::Mul:
++  case Instruction::Or:
++  case Instruction::Xor:
++    if (Idx == 1 && Imm.getBitWidth() <= 64) {
++      // Masks supported by oilf/xilf.
++      if (isUInt<32>(Imm.getZExtValue()))
++        return TTI::TCC_Free;
++      // Masks supported by oihf/xihf.
++      if ((Imm.getZExtValue() & 0xffffffff) == 0)
++        return TTI::TCC_Free;
++    }
++    break;
++  case Instruction::And:
++    if (Idx == 1 && Imm.getBitWidth() <= 64) {
++      // Any 32-bit AND operation can by implemented via nilf.
++      if (BitSize <= 32)
++        return TTI::TCC_Free;
++      // 64-bit masks supported by nilf.
++      if (isUInt<32>(~Imm.getZExtValue()))
++        return TTI::TCC_Free;
++      // 64-bit masks supported by nilh.
++      if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff)
++        return TTI::TCC_Free;
++      // Some 64-bit AND operations can be implemented via risbg.
++      // const Sw64InstrInfo *TII = ST->getInstrInfo();
++      // unsigned Start, End;
++      // if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End))
++      // return TTI::TCC_Free;
++    }
++    break;
++  case Instruction::Shl:
++  case Instruction::LShr:
++  case Instruction::AShr:
++  case Instruction::UDiv:
++  case Instruction::SDiv:
++  case Instruction::URem:
++  case Instruction::SRem:
++  case Instruction::Trunc:
++  case Instruction::ZExt:
++  case Instruction::SExt:
++  case Instruction::IntToPtr:
++  case Instruction::PtrToInt:
++  case Instruction::BitCast:
++  case Instruction::PHI:
++  case Instruction::Call:
++  case Instruction::Select:
++  case Instruction::Ret:
++  case Instruction::Load:
++    break;
++  }
++
++  return Sw64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
++}
++
++InstructionCost Sw64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID,
++                                                 unsigned Idx, const APInt &Imm,
++                                                 Type *Ty,
++                                                 TTI::TargetCostKind CostKind) {
++  assert(Ty->isIntegerTy());
++
++  unsigned BitSize = Ty->getPrimitiveSizeInBits();
++  // There is no cost model for constants with a bit size of 0. Return TCC_Free
++  // here, so that constant hoisting will ignore this constant.
++  if (BitSize == 0)
++    return TTI::TCC_Free;
++  // No cost model for operations on integers larger than 64 bit implemented
++  // yet.
++  if (BitSize > 64)
++    return TTI::TCC_Free;
++
++  switch (IID) {
++  default:
++    return TTI::TCC_Free;
++  case Intrinsic::sadd_with_overflow:
++  case Intrinsic::uadd_with_overflow:
++  case Intrinsic::ssub_with_overflow:
++  case Intrinsic::usub_with_overflow:
++    // These get expanded to include a normal addition/subtraction.
++    if (Idx == 1 && Imm.getBitWidth() <= 64) {
++      if (isUInt<32>(Imm.getZExtValue()))
++        return TTI::TCC_Free;
++      if (isUInt<32>(-Imm.getSExtValue()))
++        return TTI::TCC_Free;
++    }
++    break;
++  case Intrinsic::smul_with_overflow:
++  case Intrinsic::umul_with_overflow:
++    // These get expanded to include a normal multiplication.
++    if (Idx == 1 && Imm.getBitWidth() <= 64) {
++      if (isInt<32>(Imm.getSExtValue()))
++        return TTI::TCC_Free;
++    }
++    break;
++  case Intrinsic::experimental_stackmap:
++    if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
++      return TTI::TCC_Free;
++    break;
++  case Intrinsic::experimental_patchpoint_void:
++  case Intrinsic::experimental_patchpoint_i64:
++    if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
++      return TTI::TCC_Free;
++    break;
++  }
++  return Sw64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
++}
++
++bool Sw64TTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
++                                const TargetTransformInfo::LSRCost &C2) {
++  // check instruction count (first), and don't care about
++  // ImmCost, since offsets are checked explicitly.
++  return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls,
++                  C1.NumBaseAdds, C1.ScaleCost, C1.SetupCost) <
++         std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls,
++                  C2.NumBaseAdds, C2.ScaleCost, C2.SetupCost);
++}
++
++unsigned Sw64TTIImpl::getNumberOfRegisters(bool Vector) {
++  if (Vector) {
++    return 0;
++  }
++  return 12;
++}
++
++bool Sw64TTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
++  EVT VT = TLI->getValueType(DL, DataType);
++  return (VT.isScalarInteger() && TLI->isTypeLegal(VT));
++}
++
++void Sw64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
++                                          TTI::UnrollingPreferences &UP,
++                                          OptimizationRemarkEmitter *ORE) {
++  // Find out if L contains a call, what the machine instruction count
++  // estimate is, and how many stores there are.
++  bool HasCall = false;
++  InstructionCost NumStores = 0;
++  for (auto &BB : L->blocks())
++    for (auto &I : *BB) {
++      if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
++        if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
++          if (isLoweredToCall(F))
++            HasCall = true;
++          if (F->getIntrinsicID() == Intrinsic::memcpy ||
++              F->getIntrinsicID() == Intrinsic::memset)
++            NumStores++;
++        } else { // indirect call.
++          HasCall = true;
++        }
++      }
++      if (isa<StoreInst>(&I)) {
++        Type *MemAccessTy = I.getOperand(0)->getType();
++        NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy,
++                                     std::nullopt, 0, TTI::TCK_RecipThroughput);
++      }
++    }
++
++  // The processor will run out of store tags if too many stores
++  // are fed into it too quickly. Therefore make sure there are not
++  // too many stores in the resulting unrolled loop.
++  unsigned const NumStoresVal = *NumStores.getValue();
++  unsigned const Max = (NumStoresVal ? (12 / NumStoresVal) : UINT_MAX);
++
++  if (HasCall) {
++    // Only allow full unrolling if loop has any calls.
++    UP.FullUnrollMaxCount = Max;
++    UP.MaxCount = 1;
++    return;
++  }
++
++  UP.MaxCount = Max;
++  if (UP.MaxCount <= 1)
++    return;
++
++  // Allow partial and runtime trip count unrolling.
++  UP.Partial = UP.Runtime = true;
++
++  UP.PartialThreshold = 75;
++  if (L->getLoopDepth() > 1)
++    UP.PartialThreshold *= 2;
++
++  UP.DefaultUnrollRuntimeCount = 4;
++
++  // Allow expensive instructions in the pre-header of the loop.
++  UP.AllowExpensiveTripCount = true;
++  UP.UnrollAndJam = true;
++
++  UP.Force = true;
++}
++
++// Return the bit size for the scalar type or vector element
++// type. getScalarSizeInBits() returns 0 for a pointer type.
++static unsigned getScalarSizeInBits(Type *Ty) {
++  unsigned Size = (Ty->isPtrOrPtrVectorTy() ? 64U : Ty->getScalarSizeInBits());
++  assert(Size > 0 && "Element must have non-zero size.");
++  return Size;
++}
++
++// getNumberOfParts() calls getTypeLegalizationCost() which splits the vector
++// type until it is legal. This would e.g. return 4 for <6 x i64>, instead of
++// 3.
++static unsigned getNumVectorRegs(Type *Ty) { return 0; }
++
++unsigned Sw64TTIImpl::getMaxInterleaveFactor(ElementCount VF) {
++  return ST->getMaxInterleaveFactor();
++}
++
++TypeSize Sw64TTIImpl::getRegisterBitWidth(bool Vector) const {
++  // if (Vector)
++  //  return TypeSize::getFixed(256);
++
++  return TypeSize::getFixed(64);
++}
++
++unsigned Sw64TTIImpl::getCFInstrCost(unsigned Opcode,
++                                     TTI::TargetCostKind CostKind,
++                                     const Instruction *I) {
++  if (CostKind != TTI::TCK_RecipThroughput)
++    return Opcode == Instruction::PHI ? 0 : 1;
++  assert(CostKind == TTI::TCK_RecipThroughput && "unexpected CostKind");
++  // Branches are assumed to be predicted.
++  return 0;
++}
++
++bool Sw64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
++                                        ArrayRef<const Value *> Args) {
++
++  // A helper that returns a vector type from the given type. The number of
++  // elements in type Ty determine the vector width.
++  auto toVectorTy = [&](Type *ArgTy) {
++    return FixedVectorType::get(ArgTy->getScalarType(),
++                                cast<FixedVectorType>(DstTy)->getNumElements());
++  };
++
++  // Exit early if DstTy is not a vector type whose elements are at least
++  // 16-bits wide.
++  if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
++    return false;
++
++  // Determine if the operation has a widening variant. We consider both the
++  // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
++  // instructions.
++  //
++  // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
++  //       verify that their extending operands are eliminated during code
++  //       generation.
++  switch (Opcode) {
++  case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
++  case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
++    break;
++  default:
++    return false;
++  }
++
++  // To be a widening instruction (either the "wide" or "long" versions), the
++  // second operand must be a sign- or zero extend having a single user. We
++  // only consider extends having a single user because they may otherwise not
++  // be eliminated.
++  if (Args.size() != 2 ||
++      (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
++      !Args[1]->hasOneUse())
++    return false;
++  auto *Extend = cast<CastInst>(Args[1]);
++
++  // Legalize the destination type and ensure it can be used in a widening
++  // operation.
++  auto DstTyL = getTypeLegalizationCost(DstTy);
++  unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
++  if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
++    return false;
++
++  // Legalize the source type and ensure it can be used in a widening
++  // operation.
++  auto *SrcTy = toVectorTy(Extend->getSrcTy());
++  auto SrcTyL = getTypeLegalizationCost(SrcTy);
++  unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
++  if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
++    return false;
++
++  // Get the total number of vector elements in the legalized types.
++  InstructionCost NumDstEls =
++      DstTyL.first * DstTyL.second.getVectorMinNumElements();
++  InstructionCost NumSrcEls =
++      SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
++
++  // Return true if the legalized types have the same number of vector elements
++  // and the destination element type size is twice that of the source type.
++  return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
++}
++
++InstructionCost Sw64TTIImpl::getArithmeticInstrCost(
++    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
++    TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
++    ArrayRef<const Value *> Args, const Instruction *CxtI) {
++  // TODO: Handle more cost kinds.
++  if (CostKind != TTI::TCK_RecipThroughput)
++    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
++                                         Args, CxtI);
++
++  // Legalize the type.
++  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
++
++  // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
++  // add in the widening overhead specified by the sub-target. Since the
++  // extends feeding widening instructions are performed automatically, they
++  // aren't present in the generated code and have a zero cost. By adding a
++  // widening overhead here, we attach the total cost of the combined operation
++  // to the widening instruction.
++  InstructionCost Cost = 0;
++  if (isWideningInstruction(Ty, Opcode, Args))
++    Cost += ST->getWideningBaseCost();
++
++  int ISD = TLI->InstructionOpcodeToISD(Opcode);
++
++  switch (ISD) {
++  default:
++    return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
++                                                Op2Info);
++  case ISD::SDIV:
++    if (Op2Info.isConstant() && Op2Info.isUniform() && Op2Info.isPowerOf2()) {
++      // On Sw64, scalar signed division by constants power-of-two are
++      // normally expanded to the sequence ADD + CMP + SELECT + SRA.
++      // The OperandValue properties many not be same as that of previous
++      // operation; conservatively assume OP_None.
++      Cost +=
++          getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
++                                 Op1Info.getNoProps(), Op2Info.getNoProps());
++      Cost +=
++          getArithmeticInstrCost(Instruction::Sub, Ty, CostKind,
++                                 Op1Info.getNoProps(), Op2Info.getNoProps());
++      Cost +=
++          getArithmeticInstrCost(Instruction::Select, Ty, CostKind,
++                                 Op1Info.getNoProps(), Op2Info.getNoProps());
++      Cost +=
++          getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
++                                 Op1Info.getNoProps(), Op2Info.getNoProps());
++      return Cost;
++    }
++    [[fallthrough]];
++  case ISD::UDIV:
++    if (Op2Info.isConstant() && Op2Info.isUniform()) {
++      auto VT = TLI->getValueType(DL, Ty);
++      if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
++        // Vector signed division by constant are expanded to the
++        // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
++        // to MULHS + SUB + SRL + ADD + SRL.
++        InstructionCost MulCost =
++            getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
++                                   Op1Info.getNoProps(), Op2Info.getNoProps());
++        InstructionCost AddCost =
++            getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
++                                   Op1Info.getNoProps(), Op2Info.getNoProps());
++        InstructionCost ShrCost =
++            getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
++                                   Op1Info.getNoProps(), Op2Info.getNoProps());
++        return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
++      }
++    }
++
++    Cost +=
++        BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info);
++    if (Ty->isVectorTy()) {
++      // On Sw64, vector divisions are not supported natively and are
++      // expanded into scalar divisions of each pair of elements.
++      Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, CostKind,
++                                     Op1Info, Op2Info);
++      Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, CostKind,
++                                     Op1Info, Op2Info);
++      // TODO: if one of the arguments is scalar, then it's not necessary to
++      // double the cost of handling the vector elements.
++      Cost += Cost;
++    }
++    return Cost;
++
++  case ISD::ADD:
++  case ISD::MUL:
++  case ISD::XOR:
++  case ISD::OR:
++  case ISD::AND:
++    // These nodes are marked as 'custom' for combining purposes only.
++    // We know that they are legal. See LowerAdd in ISelLowering.
++    return (Cost + 1) * LT.first;
++
++  case ISD::FADD:
++    // These nodes are marked as 'custom' just to lower them to SVE.
++    // We know said lowering will incur no additional cost.
++    if (isa<FixedVectorType>(Ty) && !Ty->getScalarType()->isFP128Ty())
++      return (Cost + 2) * LT.first;
++
++    return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
++                                                Op2Info);
++  }
++}
++InstructionCost Sw64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
++                                            VectorType *Tp, ArrayRef<int> Mask,
++                                            TTI::TargetCostKind CostKind,
++                                            int Index, VectorType *SubTp,
++                                            ArrayRef<const Value *> Args) {
++  Kind = improveShuffleKindFromMask(Kind, Mask);
++  return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
++}
++// Return the log2 difference of the element sizes of the two vector types.
++static unsigned getElSizeLog2Diff(Type *Ty0, Type *Ty1) {
++  unsigned Bits0 = Ty0->getScalarSizeInBits();
++  unsigned Bits1 = Ty1->getScalarSizeInBits();
++
++  if (Bits1 > Bits0)
++    return (Log2_32(Bits1) - Log2_32(Bits0));
++
++  return (Log2_32(Bits0) - Log2_32(Bits1));
++}
++
++// Return the number of instructions needed to truncate SrcTy to DstTy.
++unsigned Sw64TTIImpl::getVectorTruncCost(Type *SrcTy, Type *DstTy) { return 1; }
++
++// Return the cost of converting a vector bitmask produced by a compare
++// (SrcTy), to the type of the select or extend instruction (DstTy).
++unsigned Sw64TTIImpl::getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy) {
++  assert(SrcTy->isVectorTy() && DstTy->isVectorTy() &&
++         "Should only be called with vector types.");
++
++  unsigned PackCost = 0;
++  unsigned SrcScalarBits = SrcTy->getScalarSizeInBits();
++  unsigned DstScalarBits = DstTy->getScalarSizeInBits();
++  unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy);
++  if (SrcScalarBits > DstScalarBits)
++    // The bitmask will be truncated.
++    PackCost = getVectorTruncCost(SrcTy, DstTy);
++  else if (SrcScalarBits < DstScalarBits) {
++    unsigned DstNumParts = getNumVectorRegs(DstTy);
++    // Each vector select needs its part of the bitmask unpacked.
++    PackCost = Log2Diff * DstNumParts;
++    // Extra cost for moving part of mask before unpacking.
++    PackCost += DstNumParts - 1;
++  }
++
++  return PackCost;
++}
++
++// Return the type of the compared operands. This is needed to compute the
++// cost for a Select / ZExt or SExt instruction.
++static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {
++  Type *OpTy = nullptr;
++  if (CmpInst *CI = dyn_cast<CmpInst>(I->getOperand(0)))
++    OpTy = CI->getOperand(0)->getType();
++  else if (Instruction *LogicI = dyn_cast<Instruction>(I->getOperand(0)))
++    if (LogicI->getNumOperands() == 2)
++      if (CmpInst *CI0 = dyn_cast<CmpInst>(LogicI->getOperand(0)))
++        if (isa<CmpInst>(LogicI->getOperand(1)))
++          OpTy = CI0->getOperand(0)->getType();
++
++  return nullptr;
++}
++
++unsigned Sw64TTIImpl::getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
++                                                    const Instruction *I) {
++  unsigned Cost = 0;
++  return Cost;
++}
++
++InstructionCost Sw64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
++                                              Type *Src,
++                                              TTI::CastContextHint CCH,
++                                              TTI::TargetCostKind CostKind,
++                                              const Instruction *I) {
++  // FIXME: Can the logic below also be used for these cost kinds?
++  if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) {
++    auto BaseCost = BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
++    return BaseCost == 0 ? BaseCost : 1;
++  }
++
++  unsigned DstScalarBits = Dst->getScalarSizeInBits();
++  unsigned SrcScalarBits = Src->getScalarSizeInBits();
++
++  if (!Src->isVectorTy()) {
++    assert(!Dst->isVectorTy());
++
++    if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) {
++      if (SrcScalarBits >= 32 ||
++          (I != nullptr && isa<LoadInst>(I->getOperand(0))))
++        return 1;
++      return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/;
++    }
++
++    if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
++        Src->isIntegerTy(1)) {
++
++      // This should be extension of a compare i1 result, which is done with
++      // ipm and a varying sequence of instructions.
++      unsigned Cost = 0;
++      if (Opcode == Instruction::SExt)
++        Cost = (DstScalarBits < 64 ? 3 : 4);
++      if (Opcode == Instruction::ZExt)
++        Cost = 3;
++      Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr);
++      if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
++        // If operands of an fp-type was compared, this costs +1.
++        Cost++;
++      return Cost;
++    }
++  }
++
++  return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
++}
++
++// Scalar i8 / i16 operations will typically be made after first extending
++// the operands to i32.
++static unsigned getOperandsExtensionCost(const Instruction *I) {
++  unsigned ExtCost = 0;
++  for (Value *Op : I->operands())
++    // A load of i8 or i16 sign/zero extends to i32.
++    if (!isa<LoadInst>(Op) && !isa<ConstantInt>(Op))
++      ExtCost++;
++
++  return ExtCost;
++}
++
++InstructionCost Sw64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
++                                                Type *CondTy,
++                                                CmpInst::Predicate VecPred,
++                                                TTI::TargetCostKind CostKind,
++                                                const Instruction *I) {
++  if (CostKind != TTI::TCK_RecipThroughput)
++    return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind);
++
++  if (!ValTy->isVectorTy()) {
++    switch (Opcode) {
++    case Instruction::ICmp: {
++      // A loaded value compared with 0 with multiple users becomes Load and
++      // Test. The load is then not foldable, so return 0 cost for the ICmp.
++      unsigned ScalarBits = ValTy->getScalarSizeInBits();
++      if (I != nullptr && ScalarBits >= 32)
++        if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
++          if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
++            if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() &&
++                C->isZero())
++              return 0;
++
++      unsigned Cost = 1;
++      if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
++        Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2);
++      return Cost;
++    }
++    case Instruction::Select:
++      if (ValTy->isFloatingPointTy())
++        return 4; // No load on condition for FP - costs a conditional jump.
++      return 1;   // Load On Condition / Select Register.
++    }
++  }
++
++  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind);
++}
++
++// Check if a load may be folded as a memory operand in its user.
++bool Sw64TTIImpl::isFoldableLoad(const LoadInst *Ld,
++                                 const Instruction *&FoldedValue) {
++  if (!Ld->hasOneUse())
++    return false;
++  FoldedValue = Ld;
++  const Instruction *UserI = cast<Instruction>(*Ld->user_begin());
++  unsigned LoadedBits = getScalarSizeInBits(Ld->getType());
++  unsigned TruncBits = 0;
++  unsigned SExtBits = 0;
++  unsigned ZExtBits = 0;
++  if (UserI->hasOneUse()) {
++    unsigned UserBits = UserI->getType()->getScalarSizeInBits();
++    if (isa<TruncInst>(UserI))
++      TruncBits = UserBits;
++    else if (isa<SExtInst>(UserI))
++      SExtBits = UserBits;
++    else if (isa<ZExtInst>(UserI))
++      ZExtBits = UserBits;
++  }
++  if (TruncBits || SExtBits || ZExtBits) {
++    FoldedValue = UserI;
++    UserI = cast<Instruction>(*UserI->user_begin());
++    // Load (single use) -> trunc/extend (single use) -> UserI
++  }
++  if ((UserI->getOpcode() == Instruction::Sub ||
++       UserI->getOpcode() == Instruction::SDiv ||
++       UserI->getOpcode() == Instruction::UDiv) &&
++      UserI->getOperand(1) != FoldedValue)
++    return false; // Not commutative, only RHS foldable.
++  // LoadOrTruncBits holds the number of effectively loaded bits, but 0 if an
++  // extension was made of the load.
++  unsigned LoadOrTruncBits =
++      ((SExtBits || ZExtBits) ? 0 : (TruncBits ? TruncBits : LoadedBits));
++  switch (UserI->getOpcode()) {
++  case Instruction::Add: // SE: 16->32, 16/32->64, z14:16->64. ZE: 32->64
++  case Instruction::Sub:
++  case Instruction::ICmp:
++    if (LoadedBits == 32 && ZExtBits == 64)
++      return true;
++    LLVM_FALLTHROUGH;
++  case Instruction::Mul: // SE: 16->32, 32->64, z14:16->64
++    if (UserI->getOpcode() != Instruction::ICmp) {
++      if (LoadedBits == 16 && SExtBits == 32)
++        return true;
++      if (LoadOrTruncBits == 16)
++        return true;
++    }
++    LLVM_FALLTHROUGH;
++  case Instruction::SDiv: // SE: 32->64
++    if (LoadedBits == 32 && SExtBits == 64)
++      return true;
++    LLVM_FALLTHROUGH;
++  case Instruction::UDiv:
++  case Instruction::And:
++  case Instruction::Or:
++  case Instruction::Xor:
++    // This also makes sense for float operations, but disabled for now due
++    // to regressions.
++    // case Instruction::FCmp:
++    // case Instruction::FAdd:
++    // case Instruction::FSub:
++    // case Instruction::FMul:
++    // case Instruction::FDiv:
++
++    // All possible extensions of memory checked above.
++
++    // Comparison between memory and immediate.
++    if (UserI->getOpcode() == Instruction::ICmp)
++      if (ConstantInt *CI = dyn_cast<ConstantInt>(UserI->getOperand(1)))
++        if (CI->getValue().isIntN(16))
++          return true;
++    return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64);
++    break;
++  }
++  return false;
++}
++
++static bool isBswapIntrinsicCall(const Value *V) {
++  if (const Instruction *I = dyn_cast<Instruction>(V))
++    if (auto *CI = dyn_cast<CallInst>(I))
++      if (auto *F = CI->getCalledFunction())
++        if (F->getIntrinsicID() == Intrinsic::bswap)
++          return true;
++  return false;
++}
++
++InstructionCost Sw64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
++                                             MaybeAlign Alignment,
++                                             unsigned AddressSpace,
++                                             TTI::TargetCostKind CostKind,
++                                             TTI::OperandValueInfo OpInfo,
++                                             const Instruction *I) {
++  assert(!Ty->isVoidTy() && "Invalid type");
++
++  // TODO: Handle other cost kinds.
++  if (CostKind != TTI::TCK_RecipThroughput)
++    return 1;
++
++  // Type legalization can't handle structs
++  if (TLI->getValueType(DL, Ty, true) == MVT::Other)
++    return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
++                                  CostKind);
++
++  auto LT = getTypeLegalizationCost(Ty);
++
++  if (ST->isMisaligned256StoreSlow() && Opcode == Instruction::Store &&
++      LT.second.is256BitVector() && (!Alignment || *Alignment < Align(32))) {
++    // Unaligned stores are extremely inefficient. We don't split all
++    // unaligned 128-bit stores because the negative impact that has shown in
++    // practice on inlined block copy code.
++    // We make such stores expensive so that we will only vectorize if there
++    // are 6 other instructions getting vectorized.
++    const int AmortizationCost = 6;
++
++    return LT.first * 2 * AmortizationCost;
++  }
++
++  if (Ty->isVectorTy() &&
++      cast<VectorType>(Ty)->getElementType()->isIntegerTy(8)) {
++    unsigned ProfitableNumElements;
++    if (Opcode == Instruction::Store)
++      // We use a custom trunc store lowering so v.4b should be profitable.
++      ProfitableNumElements = 4;
++    else
++      // We scalarize the loads because there is not v.4b register and we
++      // have to promote the elements to v.2.
++      ProfitableNumElements = 8;
++
++    if (cast<FixedVectorType>(Ty)->getNumElements() < ProfitableNumElements) {
++      unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
++      unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
++      // We generate 2 instructions per vector element.
++      return NumVectorizableInstsToAmortize * NumVecElts * 2;
++    }
++  }
++  return LT.first;
++}
++
++TargetTransformInfo::PopcntSupportKind
++Sw64TTIImpl::getPopcntSupport(unsigned TyWidth) {
++  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
++  // Sw64 only support 64 Bit Pop County
++  if (TyWidth == 32 || TyWidth == 64)
++    return TTI::PSK_FastHardware;
++  return TTI::PSK_Software;
++}
+diff --git a/llvm/lib/Target/Sw64/Sw64TargetTransformInfo.h b/llvm/lib/Target/Sw64/Sw64TargetTransformInfo.h
+new file mode 100644
+index 000000000..cd1b8f2f2
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64TargetTransformInfo.h
+@@ -0,0 +1,137 @@
++//===-- Sw64TargetTransformInfo.h - Sw64 specific TTI ---------*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++/// \file
++/// This file a TargetTransformInfo::Concept conforming object specific to the
++/// Sw64 target machine. It uses the target's detailed information to
++/// provide more precise answers to certain TTI queries, while letting the
++/// target independent and default TTI implementations handle the rest.
++///
++//===----------------------------------------------------------------------===//
++
++#ifndef LLVM_LIB_TARGET_SW64_SW64TARGETTRANSFORMINFO_H
++#define LLVM_LIB_TARGET_SW64_SW64TARGETTRANSFORMINFO_H
++
++#include "Sw64.h"
++#include "Sw64TargetMachine.h"
++#include "llvm/Analysis/TargetTransformInfo.h"
++#include "llvm/CodeGen/BasicTTIImpl.h"
++#include "llvm/CodeGen/TargetLowering.h"
++
++namespace llvm {
++
++class Sw64TTIImpl : public BasicTTIImplBase<Sw64TTIImpl> {
++  typedef BasicTTIImplBase<Sw64TTIImpl> BaseT;
++  typedef TargetTransformInfo TTI;
++  friend BaseT;
++
++  const Sw64Subtarget *ST;
++  const Sw64TargetLowering *TLI;
++
++  const Sw64Subtarget *getST() const { return ST; }
++  const Sw64TargetLowering *getTLI() const { return TLI; }
++
++  unsigned const LIBCALL_COST = 30;
++
++  bool isWideningInstruction(Type *Ty, unsigned Opcode,
++                             ArrayRef<const Value *> Args);
++
++public:
++  explicit Sw64TTIImpl(const Sw64TargetMachine *TM, const Function &F)
++      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
++        TLI(ST->getTargetLowering()) {}
++
++  unsigned getNumberOfRegisters(unsigned ClassID) const {
++    bool Vector = (ClassID == 1);
++    if (Vector) {
++      if (ST->hasSIMD())
++        return 32;
++      return 0;
++    }
++    return 32;
++  }
++
++  unsigned getMaxInterleaveFactor(ElementCount VF);
++  bool enableInterleavedAccessVectorization() { return true; }
++  TypeSize getRegisterBitWidth(bool Vector) const;
++
++  unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
++                          const Instruction *I);
++
++  InstructionCost getMemoryOpCost(
++      unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace,
++      TTI::TargetCostKind CostKind,
++      TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
++      const Instruction *I = nullptr);
++  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
++                               TTI::UnrollingPreferences &UP,
++                               OptimizationRemarkEmitter *ORE);
++
++  InstructionCost getArithmeticInstrCost(
++      unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
++      TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
++      TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
++      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
++      const Instruction *CxtI = nullptr);
++
++  InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
++                                TTI::TargetCostKind CostKind);
++
++  InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
++                                    const APInt &Imm, Type *Ty,
++                                    TTI::TargetCostKind CostKind,
++                                    Instruction *Inst = nullptr);
++  InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
++                                      const APInt &Imm, Type *Ty,
++                                      TTI::TargetCostKind CostKind);
++
++  bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
++                     const TargetTransformInfo::LSRCost &C2);
++
++  unsigned getNumberOfRegisters(bool Vector);
++
++  unsigned getCacheLineSize() const override { return 128; }
++  unsigned getPrefetchDistance() const override { return 524; }
++  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
++                                unsigned NumStridedMemAccesses,
++                                unsigned NumPrefetches,
++                                bool HasCall) const override {
++    return 1;
++  }
++
++  bool hasDivRemOp(Type *DataType, bool IsSigned);
++  bool prefersVectorizedAddressing() { return false; }
++  bool LSRWithInstrQueries() { return true; }
++  bool supportsEfficientVectorElementLoadStore() { return true; }
++
++  InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
++                                 ArrayRef<int> Mask,
++                                 TTI::TargetCostKind CostKind, int Index,
++                                 VectorType *SubTp,
++                                 ArrayRef<const Value *> Args = std::nullopt);
++  unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
++  unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
++  unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
++                                         const Instruction *I);
++  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
++                                   TTI::CastContextHint CCH,
++                                   TTI::TargetCostKind CostKind,
++                                   const Instruction *I = nullptr);
++  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
++                                     CmpInst::Predicate VecPred,
++                                     TTI::TargetCostKind CostKind,
++                                     const Instruction *I = nullptr);
++  bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
++
++  TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
++  /// @}
++};
++
++} // end namespace llvm
++
++#endif
+diff --git a/llvm/lib/Target/Sw64/Sw64VectorVarDefine.td b/llvm/lib/Target/Sw64/Sw64VectorVarDefine.td
+new file mode 100644
+index 000000000..f62d0bd6f
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/Sw64VectorVarDefine.td
+@@ -0,0 +1,319 @@
++//===- Sw64InstrInfo.td - The Sw64 Instruction Set -------*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++// Sw64 Operand, Complex Patterns and Transformations Definitions.
++//===----------------------------------------------------------------------===//
++
++class ConstantSImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = [],
++                                  int Offset = 0> : AsmOperandClass {
++  let Name = "ConstantSImm" # Bits # "_" # Offset;
++  let RenderMethod = "addConstantSImmOperands<" # Bits # ", " # Offset # ">";
++  let PredicateMethod = "isConstantSImm<" # Bits # ", " # Offset # ">";
++  let SuperClasses = Supers;
++  let DiagnosticType = "SImm" # Bits # "_" # Offset;
++}
++
++class ConstantUImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = [],
++                                  int Offset = 0> : AsmOperandClass {
++  let Name = "ConstantUImm" # Bits # "_" # Offset;
++  let RenderMethod = "addConstantUImmOperands<" # Bits # ", " # Offset # ">";
++  let PredicateMethod = "isConstantUImm<" # Bits # ", " # Offset # ">";
++  let SuperClasses = Supers;
++  let DiagnosticType = "UImm" # Bits # "_" # Offset;
++}
++
++def ConstantUImm7Lsl2AsmOperandClass : AsmOperandClass {
++  let Name = "UImm7Lsl2";
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isScaledSImm<7, 2>";
++//  let SuperClasses = [ConstantSImm9AsmOperandClass];
++//  let DiagnosticType = "SImm7_Lsl2";
++}
++
++def ConstantSImm8AsmOperandClass
++    : ConstantSImmAsmOperandClass<8, [ConstantUImm7Lsl2AsmOperandClass]>;
++
++def ConstantUImm8AsmOperandClass
++    : ConstantUImmAsmOperandClass<8, [ConstantUImm7Lsl2AsmOperandClass]>;
++
++foreach I = {8} in
++  def vsplat_simm # I : Operand<vAny> {
++    let ParserMatchClass =
++        !cast<AsmOperandClass>("ConstantSImm" # I # "AsmOperandClass");
++  }
++
++foreach I = {8} in
++  def vsplat_uimm # I : Operand<vAny> {
++    let ParserMatchClass =
++        !cast<AsmOperandClass>("ConstantUImm" # I # "AsmOperandClass");
++  }
++
++// Generic case - only to support certain assembly pseudo instructions.
++class UImmAnyAsmOperandClass<int Bits, list<AsmOperandClass> Supers = []>
++    : AsmOperandClass {
++  let Name = "ImmAny";
++  let RenderMethod = "addConstantUImmOperands<32>";
++  let PredicateMethod = "isSImm<" # Bits # ">";
++  let SuperClasses = Supers;
++  let DiagnosticType = "ImmAny";
++}
++
++class SImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = []>
++    : AsmOperandClass {
++  let Name = "SImm" # Bits;
++  let RenderMethod = "addSImmOperands<" # Bits # ">";
++  let PredicateMethod = "isSImm<" # Bits # ">";
++  let SuperClasses = Supers;
++  let DiagnosticType = "SImm" # Bits;
++}
++
++class UImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = []>
++    : AsmOperandClass {
++  let Name = "UImm" # Bits;
++  let RenderMethod = "addUImmOperands<" # Bits # ">";
++  let PredicateMethod = "isUImm<" # Bits # ">";
++  let SuperClasses = Supers;
++  let DiagnosticType = "UImm" # Bits;
++}
++
++def UImm32CoercedAsmOperandClass : UImmAnyAsmOperandClass<33, []> {
++  let Name = "UImm32_Coerced";
++  let DiagnosticType = "UImm32_Coerced";
++}
++
++def SImm32RelaxedAsmOperandClass
++    : SImmAsmOperandClass<32, [UImm32CoercedAsmOperandClass]> {
++  let Name = "SImm32_Relaxed";
++  let PredicateMethod = "isAnyImm<33>";
++  let DiagnosticType = "SImm32_Relaxed";
++}
++
++def SImm32AsmOperandClass
++    : SImmAsmOperandClass<32, [SImm32RelaxedAsmOperandClass]>;
++def ConstantUImm26AsmOperandClass
++    : ConstantUImmAsmOperandClass<26, [SImm32AsmOperandClass]>;
++def ConstantUImm20AsmOperandClass
++    : ConstantUImmAsmOperandClass<20, [ConstantUImm26AsmOperandClass]>;
++
++def UImm16RelaxedAsmOperandClass
++    : UImmAsmOperandClass<16, [ConstantUImm20AsmOperandClass]> {
++  let Name = "UImm16_Relaxed";
++  let PredicateMethod = "isAnyImm<16>";
++  let DiagnosticType = "UImm16_Relaxed";
++}
++
++// FIXME: One of these should probably have UImm16AsmOperandClass as the
++//        superclass instead of UImm16RelaxedasmOPerandClass.
++def UImm16AsmOperandClass
++    : UImmAsmOperandClass<16, [UImm16RelaxedAsmOperandClass]>;
++def SImm16RelaxedAsmOperandClass
++    : SImmAsmOperandClass<16, [UImm16RelaxedAsmOperandClass]> {
++  let Name = "SImm16_Relaxed";
++  let PredicateMethod = "isAnyImm<16>";
++  let DiagnosticType = "SImm16_Relaxed";
++}
++
++def SImm16AsmOperandClass
++    : SImmAsmOperandClass<16, [SImm16RelaxedAsmOperandClass]>;
++
++def ConstantSImm10Lsl3AsmOperandClass : AsmOperandClass {
++  let Name = "SImm10Lsl3";
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isScaledSImm<10, 3>";
++  let SuperClasses = [SImm16AsmOperandClass];
++  let DiagnosticType = "SImm10_Lsl3";
++}
++
++def Sw64MemAsmOperand : AsmOperandClass {
++  let Name = "Mem";
++  let ParserMethod = "parseMemOperand";
++}
++
++foreach I = {16, 32} in
++  def simm # I : Operand<i32> {
++    let DecoderMethod = "DecodeSImmWithOffsetAndScale<" # I # ">";
++    let ParserMatchClass = !cast<AsmOperandClass>("SImm" # I # "AsmOperandClass");
++  }
++
++foreach I = {1, 2, 3} in
++  def Sw64MemSimm16Lsl # I # AsmOperand : AsmOperandClass {
++    let Name = "MemOffsetSimm16_" # I;
++    let SuperClasses = [Sw64MemAsmOperand];
++    let RenderMethod = "addMemOperands";
++    let ParserMethod = "parseMemOperand";
++    let PredicateMethod = "isMemWithSimmOffset<10, " # I # ">";
++    let DiagnosticType = "MemSImm10Lsl" # I;
++  }
++
++class mem_generic : Operand<iPTR> {
++  let PrintMethod = "printMemOperand";
++  let MIOperandInfo = (ops ptr_rc, simm16);
++  let EncoderMethod = "getMemEncoding";
++  let ParserMatchClass = Sw64MemAsmOperand;
++  let OperandType = "OPERAND_MEMORY";
++}
++
++def ConstantSImm10Lsl2AsmOperandClass : AsmOperandClass {
++  let Name = "SImm10Lsl2";
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isScaledSImm<10, 2>";
++  let SuperClasses = [ConstantSImm10Lsl3AsmOperandClass];
++  let DiagnosticType = "SImm10_Lsl2";
++}
++
++foreach I = {2, 3} in
++  def simm16_ # I : Operand<i32> {
++    let DecoderMethod = "DecodeSImmWithOffsetAndScale<10, " # I # ">";
++    let ParserMatchClass =
++        !cast<AsmOperandClass>("ConstantSImm10Lsl" # I # "AsmOperandClass");
++  }
++
++  def mem_simm16 : mem_generic {
++    let MIOperandInfo = (ops ptr_rc, !cast<Operand>("simm16_2"));
++    let EncoderMethod = "getMemEncoding<2>";
++    let ParserMatchClass =
++            !cast<AsmOperandClass>("Sw64MemSimm16Lsl2AsmOperand");
++  }
++
++  def mem_simm12 : mem_generic {
++    let MIOperandInfo = (ops ptr_rc, !cast<Operand>("simm16_3"));
++    let EncoderMethod = "getMemEncoding<3>";
++    let ParserMatchClass =
++            !cast<AsmOperandClass>("Sw64MemSimm16Lsl3AsmOperand");
++  }
++
++class ConstantUImmRangeAsmOperandClass<int Bottom, int Top,
++                                       list<AsmOperandClass> Supers = []>
++    : AsmOperandClass {
++  let Name = "ConstantUImmRange" # Bottom # "_" # Top;
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isConstantUImmRange<" # Bottom # ", " # Top # ">";
++  let SuperClasses = Supers;
++  let DiagnosticType = "UImmRange" # Bottom # "_" # Top;
++}
++
++def ConstantSImm19Lsl2AsmOperandClass : AsmOperandClass {
++  let Name = "SImm19Lsl2";
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isScaledSImm<19, 2>";
++  let SuperClasses = [ConstantUImm20AsmOperandClass];
++  let DiagnosticType = "SImm19_Lsl2";
++}
++
++def ConstantSImm11AsmOperandClass
++    : ConstantSImmAsmOperandClass<11, [ConstantSImm10Lsl2AsmOperandClass]>;
++def ConstantSImm10Lsl1AsmOperandClass : AsmOperandClass {
++  let Name = "SImm10Lsl1";
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isScaledSImm<10, 1>";
++  let SuperClasses = [ConstantSImm11AsmOperandClass];
++  let DiagnosticType = "SImm10_Lsl1";
++}
++
++def ConstantUImm10AsmOperandClass
++    : ConstantUImmAsmOperandClass<10, [ConstantSImm10Lsl1AsmOperandClass]>;
++def ConstantSImm10AsmOperandClass
++    : ConstantSImmAsmOperandClass<10, [ConstantUImm10AsmOperandClass]>;
++def ConstantSImm9AsmOperandClass
++    : ConstantSImmAsmOperandClass<9, [ConstantSImm10AsmOperandClass]>;
++def ConstantSImm7Lsl2AsmOperandClass : AsmOperandClass {
++  let Name = "SImm7Lsl2";
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isScaledSImm<7, 2>";
++  let SuperClasses = [ConstantSImm9AsmOperandClass];
++  let DiagnosticType = "SImm7_Lsl2";
++}
++
++def ConstantUImm7Sub1AsmOperandClass
++    : ConstantUImmAsmOperandClass<7, [ConstantUImm8AsmOperandClass], -1> {
++  // Specify the names since the -1 offset causes invalid identifiers otherwise.
++  let Name = "UImm7_N1";
++  let DiagnosticType = "UImm7_N1";
++}
++def ConstantUImm7AsmOperandClass
++    : ConstantUImmAsmOperandClass<7, [ConstantUImm7Sub1AsmOperandClass]>;
++def ConstantUImm6Lsl2AsmOperandClass : AsmOperandClass {
++  let Name = "UImm6Lsl2";
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isScaledUImm<6, 2>";
++  let SuperClasses = [ConstantUImm7AsmOperandClass];
++  let DiagnosticType = "UImm6_Lsl2";
++}
++
++def ConstantUImm6AsmOperandClass
++    : ConstantUImmAsmOperandClass<6, [ConstantUImm6Lsl2AsmOperandClass]>;
++def ConstantSImm6AsmOperandClass
++    : ConstantSImmAsmOperandClass<6, [ConstantUImm6AsmOperandClass]>;
++
++def ConstantUImm5Lsl2AsmOperandClass : AsmOperandClass {
++  let Name = "UImm5Lsl2";
++  let RenderMethod = "addImmOperands";
++  let PredicateMethod = "isScaledUImm<5, 2>";
++  let SuperClasses = [ConstantSImm6AsmOperandClass];
++  let DiagnosticType = "UImm5_Lsl2";
++}
++def ConstantUImm5_Range2_64AsmOperandClass
++    : ConstantUImmRangeAsmOperandClass<2, 64, [ConstantUImm5Lsl2AsmOperandClass]>;
++def ConstantUImm5Plus33AsmOperandClass
++    : ConstantUImmAsmOperandClass<5, [ConstantUImm5_Range2_64AsmOperandClass],
++                                  33>;
++def ConstantUImm5ReportUImm6AsmOperandClass
++    : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus33AsmOperandClass]> {
++  let Name = "ConstantUImm5_0_Report_UImm6";
++  let DiagnosticType = "UImm5_0_Report_UImm6";
++}
++def ConstantUImm5Plus32AsmOperandClass
++    : ConstantUImmAsmOperandClass<
++          5, [ConstantUImm5ReportUImm6AsmOperandClass], 32>;
++def ConstantUImm5Plus32NormalizeAsmOperandClass
++    : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus32AsmOperandClass], 32> {
++  let Name = "ConstantUImm5_32_Norm";
++  // We must also subtract 32 when we render the operand.
++  let RenderMethod = "addConstantUImmOperands<5, 32, -32>";
++}
++
++def ConstantUImm5Plus1ReportUImm6AsmOperandClass
++    : ConstantUImmAsmOperandClass<
++          5, [ConstantUImm5Plus32NormalizeAsmOperandClass], 1>{
++  let Name = "ConstantUImm5_Plus1_Report_UImm6";
++}
++
++def ConstantUImm5Plus1AsmOperandClass
++    : ConstantUImmAsmOperandClass<
++          5, [ConstantUImm5Plus1ReportUImm6AsmOperandClass], 1>;
++def ConstantUImm5AsmOperandClass
++    : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus1AsmOperandClass]>;
++def ConstantSImm5AsmOperandClass
++    : ConstantSImmAsmOperandClass<5, [ConstantUImm5AsmOperandClass]>;
++def ConstantUImm4AsmOperandClass
++    : ConstantUImmAsmOperandClass<4, [ConstantSImm5AsmOperandClass]>;
++def ConstantSImm4AsmOperandClass
++    : ConstantSImmAsmOperandClass<4, [ConstantUImm4AsmOperandClass]>;
++def ConstantUImm3AsmOperandClass
++    : ConstantUImmAsmOperandClass<3, [ConstantSImm4AsmOperandClass]>;
++def ConstantUImm2Plus1AsmOperandClass
++    : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass], 1>;
++def ConstantUImm2AsmOperandClass
++    : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass]>;
++def ConstantUImm1AsmOperandClass
++    : ConstantUImmAsmOperandClass<1, [ConstantUImm2AsmOperandClass]>;
++
++// Unsigned Operands
++foreach I = {1, 2, 3, 4, 5, 6, 7, 8, 10, 20, 26} in
++  def uimm # I : Operand<i32> {
++    let PrintMethod = "printUImm<" # I # ">";
++    let ParserMatchClass =
++        !cast<AsmOperandClass>("ConstantUImm" # I # "AsmOperandClass");
++  }
++
++foreach I = {1, 2, 3, 4} in
++  def uimm # I # _ptr : Operand<iPTR> {
++    let PrintMethod = "printUImm<" # I # ">";
++    let ParserMatchClass =
++        !cast<AsmOperandClass>("ConstantUImm" # I # "AsmOperandClass");
++  }
+diff --git a/llvm/lib/Target/Sw64/TargetInfo/CMakeLists.txt b/llvm/lib/Target/Sw64/TargetInfo/CMakeLists.txt
+new file mode 100644
+index 000000000..cf9ad9220
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/TargetInfo/CMakeLists.txt
+@@ -0,0 +1,10 @@
++add_llvm_component_library(LLVMSw64Info
++  Sw64TargetInfo.cpp
++
++  LINK_COMPONENTS
++  Support
++
++  ADD_TO_COMPONENT
++  Sw64
++
++  )
+diff --git a/llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.cpp b/llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.cpp
+new file mode 100644
+index 000000000..02a741e72
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.cpp
+@@ -0,0 +1,24 @@
++//===-- Sw64TargetInfo.cpp - Sw64 Target Implementation -----------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++
++#include "TargetInfo/Sw64TargetInfo.h"
++#include "llvm/IR/Module.h"
++#include "llvm/MC/TargetRegistry.h"
++using namespace llvm;
++
++Target &llvm::getTheSw64Target() {
++  static Target TheSw64Target;
++  return TheSw64Target;
++}
++
++extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSw64TargetInfo() {
++  RegisterTarget<Triple::sw_64,
++                 /*HasJIT=*/true>
++      X(getTheSw64Target(), "sw_64", "Sw64", "Sw64");
++}
+diff --git a/llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.h b/llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.h
+new file mode 100644
+index 000000000..723121a75
+--- /dev/null
++++ b/llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.h
+@@ -0,0 +1,12 @@
++#ifndef LLVM_LIB_TARGET_SW_64_TARGETINFO_SW64TARGETINFO_H
++#define LLVM_LIB_TARGET_SW_64_TARGETINFO_SW64TARGETINFO_H
++
++namespace llvm {
++
++class Target;
++
++Target &getTheSw64Target();
++
++} // namespace llvm
++
++#endif // LLVM_LIB_TARGET_RISCV_TARGETINFO_RISCVTARGETINFO_H
+diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
+index c2de2293d..a6fb32a54 100644
+--- a/llvm/lib/TargetParser/Host.cpp
++++ b/llvm/lib/TargetParser/Host.cpp
+@@ -483,6 +483,10 @@ StringRef sys::detail::getHostCPUNameForBPF() {
+ #endif
+ }
+ 
++StringRef sys::detail::getHostCPUNameForSW64(StringRef ProcCpuinfoContent) {
++  return "sw_64";
++}
++
+ #if defined(__i386__) || defined(_M_IX86) || \
+     defined(__x86_64__) || defined(_M_X64)
+ 
+@@ -1457,6 +1461,12 @@ StringRef sys::getHostCPUName() {
+     return "generic";
+   }
+ }
++#elif defined(__linux__) && defined(__sw_64__)
++StringRef sys::getHostCPUName() {
++  std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
++  StringRef Content = P ? P->getBuffer() : "";
++  return detail::getHostCPUNameForSW64(Content);
++}
+ #elif defined(__loongarch__)
+ StringRef sys::getHostCPUName() {
+   // Use processor id to detect cpu name.
+diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
+index a3d6a06af..8c322622d 100644
+--- a/llvm/lib/TargetParser/Triple.cpp
++++ b/llvm/lib/TargetParser/Triple.cpp
+@@ -72,6 +72,7 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
+   case spir:           return "spir";
+   case spirv32:        return "spirv32";
+   case spirv64:        return "spirv64";
++  case sw_64:          return "sw_64";
+   case systemz:        return "s390x";
+   case tce:            return "tce";
+   case tcele:          return "tcele";
+@@ -131,6 +132,8 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) {
+   case sparcel:
+   case sparc:       return "sparc";
+ 
++  case sw_64:       return "sw64";
++
+   case systemz:     return "s390";
+ 
+   case x86:
+@@ -361,6 +364,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
+     .Case("sparc", sparc)
+     .Case("sparcel", sparcel)
+     .Case("sparcv9", sparcv9)
++    .Case("sw_64", sw_64)
+     .Case("s390x", systemz)
+     .Case("systemz", systemz)
+     .Case("tce", tce)
+@@ -508,6 +512,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
+     .Case("sparc", Triple::sparc)
+     .Case("sparcel", Triple::sparcel)
+     .Cases("sparcv9", "sparc64", Triple::sparcv9)
++    .Cases("sw", "sw_64", "sw6a", "sw6b", "sw4d", "sw8a", Triple::sw_64)
+     .Case("tce", Triple::tce)
+     .Case("tcele", Triple::tcele)
+     .Case("xcore", Triple::xcore)
+@@ -683,6 +688,17 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
+       (SubArchName.endswith("r6el") || SubArchName.endswith("r6")))
+     return Triple::MipsSubArch_r6;
+ 
++  if (SubArchName.startswith("sw")) {
++    if (SubArchName.endswith("6a"))
++      return Triple::Sw64SubArch_6a;
++    else if (SubArchName.endswith("6b"))
++      return Triple::Sw64SubArch_6b;
++    else if (SubArchName.endswith("4d"))
++      return Triple::Sw64SubArch_4d;
++    else if (SubArchName.endswith("8a"))
++      return Triple::Sw64SubArch_8a;
++  }
++
+   if (SubArchName == "powerpcspe")
+     return Triple::PPCSubArch_spe;
+ 
+@@ -846,6 +862,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
+   case Triple::sparcv9:
+   case Triple::spir64:
+   case Triple::spir:
++  case Triple::sw_64:
+   case Triple::tce:
+   case Triple::tcele:
+   case Triple::thumbeb:
+@@ -1458,6 +1475,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
+   case llvm::Triple::ve:
+   case llvm::Triple::wasm64:
+   case llvm::Triple::x86_64:
++  case llvm::Triple::sw_64:
+     return 64;
+   }
+   llvm_unreachable("Invalid architecture value");
+@@ -1486,6 +1504,7 @@ Triple Triple::get32BitArchVariant() const {
+   case Triple::msp430:
+   case Triple::systemz:
+   case Triple::ve:
++  case Triple::sw_64:
+     T.setArch(UnknownArch);
+     break;
+ 
+@@ -1601,6 +1620,7 @@ Triple Triple::get64BitArchVariant() const {
+   case Triple::ve:
+   case Triple::wasm64:
+   case Triple::x86_64:
++  case Triple::sw_64:
+     // Already 64-bit.
+     break;
+ 
+@@ -1668,6 +1688,7 @@ Triple Triple::getBigEndianArchVariant() const {
+   case Triple::spir:
+   case Triple::spirv32:
+   case Triple::spirv64:
++  case Triple::sw_64:
+   case Triple::wasm32:
+   case Triple::wasm64:
+   case Triple::x86:
+@@ -1777,6 +1798,7 @@ bool Triple::isLittleEndian() const {
+   case Triple::spir:
+   case Triple::spirv32:
+   case Triple::spirv64:
++  case Triple::sw_64:
+   case Triple::tcele:
+   case Triple::thumb:
+   case Triple::ve:
+diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+index f4bf6db56..51565c82a 100644
+--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
++++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+@@ -117,6 +117,7 @@ static const uint64_t kNetBSD_ShadowOffset64 = 1ULL << 46;
+ static const uint64_t kNetBSDKasan_ShadowOffset64 = 0xdfff900000000000;
+ static const uint64_t kPS_ShadowOffset64 = 1ULL << 40;
+ static const uint64_t kWindowsShadowOffset32 = 3ULL << 28;
++static const uint64_t kSW64_ShadowOffset64 = 1ULL << 49;
+ static const uint64_t kEmscriptenShadowOffset = 0;
+ 
+ // The shadow memory space is dynamically allocated.
+@@ -498,6 +499,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
+   bool IsFuchsia = TargetTriple.isOSFuchsia();
+   bool IsEmscripten = TargetTriple.isOSEmscripten();
+   bool IsAMDGPU = TargetTriple.isAMDGPU();
++  bool IsSW64 = TargetTriple.isSw64();
+ 
+   ShadowMapping Mapping;
+ 
+@@ -571,6 +573,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
+     else if (IsAMDGPU)
+       Mapping.Offset = (kSmallX86_64ShadowOffsetBase &
+                         (kSmallX86_64ShadowOffsetAlignMask << Mapping.Scale));
++    else if (IsSW64)
++      Mapping.Offset = kSW64_ShadowOffset64;
+     else
+       Mapping.Offset = kDefaultShadowOffset64;
+   }
+diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+index 362fd6e41..f656a21b5 100644
+--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
++++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+@@ -447,6 +447,14 @@ static const MemoryMapParams Linux_LoongArch64_MemoryMapParams = {
+     0x100000000000, // OriginBase
+ };
+ 
++// sw_64 Linux
++static const MemoryMapParams Linux_SW64_MemoryMapParams = {
++    0,                  // AndMask (not used)
++    0x1000000000000ULL, // XorMask
++    0,                  // ShadowBase (not used)
++    0x4000000000000ULL, // OriginBase
++};
++
+ // aarch64 FreeBSD
+ static const MemoryMapParams FreeBSD_AArch64_MemoryMapParams = {
+     0x1800000000000, // AndMask
+@@ -509,6 +517,11 @@ static const PlatformMemoryMapParams Linux_LoongArch_MemoryMapParams = {
+     &Linux_LoongArch64_MemoryMapParams,
+ };
+ 
++static const PlatformMemoryMapParams Linux_Sw64_MemoryMapParams = {
++    nullptr,
++    &Linux_SW64_MemoryMapParams,
++};
++
+ static const PlatformMemoryMapParams FreeBSD_ARM_MemoryMapParams = {
+     nullptr,
+     &FreeBSD_AArch64_MemoryMapParams,
+@@ -555,6 +568,7 @@ private:
+   friend struct VarArgAArch64Helper;
+   friend struct VarArgPowerPC64Helper;
+   friend struct VarArgSystemZHelper;
++  friend struct VarArgSw64Helper;
+ 
+   void initializeModule(Module &M);
+   void initializeCallbacks(Module &M, const TargetLibraryInfo &TLI);
+@@ -1033,6 +1047,9 @@ void MemorySanitizer::initializeModule(Module &M) {
+       case Triple::loongarch64:
+         MapParams = Linux_LoongArch_MemoryMapParams.bits64;
+         break;
++      case Triple::sw_64:
++        MapParams = Linux_Sw64_MemoryMapParams.bits64;
++        break;
+       default:
+         report_fatal_error("unsupported architecture");
+       }
+@@ -5332,6 +5349,170 @@ struct VarArgAArch64Helper : public VarArgHelper {
+   }
+ };
+ 
++/// Sw64-specific implementation of VarArgHelper.
++struct VarArgSw64Helper : public VarArgHelper {
++  static const unsigned kSw64GrArgSize = 64;
++  static const unsigned kSw64VrArgSize = 64;
++  static const unsigned Sw64GrBegOffset = 0;
++  static const unsigned Sw64GrEndOffset = kSw64GrArgSize;
++  // Make VR space aligned to 16 bytes.
++  static const unsigned Sw64VrBegOffset = Sw64GrEndOffset;
++  static const unsigned Sw64VrEndOffset = Sw64VrBegOffset + kSw64VrArgSize;
++  static const unsigned Sw64VAEndOffset = Sw64VrEndOffset;
++
++  Function &F;
++  MemorySanitizer &MS;
++  MemorySanitizerVisitor &MSV;
++  Value *VAArgTLSCopy = nullptr;
++  Value *VAArgOverflowSize = nullptr;
++
++  SmallVector<CallInst *, 16> VAStartInstrumentationList;
++
++  enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
++
++  VarArgSw64Helper(Function &F, MemorySanitizer &MS,
++                   MemorySanitizerVisitor &MSV)
++      : F(F), MS(MS), MSV(MSV) {}
++
++  ArgKind classifyArgument(Value *arg) {
++    Type *T = arg->getType();
++    if (T->isFPOrFPVectorTy())
++      return AK_FloatingPoint;
++    if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64) ||
++        (T->isPointerTy()))
++      return AK_GeneralPurpose;
++    return AK_Memory;
++  }
++
++  // The instrumentation stores the argument shadow in a non ABI-specific
++  // format because it does not know which argument is named.
++  // The first seven GR registers are saved in the first 56 bytes of the
++  // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then
++  // the remaining arguments.
++  // Using constant offset within the va_arg TLS array allows fast copy
++  // in the finalize instrumentation.
++  void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
++    unsigned VAArgOffset = 0;
++    const DataLayout &DL = F.getParent()->getDataLayout();
++    for (auto ArgIt = CB.arg_begin() + CB.getFunctionType()->getNumParams(),
++              End = CB.arg_end();
++         ArgIt != End; ++ArgIt) {
++      Triple TargetTriple(F.getParent()->getTargetTriple());
++      Value *A = *ArgIt;
++      Value *Base;
++      uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
++      if (TargetTriple.getArch() == Triple::mips64) {
++        // Adjusting the shadow for argument with size < 8 to match the
++        // placement of bits in big endian system
++        if (ArgSize < 8)
++          VAArgOffset += (8 - ArgSize);
++      }
++      Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize);
++      VAArgOffset += ArgSize;
++      VAArgOffset = alignTo(VAArgOffset, 8);
++      if (!Base)
++        continue;
++      IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
++    }
++
++    Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
++    // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
++    // a new class member i.e. it is the total size of all VarArgs.
++    IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
++  }
++
++  /// Compute the shadow address for a given va_arg.
++  Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
++                                   unsigned ArgOffset, unsigned ArgSize) {
++    // Make sure we don't overflow __msan_va_arg_tls.
++    if (ArgOffset + ArgSize > kParamTLSSize)
++      return nullptr;
++    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
++    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
++    return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
++                              "_msarg");
++  }
++
++  void visitVAStartInst(VAStartInst &I) override {
++    IRBuilder<> IRB(&I);
++    VAStartInstrumentationList.push_back(&I);
++    Value *VAListTag = I.getArgOperand(0);
++    Value *ShadowPtr, *OriginPtr;
++    const Align Alignment = Align(8);
++    std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
++        VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
++    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
++                     /* size */ 16, Alignment, false);
++  }
++
++  void visitVACopyInst(VACopyInst &I) override {
++    IRBuilder<> IRB(&I);
++    VAStartInstrumentationList.push_back(&I);
++    Value *VAListTag = I.getArgOperand(0);
++    Value *ShadowPtr, *OriginPtr;
++    const Align Alignment = Align(8);
++    std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
++        VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
++    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
++                     /* size */ 16, Alignment, false);
++  }
++
++  // Retrieve a va_list field of 'void*' size.
++  Value *getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
++    Value *SaveAreaPtrPtr = IRB.CreateIntToPtr(
++        IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
++                      ConstantInt::get(MS.IntptrTy, offset)),
++        Type::getInt64PtrTy(*MS.C));
++    return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
++  }
++
++  // Retrieve a va_list field of 'int' size.
++  Value *getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
++    Value *SaveAreaPtr = IRB.CreateIntToPtr(
++        IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
++                      ConstantInt::get(MS.IntptrTy, offset)),
++        Type::getInt32PtrTy(*MS.C));
++    Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
++    return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
++  }
++
++  void finalizeInstrumentation() override {
++    assert(!VAArgOverflowSize && !VAArgTLSCopy &&
++           "finalizeInstrumentation called twice");
++    IRBuilder<> IRB(MSV.FnPrologueEnd);
++    VAArgOverflowSize =
++        IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
++    Value *CopySize =
++        IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0), VAArgOverflowSize);
++
++    if (!VAStartInstrumentationList.empty()) {
++      // If there is a va_start in this function, make a backup copy of
++      // va_arg_tls somewhere in the function entry block.
++      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
++      IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
++    }
++
++    // Instrument va_start, copy va_list shadow from the backup copy of
++    // the TLS contents.
++    for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
++      CallInst *OrigInst = VAStartInstrumentationList[i];
++      IRBuilder<> IRB(OrigInst->getNextNode());
++
++      Value *VAListTag = OrigInst->getArgOperand(0);
++
++      // Read the stack pointer from the va_list.
++      Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0);
++      Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
++      const Align Alignment = Align(8);
++      std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
++          MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(),
++                                 Align(8), /*isStore*/ true);
++      IRB.CreateMemCpy(StackSaveAreaPtr, Align(8), VAArgTLSCopy, Align(8),
++                       CopySize);
++    }
++  }
++};
++
+ /// PowerPC64-specific implementation of VarArgHelper.
+ struct VarArgPowerPC64Helper : public VarArgHelper {
+   Function &F;
+diff --git a/llvm/test/ExecutionEngine/MCJIT/eh-lg-pic.ll b/llvm/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
+index 80c90cbf5..4aea4f945 100644
+--- a/llvm/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
++++ b/llvm/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
+@@ -1,7 +1,7 @@
+ ; REQUIRES: cxx-shared-library
+ ; RUN: %lli -jit-kind=mcjit -relocation-model=pic -code-model=large %s
+ ; XFAIL: target={{.*-(cygwin|windows-msvc|windows-gnu)}}
+-; XFAIL: target={{(mips|mipsel)-.*}}, target={{(i686|i386).*}}, target={{(aarch64|arm).*}}
++; XFAIL: target={{(mips|mipsel)-.*}}, target={{(i686|i386).*}}, target={{(aarch64|arm).*}}, target={{(sw_64).*}}
+ declare ptr @__cxa_allocate_exception(i64)
+ declare void @__cxa_throw(ptr, ptr, ptr)
+ declare i32 @__gxx_personality_v0(...)
+diff --git a/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg b/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg
+index b6874dd86..4af68aa23 100644
+--- a/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg
++++ b/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg
+@@ -7,6 +7,7 @@ if (
+     | ("Mips" in targets)
+     | ("PowerPC" in targets)
+     | ("SystemZ" in targets)
++    | ("Sw64" in targets)
+ ):
+     config.unsupported = False
+ else:
+@@ -25,6 +26,7 @@ if root.host_arch not in [
+     "PowerPC",
+     "ppc64",
+     "ppc64le",
++    "sw_64",
+     "SystemZ",
+ ]:
+     config.unsupported = True
+diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/lit.local.cfg b/llvm/test/ExecutionEngine/MCJIT/remote/lit.local.cfg
+index 5095d98a5..d584f3aa6 100644
+--- a/llvm/test/ExecutionEngine/MCJIT/remote/lit.local.cfg
++++ b/llvm/test/ExecutionEngine/MCJIT/remote/lit.local.cfg
+@@ -1,6 +1,10 @@
+ if "armv4" in config.root.target_triple or "armv5" in config.root.target_triple:
+     config.unsupported = True
+ 
++# Remote MCJIT is not supported on sw_64 now.
++if 'sw_64' in config.root.target_triple:
++    config.unsupported = True
++
+ # This is temporary, until Remote MCJIT works on ARM
+ # See http://llvm.org/bugs/show_bug.cgi?id=18057
+ # if 'armv7' in config.root.target_triple:
+diff --git a/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg b/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg
+index cbd7c5440..4503e5fbf 100644
+--- a/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg
++++ b/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg
+@@ -10,6 +10,7 @@ if config.root.host_arch not in [
+     "mips64",
+     "mips64el",
+     "loongarch64",
++    "sw_64",
+ ]:
+     config.unsupported = True
+ 
+diff --git a/llvm/test/tools/llvm-reduce/file-output-type.test b/llvm/test/tools/llvm-reduce/file-output-type.test
+index 93b9ca4ac..b6a4347af 100644
+--- a/llvm/test/tools/llvm-reduce/file-output-type.test
++++ b/llvm/test/tools/llvm-reduce/file-output-type.test
+@@ -1,4 +1,5 @@
+ # REQUIRES: default_triple
++# XFAIL: target={{(sw_64).*}}
+ # RUN: rm -rf %t.dir && mkdir %t.dir  && cd %t.dir
+ 
+ # RUN: llvm-as -o test-output-format.bc %p/Inputs/test-output-format.ll
+-- 
+2.33.0
+
diff --git a/llvm.spec b/llvm.spec
index 0bb6d58b44823bda3e27d08ca19c84be8432c62d..db6598d05d5ae42cec80f508649ae0ec676df9ac 100644
--- a/llvm.spec
+++ b/llvm.spec
@@ -1,4 +1,4 @@
-%define anolis_release 5
+%define anolis_release 6
 
 %global toolchain clang
  
@@ -150,6 +150,9 @@ Patch79:   0079-LoongArch-Support-la664-100068.patch
 Patch80:   0080-LoongArch-Remove-experimental-auto-vec-feature.-1000.patch
 Patch81:   0081-LoongArch-Delete-R_LARCH_DELETE-and-R_LARCH_CFA-relo.patch
 
+# Patches for Sw64
+Patch82:   0082-Sw64-Add-Sw64-target-support-for-llvm.patch
+
 BuildRequires: gcc gcc-c++ clang cmake ninja-build zlib-devel libffi-devel
 BuildRequires: libxml2-devel ncurses-devel python3-psutil python3-sphinx
 BuildRequires: python3-recommonmark python3-yaml
@@ -575,6 +578,9 @@ fi
 %endif
 
 %changelog
+* Fri May 16 2025 swcompiler <lc@wxiat.com> - 17.0.6-6
+- Add Sw64 support for llvm
+
 * Sun Apr 27 2025 Shangtong Guo <guo.shangtong@zte.com.cn> - 17.0.6-5
 - add support for riscv64 build