diff --git a/0001-Support-LoongArch.patch b/0001-Support-LoongArch.patch new file mode 100644 index 0000000000000000000000000000000000000000..e36cfc20b7d19c7e78cb100bad598993a3126b04 --- /dev/null +++ b/0001-Support-LoongArch.patch @@ -0,0 +1,773 @@ +diff --git a/openmp/README.rst b/openmp/README.rst +index 0e4916f44..0daeb3389 100644 +--- a/openmp/README.rst ++++ b/openmp/README.rst +@@ -141,7 +141,7 @@ Options for all Libraries + Options for ``libomp`` + ---------------------- + +-**LIBOMP_ARCH** = ``aarch64|arm|i386|loongarch64|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv64|s390x`` ++**LIBOMP_ARCH** = ``aarch64|arm|i386|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv64|s390x|loongarch64`` + The default value for this option is chosen based on probing the compiler for + architecture macros (e.g., is ``__x86_64__`` predefined by compiler?). + +@@ -198,7 +198,7 @@ Optional Features + **LIBOMP_OMPT_SUPPORT** = ``ON|OFF`` + Include support for the OpenMP Tools Interface (OMPT). + This option is supported and ``ON`` by default for x86, x86_64, AArch64, +- PPC64, RISCV64, LoongArch64, and s390x on Linux* and macOS*. ++ PPC64, RISCV64, loongarch64 and s390x on Linux* and macOS*. + This option is ``OFF`` if this feature is not supported for the platform. + + **LIBOMP_OMPT_OPTIONAL** = ``ON|OFF`` +diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt +index 041b60efa..0c3d642b8 100644 +--- a/openmp/runtime/CMakeLists.txt ++++ b/openmp/runtime/CMakeLists.txt +@@ -30,7 +30,7 @@ if(${OPENMP_STANDALONE_BUILD}) + # If adding a new architecture, take a look at cmake/LibompGetArchitecture.cmake + libomp_get_architecture(LIBOMP_DETECTED_ARCH) + set(LIBOMP_ARCH ${LIBOMP_DETECTED_ARCH} CACHE STRING +- "The architecture to build for (x86_64/i386/arm/ppc/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64/loongarch64/ve/s390x/wasm32).") ++ "The architecture to build for (x86_64/i386/arm/ppc/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64/ve/s390x/wasm32/loongarch64).") + # Should assertions be enabled? They are on by default. + set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL + "enable assertions?") +@@ -63,14 +63,14 @@ else() # Part of LLVM build + set(LIBOMP_ARCH arm) + elseif(LIBOMP_NATIVE_ARCH MATCHES "riscv64") + set(LIBOMP_ARCH riscv64) +- elseif(LIBOMP_NATIVE_ARCH MATCHES "loongarch64") +- set(LIBOMP_ARCH loongarch64) + elseif(LIBOMP_NATIVE_ARCH MATCHES "ve") + set(LIBOMP_ARCH ve) + elseif(LIBOMP_NATIVE_ARCH MATCHES "s390x") + set(LIBOMP_ARCH s390x) + elseif(LIBOMP_NATIVE_ARCH MATCHES "wasm") + set(LIBOMP_ARCH wasm32) ++ elseif(LIBOMP_NATIVE_ARCH MATCHES "loongarch64") ++ set(LIBOMP_ARCH loongarch64) + else() + # last ditch effort + libomp_get_architecture(LIBOMP_ARCH) +@@ -91,7 +91,7 @@ if(LIBOMP_ARCH STREQUAL "aarch64") + endif() + endif() + +-libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64 loongarch64 ve s390x wasm32) ++libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64 ve s390x wasm32 loongarch64) + + set(LIBOMP_LIB_TYPE normal CACHE STRING + "Performance,Profiling,Stubs library (normal/profile/stubs)") +@@ -175,11 +175,11 @@ set(MIC FALSE) + set(MIPS64 FALSE) + set(MIPS FALSE) + set(RISCV64 FALSE) +-set(LOONGARCH64 FALSE) + set(VE FALSE) + set(S390X FALSE) + set(WASM FALSE) + set(PPC FALSE) ++set(LoongArch64 FALSE) + if("${LIBOMP_ARCH}" STREQUAL "i386" OR "${LIBOMP_ARCH}" STREQUAL "32") # IA-32 architecture + set(IA32 TRUE) + elseif("${LIBOMP_ARCH}" STREQUAL "x86_64" OR "${LIBOMP_ARCH}" STREQUAL "32e") # Intel(R) 64 architecture +@@ -206,14 +206,14 @@ elseif("${LIBOMP_ARCH}" STREQUAL "mips64") # MIPS64 architecture + set(MIPS64 TRUE) + elseif("${LIBOMP_ARCH}" STREQUAL "riscv64") # RISCV64 architecture + set(RISCV64 TRUE) +-elseif("${LIBOMP_ARCH}" STREQUAL "loongarch64") # LoongArch64 architecture +- set(LOONGARCH64 TRUE) + elseif("${LIBOMP_ARCH}" STREQUAL "ve") # VE architecture + set(VE TRUE) + elseif("${LIBOMP_ARCH}" STREQUAL "s390x") # S390x (Z) architecture + set(S390X TRUE) + elseif("${LIBOMP_ARCH}" STREQUAL "wasm32") # WebAssembly architecture + set(WASM TRUE) ++elseif("${LIBOMP_ARCH}" STREQUAL "loongarch64") # LoongArch64 architecture ++ set(LoongArch64 TRUE) + endif() + + # Set some flags based on build_type +diff --git a/openmp/runtime/cmake/LibompGetArchitecture.cmake b/openmp/runtime/cmake/LibompGetArchitecture.cmake +index d7f81870f..8667b1154 100644 +--- a/openmp/runtime/cmake/LibompGetArchitecture.cmake ++++ b/openmp/runtime/cmake/LibompGetArchitecture.cmake +@@ -49,14 +49,14 @@ function(libomp_get_architecture return_arch) + #error ARCHITECTURE=mips + #elif defined(__riscv) && __riscv_xlen == 64 + #error ARCHITECTURE=riscv64 +- #elif defined(__loongarch__) && __loongarch_grlen == 64 +- #error ARCHITECTURE=loongarch64 + #elif defined(__ve__) + #error ARCHITECTURE=ve + #elif defined(__s390x__) + #error ARCHITECTURE=s390x + #elif defined(__wasm32__) + #error ARCHITECTURE=wasm32 ++ #elif defined(__loongarch__) && defined(__loongarch64) ++ #error ARCHITECTURE=loongarch64 + #else + #error ARCHITECTURE=UnknownArchitecture + #endif +diff --git a/openmp/runtime/cmake/LibompMicroTests.cmake b/openmp/runtime/cmake/LibompMicroTests.cmake +index e8cc218af..a15b330b5 100644 +--- a/openmp/runtime/cmake/LibompMicroTests.cmake ++++ b/openmp/runtime/cmake/LibompMicroTests.cmake +@@ -214,10 +214,10 @@ else() + elseif(${RISCV64}) + libomp_append(libomp_expected_library_deps libc.so.6) + libomp_append(libomp_expected_library_deps ld.so.1) +- elseif(${LOONGARCH64}) ++ elseif(${S390X}) + libomp_append(libomp_expected_library_deps libc.so.6) + libomp_append(libomp_expected_library_deps ld.so.1) +- elseif(${S390X}) ++ elseif(${LoongArch64}) + libomp_append(libomp_expected_library_deps libc.so.6) + libomp_append(libomp_expected_library_deps ld.so.1) + endif() +diff --git a/openmp/runtime/cmake/LibompUtils.cmake b/openmp/runtime/cmake/LibompUtils.cmake +index 139eabb45..4f20fe6da 100644 +--- a/openmp/runtime/cmake/LibompUtils.cmake ++++ b/openmp/runtime/cmake/LibompUtils.cmake +@@ -109,12 +109,12 @@ function(libomp_get_legal_arch return_arch_string) + set(${return_arch_string} "MIPS64" PARENT_SCOPE) + elseif(${RISCV64}) + set(${return_arch_string} "RISCV64" PARENT_SCOPE) +- elseif(${LOONGARCH64}) +- set(${return_arch_string} "LOONGARCH64" PARENT_SCOPE) + elseif(${VE}) + set(${return_arch_string} "VE" PARENT_SCOPE) + elseif(${S390X}) + set(${return_arch_string} "S390X" PARENT_SCOPE) ++ elseif(${LoongArch64}) ++ set(${return_arch_string} "LoongArch64" PARENT_SCOPE) + else() + set(${return_arch_string} "${LIBOMP_ARCH}" PARENT_SCOPE) + libomp_warning_say("libomp_get_legal_arch(): Warning: Unknown architecture: Using ${LIBOMP_ARCH}") +diff --git a/openmp/runtime/cmake/config-ix.cmake b/openmp/runtime/cmake/config-ix.cmake +index 76f471a44..2372f9ce5 100644 +--- a/openmp/runtime/cmake/config-ix.cmake ++++ b/openmp/runtime/cmake/config-ix.cmake +@@ -330,7 +330,6 @@ else() + (LIBOMP_ARCH STREQUAL ppc64le) OR + (LIBOMP_ARCH STREQUAL ppc64) OR + (LIBOMP_ARCH STREQUAL riscv64) OR +- (LIBOMP_ARCH STREQUAL loongarch64) OR + (LIBOMP_ARCH STREQUAL s390x)) + AND # OS supported? + ((WIN32 AND LIBOMP_HAVE_PSAPI) OR APPLE OR +diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp +index 1ac541fbc..c7236ee73 100644 +--- a/openmp/runtime/src/kmp_affinity.cpp ++++ b/openmp/runtime/src/kmp_affinity.cpp +@@ -3138,16 +3138,6 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line, + } + (*line)++; + +-#if KMP_ARCH_LOONGARCH64 +- // The parsing logic of /proc/cpuinfo in this function highly depends on +- // the blank lines between each processor info block. But on LoongArch a +- // blank line exists before the first processor info block (i.e. after the +- // "system type" line). This blank line was added because the "system +- // type" line is unrelated to any of the CPUs. We must skip this line so +- // that the original logic works on LoongArch. +- if (*buf == '\n' && *line == 2) +- continue; +-#endif + #if KMP_ARCH_S390X + // s390x /proc/cpuinfo starts with a variable number of lines containing + // the overall system information. Skip them. +diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h +index 1fb70491a..b0ee0a742 100644 +--- a/openmp/runtime/src/kmp_affinity.h ++++ b/openmp/runtime/src/kmp_affinity.h +@@ -264,7 +264,7 @@ public: + #elif __NR_sched_getaffinity != 5196 + #error Wrong code for getaffinity system call. + #endif /* __NR_sched_getaffinity */ +-#elif KMP_ARCH_LOONGARCH64 ++#elif KMP_ARCH_RISCV64 + #ifndef __NR_sched_setaffinity + #define __NR_sched_setaffinity 122 + #elif __NR_sched_setaffinity != 122 +@@ -275,7 +275,7 @@ public: + #elif __NR_sched_getaffinity != 123 + #error Wrong code for getaffinity system call. + #endif /* __NR_sched_getaffinity */ +-#elif KMP_ARCH_RISCV64 ++#elif KMP_ARCH_LOONGARCH64 + #ifndef __NR_sched_setaffinity + #define __NR_sched_setaffinity 122 + #elif __NR_sched_setaffinity != 122 +diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h +index 9cd0aefae..709eadd36 100644 +--- a/openmp/runtime/src/kmp_os.h ++++ b/openmp/runtime/src/kmp_os.h +@@ -181,8 +181,8 @@ typedef unsigned long long kmp_uint64; + KMP_ARCH_PPC + #define KMP_SIZE_T_SPEC KMP_UINT32_SPEC + #elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ +- KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ +- KMP_ARCH_VE || KMP_ARCH_S390X ++ KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_VE || \ ++ KMP_ARCH_S390X || KMP_ARCH_LOONGARCH64 + #define KMP_SIZE_T_SPEC KMP_UINT64_SPEC + #else + #error "Can't determine size_t printf format specifier." +@@ -1049,8 +1049,8 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); + #endif /* KMP_OS_WINDOWS */ + + #if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \ +- KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ +- KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC ++ KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_VE || \ ++ KMP_ARCH_S390X || KMP_ARCH_PPC || KMP_ARCH_LOONGARCH64 + #if KMP_OS_WINDOWS + #undef KMP_MB + #define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst) +diff --git a/openmp/runtime/src/kmp_platform.h b/openmp/runtime/src/kmp_platform.h +index c06f46db2..0c8b399d8 100644 +--- a/openmp/runtime/src/kmp_platform.h ++++ b/openmp/runtime/src/kmp_platform.h +@@ -112,9 +112,9 @@ + #define KMP_ARCH_MIPS 0 + #define KMP_ARCH_MIPS64 0 + #define KMP_ARCH_RISCV64 0 +-#define KMP_ARCH_LOONGARCH64 0 + #define KMP_ARCH_VE 0 + #define KMP_ARCH_S390X 0 ++#define KMP_ARCH_LOONGARCH64 0 + + #if KMP_OS_WINDOWS + #if defined(_M_AMD64) || defined(__x86_64) +@@ -171,15 +171,15 @@ + #elif defined __riscv && __riscv_xlen == 64 + #undef KMP_ARCH_RISCV64 + #define KMP_ARCH_RISCV64 1 +-#elif defined __loongarch__ && __loongarch_grlen == 64 +-#undef KMP_ARCH_LOONGARCH64 +-#define KMP_ARCH_LOONGARCH64 1 + #elif defined __ve__ + #undef KMP_ARCH_VE + #define KMP_ARCH_VE 1 + #elif defined __s390x__ + #undef KMP_ARCH_S390X + #define KMP_ARCH_S390X 1 ++#elif defined __loongarch__ && defined __loongarch64 ++#undef KMP_ARCH_LOONGARCH64 ++#define KMP_ARCH_LOONGARCH64 1 + #endif + #endif + +@@ -253,8 +253,8 @@ + // TODO: Fixme - This is clever, but really fugly + #if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \ + KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 + \ +- KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64 + KMP_ARCH_VE + \ +- KMP_ARCH_S390X + KMP_ARCH_WASM + KMP_ARCH_PPC) ++ KMP_ARCH_RISCV64 + KMP_ARCH_VE + KMP_ARCH_S390X + \ ++ KMP_ARCH_WASM + KMP_ARCH_PPC + KMP_ARCH_LOONGARCH64) + #error Unknown or unsupported architecture + #endif + +diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp +index fc5e8405a..0d83bf071 100644 +--- a/openmp/runtime/src/kmp_runtime.cpp ++++ b/openmp/runtime/src/kmp_runtime.cpp +@@ -8896,8 +8896,8 @@ __kmp_determine_reduction_method( + int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; + + #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ +- KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ +- KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM ++ KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_VE || \ ++ KMP_ARCH_S390X || KMP_ARCH_WASM || KMP_ARCH_LOONGARCH64 + + #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ + KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD || \ +diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h +index bd3fd9b43..7d3ab79aa 100644 +--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h ++++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h +@@ -170,6 +170,10 @@ + #define ITT_ARCH_S390X 8 + #endif /* ITT_ARCH_S390X */ + ++#ifndef ITT_ARCH_LOONGARCH64 ++#define ITT_ARCH_LOONGARCH64 7 ++#endif /* ITT_ARCH_LOONGARCH64 */ ++ + #ifndef ITT_ARCH + #if defined _M_IX86 || defined __i386__ + #define ITT_ARCH ITT_ARCH_IA32 +@@ -187,6 +191,8 @@ + #define ITT_ARCH ITT_ARCH_VE + #elif defined __s390x__ + #define ITT_ARCH ITT_ARCH_S390X ++#elif defined __loongarch__ && defined __loongarch64 ++#define ITT_ARCH ITT_ARCH_LOONGARCH64 + #endif + #endif + +diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S +index 14987c298..f264280e3 100644 +--- a/openmp/runtime/src/z_Linux_asm.S ++++ b/openmp/runtime/src/z_Linux_asm.S +@@ -1902,164 +1902,6 @@ __kmp_invoke_microtask: + + #endif /* KMP_ARCH_RISCV64 */ + +-#if KMP_ARCH_LOONGARCH64 +- +-//------------------------------------------------------------------------ +-// +-// typedef void (*microtask_t)(int *gtid, int *tid, ...); +-// +-// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, +-// void *p_argv[] +-// #if OMPT_SUPPORT +-// , +-// void **exit_frame_ptr +-// #endif +-// ) { +-// #if OMPT_SUPPORT +-// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); +-// #endif +-// +-// (*pkfn)(>id, &tid, argv[0], ...); +-// +-// return 1; +-// } +-// +-// Parameters: +-// a0: pkfn +-// a1: gtid +-// a2: tid +-// a3: argc +-// a4: p_argv +-// a5: exit_frame_ptr +-// +-// Locals: +-// __gtid: gtid param pushed on stack so can pass >id to pkfn +-// __tid: tid param pushed on stack so can pass &tid to pkfn +-// +-// Temp registers: +-// +-// t0: used to calculate the dynamic stack size / used to hold pkfn address +-// t1: used as temporary for stack placement calculation +-// t2: used as temporary for stack arguments +-// t3: used as temporary for number of remaining pkfn parms +-// t4: used to traverse p_argv array +-// +-// return: a0 (always 1/TRUE) +-// +- +-// -- Begin __kmp_invoke_microtask +-// mark_begin; +- .text +- .globl __kmp_invoke_microtask +- .p2align 2 +- .type __kmp_invoke_microtask,@function +-__kmp_invoke_microtask: +- .cfi_startproc +- +- // First, save ra and fp +- addi.d $sp, $sp, -16 +- st.d $ra, $sp, 8 +- st.d $fp, $sp, 0 +- addi.d $fp, $sp, 16 +- .cfi_def_cfa 22, 0 +- .cfi_offset 1, -8 +- .cfi_offset 22, -16 +- +- // Compute the dynamic stack size: +- // +- // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by +- // reference +- // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' +- // function by register. Given that we have 8 of such registers (a[0-7]) +- // and two + 'argc' arguments (consider >id and &tid), we need to +- // reserve max(0, argc - 6)*8 extra bytes +- // +- // The total number of bytes is then max(0, argc - 6)*8 + 8 +- +- addi.d $t0, $a3, -6 +- slt $t1, $t0, $zero +- masknez $t0, $t0, $t1 +- addi.d $t0, $t0, 1 +- slli.d $t0, $t0, 3 +- sub.d $sp, $sp, $t0 +- +- // Align the stack to 16 bytes +- bstrins.d $sp, $zero, 3, 0 +- +- move $t0, $a0 +- move $t3, $a3 +- move $t4, $a4 +- +-#if OMPT_SUPPORT +- // Save frame pointer into exit_frame +- st.d $fp, $a5, 0 +-#endif +- +- // Prepare arguments for the pkfn function (first 8 using a0-a7 registers) +- +- st.w $a1, $fp, -20 +- st.w $a2, $fp, -24 +- +- addi.d $a0, $fp, -20 +- addi.d $a1, $fp, -24 +- +- beqz $t3, .L_kmp_3 +- ld.d $a2, $t4, 0 +- +- addi.d $t3, $t3, -1 +- beqz $t3, .L_kmp_3 +- ld.d $a3, $t4, 8 +- +- addi.d $t3, $t3, -1 +- beqz $t3, .L_kmp_3 +- ld.d $a4, $t4, 16 +- +- addi.d $t3, $t3, -1 +- beqz $t3, .L_kmp_3 +- ld.d $a5, $t4, 24 +- +- addi.d $t3, $t3, -1 +- beqz $t3, .L_kmp_3 +- ld.d $a6, $t4, 32 +- +- addi.d $t3, $t3, -1 +- beqz $t3, .L_kmp_3 +- ld.d $a7, $t4, 40 +- +- // Prepare any additional argument passed through the stack +- addi.d $t4, $t4, 48 +- move $t1, $sp +- b .L_kmp_2 +-.L_kmp_1: +- ld.d $t2, $t4, 0 +- st.d $t2, $t1, 0 +- addi.d $t4, $t4, 8 +- addi.d $t1, $t1, 8 +-.L_kmp_2: +- addi.d $t3, $t3, -1 +- bnez $t3, .L_kmp_1 +- +-.L_kmp_3: +- // Call pkfn function +- jirl $ra, $t0, 0 +- +- // Restore stack and return +- +- addi.d $a0, $zero, 1 +- +- addi.d $sp, $fp, -16 +- ld.d $fp, $sp, 0 +- ld.d $ra, $sp, 8 +- addi.d $sp, $sp, 16 +- jr $ra +-.Lfunc_end0: +- .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask +- .cfi_endproc +- +-// -- End __kmp_invoke_microtask +- +-#endif /* KMP_ARCH_LOONGARCH64 */ +- + #if KMP_ARCH_VE + + //------------------------------------------------------------------------ +@@ -2405,6 +2247,157 @@ __kmp_invoke_microtask: + + #endif /* KMP_ARCH_S390X */ + ++#if KMP_ARCH_LOONGARCH64 ++ ++//------------------------------------------------------------------------ ++// ++// typedef void (*microtask_t)( int *gtid, int *tid, ... ); ++// ++// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, ++// void *p_argv[] ++// #if OMPT_SUPPORT ++// , ++// void **exit_frame_ptr ++// #endif ++// ) { ++// #if OMPT_SUPPORT ++// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); ++// #endif ++// (*pkfn)( & gtid, & tid, argv[0], ... ); ++// ++// return 1; ++// } ++// ++// parameters: ++// a0: pkfn ++// a1: gtid ++// a2: tid ++// a3: argc ++// a4: p_argv ++// a5: exit_frame_ptr ++// ++// Temp. registers: ++// ++// t0: used to calculate the dynamic stack size / used to hold pkfn address ++// t1: used as temporary for stack placement calculation ++// t2: used as temporary for stack arguments ++// t3: used as temporary for number of remaining pkfn parms ++// t4: used to traverse p_argv array ++// ++// return: a0 (always 1/TRUE) ++// ++ ++// -- Begin __kmp_invoke_microtask ++// mark_begin; ++ .text ++ .globl __kmp_invoke_microtask ++ .p2align 3 ++ .type __kmp_invoke_microtask,@function ++__kmp_invoke_microtask: ++ ++ // First, save ra and fp ++ addi.d $sp, $sp, -16 ++ st.d $ra, $sp, 8 ++ st.d $fp, $sp, 0 ++ addi.d $fp, $sp, 16 ++ ++ // Compute the dynamic stack size: ++ // ++ // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by ++ // reference ++ // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' ++ // function by register. Given that we have 8 of such registers (a[0-7]) ++ // and two + 'argc' arguments (consider >id and &tid), we need to ++ // reserve max(0, argc - 6)*8 ext$ra bytes ++ // ++ // The total number of bytes is then max(0, argc - 6)*8 + 8 ++ ++ // Compute max(0, argc - 6) using the following bithack: ++ // max(0, x) = x - (x & (x >> 31?63)), where x := argc - 6 ++ // Source: http://g$raphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax ++ addi.d $t0, $a3, -6 ++ srai.d $t1, $t0, 63 ++ and $t1, $t0, $t1 ++ sub.d $t0, $t0, $t1 ++ ++ addi.d $t0, $t0, 1 ++ ++ slli.d $t0, $t0, 3 // t0: total number of bytes for stack storing arguments. ++ sub.d $sp, $sp, $t0 ++ ++ move $t0, $a0 ++ move $t3, $a3 ++ move $t4, $a4 ++ ++#if OMPT_SUPPORT ++ // Save frame pointer into exit_frame ++ st.d $fp, $a5, 0 ++#endif ++ ++ // Prepare arguments for the pkfn function (first 8 using $a0-$a7 registers) ++ ++ st.w $a1, $fp, -20 // gtid ++ st.w $a2, $fp, -24 // tid ++ ++ addi.d $a0, $fp, -20 // >id ++ addi.d $a1, $fp, -24 // &tid ++ ++ beqz $t3, .L_kmp_3 ++ ld.d $a2, $t4, 0 // argv[0] ++ ++ addi.d $t3, $t3, -1 ++ beqz $t3, .L_kmp_3 ++ ld.d $a3, $t4, 8 // argv[1] ++ ++ addi.d $t3, $t3, -1 ++ beqz $t3, .L_kmp_3 ++ ld.d $a4, $t4, 16 // argv[2] ++ ++ addi.d $t3, $t3, -1 ++ beqz $t3, .L_kmp_3 ++ ld.d $a5, $t4, 24 // argv[3] ++ ++ addi.d $t3, $t3, -1 ++ beqz $t3, .L_kmp_3 ++ ld.d $a6, $t4, 32 // argv[4] ++ ++ addi.d $t3, $t3, -1 ++ beqz $t3, .L_kmp_3 ++ ld.d $a7, $t4, 40 // argv[5] ++ ++ // Prepare any additional argument passed through the stack ++ addi.d $t4, $t4, 48 ++ move $t1, $sp ++ b .L_kmp_2 ++.L_kmp_1: ++ ld.d $t2, $t4, 0 ++ st.d $t2, $t1, 0 ++ addi.d $t4, $t4, 8 ++ addi.d $t1, $t1, 8 ++.L_kmp_2: ++ addi.d $t3, $t3, -1 ++ bnez $t3, .L_kmp_1 ++ ++.L_kmp_3: ++ // Call pkfn function ++ jirl $ra, $t0, 0 ++ ++ // Restore stack and return ++ ++ addi.d $a0, $zero, 1 ++ ++ addi.d $sp, $fp, -16 ++ ld.d $fp, $sp, 0 ++ ld.d $ra, $sp, 8 ++ addi.d $sp, $sp, 16 ++ jr $ra ++.Lfunc_end0: ++ .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask ++ ++// -- End __kmp_invoke_microtask ++ ++#endif /* KMP_ARCH_LOONGARCH64 */ ++ + #if KMP_ARCH_ARM || KMP_ARCH_MIPS + .data + COMMON .gomp_critical_user_, 32, 3 +@@ -2419,8 +2412,7 @@ __kmp_unnamed_critical_addr: + #endif /* KMP_ARCH_ARM */ + + #if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || \ +- KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || \ +- KMP_ARCH_S390X ++ KMP_ARCH_RISCV64 || KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_LOONGARCH64 + #ifndef KMP_PREFIX_UNDERSCORE + # define KMP_PREFIX_UNDERSCORE(x) x + #endif +@@ -2435,8 +2427,8 @@ KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): + .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8 + #endif + #endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || +- KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || +- KMP_ARCH_S390X */ ++ KMP_ARCH_RISCV64 || KMP_ARCH_VE || ++ KMP_ARCH_S390X || KMP_ARCH_LOONGARCH64 */ + + #if KMP_OS_LINUX + # if KMP_ARCH_ARM || KMP_ARCH_AARCH64 +diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp +index b9ff96873..96cbe1790 100644 +--- a/openmp/runtime/src/z_Linux_util.cpp ++++ b/openmp/runtime/src/z_Linux_util.cpp +@@ -2521,8 +2521,8 @@ finish: // Clean up and exit. + + #if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \ + ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || \ +- KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ +- KMP_ARCH_ARM || KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC_XCOFF) ++ KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_ARM || \ ++ KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC_XCOFF || KMP_ARCH_LOONGARCH64) + + // we really only need the case with 1 argument, because CLANG always build + // a struct of pointers to shared variables referenced in the outlined function +diff --git a/openmp/runtime/test/ompt/callback.h b/openmp/runtime/test/ompt/callback.h +index 05b2006a2..c888b217e 100644 +--- a/openmp/runtime/test/ompt/callback.h ++++ b/openmp/runtime/test/ompt/callback.h +@@ -217,15 +217,6 @@ ompt_label_##id: + printf("%" PRIu64 ": current_address=%p or %p\n", \ + ompt_get_thread_data()->value, ((char *)addr) - 8, ((char *)addr) - 12) + #endif +-#elif KMP_ARCH_LOONGARCH64 +-// On LoongArch64 the NOP instruction is 4 bytes long, can be followed by +-// inserted jump instruction (another 4 bytes long). And an additional jump +-// instruction may appear (adding 4 more bytes) when the NOP is referenced +-// elsewhere (ie. another branch). +-#define print_possible_return_addresses(addr) \ +- printf("%" PRIu64 ": current_address=%p or %p or %p\n", \ +- ompt_get_thread_data()->value, ((char *)addr) - 4, \ +- ((char *)addr) - 8, ((char *)addr) - 12) + #elif KMP_ARCH_VE + // On VE the NOP instruction is 8 byte long. In addition, the compiler inserts + // a ??? instruction for non-void runtime functions which is ? bytes long. +diff --git a/openmp/runtime/tools/lib/Platform.pm b/openmp/runtime/tools/lib/Platform.pm +index 6efd932da..ca44d52d1 100644 +--- a/openmp/runtime/tools/lib/Platform.pm ++++ b/openmp/runtime/tools/lib/Platform.pm +@@ -63,10 +63,10 @@ sub canon_arch($) { + $arch = "mips"; + } elsif ( $arch =~ m{\Ariscv64} ) { + $arch = "riscv64"; +- } elsif ( $arch =~ m{\Aloongarch64} ) { +- $arch = "loongarch64"; + } elsif ( $arch =~ m{\As390x} ) { + $arch = "s390x"; ++ } elsif ( $arch =~ m{\Aloongarch64} ) { ++ $arch = "loongarch64"; + } else { + $arch = undef; + }; # if +@@ -97,11 +97,11 @@ sub canon_mic_arch($) { + "32e" => "Intel(R) 64", + "arm" => "ARM", + "aarch64" => "AArch64", +- "loongarch64" => "LoongArch64", + "mic" => "Intel(R) Many Integrated Core Architecture", + "mips" => "MIPS", + "mips64" => "MIPS64", + "riscv64" => "RISC-V (64-bit)", ++ "loongarch64" => "LoongArch64", + ); + + sub legal_arch($) { +@@ -124,6 +124,7 @@ sub canon_mic_arch($) { + "mic" => "intel64", + "mips" => "mips", + "mips64" => "MIPS64", ++ "loongarch64" => "loongarch64", + ); + + sub arch_opt($) { +@@ -230,10 +231,10 @@ sub target_options() { + $_host_arch = "mips"; + } elsif ( $hardware_platform eq "riscv64" ) { + $_host_arch = "riscv64"; +- } elsif ( $hardware_platform eq "loongarch64" ) { +- $_host_arch = "loongarch64"; + } elsif ( $hardware_platform eq "s390x" ) { + $_host_arch = "s390x"; ++ } elsif ( $hardware_platform eq "loongarch64" ) { ++ $_host_arch = "loongarch64"; + } else { + die "Unsupported host hardware platform: \"$hardware_platform\"; stopped"; + }; # if +@@ -423,7 +424,7 @@ the script assumes host architecture is target one. + + Input string is an architecture name to canonize. The function recognizes many variants, for example: + C<32e>, C, C, etc. Returned string is a canonized architecture name, +-one of: C<32>, C<32e>, C<64>, C, C, C, C, C, C, C, C, C, or C is input string is not recognized. ++one of: C<32>, C<32e>, C<64>, C, C, C, C, C, C, C, C, C or C is input string is not recognized. + + =item B + +diff --git a/openmp/runtime/tools/lib/Uname.pm b/openmp/runtime/tools/lib/Uname.pm +index 9dde444d5..db1d93302 100644 +--- a/openmp/runtime/tools/lib/Uname.pm ++++ b/openmp/runtime/tools/lib/Uname.pm +@@ -158,10 +158,10 @@ if ( 0 ) { + $values{ hardware_platform } = "mips"; + } elsif ( $values{ machine } =~ m{\Ariscv64\z} ) { + $values{ hardware_platform } = "riscv64"; +- } elsif ( $values{ machine } =~ m{\Aloongarch64\z} ) { +- $values{ hardware_platform } = "loongarch64"; + } elsif ( $values{ machine } =~ m{\As390x\z} ) { + $values{ hardware_platform } = "s390x"; ++ } elsif ( $values{ machine } =~ m{\Aloongarch64\z} ) { ++ $values{ hardware_platform } = "loongarch64"; + } else { + die "Unsupported machine (\"$values{ machine }\") returned by POSIX::uname(); stopped"; + }; # if diff --git a/libomp.spec b/libomp.spec index b812378d88e07f72883323c07cb3d70d2e682f2a..8e0db6f2e3917c6b95e242c0f3a5e1779e6343df 100644 --- a/libomp.spec +++ b/libomp.spec @@ -1,3 +1,4 @@ +%define anolis_release .0.1 %bcond_with snapshot_build %if %{with snapshot_build} @@ -35,7 +36,7 @@ Name: libomp Version: %{libomp_version}%{?rc_ver:~rc%{rc_ver}}%{?llvm_snapshot_version_suffix:~%{llvm_snapshot_version_suffix}} -Release: 1%{?dist} +Release: 1%{anolis_release}%{?dist} Summary: OpenMP runtime for clang License: NCSA @@ -49,6 +50,8 @@ Source1: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{libomp Source2: release-keys.asc %endif +Patch1: 0001-Support-LoongArch.patch + BuildRequires: clang >= %{maj_ver} # For clang-offload-packager BuildRequires: clang-tools-extra @@ -97,6 +100,10 @@ OpenMP header files. %cmake -GNinja \ -DLIBOMP_INSTALL_ALIASES=OFF \ +%ifarch loongarch64 + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++ \ +%endif -DCMAKE_MODULE_PATH=%{_datadir}/llvm/cmake/Modules \ -DLLVM_DIR=%{_libdir}/cmake/llvm \ -DCMAKE_INSTALL_INCLUDEDIR=%{_prefix}/lib/clang/%{maj_ver}/include \ @@ -125,14 +132,16 @@ rm -rf %{buildroot}%{_libdir}/libarcher_static.a %files %license LICENSE.TXT %{_libdir}/libomp.so +%ifnarch loongarch64 %{_libdir}/libompd.so %ifnarch %{arm} %{_libdir}/libarcher.so %endif +%endif %ifnarch %{ix86} %{arm} # libomptarget is not supported on 32-bit systems. # s390x does not support the offloading plugins. -%ifnarch s390x +%ifnarch s390x loongarch64 %{_libdir}/libomptarget.rtl.amdgpu.so.%{so_suffix} %{_libdir}/libomptarget.rtl.cuda.so.%{so_suffix} %{_libdir}/libomptarget.rtl.%{libomp_arch}.so.%{so_suffix} @@ -143,7 +152,7 @@ rm -rf %{buildroot}%{_libdir}/libarcher_static.a %files devel %{_prefix}/lib/clang/%{maj_ver}/include/omp.h %{_prefix}/lib/clang/%{maj_ver}/include/ompx.h -%ifnarch %{arm} +%ifnarch %{arm} loongarch64 %{_prefix}/lib/clang/%{maj_ver}/include/omp-tools.h %{_prefix}/lib/clang/%{maj_ver}/include/ompt.h %{_prefix}/lib/clang/%{maj_ver}/include/ompt-multiplex.h @@ -152,7 +161,7 @@ rm -rf %{buildroot}%{_libdir}/libarcher_static.a %ifnarch %{ix86} %{arm} # libomptarget is not supported on 32-bit systems. # s390x does not support the offloading plugins. -%ifnarch s390x +%ifnarch s390x loongarch64 %{_libdir}/libomptarget.rtl.amdgpu.so %{_libdir}/libomptarget.rtl.cuda.so %{_libdir}/libomptarget.rtl.%{libomp_arch}.so @@ -164,6 +173,9 @@ rm -rf %{buildroot}%{_libdir}/libarcher_static.a %endif %changelog +* Thu Dec 05 2024 Chen Li - 18.1.8-1.0.1 +- Add support for LoongArch + * Tue Jul 09 2024 Tom Stellard - 18.1.8-1 - 18.1.8 Release