From 6e8ffa746f17ed969b81d8abfc9e467dd7ff95bc Mon Sep 17 00:00:00 2001 From: ticat_fp Date: Tue, 23 Apr 2024 11:25:55 +0800 Subject: [PATCH] LoongArch: add 3a6000 support Signed-off-by: ticat_fp --- LoongArch-Add-LA664-support.patch | 332 +++++++ ...ternal-error-running-gcc-march-nativ.patch | 106 ++ ...x-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch | 907 ++++++++++++++++++ LoongArch-Use-finer-grained-DBAR-hints.patch | 137 +++ gcc.spec | 15 +- 5 files changed, 1496 insertions(+), 1 deletion(-) create mode 100644 LoongArch-Add-LA664-support.patch create mode 100644 LoongArch-Fix-internal-error-running-gcc-march-nativ.patch create mode 100644 LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch create mode 100644 LoongArch-Use-finer-grained-DBAR-hints.patch diff --git a/LoongArch-Add-LA664-support.patch b/LoongArch-Add-LA664-support.patch new file mode 100644 index 0000000..8e2674d --- /dev/null +++ b/LoongArch-Add-LA664-support.patch @@ -0,0 +1,332 @@ +From c68463abbab98aa7f5a9b91e71ed6f6834c723df Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Thu, 16 Nov 2023 20:43:53 +0800 +Subject: [PATCH] LoongArch: Add LA664 support. + +Define ISA_BASE_LA64V110, which represents the base instruction set defined in LoongArch1.1. +Support the configure setting --with-arch =la664, and support -march=la664,-mtune=la664. + +gcc/ChangeLog: + + * config.gcc: Support LA664. + * config/loongarch/genopts/loongarch-strings: Likewise. + * config/loongarch/genopts/loongarch.opt.in: Likewise. + * config/loongarch/loongarch-cpu.cc (fill_native_cpu_config): Likewise. + * config/loongarch/loongarch-def.c: Likewise. + * config/loongarch/loongarch-def.h (N_ISA_BASE_TYPES): Likewise. + (ISA_BASE_LA64V110): Define macro. + (N_ARCH_TYPES): Update value. + (N_TUNE_TYPES): Update value. + (CPU_LA664): New macro. + * config/loongarch/loongarch-opts.cc (isa_default_abi): Likewise. + (isa_base_compat_p): Likewise. + * config/loongarch/loongarch-opts.h (TARGET_64BIT): This parameter is enabled + when la_target.isa.base is equal to ISA_BASE_LA64V100 or ISA_BASE_LA64V110. + (TARGET_uARCH_LA664): Define macro. + * config/loongarch/loongarch-str.h (STR_CPU_LA664): Likewise. + * config/loongarch/loongarch.cc (loongarch_cpu_sched_reassociation_width): + Add LA664 support. + * config/loongarch/loongarch.opt: Regenerate. + +Signed-off-by: ticat_fp +--- + gcc/config.gcc | 10 ++++----- + .../loongarch/genopts/loongarch-strings | 1 + + gcc/config/loongarch/genopts/loongarch.opt.in | 3 +++ + gcc/config/loongarch/loongarch-cpu.cc | 4 ++++ + gcc/config/loongarch/loongarch-def.c | 21 +++++++++++++++++++ + gcc/config/loongarch/loongarch-def.h | 8 ++++--- + gcc/config/loongarch/loongarch-opts.cc | 8 +++---- + gcc/config/loongarch/loongarch-opts.h | 4 +++- + gcc/config/loongarch/loongarch-str.h | 1 + + gcc/config/loongarch/loongarch.cc | 1 + + gcc/config/loongarch/loongarch.opt | 3 +++ + 11 files changed, 51 insertions(+), 13 deletions(-) + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 6d51bd93f3f..b88591b6fd8 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -5039,7 +5039,7 @@ case "${target}" in + + # Perform initial sanity checks on --with-* options. + case ${with_arch} in +- "" | abi-default | loongarch64 | la464) ;; # OK, append here. ++ "" | abi-default | loongarch64 | la[46]64) ;; # OK, append here. + native) + if test x${host} != x${target}; then + echo "--with-arch=native is illegal for cross-compiler." 1>&2 +@@ -5088,7 +5088,7 @@ case "${target}" in + case ${abi_base}/${abi_ext} in + lp64*/base) + # architectures that support lp64* ABI +- arch_pattern="native|abi-default|loongarch64|la464" ++ arch_pattern="native|abi-default|loongarch64|la[46]64" + # default architecture for lp64* ABI + arch_default="abi-default" + ;; +@@ -5163,7 +5163,7 @@ case "${target}" in + # Check default with_tune configuration using with_arch. + case ${with_arch} in + loongarch64) +- tune_pattern="native|abi-default|loongarch64|la464" ++ tune_pattern="native|abi-default|loongarch64|la[46]64" + ;; + *) + # By default, $with_tune == $with_arch +@@ -5219,7 +5219,7 @@ case "${target}" in + # Fixed: use the default gcc configuration for all multilib + # builds by default. + with_multilib_default="" ;; +- arch,native|arch,loongarch64|arch,la464) # OK, append here. ++ arch,native|arch,loongarch64|arch,la[46]64) # OK, append here. + with_multilib_default="/march=${component}" ;; + arch,*) + with_multilib_default="/march=abi-default" +@@ -5307,7 +5307,7 @@ case "${target}" in + if test x${parse_state} = x"arch"; then + # -march option + case ${component} in +- native | abi-default | loongarch64 | la464) # OK, append here. ++ native | abi-default | loongarch64 | la[46]64) # OK, append here. + # Append -march spec for each multilib variant. + loongarch_multilib_list_make="${loongarch_multilib_list_make}/march=${component}" + parse_state="opts" +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index 8e412f7536e..7bc4824007e 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -26,6 +26,7 @@ STR_CPU_NATIVE native + STR_CPU_ABI_DEFAULT abi-default + STR_CPU_LOONGARCH64 loongarch64 + STR_CPU_LA464 la464 ++STR_CPU_LA664 la664 + + # Base architecture + STR_ISA_BASE_LA64V100 la64 +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 158701d327a..00b4733d75b 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -107,6 +107,9 @@ Enum(cpu_type) String(@@STR_CPU_LOONGARCH64@@) Value(CPU_LOONGARCH64) + EnumValue + Enum(cpu_type) String(@@STR_CPU_LA464@@) Value(CPU_LA464) + ++EnumValue ++Enum(cpu_type) String(@@STR_CPU_LA664@@) Value(CPU_LA664) ++ + m@@OPTSTR_ARCH@@= + Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) + -m@@OPTSTR_ARCH@@=PROCESSOR Generate code for the given PROCESSOR ISA. +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index 7a2866f60f9..f3a13414143 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -106,6 +106,10 @@ fill_native_cpu_config (struct loongarch_target *tgt) + native_cpu_type = CPU_LA464; + break; + ++ case 0x0014d000: /* LA664 */ ++ native_cpu_type = CPU_LA664; ++ break; ++ + default: + /* Unknown PRID. */ + if (tune_native_p) +diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c +index 430ef8b2d95..067629141b6 100644 +--- a/gcc/config/loongarch/loongarch-def.c ++++ b/gcc/config/loongarch/loongarch-def.c +@@ -28,6 +28,7 @@ loongarch_cpu_strings[N_TUNE_TYPES] = { + [CPU_ABI_DEFAULT] = STR_CPU_ABI_DEFAULT, + [CPU_LOONGARCH64] = STR_CPU_LOONGARCH64, + [CPU_LA464] = STR_CPU_LA464, ++ [CPU_LA664] = STR_CPU_LA664, + }; + + struct loongarch_isa +@@ -42,6 +43,11 @@ loongarch_cpu_default_isa[N_ARCH_TYPES] = { + .fpu = ISA_EXT_FPU64, + .simd = ISA_EXT_SIMD_LASX, + }, ++ [CPU_LA664] = { ++ .base = ISA_BASE_LA64V110, ++ .fpu = ISA_EXT_FPU64, ++ .simd = ISA_EXT_SIMD_LASX, ++ }, + }; + + struct loongarch_cache +@@ -58,6 +64,12 @@ loongarch_cpu_cache[N_TUNE_TYPES] = { + .l2d_size = 256, + .simultaneous_prefetches = 4, + }, ++ [CPU_LA664] = { ++ .l1d_line_size = 64, ++ .l1d_size = 64, ++ .l2d_size = 256, ++ .simultaneous_prefetches = 4, ++ }, + }; + + struct loongarch_align +@@ -70,6 +82,10 @@ loongarch_cpu_align[N_TUNE_TYPES] = { + .function = "32", + .label = "16", + }, ++ [CPU_LA664] = { ++ .function = "32", ++ .label = "16", ++ }, + }; + + +@@ -104,6 +120,9 @@ loongarch_cpu_rtx_cost_data[N_TUNE_TYPES] = { + [CPU_LA464] = { + DEFAULT_COSTS + }, ++ [CPU_LA664] = { ++ DEFAULT_COSTS ++ }, + }; + + /* RTX costs to use when optimizing for size. */ +@@ -127,6 +146,7 @@ loongarch_cpu_issue_rate[N_TUNE_TYPES] = { + [CPU_NATIVE] = 4, + [CPU_LOONGARCH64] = 4, + [CPU_LA464] = 4, ++ [CPU_LA664] = 6, + }; + + int +@@ -134,6 +154,7 @@ loongarch_cpu_multipass_dfa_lookahead[N_TUNE_TYPES] = { + [CPU_NATIVE] = 4, + [CPU_LOONGARCH64] = 4, + [CPU_LA464] = 4, ++ [CPU_LA664] = 6, + }; + + /* Wiring string definitions from loongarch-str.h to global arrays +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index 6e2a6987910..db497f3ffe2 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -55,7 +55,8 @@ extern "C" { + /* enum isa_base */ + extern const char* loongarch_isa_base_strings[]; + #define ISA_BASE_LA64V100 0 +-#define N_ISA_BASE_TYPES 1 ++#define ISA_BASE_LA64V110 1 ++#define N_ISA_BASE_TYPES 2 + + /* enum isa_ext_* */ + extern const char* loongarch_isa_ext_strings[]; +@@ -141,8 +142,9 @@ struct loongarch_target + #define CPU_ABI_DEFAULT 1 + #define CPU_LOONGARCH64 2 + #define CPU_LA464 3 +-#define N_ARCH_TYPES 4 +-#define N_TUNE_TYPES 4 ++#define CPU_LA664 4 ++#define N_ARCH_TYPES 5 ++#define N_TUNE_TYPES 5 + + /* parallel tables. */ + extern const char* loongarch_cpu_strings[]; +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index e5921189a06..67a59152a01 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -552,17 +552,17 @@ isa_default_abi (const struct loongarch_isa *isa) + switch (isa->fpu) + { + case ISA_EXT_FPU64: +- if (isa->base == ISA_BASE_LA64V100) ++ if (isa->base >= ISA_BASE_LA64V100) + abi.base = ABI_BASE_LP64D; + break; + + case ISA_EXT_FPU32: +- if (isa->base == ISA_BASE_LA64V100) ++ if (isa->base >= ISA_BASE_LA64V100) + abi.base = ABI_BASE_LP64F; + break; + + case ISA_EXT_NONE: +- if (isa->base == ISA_BASE_LA64V100) ++ if (isa->base >= ISA_BASE_LA64V100) + abi.base = ABI_BASE_LP64S; + break; + +@@ -582,7 +582,7 @@ isa_base_compat_p (const struct loongarch_isa *set1, + switch (set2->base) + { + case ISA_BASE_LA64V100: +- return (set1->base == ISA_BASE_LA64V100); ++ return (set1->base >= ISA_BASE_LA64V100); + + default: + gcc_unreachable (); +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 6dd309aad96..0e1b3e528a1 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -76,7 +76,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + #define TARGET_DOUBLE_FLOAT (la_target.isa.fpu == ISA_EXT_FPU64) + #define TARGET_DOUBLE_FLOAT_ABI (la_target.abi.base == ABI_BASE_LP64D) + +-#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64V100) ++#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64V100 \ ++ || la_target.isa.base == ISA_BASE_LA64V110) + #define TARGET_ABI_LP64 (la_target.abi.base == ABI_BASE_LP64D \ + || la_target.abi.base == ABI_BASE_LP64F \ + || la_target.abi.base == ABI_BASE_LP64S) +@@ -88,6 +89,7 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + + /* TARGET_ macros for use in *.md template conditionals */ + #define TARGET_uARCH_LA464 (la_target.cpu_tune == CPU_LA464) ++#define TARGET_uARCH_LA664 (la_target.cpu_tune == CPU_LA664) + + /* Note: optimize_size may vary across functions, + while -m[no]-memcpy imposes a global constraint. */ +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index 072558c28f1..fc4f41bfc1e 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. If not see + #define STR_CPU_ABI_DEFAULT "abi-default" + #define STR_CPU_LOONGARCH64 "loongarch64" + #define STR_CPU_LA464 "la464" ++#define STR_CPU_LA664 "la664" + + #define STR_ISA_BASE_LA64V100 "la64" + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 22ca24a1878..4cd509f11c6 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10177,6 +10177,7 @@ loongarch_cpu_sched_reassociation_width (struct loongarch_target *target, + { + case CPU_LOONGARCH64: + case CPU_LA464: ++ case CPU_LA664: + /* Vector part. */ + if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)) + { +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index a5988411fbb..7f129e53ba5 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -114,6 +114,9 @@ Enum(cpu_type) String(loongarch64) Value(CPU_LOONGARCH64) + EnumValue + Enum(cpu_type) String(la464) Value(CPU_LA464) + ++EnumValue ++Enum(cpu_type) String(la664) Value(CPU_LA664) ++ + march= + Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) + -march=PROCESSOR Generate code for the given PROCESSOR ISA. +-- +2.33.0 + diff --git a/LoongArch-Fix-internal-error-running-gcc-march-nativ.patch b/LoongArch-Fix-internal-error-running-gcc-march-nativ.patch new file mode 100644 index 0000000..84c3b91 --- /dev/null +++ b/LoongArch-Fix-internal-error-running-gcc-march-nativ.patch @@ -0,0 +1,106 @@ +From 56752a6bbfb3d3501d0899b23020c3e2eb58882c Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 17 Nov 2023 20:44:17 +0800 +Subject: [PATCH] LoongArch: Fix internal error running "gcc -march=native" on + LA664 + +On LA664, the PRID preset is ISA_BASE_LA64V110 but the base architecture +is guessed ISA_BASE_LA64V100. This causes a warning to be outputed: + + cc1: warning: base architecture 'la64' differs from PRID preset '?' + +But we've not set the "?" above in loongarch_isa_base_strings, thus it's +a nullptr and then an ICE is triggered. + +Add ISA_BASE_LA64V110 to genopts and initialize +loongarch_isa_base_strings[ISA_BASE_LA64V110] correctly to fix the ICE. +The warning itself will be fixed later. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch-strings: + (STR_ISA_BASE_LA64V110): Add. + * config/loongarch/genopts/loongarch.opt.in: + (ISA_BASE_LA64V110): Add. + * config/loongarch/loongarch-def.c + (loongarch_isa_base_strings): Initialize [ISA_BASE_LA64V110] + to STR_ISA_BASE_LA64V110. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch-str.h: Regenerate. + +Signed-off-by: ticat_fp +--- + gcc/config/loongarch/genopts/loongarch-strings | 1 + + gcc/config/loongarch/genopts/loongarch.opt.in | 3 +++ + gcc/config/loongarch/loongarch-def.c | 1 + + gcc/config/loongarch/loongarch-str.h | 1 + + gcc/config/loongarch/loongarch.opt | 3 +++ + 5 files changed, 9 insertions(+) + +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index 7bc4824007e..b2070c83ed0 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -30,6 +30,7 @@ STR_CPU_LA664 la664 + + # Base architecture + STR_ISA_BASE_LA64V100 la64 ++STR_ISA_BASE_LA64V110 la64v1.1 + + # -mfpu + OPTSTR_ISA_EXT_FPU fpu +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 00b4733d75b..b274b3fb21e 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -32,6 +32,9 @@ Basic ISAs of LoongArch: + EnumValue + Enum(isa_base) String(@@STR_ISA_BASE_LA64V100@@) Value(ISA_BASE_LA64V100) + ++EnumValue ++Enum(isa_base) String(@@STR_ISA_BASE_LA64V110@@) Value(ISA_BASE_LA64V110) ++ + ;; ISA extensions / adjustments + Enum + Name(isa_ext_fpu) Type(int) +diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c +index 067629141b6..f22d488acb2 100644 +--- a/gcc/config/loongarch/loongarch-def.c ++++ b/gcc/config/loongarch/loongarch-def.c +@@ -165,6 +165,7 @@ loongarch_cpu_multipass_dfa_lookahead[N_TUNE_TYPES] = { + const char* + loongarch_isa_base_strings[N_ISA_BASE_TYPES] = { + [ISA_BASE_LA64V100] = STR_ISA_BASE_LA64V100, ++ [ISA_BASE_LA64V110] = STR_ISA_BASE_LA64V110, + }; + + const char* +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index fc4f41bfc1e..114dbc692d7 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see + #define STR_CPU_LA664 "la664" + + #define STR_ISA_BASE_LA64V100 "la64" ++#define STR_ISA_BASE_LA64V110 "la64v1.1" + + #define OPTSTR_ISA_EXT_FPU "fpu" + #define STR_NONE "none" +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 7f129e53ba5..350ca30d232 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -39,6 +39,9 @@ Basic ISAs of LoongArch: + EnumValue + Enum(isa_base) String(la64) Value(ISA_BASE_LA64V100) + ++EnumValue ++Enum(isa_base) String(la64v1.1) Value(ISA_BASE_LA64V110) ++ + ;; ISA extensions / adjustments + Enum + Name(isa_ext_fpu) Type(int) +-- +2.33.0 + diff --git a/LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch b/LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch new file mode 100644 index 0000000..e0bcc7c --- /dev/null +++ b/LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch @@ -0,0 +1,907 @@ +From 40366b89e9c8e727af70ecf7007cba6c51e4b7d2 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Wed, 29 Nov 2023 11:16:59 +0800 +Subject: [PATCH] LoongArch: Fix lsx-vshuf.c and lasx-xvshuf_b.c tests fail on + LA664 [PR112611] + +For [x]vshuf instructions, if the index value in the selector exceeds 63, it triggers +undefined behavior on LA464, but not on LA664. To ensure compatibility of these two +tests on both LA464 and LA664, we have modified both tests to ensure that the index +value in the selector does not exceed 63. + +gcc/testsuite/ChangeLog: + + PR target/112611 + * gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c: Sure index less than 64. + * gcc.target/loongarch/vector/lsx/lsx-vshuf.c: Ditto. + +Signed-off-by: ticat_fp +--- + .../loongarch/vector/lasx/lasx-xvshuf_b.c | 343 ++++++------------ + .../loongarch/vector/lsx/lsx-vshuf.c | 162 +++------ + 2 files changed, 164 insertions(+), 341 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c +index d8a29dbd225..b8ab387118a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c +@@ -43,9 +43,9 @@ main () + *((unsigned long *)&__m256i_op1[1]) = 0xfffffefefffffefe; + *((unsigned long *)&__m256i_op1[0]) = 0xfffffefefffffefe; + *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[2]) = 0xfffffff8fffffff8; ++ *((unsigned long *)&__m256i_op2[2]) = 0x3f3f3f383f3f3f38; + *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[0]) = 0xfffffff8fc000000; ++ *((unsigned long *)&__m256i_op2[0]) = 0x3f3f3f383c000000; + *((unsigned long *)&__m256i_result[3]) = 0xfafafafafafafafa; + *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[1]) = 0xfefefefefefefefe; +@@ -137,33 +137,14 @@ main () + *((unsigned long *)&__m256i_op1[2]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op1[0]) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_op2[3]) = 0x0000ffffffffffff; +- *((unsigned long *)&__m256i_op2[2]) = 0x0000ffff0000ffff; +- *((unsigned long *)&__m256i_op2[1]) = 0x0000ffffffffffff; +- *((unsigned long *)&__m256i_op2[0]) = 0x0000ffff0000ffff; ++ *((unsigned long *)&__m256i_op2[3]) = 0x0000111111111111; ++ *((unsigned long *)&__m256i_op2[2]) = 0x0000222200002222; ++ *((unsigned long *)&__m256i_op2[1]) = 0x0000111111111111; ++ *((unsigned long *)&__m256i_op2[0]) = 0x0000222200002222; + *((unsigned long *)&__m256i_result[3]) = 0xffff000000000000; +- *((unsigned long *)&__m256i_result[2]) = 0xffff0000ffff0000; ++ *((unsigned long *)&__m256i_result[2]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_result[1]) = 0xffff000000000000; +- *((unsigned long *)&__m256i_result[0]) = 0xffff0000ffff0000; +- __m256i_out = __lasx_xvshuf_b (__m256i_op0, __m256i_op1, __m256i_op2); +- ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); +- +- *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[3]) = 0x000000000000ffff; +- *((unsigned long *)&__m256i_op2[2]) = 0x000000000000ffff; +- *((unsigned long *)&__m256i_op2[1]) = 0x000000000000ffff; +- *((unsigned long *)&__m256i_op2[0]) = 0x000000000000ffff; +- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result[0]) = 0xffffffffffffffff; + __m256i_out = __lasx_xvshuf_b (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +@@ -176,7 +157,7 @@ main () + *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[2]) = 0x0000000000077fff; ++ *((unsigned long *)&__m256i_op2[2]) = 0x0000000000032f1f; + *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[3]) = 0xffffffffffffffff; +@@ -186,9 +167,9 @@ main () + __m256i_out = __lasx_xvshuf_b (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op0[3]) = 0xfffffffffffffefe; +- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000101; +- *((unsigned long *)&__m256i_op0[1]) = 0xfffffffffffffefe; ++ *((unsigned long *)&__m256i_op0[3]) = 0x0011001100110011; ++ *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000001; ++ *((unsigned long *)&__m256i_op0[1]) = 0x0011001100110011; + *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000101; + *((unsigned long *)&__m256i_op1[3]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op1[2]) = 0x67eee33567eee435; +@@ -198,35 +179,16 @@ main () + *((unsigned long *)&__m256i_op2[2]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op2[1]) = 0x00000000ffffffff; + *((unsigned long *)&__m256i_op2[0]) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result[3]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_result[2]) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result[1]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_result[0]) = 0xffffffffffffffff; + __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_op0[3]) = 0x0022002200000000; + *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; +- __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); +- ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); +- +- *((unsigned long *)&__m256i_op0[3]) = 0xffffffff80000000; +- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[1]) = 0xffffffff80000000; ++ *((unsigned long *)&__m256i_op0[1]) = 0x001f001f00000000; + *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op1[3]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op1[2]) = 0xffffffffffffffff; +@@ -243,10 +205,10 @@ main () + __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op0[3]) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_op0[2]) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_op0[1]) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_op0[0]) = 0xffffffffffffffff; ++ *((unsigned long *)&__m256i_op0[3]) = 0x0011001100110011; ++ *((unsigned long *)&__m256i_op0[2]) = 0x0011001100110011; ++ *((unsigned long *)&__m256i_op0[1]) = 0x0011001100110011; ++ *((unsigned long *)&__m256i_op0[0]) = 0x0011001100110011; + *((unsigned long *)&__m256i_op1[3]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op1[2]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op1[1]) = 0xffffffffffffffff; +@@ -255,17 +217,17 @@ main () + *((unsigned long *)&__m256i_op2[2]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op2[1]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op2[0]) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result[3]) = 0xffffffffffffffff; ++ *((unsigned long *)&__m256i_result[2]) = 0xffffffffffffffff; ++ *((unsigned long *)&__m256i_result[1]) = 0xffffffffffffffff; ++ *((unsigned long *)&__m256i_result[0]) = 0xffffffffffffffff; + __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op0[3]) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_op0[2]) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_op0[1]) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_op0[0]) = 0xffffffffffffffff; ++ *((unsigned long *)&__m256i_op0[3]) = 0x003f003f003f003f; ++ *((unsigned long *)&__m256i_op0[2]) = 0x003f003f003f003f; ++ *((unsigned long *)&__m256i_op0[1]) = 0x003f003f003f003f; ++ *((unsigned long *)&__m256i_op0[0]) = 0x003f003f003f003f; + *((unsigned long *)&__m256i_op1[3]) = 0xefdfefdf00000000; + *((unsigned long *)&__m256i_op1[2]) = 0xefdfefdfefdfefdf; + *((unsigned long *)&__m256i_op1[1]) = 0xefdfefdf00000000; +@@ -274,36 +236,17 @@ main () + *((unsigned long *)&__m256i_op2[2]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op2[1]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op2[0]) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; +- __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); +- ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); +- +- *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result[3]) = 0xefdfefdfefdfefdf; ++ *((unsigned long *)&__m256i_result[2]) = 0xefdfefdfefdfefdf; ++ *((unsigned long *)&__m256i_result[1]) = 0xefdfefdfefdfefdf; ++ *((unsigned long *)&__m256i_result[0]) = 0xefdfefdfefdfefdf; + __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op0[3]) = 0x7575ffff75757595; +- *((unsigned long *)&__m256i_op0[2]) = 0x7575ffff7575f575; +- *((unsigned long *)&__m256i_op0[1]) = 0x7575ffff75757595; +- *((unsigned long *)&__m256i_op0[0]) = 0x7575ffff7575f575; ++ *((unsigned long *)&__m256i_op0[3]) = 0x0035000000350005; ++ *((unsigned long *)&__m256i_op0[2]) = 0x0035000000350015; ++ *((unsigned long *)&__m256i_op0[1]) = 0x0035000000350025; ++ *((unsigned long *)&__m256i_op0[0]) = 0x0035000000350035; + *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000003; + *((unsigned long *)&__m256i_op1[2]) = 0x0000000000010001; + *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000003; +@@ -312,10 +255,10 @@ main () + *((unsigned long *)&__m256i_op2[2]) = 0x7575757575757575; + *((unsigned long *)&__m256i_op2[1]) = 0x7575757575757575; + *((unsigned long *)&__m256i_op2[0]) = 0x7575757575757575; +- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result[3]) = 0x7575757575757575; ++ *((unsigned long *)&__m256i_result[2]) = 0x7575757575757575; ++ *((unsigned long *)&__m256i_result[1]) = 0x7575757575757575; ++ *((unsigned long *)&__m256i_result[0]) = 0x7575757575757575; + __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +@@ -357,29 +300,10 @@ main () + __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; +- __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); +- ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); +- +- *((unsigned long *)&__m256i_op0[3]) = 0x000000000000fffe; +- *((unsigned long *)&__m256i_op0[2]) = 0x00000000000000f0; +- *((unsigned long *)&__m256i_op0[1]) = 0x000000000000fffe; +- *((unsigned long *)&__m256i_op0[0]) = 0x00000000000000f0; ++ *((unsigned long *)&__m256i_op0[3]) = 0x000000000000003e; ++ *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000010; ++ *((unsigned long *)&__m256i_op0[1]) = 0x000000000000003e; ++ *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000010; + *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; +@@ -389,16 +313,16 @@ main () + *((unsigned long *)&__m256i_op2[1]) = 0x8000000000000000; + *((unsigned long *)&__m256i_op2[0]) = 0x000000ffff88ff88; + *((unsigned long *)&__m256i_result[3]) = 0xff88ff88ff880000; +- *((unsigned long *)&__m256i_result[2]) = 0xff88ff88ff880000; ++ *((unsigned long *)&__m256i_result[2]) = 0xff88ff88ff88ff88; + *((unsigned long *)&__m256i_result[1]) = 0xff88ff88ff880000; +- *((unsigned long *)&__m256i_result[0]) = 0xff88ff88ff880000; ++ *((unsigned long *)&__m256i_result[0]) = 0xff88ff88ff88ff88; + __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op0[3]) = 0x000000010000ffe1; +- *((unsigned long *)&__m256i_op0[2]) = 0x0000000101001e18; +- *((unsigned long *)&__m256i_op0[1]) = 0x000000010000ffe1; +- *((unsigned long *)&__m256i_op0[0]) = 0x0000000101001e18; ++ *((unsigned long *)&__m256i_op0[3]) = 0x0000000100000011; ++ *((unsigned long *)&__m256i_op0[2]) = 0x0000000100000018; ++ *((unsigned long *)&__m256i_op0[1]) = 0x0000000100000001; ++ *((unsigned long *)&__m256i_op0[0]) = 0x0000000100000008; + *((unsigned long *)&__m256i_op1[3]) = 0x98111cca98111cca; + *((unsigned long *)&__m256i_op1[2]) = 0x98111cca98111cca; + *((unsigned long *)&__m256i_op1[1]) = 0x98111cca98111cca; +@@ -407,17 +331,17 @@ main () + *((unsigned long *)&__m256i_op2[2]) = 0x0000000101001e18; + *((unsigned long *)&__m256i_op2[1]) = 0x000000010000ffe1; + *((unsigned long *)&__m256i_op2[0]) = 0x0000000101001e18; +- *((unsigned long *)&__m256i_result[3]) = 0x0000000100000000; ++ *((unsigned long *)&__m256i_result[3]) = 0x0000000100000001; + *((unsigned long *)&__m256i_result[2]) = 0x0000000101001e18; +- *((unsigned long *)&__m256i_result[1]) = 0x0000000100000000; ++ *((unsigned long *)&__m256i_result[1]) = 0x0000000100000001; + *((unsigned long *)&__m256i_result[0]) = 0x0000000101001e18; + __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op0[3]) = 0x0001000100010001; +- *((unsigned long *)&__m256i_op0[2]) = 0x80008000b3e8fef1; +- *((unsigned long *)&__m256i_op0[1]) = 0x0001000100010001; +- *((unsigned long *)&__m256i_op0[0]) = 0x80008000802ea100; ++ *((unsigned long *)&__m256i_op0[3]) = 0x000000010000001a; ++ *((unsigned long *)&__m256i_op0[2]) = 0x0000001100000001; ++ *((unsigned long *)&__m256i_op0[1]) = 0x0000002100000010; ++ *((unsigned long *)&__m256i_op0[0]) = 0x000000310000001f; + *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; +@@ -426,17 +350,17 @@ main () + *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000002; + *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000001; + *((unsigned long *)&__m256i_op2[0]) = 0x00000000012e2110; +- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result[3]) = 0x0000000000000001; + *((unsigned long *)&__m256i_result[2]) = 0x0000000200000000; +- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[0]) = 0x012e2110012e2110; ++ *((unsigned long *)&__m256i_result[1]) = 0x00000000012e2110; ++ *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; + __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op0[3]) = 0x0000000082a54290; +- *((unsigned long *)&__m256i_op0[2]) = 0x00000000028aa700; +- *((unsigned long *)&__m256i_op0[1]) = 0x0000000082a54290; +- *((unsigned long *)&__m256i_op0[0]) = 0x0000000002a54287; ++ *((unsigned long *)&__m256i_op0[3]) = 0x0000002f00000000; ++ *((unsigned long *)&__m256i_op0[2]) = 0x0000001a00000000; ++ *((unsigned long *)&__m256i_op0[1]) = 0x000000010000001c; ++ *((unsigned long *)&__m256i_op0[0]) = 0x0000000e0000000c; + *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op1[2]) = 0x00000000002a542a; + *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; +@@ -447,8 +371,8 @@ main () + *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result[1]) = 0x00000000002a542a; ++ *((unsigned long *)&__m256i_result[0]) = 0x00000000002a542a; + __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +@@ -471,10 +395,10 @@ main () + __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op0[3]) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_op0[2]) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_op0[1]) = 0xffffffffffffffff; +- *((unsigned long *)&__m256i_op0[0]) = 0xffffffffffffffff; ++ *((unsigned long *)&__m256i_op0[3]) = 0x0000000100000031; ++ *((unsigned long *)&__m256i_op0[2]) = 0x0000000100000031; ++ *((unsigned long *)&__m256i_op0[1]) = 0x0000000100000031; ++ *((unsigned long *)&__m256i_op0[0]) = 0x0000000100000031; + *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; +@@ -490,10 +414,10 @@ main () + __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op0[3]) = 0x0001000100010001; +- *((unsigned long *)&__m256i_op0[2]) = 0x0001000100010001; +- *((unsigned long *)&__m256i_op0[1]) = 0x0001000100010001; +- *((unsigned long *)&__m256i_op0[0]) = 0x0001000100010001; ++ *((unsigned long *)&__m256i_op0[3]) = 0x0000000200000001; ++ *((unsigned long *)&__m256i_op0[2]) = 0x0000000400000003; ++ *((unsigned long *)&__m256i_op0[1]) = 0x0000000600000005; ++ *((unsigned long *)&__m256i_op0[0]) = 0x0000000800000007; + *((unsigned long *)&__m256i_op1[3]) = 0x000000007fc00000; + *((unsigned long *)&__m256i_op1[2]) = 0x000000007fc00000; + *((unsigned long *)&__m256i_op1[1]) = 0x000000007fc00000; +@@ -503,7 +427,7 @@ main () + *((unsigned long *)&__m256i_op2[1]) = 0xdfffffffdfffffff; + *((unsigned long *)&__m256i_op2[0]) = 0x8000000080000000; + *((unsigned long *)&__m256i_result[3]) = 0x8000000080000000; +- *((unsigned long *)&__m256i_result[2]) = 0x8000000080000000; ++ *((unsigned long *)&__m256i_result[2]) = 0x7fc00000dfffffff; + *((unsigned long *)&__m256i_result[1]) = 0x8000000080000000; + *((unsigned long *)&__m256i_result[0]) = 0x8000000080000000; + __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2); +@@ -529,9 +453,9 @@ main () + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + + *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[2]) = 0x0001000104000200; ++ *((unsigned long *)&__m256i_op0[2]) = 0x0000002000000030; + *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[0]) = 0x0001000104000200; ++ *((unsigned long *)&__m256i_op0[0]) = 0x0000001000000000; + *((unsigned long *)&__m256i_op1[3]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op1[2]) = 0xffff0000ffff0000; + *((unsigned long *)&__m256i_op1[1]) = 0xffffffffffffffff; +@@ -585,10 +509,10 @@ main () + __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op0[3]) = 0x0000fffffe01fe52; +- *((unsigned long *)&__m256i_op0[2]) = 0x00000000ff01ff02; +- *((unsigned long *)&__m256i_op0[1]) = 0x0000fffffe01fe52; +- *((unsigned long *)&__m256i_op0[0]) = 0x00000000ff01ff02; ++ *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000001; ++ *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000002; ++ *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000003; + *((unsigned long *)&__m256i_op1[3]) = 0x0000800000000000; + *((unsigned long *)&__m256i_op1[2]) = 0x0000000080008001; + *((unsigned long *)&__m256i_op1[1]) = 0x0000800000000000; +@@ -597,36 +521,17 @@ main () + *((unsigned long *)&__m256i_op2[2]) = 0x000000000000ffff; + *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op2[0]) = 0x000000000000ffff; +- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[2]) = 0x0000000080008001; +- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[0]) = 0x0000000080008001; +- __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2); +- ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); +- +- *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result[3]) = 0x000000000000ffff; + *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result[1]) = 0x0000000080008001; ++ *((unsigned long *)&__m256i_result[0]) = 0x0000800000000000; + __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + + *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000011; ++ *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000022; ++ *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000033; + *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; +@@ -642,44 +547,6 @@ main () + __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; +- __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2); +- ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); +- +- *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[2]) = 0x0008000000000000; +- *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op1[0]) = 0x0008000000000000; +- *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; +- __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2); +- ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); +- + *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; +@@ -700,9 +567,9 @@ main () + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + + *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[2]) = 0x0000002000000000; +- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[0]) = 0x0000002000000000; ++ *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000010; ++ *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000020; ++ *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000030; + *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; +@@ -718,10 +585,10 @@ main () + __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op0[3]) = 0xfffeb6839ffffd80; +- *((unsigned long *)&__m256i_op0[2]) = 0xfffeb8649d0d6250; +- *((unsigned long *)&__m256i_op0[1]) = 0xfffeb6839ffffd80; +- *((unsigned long *)&__m256i_op0[0]) = 0xfffeb8649d0d6250; ++ *((unsigned long *)&__m256i_op0[3]) = 0x000000000000000a; ++ *((unsigned long *)&__m256i_op0[2]) = 0x000000000000001b; ++ *((unsigned long *)&__m256i_op0[1]) = 0x000000000000002c; ++ *((unsigned long *)&__m256i_op0[0]) = 0x000000000000003d; + *((unsigned long *)&__m256i_op1[3]) = 0xfffeb6839ffffd80; + *((unsigned long *)&__m256i_op1[2]) = 0xfffe97c020010001; + *((unsigned long *)&__m256i_op1[1]) = 0xfffeb6839ffffd80; +@@ -730,17 +597,17 @@ main () + *((unsigned long *)&__m256i_op2[2]) = 0xfffe97c020010001; + *((unsigned long *)&__m256i_op2[1]) = 0xfffeb6839ffffd80; + *((unsigned long *)&__m256i_op2[0]) = 0xfffe97c020010001; +- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result[3]) = 0xfffe97c020010001; ++ *((unsigned long *)&__m256i_result[2]) = 0xfffeb6839ffffd80; ++ *((unsigned long *)&__m256i_result[1]) = 0xfffe97c020010001; ++ *((unsigned long *)&__m256i_result[0]) = 0xfffeb6839ffffd80; + __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +- *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_op0[3]) = 0x000000000000001a; ++ *((unsigned long *)&__m256i_op0[2]) = 0x000000000000001b; ++ *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000002; ++ *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000007; + *((unsigned long *)&__m256i_op1[3]) = 0x0000000000010001; + *((unsigned long *)&__m256i_op1[2]) = 0x0000000000010001; + *((unsigned long *)&__m256i_op1[1]) = 0x0000000000010001; +@@ -749,10 +616,10 @@ main () + *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result[3]) = 0x0000000000010001; ++ *((unsigned long *)&__m256i_result[2]) = 0x0000000000010001; ++ *((unsigned long *)&__m256i_result[1]) = 0x0000000000010001; ++ *((unsigned long *)&__m256i_result[0]) = 0x0000000000010001; + __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c +index 8153964cf1d..f3b800f8804 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c +@@ -20,7 +20,7 @@ main () + *((unsigned long *)&__m128i_op1[1]) = 0x0000000401000001; + *((unsigned long *)&__m128i_op1[0]) = 0x0001000100000004; + *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op2[0]) = 0x00000000007f0000; ++ *((unsigned long *)&__m128i_op2[0]) = 0x00000000003f0000; + *((unsigned long *)&__m128i_result[1]) = 0x0404040404040404; + *((unsigned long *)&__m128i_result[0]) = 0x0404040404000404; + __m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2); +@@ -31,7 +31,7 @@ main () + *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff; + *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op2[0]) = 0xffffffff00000000; ++ *((unsigned long *)&__m128i_op2[0]) = 0x3f2f1f0f00000000; + *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; + *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; + __m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2); +@@ -63,10 +63,10 @@ main () + *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff; + *((unsigned long *)&__m128i_op1[1]) = 0x52525252adadadad; + *((unsigned long *)&__m128i_op1[0]) = 0x52525252adadadad; +- *((unsigned long *)&__m128i_op2[1]) = 0x800000007fffffff; +- *((unsigned long *)&__m128i_op2[0]) = 0x800000007fffffff; +- *((unsigned long *)&__m128i_result[1]) = 0x00adadad00000000; +- *((unsigned long *)&__m128i_result[0]) = 0x00adadad00000000; ++ *((unsigned long *)&__m128i_op2[1]) = 0x2000000004030201; ++ *((unsigned long *)&__m128i_op2[0]) = 0x2000000014131211; ++ *((unsigned long *)&__m128i_result[1]) = 0xadadadad52adadad; ++ *((unsigned long *)&__m128i_result[0]) = 0xadadadadffffffff; + __m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +@@ -96,10 +96,10 @@ main () + *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000100; + *((unsigned long *)&__m128i_op1[1]) = 0x04040403fafafafc; + *((unsigned long *)&__m128i_op1[0]) = 0x000000000000ff80; +- *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result[1]) = 0x8080808080808080; +- *((unsigned long *)&__m128i_result[0]) = 0x8080808080808080; ++ *((unsigned long *)&__m128i_op2[1]) = 0x00101a1b1c1d1e1f; ++ *((unsigned long *)&__m128i_op2[0]) = 0x0807060504030201; ++ *((unsigned long *)&__m128i_result[1]) = 0x8000020202000000; ++ *((unsigned long *)&__m128i_result[0]) = 0xfc000000000000ff; + __m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +@@ -118,10 +118,10 @@ main () + *((unsigned long *)&__m128i_op0[0]) = 0xffd7ff8dffa4ff7a; + *((unsigned long *)&__m128i_op1[1]) = 0x34947b4b11684f92; + *((unsigned long *)&__m128i_op1[0]) = 0xee297a731e5c5f86; +- *((unsigned long *)&__m128i_op2[1]) = 0x7fffffffffffffff; +- *((unsigned long *)&__m128i_op2[0]) = 0xffc0000000000000; +- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result[0]) = 0x0000868686868686; ++ *((unsigned long *)&__m128i_op2[1]) = 0x1f0710301a2b332d; ++ *((unsigned long *)&__m128i_op2[0]) = 0x1f20000000000000; ++ *((unsigned long *)&__m128i_result[1]) = 0xffee7a7a9811ff7b; ++ *((unsigned long *)&__m128i_result[0]) = 0xff86868686868686; + __m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +@@ -136,19 +136,19 @@ main () + __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +- *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff; +- *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff; ++ *((unsigned long *)&__m128i_op0[1]) = 0x001f002f003f000f; ++ *((unsigned long *)&__m128i_op0[0]) = 0x001f002f003f000f; + *((unsigned long *)&__m128i_op1[1]) = 0x7fffffffffffffff; + *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_result[1]) = 0x7fff7fff7fff7fff; ++ *((unsigned long *)&__m128i_result[0]) = 0x7fff7fff7fff7fff; + __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +- *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_op0[1]) = 0x000100040010001f; ++ *((unsigned long *)&__m128i_op0[0]) = 0x0002000300110012; + *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op1[0]) = 0x000000002bfd9461; + *((unsigned long *)&__m128i_op2[1]) = 0x00007fff00007fff; +@@ -169,74 +169,41 @@ main () + __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +- *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff; +- *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; +- __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2); +- ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); +- +- *((unsigned long *)&__m128i_op0[1]) = 0x000300037ff000ff; +- *((unsigned long *)&__m128i_op0[0]) = 0x0003000300a10003; ++ *((unsigned long *)&__m128i_op0[1]) = 0x000300030000001f; ++ *((unsigned long *)&__m128i_op0[0]) = 0x0003000300000003; + *((unsigned long *)&__m128i_op1[1]) = 0x000300037ff000ff; + *((unsigned long *)&__m128i_op1[0]) = 0x0003000300a10003; + *((unsigned long *)&__m128i_op2[1]) = 0x000000007ff000ff; + *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000003; + *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; + __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +- *((unsigned long *)&__m128i_op0[1]) = 0x0909000009090000; +- *((unsigned long *)&__m128i_op0[0]) = 0x0909000009090000; ++ *((unsigned long *)&__m128i_op0[1]) = 0x0019000000090000; ++ *((unsigned long *)&__m128i_op0[0]) = 0x0019000000090000; + *((unsigned long *)&__m128i_op1[1]) = 0x0909000009090000; + *((unsigned long *)&__m128i_op1[0]) = 0x0909000009090000; + *((unsigned long *)&__m128i_op2[1]) = 0x002a05a2f059094a; + *((unsigned long *)&__m128i_op2[0]) = 0x05ad3ba576eae048; +- *((unsigned long *)&__m128i_result[1]) = 0x0909e0480909e048; +- *((unsigned long *)&__m128i_result[0]) = 0x0909e0480909e048; +- __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2); +- ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); +- +- *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_result[1]) = 0x909e0480909e048; ++ *((unsigned long *)&__m128i_result[0]) = 0x909e0480909e048; + __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +- *((unsigned long *)&__m128i_op0[1]) = 0x00000000000000c0; +- *((unsigned long *)&__m128i_op0[0]) = 0x00000001ffffff29; ++ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000030; ++ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000029; + *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op2[1]) = 0x00000000000000c0; + *((unsigned long *)&__m128i_op2[0]) = 0x00000001ffffff29; +- *((unsigned long *)&__m128i_result[1]) = 0xffffff2900000000; ++ *((unsigned long *)&__m128i_result[1]) = 0xffffff29ffffff29; + *((unsigned long *)&__m128i_result[0]) = 0x0000000100000001; + __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + + *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; +- __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); +- ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); +- +- *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op0[0]) = 0x00000000000000ff; ++ *((unsigned long *)&__m128i_op0[0]) = 0x000000000000001f; + *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op1[0]) = 0x1f54e0ab00000000; + *((unsigned long *)&__m128i_op2[1]) = 0x0101010101010101; +@@ -246,19 +213,8 @@ main () + __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +- *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; +- __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); +- ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); +- +- *((unsigned long *)&__m128i_op0[1]) = 0x0000000000007fff; +- *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_op0[1]) = 0x0000002f0000002f; ++ *((unsigned long *)&__m128i_op0[0]) = 0x0000001000000000; + *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op2[1]) = 0x0000000020000020; +@@ -279,30 +235,30 @@ main () + __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +- *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op0[0]) = 0x0000000004870ba0; ++ *((unsigned long *)&__m128i_op0[1]) = 0x0000000900000010; ++ *((unsigned long *)&__m128i_op0[0]) = 0x0000002000000003; + *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op1[0]) = 0x0000001000000010; + *((unsigned long *)&__m128i_op2[1]) = 0x8000000100000000; + *((unsigned long *)&__m128i_op2[0]) = 0x8000000000000103; + *((unsigned long *)&__m128i_result[1]) = 0x0000010300000103; +- *((unsigned long *)&__m128i_result[0]) = 0x0000010300000000; ++ *((unsigned long *)&__m128i_result[0]) = 0x0000010380000001; + __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +- *((unsigned long *)&__m128i_op0[1]) = 0x000000ff0000857a; +- *((unsigned long *)&__m128i_op0[0]) = 0x05fafe0101fe000e; ++ *((unsigned long *)&__m128i_op0[1]) = 0x0000001000000007; ++ *((unsigned long *)&__m128i_op0[0]) = 0x0000002000000001; + *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op2[1]) = 0xffffffffffffffff; + *((unsigned long *)&__m128i_op2[0]) = 0xffffffffffffffff; + *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result[0]) = 0xffffffff00000000; ++ *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff; + __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +- *((unsigned long *)&__m128i_op0[1]) = 0xada4808924882588; +- *((unsigned long *)&__m128i_op0[0]) = 0xacad25090caca5a4; ++ *((unsigned long *)&__m128i_op0[1]) = 0x0000001a0000001b; ++ *((unsigned long *)&__m128i_op0[0]) = 0x0000000a0000000b; + *((unsigned long *)&__m128i_op1[1]) = 0x021b7d24c9678a35; + *((unsigned long *)&__m128i_op1[0]) = 0x030298a6a1030a49; + *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; +@@ -312,8 +268,8 @@ main () + __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +- *((unsigned long *)&__m128i_op0[1]) = 0x00000000ffffffff; +- *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff; ++ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000003; ++ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000013; + *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; +@@ -323,14 +279,14 @@ main () + __m128i_out = __lsx_vshuf_d (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +- *((unsigned long *)&__m128i_op0[1]) = 0xdfa6e0c6d46cdc13; +- *((unsigned long *)&__m128i_op0[0]) = 0x21fc7081ec69b5f2; ++ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000001; ++ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000011; + *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op1[0]) = 0x000000002c002400; + *((unsigned long *)&__m128i_op2[1]) = 0xffffb96bffff57c9; + *((unsigned long *)&__m128i_op2[0]) = 0xffff6080ffff4417; +- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_result[1]) = 0xffffb96bffff57c9; ++ *((unsigned long *)&__m128i_result[0]) = 0xffffb96bffff57c9; + __m128i_out = __lsx_vshuf_d (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +@@ -345,8 +301,8 @@ main () + __m128i_out = __lsx_vshuf_d (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +- *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000020; ++ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000010; + *((unsigned long *)&__m128i_op1[1]) = 0x0000000000002000; + *((unsigned long *)&__m128i_op1[0]) = 0xf0003000f0003000; + *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; +@@ -356,30 +312,30 @@ main () + __m128i_out = __lsx_vshuf_d (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +- *((unsigned long *)&__m128i_op0[1]) = 0x021b7d2449678a35; +- *((unsigned long *)&__m128i_op0[0]) = 0x030298a621030a49; ++ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000007; ++ *((unsigned long *)&__m128i_op0[0]) = 0x000000000000001a; + *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op1[0]) = 0x7fff7fff7fff7fff; + *((unsigned long *)&__m128i_op2[1]) = 0x021b7d24c9678a35; + *((unsigned long *)&__m128i_op2[0]) = 0x030298a6a1030a49; +- *((unsigned long *)&__m128i_result[1]) = 0x021b7d24c9678a35; +- *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_result[0]) = 0x7fff7fff7fff7fff; + __m128i_out = __lsx_vshuf_d (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +- *((unsigned long *)&__m128i_op0[1]) = 0x7f7f00007f7f0000; +- *((unsigned long *)&__m128i_op0[0]) = 0x7f7f80807f7f8080; ++ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000002; ++ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000001; + *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op1[0]) = 0x0000fffe0000fffe; + *((unsigned long *)&__m128i_op2[1]) = 0x7f8000007f800000; + *((unsigned long *)&__m128i_op2[0]) = 0x7f8000007f800000; +- *((unsigned long *)&__m128i_result[1]) = 0x7f8000007f800000; +- *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_result[1]) = 0x0000fffe0000fffe; ++ *((unsigned long *)&__m128i_result[0]) = 0x7f8000007f800000; + __m128i_out = __lsx_vshuf_d (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +- *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff; +- *((unsigned long *)&__m128i_op0[0]) = 0xfffffffffff10000; ++ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000010; ++ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000020; + *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; +-- +2.33.0 + diff --git a/LoongArch-Use-finer-grained-DBAR-hints.patch b/LoongArch-Use-finer-grained-DBAR-hints.patch new file mode 100644 index 0000000..ad549c4 --- /dev/null +++ b/LoongArch-Use-finer-grained-DBAR-hints.patch @@ -0,0 +1,137 @@ +From 4a70bfbf686c2b6a1ecd83fe851de826c612c3e0 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Tue, 14 Nov 2023 05:32:38 +0800 +Subject: [PATCH] LoongArch: Use finer-grained DBAR hints + +LA664 defines DBAR hints 0x1 - 0x1f (except 0xf and 0x1f) as follows [1-2]: + +- Bit 4: kind of constraint (0: completion, 1: ordering) +- Bit 3: barrier for previous read (0: true, 1: false) +- Bit 2: barrier for previous write (0: true, 1: false) +- Bit 1: barrier for succeeding read (0: true, 1: false) +- Bit 0: barrier for succeeding write (0: true, 1: false) + +LLVM has already utilized them for different memory orders [3]: + +- Bit 4 is always set to one because it's only intended to be zero for + things like MMIO devices, which are out of the scope of memory orders. +- An acquire barrier is used to implement acquire loads like + + ld.d $a1, $t0, 0 + dbar acquire_hint + + where the load operation (ld.d) should not be reordered with any load + or store operation after the acquire load. To accomplish this + constraint, we need to prevent the load operation from being reordered + after the barrier, and also prevent any following load/store operation + from being reordered before the barrier. Thus bits 0, 1, and 3 must + be zero, and bit 2 can be one, so acquire_hint should be 0b10100. +- An release barrier is used to implement release stores like + + dbar release_hint + st.d $a1, $t0, 0 + + where the store operation (st.d) should not be reordered with any load + or store operation before the release store. So we need to prevent + the store operation from being reordered before the barrier, and also + prevent any preceding load/store operation from being reordered after + the barrier. So bits 0, 2, 3 must be zero, and bit 1 can be one. So + release_hint should be 0b10010. + +A similar mapping has been utilized for RISC-V GCC [4], LoongArch Linux +kernel [1], and LoongArch LLVM [3]. So the mapping should be correct. +And I've also bootstrapped & regtested GCC on a LA664 with this patch. + +The LoongArch CPUs should treat "unknown" hints as dbar 0, so we can +unconditionally emit the new hints without a compiler switch. + +[1]: https://git.kernel.org/torvalds/c/e031a5f3f1ed +[2]: https://github.com/loongson-community/docs/pull/12 +[3]: https://github.com/llvm/llvm-project/pull/68787 +[4]: https://gcc.gnu.org/r14-406 + +gcc/ChangeLog: + + * config/loongarch/sync.md (mem_thread_fence): Remove redundant + check. + (mem_thread_fence_1): Emit finer-grained DBAR hints for + different memory models, instead of 0. + +Signed-off-by: ticat_fp +--- + gcc/config/loongarch/sync.md | 51 +++++++++++++++++++++++++++++------- + 1 file changed, 42 insertions(+), 9 deletions(-) + +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index 9924d522bcd..1ad0c63e0d9 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -50,23 +50,56 @@ + [(match_operand:SI 0 "const_int_operand" "")] ;; model + "" + { +- if (INTVAL (operands[0]) != MEMMODEL_RELAXED) +- { +- rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); +- MEM_VOLATILE_P (mem) = 1; +- emit_insn (gen_mem_thread_fence_1 (mem, operands[0])); +- } ++ rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (mem) = 1; ++ emit_insn (gen_mem_thread_fence_1 (mem, operands[0])); ++ + DONE; + }) + +-;; Until the LoongArch memory model (hence its mapping from C++) is finalized, +-;; conservatively emit a full FENCE. ++;; DBAR hint encoding for LA664 and later micro-architectures, paraphrased from ++;; the Linux patch revealing it [1]: ++;; ++;; - Bit 4: kind of constraint (0: completion, 1: ordering) ++;; - Bit 3: barrier for previous read (0: true, 1: false) ++;; - Bit 2: barrier for previous write (0: true, 1: false) ++;; - Bit 1: barrier for succeeding read (0: true, 1: false) ++;; - Bit 0: barrier for succeeding write (0: true, 1: false) ++;; ++;; [1]: https://git.kernel.org/torvalds/c/e031a5f3f1ed ++;; ++;; Implementations without support for the finer-granularity hints simply treat ++;; all as the full barrier (DBAR 0), so we can unconditionally start emiting the ++;; more precise hints right away. + (define_insn "mem_thread_fence_1" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER)) + (match_operand:SI 1 "const_int_operand" "")] ;; model + "" +- "dbar\t0") ++ { ++ enum memmodel model = memmodel_base (INTVAL (operands[1])); ++ ++ switch (model) ++ { ++ case MEMMODEL_ACQUIRE: ++ return "dbar\t0b10100"; ++ case MEMMODEL_RELEASE: ++ return "dbar\t0b10010"; ++ case MEMMODEL_ACQ_REL: ++ case MEMMODEL_SEQ_CST: ++ return "dbar\t0b10000"; ++ default: ++ /* GCC internal: "For the '__ATOMIC_RELAXED' model no instructions ++ need to be issued and this expansion is not invoked." ++ ++ __atomic builtins doc: "Consume is implemented using the ++ stronger acquire memory order because of a deficiency in C++11's ++ semantics." See PR 59448 and get_memmodel in builtins.cc. ++ ++ Other values should not be returned by memmodel_base. */ ++ gcc_unreachable (); ++ } ++ }) + + ;; Atomic memory operations. + +-- +2.33.0 + diff --git a/gcc.spec b/gcc.spec index 22d2c85..3da84ce 100644 --- a/gcc.spec +++ b/gcc.spec @@ -1,4 +1,4 @@ -%define anolis_release 3 +%define anolis_release 4 %global DATE 20221121 %global gitrev b3f5a0d53b84ed27cf00cfa2b9c3e2c78935c07d @@ -245,6 +245,10 @@ Patch3123: LoongArch-Change-the-value-of-branch_cost-from-2-to-.patch Patch3124: libsanitizer-add-LoongArch-support.patch Patch3125: LoongArch-fix-error-building.patch Patch3126: libjccjit-do-not-link-objects-contained-same-element.patch +Patch3127: LoongArch-Use-finer-grained-DBAR-hints.patch +Patch3128: LoongArch-Add-LA664-support.patch +Patch3129: LoongArch-Fix-internal-error-running-gcc-march-nativ.patch +Patch3130: LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch # Part 5000 ~ 5999 Patch5001: HYGON-0001-arch-support-for-hygon.patch @@ -892,6 +896,10 @@ The %{name}-doc package contains documentation files for %{name}. %patch3124 -p1 %patch3125 -p1 %patch3126 -p1 +%patch3127 -p1 +%patch3128 -p1 +%patch3129 -p1 +%patch3130 -p1 %endif %ifarch x86_64 %patch5001 -p1 @@ -1883,6 +1891,8 @@ end %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-def.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-tune.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-driver.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/lsxintrin.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/lasxintrin.h %endif %if %{build_libasan} %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/sanitizer @@ -2358,6 +2368,9 @@ end %changelog +* Tue Apr 23 2024 Peng Fan 12.3.0-4 +- LoongArch: add 3a6000 support. + * Mon Apr 08 2024 Zhaoling Bao 12.3.0-3 - Hygon: Add supported patch. -- Gitee