From 3ea471fec9d5305a657ab91947b8f89c1b5889b3 Mon Sep 17 00:00:00 2001
From: Jiang Yi <jiangyi38@hisilicon.com>
Date: Tue, 7 Nov 2023 10:01:14 +0800
Subject: [PATCH] Backport the support for cluster scheduler level on Kunpeng
 Server

Signed-off-by: Jiang Yi <jiangyi38@hisilicon.com>
---
 kernel.spec                                   |  20 +-
 ...resent-clusters-of-CPUs-within-a-die.patch | 475 ++++++++++++++++++
 ...r-scheduler-level-in-core-and-relate.patch | 264 ++++++++++
 ...xport-cluster-attributes-only-if-an-.patch | 128 +++++
 ...ology-Remove-unused-cpu_cluster_mask.patch |  49 ++
 ...-Limit-span-of-cpu_clustergroup_mask.patch |  73 +++
 ...ke-cluster-topology-span-at-least-SM.patch |  72 +++
 ...u-cluster-domain-info-and-cpus_share.patch | 176 +++++++
 ...cluster-before-scanning-LLC-in-wake-.patch | 224 +++++++++
 ...-SDTL_SKIP-flag-to-skip-topology-lev.patch |  78 +++
 ...a-new-register_sysctl_init-interface.patch | 196 ++++++++
 ...rivers-base-arch_topology-Rebuild-th.patch |  87 ++++
 ...rch-arm64-Rebuild-the-sched_domain-h.patch |  58 +++
 ...dd-runtime-knob-sysctl_sched_cluster.patch | 237 +++++++++
 ...ot-time-enabling-disabling-of-cluste.patch |  72 +++
 ...isable-cluster-scheduling-by-default.patch |  38 ++
 ...the-kernel-configuration-for-cluster.patch |  35 ++
 series.conf                                   |  16 +
 18 files changed, 2297 insertions(+), 1 deletion(-)
 create mode 100644 patches/0117-topology-Represent-clusters-of-CPUs-within-a-die.patch
 create mode 100644 patches/0118-sched-Add-cluster-scheduler-level-in-core-and-relate.patch
 create mode 100644 patches/0119-topology-sysfs-export-cluster-attributes-only-if-an-.patch
 create mode 100644 patches/0120-topology-Remove-unused-cpu_cluster_mask.patch
 create mode 100644 patches/0121-arch_topology-Limit-span-of-cpu_clustergroup_mask.patch
 create mode 100644 patches/0122-arch_topology-Make-cluster-topology-span-at-least-SM.patch
 create mode 100644 patches/0123-sched-Add-per_cpu-cluster-domain-info-and-cpus_share.patch
 create mode 100644 patches/0124-sched-fair-Scan-cluster-before-scanning-LLC-in-wake-.patch
 create mode 100644 patches/0125-scheduler-Create-SDTL_SKIP-flag-to-skip-topology-lev.patch
 create mode 100644 patches/0126-sysctl-add-a-new-register_sysctl_init-interface.patch
 create mode 100644 patches/0127-sched-topology-drivers-base-arch_topology-Rebuild-th.patch
 create mode 100644 patches/0128-sched-topology-arch-arm64-Rebuild-the-sched_domain-h.patch
 create mode 100644 patches/0129-scheduler-Add-runtime-knob-sysctl_sched_cluster.patch
 create mode 100644 patches/0130-scheduler-Add-boot-time-enabling-disabling-of-cluste.patch
 create mode 100644 patches/0131-scheduler-Disable-cluster-scheduling-by-default.patch
 create mode 100644 patches/0132-sched-Open-the-kernel-configuration-for-cluster.patch

diff --git a/kernel.spec b/kernel.spec
index 5f736aaa..2453253a 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -32,7 +32,7 @@
 
 Name:	 kernel
 Version: 4.19.90
-Release: %{hulkrelease}.0236
+Release: %{hulkrelease}.0237
 Summary: Linux Kernel
 License: GPLv2
 URL:	 http://www.kernel.org/
@@ -836,6 +836,24 @@ fi
 
 %changelog
 
+* Thu Nov 9 2023 Jiang Yi <jiangyi38@hisilicon.com> - 4.19.90-2311.1.0.0237
+- sched:Open the kernel configuration for cluster.
+- scheduler: Disable cluster scheduling by default
+- scheduler: Add boot time enabling/disabling of cluster scheduling
+- scheduler: Add runtime knob sysctl_sched_cluster
+- sched/topology, arch/arm64: Rebuild the sched_domain hierarchy when the CPU capacity changes
+- sched/topology, drivers/base/arch_topology: Rebuild the sched_domain hierarchy when capacities change
+- sysctl: add a new register_sysctl_init() interface
+- scheduler: Create SDTL_SKIP flag to skip topology level
+- sched/fair: Scan cluster before scanning LLC in wake-up path
+- sched: Add per_cpu cluster domain info and cpus_share_lowest_cache API
+- arch_topology: Make cluster topology span at least SMT CPUs
+- arch_topology: Limit span of cpu_clustergroup_mask()
+- topology: Remove unused cpu_cluster_mask()
+- topology/sysfs: export cluster attributes only if an architectures has support
+- sched: Add cluster scheduler level in core and related Kconfig for ARM64
+- topology: Represent clusters of CPUs within a die
+
 * Wed Nov 8 2023 Yu Liao <liaoyu15@huawei.com> - 4.19.90-2311.1.0.0236
 - kernel.spec: skip check patches that from linux master or stable
 
diff --git a/patches/0117-topology-Represent-clusters-of-CPUs-within-a-die.patch b/patches/0117-topology-Represent-clusters-of-CPUs-within-a-die.patch
new file mode 100644
index 00000000..3e4d33ab
--- /dev/null
+++ b/patches/0117-topology-Represent-clusters-of-CPUs-within-a-die.patch
@@ -0,0 +1,475 @@
+From c94a98a1de262778bb5902d55e10e88d3a89e251 Mon Sep 17 00:00:00 2001
+From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Date: Thu, 18 Nov 2021 20:43:35 +0800
+Subject: [PATCH 117/132] topology: Represent clusters of CPUs within a die
+
+mainline inclusion
+from mainline-v5.16-rc1
+commit c5e22feffdd736cb02b98b0f5b375c8ebc858dd4
+category: feature
+bugzilla: https://gitee.com/openeuler/kernel/issues/I4GEZS
+Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c5e22feffdd736cb02b98b0f5b375c8ebc858dd4
+
+------------------------------------------------------------------------
+
+Both ACPI and DT provide the ability to describe additional layers of
+topology between that of individual cores and higher level constructs
+such as the level at which the last level cache is shared.
+In ACPI this can be represented in PPTT as a Processor Hierarchy
+Node Structure [1] that is the parent of the CPU cores and in turn
+has a parent Processor Hierarchy Nodes Structure representing
+a higher level of topology.
+
+For example Kunpeng 920 has 6 or 8 clusters in each NUMA node, and each
+cluster has 4 cpus. All clusters share L3 cache data, but each cluster
+has local L3 tag. On the other hand, each clusters will share some
+internal system bus.
+
++-----------------------------------+                          +---------+
+|  +------+    +------+             +--------------------------+         |
+|  | CPU0 |    | cpu1 |             |    +-----------+         |         |
+|  +------+    +------+             |    |           |         |         |
+|                                   +----+    L3     |         |         |
+|  +------+    +------+   cluster   |    |    tag    |         |         |
+|  | CPU2 |    | CPU3 |             |    |           |         |         |
+|  +------+    +------+             |    +-----------+         |         |
+|                                   |                          |         |
++-----------------------------------+                          |         |
++-----------------------------------+                          |         |
+|  +------+    +------+             +--------------------------+         |
+|  |      |    |      |             |    +-----------+         |         |
+|  +------+    +------+             |    |           |         |         |
+|                                   |    |    L3     |         |         |
+|  +------+    +------+             +----+    tag    |         |         |
+|  |      |    |      |             |    |           |         |         |
+|  +------+    +------+             |    +-----------+         |         |
+|                                   |                          |         |
++-----------------------------------+                          |   L3    |
+                                                               |   data  |
++-----------------------------------+                          |         |
+|  +------+    +------+             |    +-----------+         |         |
+|  |      |    |      |             |    |           |         |         |
+|  +------+    +------+             +----+    L3     |         |         |
+|                                   |    |    tag    |         |         |
+|  +------+    +------+             |    |           |         |         |
+|  |      |    |      |             |    +-----------+         |         |
+|  +------+    +------+             +--------------------------+         |
++-----------------------------------|                          |         |
++-----------------------------------|                          |         |
+|  +------+    +------+             +--------------------------+         |
+|  |      |    |      |             |    +-----------+         |         |
+|  +------+    +------+             |    |           |         |         |
+|                                   +----+    L3     |         |         |
+|  +------+    +------+             |    |    tag    |         |         |
+|  |      |    |      |             |    |           |         |         |
+|  +------+    +------+             |    +-----------+         |         |
+|                                   |                          |         |
++-----------------------------------+                          |         |
++-----------------------------------+                          |         |
+|  +------+    +------+             +--------------------------+         |
+|  |      |    |      |             |   +-----------+          |         |
+|  +------+    +------+             |   |           |          |         |
+|                                   |   |    L3     |          |         |
+|  +------+    +------+             +---+    tag    |          |         |
+|  |      |    |      |             |   |           |          |         |
+|  +------+    +------+             |   +-----------+          |         |
+|                                   |                          |         |
++-----------------------------------+                          |         |
++-----------------------------------+                          |         |
+|  +------+    +------+             +--------------------------+         |
+|  |      |    |      |             |  +-----------+           |         |
+|  +------+    +------+             |  |           |           |         |
+|                                   |  |    L3     |           |         |
+|  +------+    +------+             +--+    tag    |           |         |
+|  |      |    |      |             |  |           |           |         |
+|  +------+    +------+             |  +-----------+           |         |
+|                                   |                          +---------+
++-----------------------------------+
+
+That means spreading tasks among clusters will bring more bandwidth
+while packing tasks within one cluster will lead to smaller cache
+synchronization latency. So both kernel and userspace will have
+a chance to leverage this topology to deploy tasks accordingly to
+achieve either smaller cache latency within one cluster or an even
+distribution of load among clusters for higher throughput.
+
+This patch exposes cluster topology to both kernel and userspace.
+Libraried like hwloc will know cluster by cluster_cpus and related
+sysfs attributes. PoC of HWLOC support at [2].
+
+Note this patch only handle the ACPI case.
+
+Special consideration is needed for SMT processors, where it is
+necessary to move 2 levels up the hierarchy from the leaf nodes
+(thus skipping the processor core level).
+
+Note that arm64 / ACPI does not provide any means of identifying
+a die level in the topology but that may be unrelate to the cluster
+level.
+
+[1] ACPI Specification 6.3 - section 5.2.29.1 processor hierarchy node
+    structure (Type 0)
+[2] https://github.com/hisilicon/hwloc/tree/linux-cluster
+
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
+Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20210924085104.44806-2-21cnbao@gmail.com
+Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
+Reviewed-by: tao zeng <prime.zeng@hisilicon.com>
+Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com>
+
+Conflicts:
+	Documentation/ABI/stable/sysfs-devices-system-cpu
+        Documentation/admin-guide/cputopology.rst
+        drivers/base/arch_topology.c
+        drivers/base/topology.c
+        include/linux/arch_topology.h
+        include/linux/topology.h
+
+Signed-off-by: Jiang Yi <jiangyi38@hisilicon.com>
+---
+ Documentation/cputopology.txt     | 26 +++++++++--
+ arch/arm64/include/asm/topology.h |  5 +++
+ arch/arm64/kernel/topology.c      | 17 ++++++++
+ drivers/acpi/pptt.c               | 72 +++++++++++++++++++++++++++++++
+ drivers/base/topology.c           | 10 +++++
+ include/linux/acpi.h              |  5 +++
+ include/linux/topology.h          |  6 +++
+ 7 files changed, 137 insertions(+), 4 deletions(-)
+
+diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
+index 2ff8a1e9a2db..acd55bf0c718 100644
+--- a/Documentation/cputopology.txt
++++ b/Documentation/cputopology.txt
+@@ -18,6 +18,11 @@ die_id:
+ 	identifier (rather than the kernel's).  The actual value is
+ 	architecture and platform dependent.
+ 
++cluster_id:
++	the cluster ID of cpuX.  Typically it is the hardware platform's
++	identifier (rather than the kernel's). The actual value is
++	architecture and platform dependent.
++
+ core_id:
+ 
+ 	the CPU core ID of cpuX. Typically it is the hardware platform's
+@@ -36,6 +41,15 @@ drawer_id:
+ 	identifier (rather than the kernel's).	The actual value is
+ 	architecture and platform dependent.
+ 
++cluster_cpus:
++
++	internal kernel map of CPUs within the same cluster
++
++cluster_cpus_list:
++
++	human-readable list of CPUs within the same cluster.
++	The format is like 0-3, 8-11, 14,17.
++
+ thread_siblings:
+ 
+ 	internal kernel map of cpuX's hardware threads within the same
+@@ -88,11 +102,13 @@ these macros in include/asm-XXX/topology.h::
+ 
+ 	#define topology_physical_package_id(cpu)
+ 	#define topology_die_id(cpu)
++	#define topology_cluster_id(cpu)
+ 	#define topology_core_id(cpu)
+ 	#define topology_book_id(cpu)
+ 	#define topology_drawer_id(cpu)
+ 	#define topology_sibling_cpumask(cpu)
+ 	#define topology_core_cpumask(cpu)
++	#define topology_cluster_cpumask(cpu)
+ 	#define topology_book_cpumask(cpu)
+ 	#define topology_drawer_cpumask(cpu)
+ 
+@@ -107,10 +123,12 @@ not defined by include/asm-XXX/topology.h:
+ 
+ 1) topology_physical_package_id: -1
+ 2) topology_die_id: -1
+-3) topology_core_id: 0
+-4) topology_sibling_cpumask: just the given CPU
+-5) topology_core_cpumask: just the given CPU
+-6) topology_die_cpumask: just the given CPU
++3) topology_cluster_id: -1
++4) topology_core_id: 0
++5) topology_sibling_cpumask: just the given CPU
++6) topology_core_cpumask: just the given CPU
++7) topology_cluster_cpumask: just the given CPU
++8) topology_die_cpumask: just the given CPU
+ 
+ For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
+ default definitions for topology_book_id() and topology_book_cpumask().
+diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
+index 49a0fee4f89b..164e26035653 100644
+--- a/arch/arm64/include/asm/topology.h
++++ b/arch/arm64/include/asm/topology.h
+@@ -7,25 +7,30 @@
+ struct cpu_topology {
+ 	int thread_id;
+ 	int core_id;
++	int cluster_id;
+ 	int package_id;
+ 	int llc_id;
+ 	cpumask_t thread_sibling;
+ 	cpumask_t core_sibling;
++	cpumask_t cluster_sibling;
+ 	cpumask_t llc_sibling;
+ };
+ 
+ extern struct cpu_topology cpu_topology[NR_CPUS];
+ 
+ #define topology_physical_package_id(cpu)	(cpu_topology[cpu].package_id)
++#define topology_cluster_id(cpu)		(cpu_topology[cpu].cluster_id)
+ #define topology_core_id(cpu)		(cpu_topology[cpu].core_id)
+ #define topology_core_cpumask(cpu)	(&cpu_topology[cpu].core_sibling)
+ #define topology_sibling_cpumask(cpu)	(&cpu_topology[cpu].thread_sibling)
++#define topology_cluster_cpumask(cpu)	(&cpu_topology[cpu].cluster_sibling)
+ #define topology_llc_cpumask(cpu)	(&cpu_topology[cpu].llc_sibling)
+ 
+ void init_cpu_topology(void);
+ void store_cpu_topology(unsigned int cpuid);
+ void remove_cpu_topology(unsigned int cpuid);
+ const struct cpumask *cpu_coregroup_mask(int cpu);
++const struct cpumask *cpu_clustergroup_mask(int cpu);
+ 
+ #ifdef CONFIG_NUMA
+ 
+diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
+index bf937d334b81..e4effe6f3177 100644
+--- a/arch/arm64/kernel/topology.c
++++ b/arch/arm64/kernel/topology.c
+@@ -230,6 +230,11 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
+ 	return core_mask;
+ }
+ 
++const struct cpumask *cpu_clustergroup_mask(int cpu)
++{
++	return &cpu_topology[cpu].cluster_sibling;
++}
++
+ static void update_siblings_masks(unsigned int cpuid)
+ {
+ 	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
+@@ -247,6 +252,12 @@ static void update_siblings_masks(unsigned int cpuid)
+ 		if (cpuid_topo->package_id != cpu_topo->package_id)
+ 			continue;
+ 
++		if (cpuid_topo->cluster_id == cpu_topo->cluster_id &&
++			cpuid_topo->cluster_id != -1) {
++			cpumask_set_cpu(cpu, &cpuid_topo->cluster_sibling);
++			cpumask_set_cpu(cpuid, &cpu_topo->cluster_sibling);
++		}
++
+ 		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+ 		cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+ 
+@@ -312,6 +323,9 @@ static void clear_cpu_topology(int cpu)
+ 	cpumask_clear(&cpu_topo->llc_sibling);
+ 	cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
+ 
++	cpumask_clear(&cpu_topo->cluster_sibling);
++	cpumask_set_cpu(cpu, &cpu_topo->cluster_sibling);
++
+ 	cpumask_clear(&cpu_topo->core_sibling);
+ 	cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
+ 	cpumask_clear(&cpu_topo->thread_sibling);
+@@ -327,6 +341,7 @@ static void __init reset_cpu_topology(void)
+ 
+ 		cpu_topo->thread_id = -1;
+ 		cpu_topo->core_id = 0;
++		cpu_topo->cluster_id = -1;
+ 		cpu_topo->package_id = -1;
+ 		cpu_topo->llc_id = -1;
+ 
+@@ -438,6 +453,8 @@ static int __init parse_acpi_topology(void)
+ 			cpu_topology[cpu].thread_id  = -1;
+ 			cpu_topology[cpu].core_id    = topology_id;
+ 		}
++		topology_id = find_acpi_cpu_topology_cluster(cpu);
++		cpu_topology[cpu].cluster_id = topology_id;
+ 		topology_id = find_acpi_cpu_topology_package(cpu);
+ 		cpu_topology[cpu].package_id = topology_id;
+ 
+diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
+index 879b9155b7b4..0f382545cf06 100644
+--- a/drivers/acpi/pptt.c
++++ b/drivers/acpi/pptt.c
+@@ -617,6 +617,11 @@ static struct acpi_pptt_processor *acpi_find_processor_tag(struct acpi_table_hea
+ 	return cpu;
+ }
+ 
++static void acpi_pptt_warn_missing(void)
++{
++	pr_warn_once("No PPTT table found, CPU and cache topology may be inaccurate\n");
++}
++
+ /**
+  * topology_get_acpi_cpu_tag() - Find a unique topology value for a feature
+  * @table: Pointer to the head of the PPTT table
+@@ -871,6 +876,73 @@ int find_acpi_cpu_topology_package(unsigned int cpu)
+ 					  ACPI_PPTT_PHYSICAL_PACKAGE);
+ }
+ 
++/**
++ * find_acpi_cpu_topology_cluster() - Determine a unique CPU cluster value
++ * @cpu: Kernel logical CPU number
++ *
++ * Determine a topology unique cluster ID for the given CPU/thread.
++ * This ID can then be used to group peers, which will have matching ids.
++ *
++ * The cluster, if present is the level of topology above CPUs. In a
++ * multi-thread CPU, it will be the level above the CPU, not the thread.
++ * It may not exist in single CPU systems. In simple multi-CPU systems,
++ * it may be equal to the package topology level.
++ *
++ * Return: -ENOENT if the PPTT doesn't exist, the CPU cannot be found
++ * or there is no toplogy level above the CPU..
++ * Otherwise returns a value which represents the package for this CPU.
++ */
++
++int find_acpi_cpu_topology_cluster(unsigned int cpu)
++{
++	struct acpi_table_header *table;
++	acpi_status status;
++	struct acpi_pptt_processor *cpu_node, *cluster_node;
++	u32 acpi_cpu_id;
++	int retval;
++	int is_thread;
++
++	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
++	if (ACPI_FAILURE(status)) {
++		acpi_pptt_warn_missing();
++		return -ENOENT;
++	}
++
++	acpi_cpu_id = get_acpi_id_for_cpu(cpu);
++	cpu_node = acpi_find_processor_node(table, acpi_cpu_id);
++	if (cpu_node == NULL || !cpu_node->parent) {
++		retval = -ENOENT;
++		goto put_table;
++	}
++
++	is_thread = cpu_node->flags & ACPI_PPTT_ACPI_PROCESSOR_IS_THREAD;
++	cluster_node = fetch_pptt_node(table, cpu_node->parent);
++	if (cluster_node == NULL) {
++		retval = -ENOENT;
++		goto put_table;
++	}
++	if (is_thread) {
++		if (!cluster_node->parent) {
++			retval = -ENOENT;
++			goto put_table;
++		}
++		cluster_node = fetch_pptt_node(table, cluster_node->parent);
++		if (cluster_node == NULL) {
++			retval = -ENOENT;
++			goto put_table;
++		}
++	}
++	if (cluster_node->flags & ACPI_PPTT_ACPI_PROCESSOR_ID_VALID)
++		retval = cluster_node->acpi_processor_id;
++	else
++		retval = ACPI_PTR_DIFF(cluster_node, table);
++
++put_table:
++	acpi_put_table(table);
++
++	return retval;
++}
++
+ /**
+  * find_acpi_cpu_topology_hetero_id() - Get a core architecture tag
+  * @cpu: Kernel logical CPU number
+diff --git a/drivers/base/topology.c b/drivers/base/topology.c
+index da74231de498..7e4bdf65e27a 100644
+--- a/drivers/base/topology.c
++++ b/drivers/base/topology.c
+@@ -46,6 +46,9 @@ static DEVICE_ATTR_RO(physical_package_id);
+ define_id_show_func(die_id);
+ static DEVICE_ATTR_RO(die_id);
+ 
++define_id_show_func(cluster_id);
++static DEVICE_ATTR_RO(cluster_id);
++
+ define_id_show_func(core_id);
+ static DEVICE_ATTR_RO(core_id);
+ 
+@@ -57,6 +60,10 @@ define_siblings_show_func(core_siblings, core_cpumask);
+ static DEVICE_ATTR_RO(core_siblings);
+ static DEVICE_ATTR_RO(core_siblings_list);
+ 
++define_siblings_show_func(cluster_cpus, cluster_cpumask);
++static DEVICE_ATTR_RO(cluster_cpus);
++static DEVICE_ATTR_RO(cluster_cpus_list);
++
+ #ifdef CONFIG_SCHED_BOOK
+ define_id_show_func(book_id);
+ static DEVICE_ATTR_RO(book_id);
+@@ -76,11 +83,14 @@ static DEVICE_ATTR_RO(drawer_siblings_list);
+ static struct attribute *default_attrs[] = {
+ 	&dev_attr_physical_package_id.attr,
+ 	&dev_attr_die_id.attr,
++	&dev_attr_cluster_id.attr,
+ 	&dev_attr_core_id.attr,
+ 	&dev_attr_thread_siblings.attr,
+ 	&dev_attr_thread_siblings_list.attr,
+ 	&dev_attr_core_siblings.attr,
+ 	&dev_attr_core_siblings_list.attr,
++	&dev_attr_cluster_cpus.attr,
++	&dev_attr_cluster_cpus_list.attr,
+ #ifdef CONFIG_SCHED_BOOK
+ 	&dev_attr_book_id.attr,
+ 	&dev_attr_book_siblings.attr,
+diff --git a/include/linux/acpi.h b/include/linux/acpi.h
+index 4a0142276cb8..2713d2032bff 100644
+--- a/include/linux/acpi.h
++++ b/include/linux/acpi.h
+@@ -1328,6 +1328,7 @@ static inline int lpit_read_residency_count_address(u64 *address)
+ #ifdef CONFIG_ACPI_PPTT
+ int acpi_pptt_cpu_is_thread(unsigned int cpu);
+ int find_acpi_cpu_topology(unsigned int cpu, int level);
++int find_acpi_cpu_topology_cluster(unsigned int cpu);
+ int find_acpi_cpu_topology_package(unsigned int cpu);
+ int find_acpi_cpu_topology_hetero_id(unsigned int cpu);
+ int find_acpi_cpu_cache_topology(unsigned int cpu, int level);
+@@ -1340,6 +1341,10 @@ static inline int find_acpi_cpu_topology(unsigned int cpu, int level)
+ {
+ 	return -EINVAL;
+ }
++static inline int find_acpi_cpu_topology_cluster(unsigned int cpu)
++{
++	return -EINVAL;
++}
+ static inline int find_acpi_cpu_topology_package(unsigned int cpu)
+ {
+ 	return -EINVAL;
+diff --git a/include/linux/topology.h b/include/linux/topology.h
+index a19771cd267d..90dd075394b2 100644
+--- a/include/linux/topology.h
++++ b/include/linux/topology.h
+@@ -188,6 +188,9 @@ static inline int cpu_to_mem(int cpu)
+ #ifndef topology_die_id
+ #define topology_die_id(cpu)			((void)(cpu), -1)
+ #endif
++#ifndef topology_cluster_id
++#define topology_cluster_id(cpu)		((void)(cpu), -1)
++#endif
+ #ifndef topology_core_id
+ #define topology_core_id(cpu)			((void)(cpu), 0)
+ #endif
+@@ -197,6 +200,9 @@ static inline int cpu_to_mem(int cpu)
+ #ifndef topology_core_cpumask
+ #define topology_core_cpumask(cpu)		cpumask_of(cpu)
+ #endif
++#ifndef topology_cluster_cpumask
++#define topology_cluster_cpumask(cpu)		cpumask_of(cpu)
++#endif
+ 
+ #ifdef CONFIG_SCHED_SMT
+ static inline const struct cpumask *cpu_smt_mask(int cpu)
+-- 
+2.23.0
+
diff --git a/patches/0118-sched-Add-cluster-scheduler-level-in-core-and-relate.patch b/patches/0118-sched-Add-cluster-scheduler-level-in-core-and-relate.patch
new file mode 100644
index 00000000..e22475fa
--- /dev/null
+++ b/patches/0118-sched-Add-cluster-scheduler-level-in-core-and-relate.patch
@@ -0,0 +1,264 @@
+From 73796877f3e5809bf1e5803bf62f4eaf8f5f4764 Mon Sep 17 00:00:00 2001
+From: Barry Song <song.bao.hua@hisilicon.com>
+Date: Fri, 24 Sep 2021 20:51:03 +1200
+Subject: [PATCH 118/132] sched: Add cluster scheduler level in core and
+ related Kconfig for ARM64
+
+mainline inclusion
+from mainline-v5.16-rc1
+commit 778c558f49a2cb3dc7b18a80ff515e82aa813627
+category: feature
+bugzilla: https://gitee.com/openeuler/kernel/issues/I4GEZS
+Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=778c558f49a2cb3dc7b18a80ff515e82aa813627
+
+------------------------------------------------------------------------
+
+This patch adds scheduler level for clusters and automatically enables
+the load balance among clusters. It will directly benefit a lot of
+workload which loves more resources such as memory bandwidth, caches.
+
+Testing has widely been done in two different hardware configurations of
+Kunpeng920:
+
+ 24 cores in one NUMA(6 clusters in each NUMA node);
+ 32 cores in one NUMA(8 clusters in each NUMA node)
+
+Workload is running on either one NUMA node or four NUMA nodes, thus,
+this can estimate the effect of cluster spreading w/ and w/o NUMA load
+balance.
+
+* Stream benchmark:
+
+4threads stream (on 1NUMA * 24cores = 24cores)
+                stream                 stream
+                w/o patch              w/ patch
+MB/sec copy     29929.64 (   0.00%)    32932.68 (  10.03%)
+MB/sec scale    29861.10 (   0.00%)    32710.58 (   9.54%)
+MB/sec add      27034.42 (   0.00%)    32400.68 (  19.85%)
+MB/sec triad    27225.26 (   0.00%)    31965.36 (  17.41%)
+
+6threads stream (on 1NUMA * 24cores = 24cores)
+                stream                 stream
+                w/o patch              w/ patch
+MB/sec copy     40330.24 (   0.00%)    42377.68 (   5.08%)
+MB/sec scale    40196.42 (   0.00%)    42197.90 (   4.98%)
+MB/sec add      37427.00 (   0.00%)    41960.78 (  12.11%)
+MB/sec triad    37841.36 (   0.00%)    42513.64 (  12.35%)
+
+12threads stream (on 1NUMA * 24cores = 24cores)
+                stream                 stream
+                w/o patch              w/ patch
+MB/sec copy     52639.82 (   0.00%)    53818.04 (   2.24%)
+MB/sec scale    52350.30 (   0.00%)    53253.38 (   1.73%)
+MB/sec add      53607.68 (   0.00%)    55198.82 (   2.97%)
+MB/sec triad    54776.66 (   0.00%)    56360.40 (   2.89%)
+
+Thus, it could help memory-bound workload especially under medium load.
+Similar improvement is also seen in lkp-pbzip2:
+
+* lkp-pbzip2 benchmark
+
+2-96 threads (on 4NUMA * 24cores = 96cores)
+                  lkp-pbzip2              lkp-pbzip2
+                  w/o patch               w/ patch
+Hmean     tput-2   11062841.57 (   0.00%)  11341817.51 *   2.52%*
+Hmean     tput-5   26815503.70 (   0.00%)  27412872.65 *   2.23%*
+Hmean     tput-8   41873782.21 (   0.00%)  43326212.92 *   3.47%*
+Hmean     tput-12  61875980.48 (   0.00%)  64578337.51 *   4.37%*
+Hmean     tput-21 105814963.07 (   0.00%) 111381851.01 *   5.26%*
+Hmean     tput-30 150349470.98 (   0.00%) 156507070.73 *   4.10%*
+Hmean     tput-48 237195937.69 (   0.00%) 242353597.17 *   2.17%*
+Hmean     tput-79 360252509.37 (   0.00%) 362635169.23 *   0.66%*
+Hmean     tput-96 394571737.90 (   0.00%) 400952978.48 *   1.62%*
+
+2-24 threads (on 1NUMA * 24cores = 24cores)
+                 lkp-pbzip2               lkp-pbzip2
+                 w/o patch                w/ patch
+Hmean     tput-2   11071705.49 (   0.00%)  11296869.10 *   2.03%*
+Hmean     tput-4   20782165.19 (   0.00%)  21949232.15 *   5.62%*
+Hmean     tput-6   30489565.14 (   0.00%)  33023026.96 *   8.31%*
+Hmean     tput-8   40376495.80 (   0.00%)  42779286.27 *   5.95%*
+Hmean     tput-12  61264033.85 (   0.00%)  62995632.78 *   2.83%*
+Hmean     tput-18  86697139.39 (   0.00%)  86461545.74 (  -0.27%)
+Hmean     tput-24 104854637.04 (   0.00%) 104522649.46 *  -0.32%*
+
+In the case of 6 threads and 8 threads, we see the greatest performance
+improvement.
+
+Similar improvement can be seen on lkp-pixz though the improvement is
+smaller:
+
+* lkp-pixz benchmark
+
+2-24 threads lkp-pixz (on 1NUMA * 24cores = 24cores)
+                  lkp-pixz               lkp-pixz
+                  w/o patch              w/ patch
+Hmean     tput-2   6486981.16 (   0.00%)  6561515.98 *   1.15%*
+Hmean     tput-4  11645766.38 (   0.00%) 11614628.43 (  -0.27%)
+Hmean     tput-6  15429943.96 (   0.00%) 15957350.76 *   3.42%*
+Hmean     tput-8  19974087.63 (   0.00%) 20413746.98 *   2.20%*
+Hmean     tput-12 28172068.18 (   0.00%) 28751997.06 *   2.06%*
+Hmean     tput-18 39413409.54 (   0.00%) 39896830.55 *   1.23%*
+Hmean     tput-24 49101815.85 (   0.00%) 49418141.47 *   0.64%*
+
+* SPECrate benchmark
+
+4,8,16 copies mcf_r(on 1NUMA * 32cores = 32cores)
+		Base     	 	Base
+		Run Time   	 	Rate
+		-------  	 	---------
+4 Copies	w/o 580 (w/ 570)       	w/o 11.1 (w/ 11.3)
+8 Copies	w/o 647 (w/ 605)       	w/o 20.0 (w/ 21.4, +7%)
+16 Copies	w/o 844 (w/ 844)       	w/o 30.6 (w/ 30.6)
+
+32 Copies(on 4NUMA * 32 cores = 128cores)
+[w/o patch]
+                 Base     Base        Base
+Benchmarks       Copies  Run Time     Rate
+--------------- -------  ---------  ---------
+500.perlbench_r      32        584       87.2  *
+502.gcc_r            32        503       90.2  *
+505.mcf_r            32        745       69.4  *
+520.omnetpp_r        32       1031       40.7  *
+523.xalancbmk_r      32        597       56.6  *
+525.x264_r            1         --            CE
+531.deepsjeng_r      32        336      109    *
+541.leela_r          32        556       95.4  *
+548.exchange2_r      32        513      163    *
+557.xz_r             32        530       65.2  *
+ Est. SPECrate2017_int_base              80.3
+
+[w/ patch]
+                  Base     Base        Base
+Benchmarks       Copies  Run Time     Rate
+--------------- -------  ---------  ---------
+500.perlbench_r      32        580      87.8 (+0.688%)  *
+502.gcc_r            32        477      95.1 (+5.432%)  *
+505.mcf_r            32        644      80.3 (+13.574%) *
+520.omnetpp_r        32        942      44.6 (+9.58%)   *
+523.xalancbmk_r      32        560      60.4 (+6.714%%) *
+525.x264_r            1         --           CE
+531.deepsjeng_r      32        337      109  (+0.000%) *
+541.leela_r          32        554      95.6 (+0.210%) *
+548.exchange2_r      32        515      163  (+0.000%) *
+557.xz_r             32        524      66.0 (+1.227%) *
+ Est. SPECrate2017_int_base              83.7 (+4.062%)
+
+On the other hand, it is slightly helpful to CPU-bound tasks like
+kernbench:
+
+* 24-96 threads kernbench (on 4NUMA * 24cores = 96cores)
+                     kernbench              kernbench
+                     w/o cluster            w/ cluster
+Min       user-24    12054.67 (   0.00%)    12024.19 (   0.25%)
+Min       syst-24     1751.51 (   0.00%)     1731.68 (   1.13%)
+Min       elsp-24      600.46 (   0.00%)      598.64 (   0.30%)
+Min       user-48    12361.93 (   0.00%)    12315.32 (   0.38%)
+Min       syst-48     1917.66 (   0.00%)     1892.73 (   1.30%)
+Min       elsp-48      333.96 (   0.00%)      332.57 (   0.42%)
+Min       user-96    12922.40 (   0.00%)    12921.17 (   0.01%)
+Min       syst-96     2143.94 (   0.00%)     2110.39 (   1.56%)
+Min       elsp-96      211.22 (   0.00%)      210.47 (   0.36%)
+Amean     user-24    12063.99 (   0.00%)    12030.78 *   0.28%*
+Amean     syst-24     1755.20 (   0.00%)     1735.53 *   1.12%*
+Amean     elsp-24      601.60 (   0.00%)      600.19 (   0.23%)
+Amean     user-48    12362.62 (   0.00%)    12315.56 *   0.38%*
+Amean     syst-48     1921.59 (   0.00%)     1894.95 *   1.39%*
+Amean     elsp-48      334.10 (   0.00%)      332.82 *   0.38%*
+Amean     user-96    12925.27 (   0.00%)    12922.63 (   0.02%)
+Amean     syst-96     2146.66 (   0.00%)     2122.20 *   1.14%*
+Amean     elsp-96      211.96 (   0.00%)      211.79 (   0.08%)
+
+Note this patch isn't an universal win, it might hurt those workload
+which can benefit from packing. Though tasks which want to take
+advantages of lower communication latency of one cluster won't
+necessarily been packed in one cluster while kernel is not aware of
+clusters, they have some chance to be randomly packed. But this
+patch will make them more likely spread.
+
+Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
+Tested-by: Yicong Yang <yangyicong@hisilicon.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Jiang Yi <jiangyi38@hisilicon.com>
+---
+ arch/arm64/Kconfig             | 9 +++++++++
+ include/linux/sched/topology.h | 7 +++++++
+ include/linux/topology.h       | 7 +++++++
+ kernel/sched/topology.c        | 5 +++++
+ 4 files changed, 28 insertions(+)
+
+diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
+index 88b8031a93b2..47aa27fcd895 100644
+--- a/arch/arm64/Kconfig
++++ b/arch/arm64/Kconfig
+@@ -784,6 +784,15 @@ config SCHED_MC
+ 	  making when dealing with multi-core CPU chips at a cost of slightly
+ 	  increased overhead in some places. If unsure say N here.
+ 
++config SCHED_CLUSTER
++	bool "Cluster scheduler support"
++	help
++	  Cluster scheduler support improves the CPU scheduler's decision
++	  making when dealing with machines that have clusters of CPUs.
++	  Cluster usually means a couple of CPUs which are placed closely
++	  by sharing mid-level caches, last-level cache tags or internal
++	  busses.
++
+ config SCHED_SMT
+ 	bool "SMT scheduler support"
+ 	help
+diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
+index a9032116c13e..15d2e06f690b 100644
+--- a/include/linux/sched/topology.h
++++ b/include/linux/sched/topology.h
+@@ -40,6 +40,13 @@ static inline int cpu_smt_flags(void)
+ }
+ #endif
+ 
++#ifdef CONFIG_SCHED_CLUSTER
++static inline int cpu_cluster_flags(void)
++{
++	return SD_SHARE_PKG_RESOURCES;
++}
++#endif
++
+ #ifdef CONFIG_SCHED_MC
+ static inline int cpu_core_flags(void)
+ {
+diff --git a/include/linux/topology.h b/include/linux/topology.h
+index 90dd075394b2..58f8a9e9d90b 100644
+--- a/include/linux/topology.h
++++ b/include/linux/topology.h
+@@ -211,6 +211,13 @@ static inline const struct cpumask *cpu_smt_mask(int cpu)
+ }
+ #endif
+ 
++#if defined(CONFIG_SCHED_CLUSTER) && !defined(cpu_cluster_mask)
++static inline const struct cpumask *cpu_cluster_mask(int cpu)
++{
++	return topology_cluster_cpumask(cpu);
++}
++#endif
++
+ static inline const struct cpumask *cpu_cpu_mask(int cpu)
+ {
+ 	return cpumask_of_node(cpu_to_node(cpu));
+diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
+index ad5591520c99..5d662314c08b 100644
+--- a/kernel/sched/topology.c
++++ b/kernel/sched/topology.c
+@@ -1298,6 +1298,11 @@ static struct sched_domain_topology_level default_topology[] = {
+ #ifdef CONFIG_SCHED_SMT
+ 	{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+ #endif
++
++#ifdef CONFIG_SCHED_CLUSTER
++	{ cpu_clustergroup_mask, cpu_cluster_flags, SD_INIT_NAME(CLS) },
++#endif
++
+ #ifdef CONFIG_SCHED_MC
+ 	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+ #endif
+-- 
+2.23.0
+
diff --git a/patches/0119-topology-sysfs-export-cluster-attributes-only-if-an-.patch b/patches/0119-topology-sysfs-export-cluster-attributes-only-if-an-.patch
new file mode 100644
index 00000000..10c82afd
--- /dev/null
+++ b/patches/0119-topology-sysfs-export-cluster-attributes-only-if-an-.patch
@@ -0,0 +1,128 @@
+From 8072124e989fd6183877494d17c098ec9f308683 Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <hca@linux.ibm.com>
+Date: Mon, 29 Nov 2021 14:03:08 +0100
+Subject: [PATCH 119/132] topology/sysfs: export cluster attributes only if an
+ architectures has support
+
+mainline inclusion
+from mainline-v5.17-rc1
+commit e795707703b32fecdd7467afcc33ff1e92416c05
+category: bugfix
+bugzilla: https://gitee.com/openeuler/kernel/issues/I4GEZS
+Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e795707703b32fecdd7467afcc33ff1e92416c05
+
+----------------------------------------------------------------------
+
+The cluster_id and cluster_cpus topology sysfs attributes have been
+added with commit c5e22feffdd7 ("topology: Represent clusters of CPUs
+within a die").
+
+They are currently only used for x86, arm64, and riscv (via generic
+arch topology), however they are still present with bogus default
+values for all other architectures. Instead of enforcing such new
+sysfs attributes to all architectures, make them only optional visible
+if an architecture opts in by defining both the topology_cluster_id
+and topology_cluster_cpumask attributes.
+
+This is similar to what was done when the book and drawer topology
+levels were introduced: avoid useless and therefore confusing sysfs
+attributes for architectures which cannot make use of them.
+
+This should not break any existing applications, since this is a
+new interface introduced with the v5.16 merge window.
+
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Link: https://lore.kernel.org/r/20211129130309.3256168-3-hca@linux.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+Conflicts:
+	Documentation/admin-guide/cputopology.rst
+	drivers/base/topology.c
+	include/linux/topology.h
+
+Signed-off-by: Jiang Yi <jiangyi38@hisilicon.com>
+---
+ Documentation/cputopology.txt | 3 +++
+ drivers/base/topology.c       | 8 ++++++++
+ include/linux/topology.h      | 4 ++++
+ 3 files changed, 15 insertions(+)
+
+diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
+index acd55bf0c718..e6645ff18994 100644
+--- a/Documentation/cputopology.txt
++++ b/Documentation/cputopology.txt
+@@ -94,6 +94,9 @@ Architecture-neutral, drivers/base/topology.c, exports these attributes.
+ However, the book and drawer related sysfs files will only be created if
+ CONFIG_SCHED_BOOK and CONFIG_SCHED_DRAWER are selected, respectively.
+ 
++The cluster hierarchy related sysfs files will only be created if an
++architecture provides the related macros as described below.
++
+ CONFIG_SCHED_BOOK and CONFIG_SCHED_DRAWER are currently only used on s390,
+ where they reflect the cpu and cache hierarchy.
+ 
+diff --git a/drivers/base/topology.c b/drivers/base/topology.c
+index 7e4bdf65e27a..66ebcf05030f 100644
+--- a/drivers/base/topology.c
++++ b/drivers/base/topology.c
+@@ -46,8 +46,10 @@ static DEVICE_ATTR_RO(physical_package_id);
+ define_id_show_func(die_id);
+ static DEVICE_ATTR_RO(die_id);
+ 
++#ifdef TOPOLOGY_CLUSTER_SYSFS
+ define_id_show_func(cluster_id);
+ static DEVICE_ATTR_RO(cluster_id);
++#endif
+ 
+ define_id_show_func(core_id);
+ static DEVICE_ATTR_RO(core_id);
+@@ -60,9 +62,11 @@ define_siblings_show_func(core_siblings, core_cpumask);
+ static DEVICE_ATTR_RO(core_siblings);
+ static DEVICE_ATTR_RO(core_siblings_list);
+ 
++#ifdef TOPOLOGY_CLUSTER_SYSFS
+ define_siblings_show_func(cluster_cpus, cluster_cpumask);
+ static DEVICE_ATTR_RO(cluster_cpus);
+ static DEVICE_ATTR_RO(cluster_cpus_list);
++#endif
+ 
+ #ifdef CONFIG_SCHED_BOOK
+ define_id_show_func(book_id);
+@@ -83,14 +87,18 @@ static DEVICE_ATTR_RO(drawer_siblings_list);
+ static struct attribute *default_attrs[] = {
+ 	&dev_attr_physical_package_id.attr,
+ 	&dev_attr_die_id.attr,
++#ifdef TOPOLOGY_CLUSTER_SYSFS
+ 	&dev_attr_cluster_id.attr,
++#endif
+ 	&dev_attr_core_id.attr,
+ 	&dev_attr_thread_siblings.attr,
+ 	&dev_attr_thread_siblings_list.attr,
+ 	&dev_attr_core_siblings.attr,
+ 	&dev_attr_core_siblings_list.attr,
++#ifdef TOPOLOGY_CLUSTER_SYSFS
+ 	&dev_attr_cluster_cpus.attr,
+ 	&dev_attr_cluster_cpus_list.attr,
++#endif
+ #ifdef CONFIG_SCHED_BOOK
+ 	&dev_attr_book_id.attr,
+ 	&dev_attr_book_siblings.attr,
+diff --git a/include/linux/topology.h b/include/linux/topology.h
+index 58f8a9e9d90b..9033a952ee68 100644
+--- a/include/linux/topology.h
++++ b/include/linux/topology.h
+@@ -182,6 +182,10 @@ static inline int cpu_to_mem(int cpu)
+ 
+ #endif	/* [!]CONFIG_HAVE_MEMORYLESS_NODES */
+ 
++#if defined(topology_cluster_id) && defined(topology_cluster_cpumask)
++#define TOPOLOGY_CLUSTER_SYSFS
++#endif
++
+ #ifndef topology_physical_package_id
+ #define topology_physical_package_id(cpu)	((void)(cpu), -1)
+ #endif
+-- 
+2.23.0
+
diff --git a/patches/0120-topology-Remove-unused-cpu_cluster_mask.patch b/patches/0120-topology-Remove-unused-cpu_cluster_mask.patch
new file mode 100644
index 00000000..3ce44fe7
--- /dev/null
+++ b/patches/0120-topology-Remove-unused-cpu_cluster_mask.patch
@@ -0,0 +1,49 @@
+From 68d8b00dca7138b644e472c797957907b98aa535 Mon Sep 17 00:00:00 2001
+From: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Date: Fri, 13 May 2022 11:34:33 +0200
+Subject: [PATCH 120/132] topology: Remove unused cpu_cluster_mask()
+
+mainline inclusion
+from mainline-v5.19-rc1
+commit 15f214f9bdb7c1f560b4bf863c5a72ff53b442a4
+category: bugfix
+bugzilla: https://gitee.com/openeuler/kernel/issues/I4GEZS
+Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=15f214f9bdb7c1f560b4bf863c5a72ff53b442a4
+
+------------------------------------------------------------------------
+
+default_topology[] uses cpu_clustergroup_mask() for the CLS level
+(guarded by CONFIG_SCHED_CLUSTER) which is currently provided by x86
+(arch/x86/kernel/smpboot.c) and arm64 (drivers/base/arch_topology.c).
+
+Fixes: 778c558f49a2 ("sched: Add cluster scheduler level in core and related Kconfig for ARM64")
+Acked-by: Barry Song <baohua@kernel.org>
+Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Link: https://lore.kernel.org/r/20220513093433.425163-1-dietmar.eggemann@arm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jiang Yi <jiangyi38@hisilicon.com>
+---
+ include/linux/topology.h | 7 -------
+ 1 file changed, 7 deletions(-)
+
+diff --git a/include/linux/topology.h b/include/linux/topology.h
+index 9033a952ee68..9a7753fcae6a 100644
+--- a/include/linux/topology.h
++++ b/include/linux/topology.h
+@@ -215,13 +215,6 @@ static inline const struct cpumask *cpu_smt_mask(int cpu)
+ }
+ #endif
+ 
+-#if defined(CONFIG_SCHED_CLUSTER) && !defined(cpu_cluster_mask)
+-static inline const struct cpumask *cpu_cluster_mask(int cpu)
+-{
+-	return topology_cluster_cpumask(cpu);
+-}
+-#endif
+-
+ static inline const struct cpumask *cpu_cpu_mask(int cpu)
+ {
+ 	return cpumask_of_node(cpu_to_node(cpu));
+-- 
+2.23.0
+
diff --git a/patches/0121-arch_topology-Limit-span-of-cpu_clustergroup_mask.patch b/patches/0121-arch_topology-Limit-span-of-cpu_clustergroup_mask.patch
new file mode 100644
index 00000000..b827bbfd
--- /dev/null
+++ b/patches/0121-arch_topology-Limit-span-of-cpu_clustergroup_mask.patch
@@ -0,0 +1,73 @@
+From 7f7344453623deceaa16f6e1cdc71064521bbfca Mon Sep 17 00:00:00 2001
+From: Ionela Voinescu <ionela.voinescu@arm.com>
+Date: Mon, 4 Jul 2022 11:16:01 +0100
+Subject: [PATCH 121/132] arch_topology: Limit span of cpu_clustergroup_mask()
+
+mainline inclusion
+from mainline-v6.0-rc1
+commit bfcc4397435dc0407099b9a805391abc05c2313b
+category: bugfix
+bugzilla: https://gitee.com/openeuler/kernel/issues/I88UKS
+Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=bfcc4397435dc0407099b9a805391abc05c2313b
+
+----------------------------------------------------------------------
+
+Currently the cluster identifier is not set on DT based platforms.
+The reset or default value is -1 for all the CPUs. Once we assign the
+cluster identifier values correctly, the cluster_sibling mask will be
+populated and returned by cpu_clustergroup_mask() to contribute in the
+creation of the CLS scheduling domain level, if SCHED_CLUSTER is
+enabled.
+
+To avoid topologies that will result in questionable or incorrect
+scheduling domains, impose restrictions regarding the span of clusters,
+as presented to scheduling domains building code: cluster_sibling should
+not span more or the same CPUs as cpu_coregroup_mask().
+
+This is needed in order to obtain a strict separation between the MC and
+CLS levels, and maintain the same domains for existing platforms in
+the presence of CONFIG_SCHED_CLUSTER, where the new cluster information
+is redundant and irrelevant for the scheduler.
+
+While previously the scheduling domain builder code would have removed MC
+as redundant and kept CLS if SCHED_CLUSTER was enabled and the
+cpu_coregroup_mask() and cpu_clustergroup_mask() spanned the same CPUs,
+now CLS will be removed and MC kept.
+
+Link: https://lore.kernel.org/r/20220704101605.1318280-18-sudeep.holla@arm.com
+Cc: Darren Hart <darren@os.amperecomputing.com>
+Tested-by: Conor Dooley <conor.dooley@microchip.com>
+Acked-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Ionela Voinescu <ionela.voinescu@arm.com>
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+
+Conflicts:
+	drivers/base/arch_topology.c
+
+Signed-off-by: Jiang Yi <jiangyi38@hisilicon.com>
+---
+ arch/arm64/kernel/topology.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
+index e4effe6f3177..b76c92b6ac14 100644
+--- a/arch/arm64/kernel/topology.c
++++ b/arch/arm64/kernel/topology.c
+@@ -232,6 +232,14 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
+ 
+ const struct cpumask *cpu_clustergroup_mask(int cpu)
+ {
++	/*
++	 * Forbid cpu_clustergroup_mask() to span more or the same CPUs as
++	 * cpu_coregroup_mask().
++	 */
++	if (cpumask_subset(cpu_coregroup_mask(cpu),
++			   &cpu_topology[cpu].cluster_sibling))
++		return get_cpu_mask(cpu);
++
+ 	return &cpu_topology[cpu].cluster_sibling;
+ }
+ 
+-- 
+2.23.0
+
diff --git a/patches/0122-arch_topology-Make-cluster-topology-span-at-least-SM.patch b/patches/0122-arch_topology-Make-cluster-topology-span-at-least-SM.patch
new file mode 100644
index 00000000..a3f0f872
--- /dev/null
+++ b/patches/0122-arch_topology-Make-cluster-topology-span-at-least-SM.patch
@@ -0,0 +1,72 @@
+From 91cf59f2bfe15ac0cdcb0da47c286000f7859469 Mon Sep 17 00:00:00 2001
+From: Yicong Yang <yangyicong@hisilicon.com>
+Date: Mon, 5 Sep 2022 20:26:15 +0800
+Subject: [PATCH 122/132] arch_topology: Make cluster topology span at least
+ SMT CPUs
+
+mainline inclusion
+from mainline-v6.0-rc5
+commit 5ac251c8a05ce074e5efac779debf82a15d870a3
+category: bugfix
+bugzilla: https://gitee.com/openeuler/kernel/issues/I88UKS
+Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5ac251c8a05ce074e5efac779debf82a15d870a3
+
+----------------------------------------------------------------------
+
+Currently cpu_clustergroup_mask() will return CPU mask if cluster span more
+or the same CPUs as cpu_coregroup_mask(). This will result topology borken
+on non-Cluster SMT machines when building with CONFIG_SCHED_CLUSTER=y.
+
+Test with:
+qemu-system-aarch64 -enable-kvm -machine virt \
+ -net none \
+ -cpu host \
+ -bios ./QEMU_EFI.fd \
+ -m 2G \
+ -smp 48,sockets=2,cores=12,threads=2 \
+ -kernel $Image \
+ -initrd $Rootfs \
+ -nographic
+ -append "rdinit=init console=ttyAMA0 sched_verbose loglevel=8"
+
+We'll get below error:
+[    3.084568] BUG: arch topology borken
+[    3.084570]      the SMT domain not a subset of the CLS domain
+
+Since cluster is a level higher than SMT, fix this by making cluster
+spans at least SMT CPUs.
+
+Fixes: bfcc4397435d ("arch_topology: Limit span of cpu_clustergroup_mask()")
+Cc: Sudeep Holla <sudeep.holla@arm.com>
+Cc: Vincent Guittot <vincent.guittot@linaro.org>
+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
+Link: https://lore.kernel.org/r/20220905122615.12946-1-yangyicong@huawei.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+Conflicts:
+	drivers/base/arch_topology.c
+
+Signed-off-by: Jiang Yi <jiangyi38@hisilicon.com>
+---
+ arch/arm64/kernel/topology.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
+index b76c92b6ac14..46f58120392c 100644
+--- a/arch/arm64/kernel/topology.c
++++ b/arch/arm64/kernel/topology.c
+@@ -238,7 +238,7 @@ const struct cpumask *cpu_clustergroup_mask(int cpu)
+ 	 */
+ 	if (cpumask_subset(cpu_coregroup_mask(cpu),
+ 			   &cpu_topology[cpu].cluster_sibling))
+-		return get_cpu_mask(cpu);
++		return topology_sibling_cpumask(cpu);
+ 
+ 	return &cpu_topology[cpu].cluster_sibling;
+ }
+-- 
+2.23.0
+
diff --git a/patches/0123-sched-Add-per_cpu-cluster-domain-info-and-cpus_share.patch b/patches/0123-sched-Add-per_cpu-cluster-domain-info-and-cpus_share.patch
new file mode 100644
index 00000000..8e32c4f2
--- /dev/null
+++ b/patches/0123-sched-Add-per_cpu-cluster-domain-info-and-cpus_share.patch
@@ -0,0 +1,176 @@
+From d99abce298f4d141832460998bd398f38aef4eee Mon Sep 17 00:00:00 2001
+From: Barry Song <song.bao.hua@hisilicon.com>
+Date: Mon, 17 Oct 2022 15:01:55 +0800
+Subject: [PATCH 123/132] sched: Add per_cpu cluster domain info and
+ cpus_share_lowest_cache API
+
+kunpeng inclusion
+category: feature
+bugzilla: https://gitee.com/openeuler/kernel/issues/I5W44S
+Reference: https://lore.kernel.org/lkml/20220915073423.25535-1-yangyicong@huawei.com/
+
+----------------------------------------------------------------------
+
+Add per-cpu cluster domain info and cpus_share_lowest_cache() API.
+This is the preparation for the optimization of select_idle_cpu()
+on platforms with cluster scheduler level.
+
+Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
+Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
+Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
+Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
+Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Jie Liu <liujie375@h-partners.com>
+
+Conflicts:
+	include/linux/sched/sd_flags.h
+	kernel/sched/core.c
+	kernel/sched/sched.h
+	kernel/sched/topology.c
+
+Signed-off-by: Jiang Yi <jiangyi38@hisilicon.com>
+---
+ include/linux/sched/topology.h | 23 +++++++++++++++--------
+ kernel/sched/core.c            | 14 ++++++++++++++
+ kernel/sched/sched.h           |  2 ++
+ kernel/sched/topology.c        | 15 +++++++++++++++
+ 4 files changed, 46 insertions(+), 8 deletions(-)
+
+diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
+index 15d2e06f690b..55eec54e7f1e 100644
+--- a/include/linux/sched/topology.h
++++ b/include/linux/sched/topology.h
+@@ -25,13 +25,14 @@
+ #define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
+ #define SD_ASYM_CPUCAPACITY	0x0040  /* Groups have different max cpu capacities */
+ #define SD_SHARE_CPUCAPACITY	0x0080	/* Domain members share cpu capacity */
+-#define SD_SHARE_POWERDOMAIN	0x0100	/* Domain members share power domain */
+-#define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
+-#define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
+-#define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
+-#define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
+-#define SD_OVERLAP		0x2000	/* sched_domains of this level overlap */
+-#define SD_NUMA			0x4000	/* cross-node balancing */
++#define SD_CLUSTER              0x0100  /* Domain members share CPU cluster */
++#define SD_SHARE_POWERDOMAIN	0x0200	/* Domain members share power domain */
++#define SD_SHARE_PKG_RESOURCES	0x0400	/* Domain members share cpu pkg resources */
++#define SD_SERIALIZE		0x0800	/* Only a single load balancing instance */
++#define SD_ASYM_PACKING		0x1000  /* Place busy groups earlier in the domain */
++#define SD_PREFER_SIBLING	0x2000	/* Prefer to place tasks in a sibling domain */
++#define SD_OVERLAP		0x4000	/* sched_domains of this level overlap */
++#define SD_NUMA			0x8000	/* cross-node balancing */
+ 
+ #ifdef CONFIG_SCHED_SMT
+ static inline int cpu_smt_flags(void)
+@@ -43,7 +44,7 @@ static inline int cpu_smt_flags(void)
+ #ifdef CONFIG_SCHED_CLUSTER
+ static inline int cpu_cluster_flags(void)
+ {
+-	return SD_SHARE_PKG_RESOURCES;
++	return SD_CLUSTER | SD_SHARE_PKG_RESOURCES;
+ }
+ #endif
+ 
+@@ -180,6 +181,7 @@ cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
+ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
+ 
+ bool cpus_share_cache(int this_cpu, int that_cpu);
++bool cpus_share_lowest_cache(int this_cpu, int that_cpu);
+ 
+ typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
+ typedef int (*sched_domain_flags_f)(void);
+@@ -227,6 +229,11 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu)
+ 	return true;
+ }
+ 
++static inline bool cpus_share_lowest_cache(int this_cpu, int that_cpu)
++{
++	return true;
++}
++
+ #endif	/* !CONFIG_SMP */
+ 
+ static inline int task_node(const struct task_struct *p)
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 7825ceaae0c4..bbfed1ce2372 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1851,6 +1851,20 @@ bool cpus_share_cache(int this_cpu, int that_cpu)
+ 
+ 	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
+ }
++
++/*
++ * Whether CPUs are share lowest cache, which means LLC on non-cluster
++ * machines and LLC tag or L2 on machines with clusters.
++ */
++bool cpus_share_lowest_cache(int this_cpu, int that_cpu)
++{
++	if (this_cpu == that_cpu)
++		return true;
++
++	return per_cpu(sd_lowest_cache_id, this_cpu) ==
++		per_cpu(sd_lowest_cache_id, that_cpu);
++}
++
+ #endif /* CONFIG_SMP */
+ 
+ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 1d882a2b8d5f..c9019e1a6296 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -1307,7 +1307,9 @@ static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
+ DECLARE_PER_CPU(struct sched_domain *, sd_llc);
+ DECLARE_PER_CPU(int, sd_llc_size);
+ DECLARE_PER_CPU(int, sd_llc_id);
++DECLARE_PER_CPU(int, sd_lowest_cache_id);
+ DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
++DECLARE_PER_CPU(struct sched_domain __rcu *, sd_cluster);
+ DECLARE_PER_CPU(struct sched_domain *, sd_numa);
+ DECLARE_PER_CPU(struct sched_domain *, sd_asym);
+ 
+diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
+index 5d662314c08b..0b299f9d60cf 100644
+--- a/kernel/sched/topology.c
++++ b/kernel/sched/topology.c
+@@ -412,6 +412,8 @@ static void destroy_sched_domains(struct sched_domain *sd)
+ DEFINE_PER_CPU(struct sched_domain *, sd_llc);
+ DEFINE_PER_CPU(int, sd_llc_size);
+ DEFINE_PER_CPU(int, sd_llc_id);
++DEFINE_PER_CPU(int, sd_lowest_cache_id);
++DEFINE_PER_CPU(struct sched_domain __rcu *, sd_cluster);
+ DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
+ DEFINE_PER_CPU(struct sched_domain *, sd_numa);
+ DEFINE_PER_CPU(struct sched_domain *, sd_asym);
+@@ -445,6 +447,18 @@ static void update_top_cache_domain(int cpu)
+ 	per_cpu(sd_llc_id, cpu) = id;
+ 	rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);
+ 
++	sd = lowest_flag_domain(cpu, SD_CLUSTER);
++	if (sd)
++		id = cpumask_first(sched_domain_span(sd));
++	rcu_assign_pointer(per_cpu(sd_cluster, cpu), sd);
++
++	/*
++	 * This assignment should be placed after the sd_llc_id as
++	 * we want this id equals to cluster id on cluster machines
++	 * but equals to LLC id on non-Cluster machines.
++	 */
++	per_cpu(sd_lowest_cache_id, cpu) = id;
++
+ 	sd = lowest_flag_domain(cpu, SD_NUMA);
+ 	rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
+ 
+@@ -1162,6 +1176,7 @@ static struct cpumask		***sched_domains_numa_masks;
+  */
+ #define TOPOLOGY_SD_FLAGS		\
+ 	(SD_SHARE_CPUCAPACITY	|	\
++	 SD_CLUSTER		|	\
+ 	 SD_SHARE_PKG_RESOURCES |	\
+ 	 SD_NUMA		|	\
+ 	 SD_ASYM_PACKING	|	\
+-- 
+2.23.0
+
diff --git a/patches/0124-sched-fair-Scan-cluster-before-scanning-LLC-in-wake-.patch b/patches/0124-sched-fair-Scan-cluster-before-scanning-LLC-in-wake-.patch
new file mode 100644
index 00000000..35772e29
--- /dev/null
+++ b/patches/0124-sched-fair-Scan-cluster-before-scanning-LLC-in-wake-.patch
@@ -0,0 +1,224 @@
+From 30fc24beff2ab85a2e29cbd483fb62f4a99b5283 Mon Sep 17 00:00:00 2001
+From: Barry Song <song.bao.hua@hisilicon.com>
+Date: Mon, 17 Oct 2022 15:34:27 +0800
+Subject: [PATCH 124/132] sched/fair: Scan cluster before scanning LLC in
+ wake-up path
+
+kunpeng inclusion
+category: feature
+bugzilla: https://gitee.com/openeuler/kernel/issues/I5W44S
+Reference: https://lore.kernel.org/lkml/20220915073423.25535-1-yangyicong@huawei.com/
+
+----------------------------------------------------------------------
+
+For platforms having clusters like Kunpeng920, CPUs within the same cluster
+have lower latency when synchronizing and accessing shared resources like
+cache. Thus, this patch tries to find an idle cpu within the cluster of the
+target CPU before scanning the whole LLC to gain lower latency.
+
+Testing has been done on Kunpeng920 by pinning tasks to one numa and two
+numa. On Kunpeng920, Each numa has 8 clusters and each cluster has 4 CPUs.
+
+With this patch, We noticed enhancement on tbench within one numa or cross
+two numa.
+
+On numa 0:
+                             6.0-rc1                patched
+Hmean     1        351.20 (   0.00%)      396.45 *  12.88%*
+Hmean     2        700.43 (   0.00%)      793.76 *  13.32%*
+Hmean     4       1404.42 (   0.00%)     1583.62 *  12.76%*
+Hmean     8       2833.31 (   0.00%)     3147.85 *  11.10%*
+Hmean     16      5501.90 (   0.00%)     6089.89 *  10.69%*
+Hmean     32     10428.59 (   0.00%)    10619.63 *   1.83%*
+Hmean     64      8223.39 (   0.00%)     8306.93 *   1.02%*
+Hmean     128     7042.88 (   0.00%)     7068.03 *   0.36%*
+
+On numa 0-1:
+                             6.0-rc1                patched
+Hmean     1        363.06 (   0.00%)      397.13 *   9.38%*
+Hmean     2        721.68 (   0.00%)      789.84 *   9.44%*
+Hmean     4       1435.15 (   0.00%)     1566.01 *   9.12%*
+Hmean     8       2776.17 (   0.00%)     3007.05 *   8.32%*
+Hmean     16      5471.71 (   0.00%)     6103.91 *  11.55%*
+Hmean     32     10164.98 (   0.00%)    11531.81 *  13.45%*
+Hmean     64     17143.28 (   0.00%)    20078.68 *  17.12%*
+Hmean     128    14552.70 (   0.00%)    15156.41 *   4.15%*
+Hmean     256    12827.37 (   0.00%)    13326.86 *   3.89%*
+
+Note neither Kunpeng920 nor x86 Jacobsville supports SMT, so the SMT branch
+in the code has not been tested but it supposed to work.
+
+Suggested-by: Peter Zijlstra <peterz@infradead.org>
+[https://lore.kernel.org/lkml/Ytfjs+m1kUs0ScSn@worktop.programming.kicks-ass.net]
+Tested-by: Yicong Yang <yangyicong@hisilicon.com>
+Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
+Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
+Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
+Reviewed-by: Chen Yu <yu.c.chen@intel.com>
+Signed-off-by: Jie Liu <liujie375@h-partners.com>
+
+Conflicts:
+	kernel/sched/fair.c
+	kernel/sched/sched.h
+	kernel/sched/topology.c
+
+Signed-off-by: Jiang Yi <jiangyi38@hisilicon.com>
+---
+ kernel/sched/fair.c     | 50 +++++++++++++++++++++++++++++++++++++----
+ kernel/sched/sched.h    |  1 +
+ kernel/sched/topology.c | 11 +++++++++
+ 3 files changed, 58 insertions(+), 4 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 1c78e2f29901..8ff0f87f1a76 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -6796,6 +6796,30 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
+ 	cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
+ #endif
+ 
++	if (static_branch_unlikely(&sched_cluster_active)) {
++		struct sched_domain *sdc =
++			rcu_dereference(per_cpu(sd_cluster, target));
++
++		if (sdc) {
++			for_each_cpu_wrap(cpu, sched_domain_span(sdc), target) {
++				bool idle = true;
++
++				if (!cpumask_test_cpu(cpu, cpus))
++					continue;
++
++				for_each_cpu(cpu, cpu_smt_mask(core)) {
++					cpumask_clear_cpu(cpu, cpus);
++					if (!available_idle_cpu(cpu))
++						idle = false;
++				}
++
++				if (idle)
++					return core;
++			}
++			cpumask_andnot(cpus, cpus, sched_domain_span(sdc));
++		}
++	}
++
+ 	for_each_cpu_wrap(core, cpus, target) {
+ 		bool idle = true;
+ 
+@@ -6901,8 +6925,26 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
+ 	cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
+ #endif
+ 
++	if (static_branch_unlikely(&sched_cluster_active)) {
++		struct sched_domain *sdc =
++			rcu_dereference(per_cpu(sd_cluster, target));
++
++		if (sdc) {
++			for_each_cpu_wrap(cpu, sched_domain_span(sdc), target) {
++				if (!cpumask_test_cpu(cpu, cpus))
++					continue;
++				if (--nr <= 0)
++					return -1;
++				if (available_idle_cpu(cpu) ||
++					sched_idle_cpu(cpu))
++					return cpu;
++			}
++			cpumask_andnot(cpus, cpus, sched_domain_span(sdc));
++		}
++	}
++
+ 	for_each_cpu_wrap(cpu, cpus, target) {
+-		if (!--nr)
++		if (--nr <= 0)
+ 			return -1;
+ 		if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
+ 			break;
+@@ -6952,11 +6994,11 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
+ 	 * If the previous CPU is cache affine and idle, don't be stupid:
+ 	 */
+ #ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
+-	if (prev != target && cpus_share_cache(prev, target) &&
++	if (prev != target && cpus_share_lowest_cache(prev, target) &&
+ 	    cpumask_test_cpu(prev, p->select_cpus) &&
+ 	    (available_idle_cpu(prev) || sched_idle_cpu(prev))) {
+ #else
+-	if (prev != target && cpus_share_cache(prev, target) &&
++	if (prev != target && cpus_share_lowest_cache(prev, target) &&
+ 	    (available_idle_cpu(prev) || sched_idle_cpu(prev))) {
+ #endif
+ 		SET_STAT(found_idle_cpu_easy);
+@@ -6967,7 +7009,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
+ 	recent_used_cpu = p->recent_used_cpu;
+ 	if (recent_used_cpu != prev &&
+ 	    recent_used_cpu != target &&
+-	    cpus_share_cache(recent_used_cpu, target) &&
++	    cpus_share_lowest_cache(recent_used_cpu, target) &&
+ 	    (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
+ #ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
+ 	    cpumask_test_cpu(p->recent_used_cpu, p->select_cpus)) {
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index c9019e1a6296..131228b5c268 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -1312,6 +1312,7 @@ DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
+ DECLARE_PER_CPU(struct sched_domain __rcu *, sd_cluster);
+ DECLARE_PER_CPU(struct sched_domain *, sd_numa);
+ DECLARE_PER_CPU(struct sched_domain *, sd_asym);
++extern struct static_key_false sched_cluster_active;
+ 
+ struct sched_group_capacity {
+ 	atomic_t		ref;
+diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
+index 0b299f9d60cf..eda15f08577f 100644
+--- a/kernel/sched/topology.c
++++ b/kernel/sched/topology.c
+@@ -418,6 +418,8 @@ DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
+ DEFINE_PER_CPU(struct sched_domain *, sd_numa);
+ DEFINE_PER_CPU(struct sched_domain *, sd_asym);
+ 
++DEFINE_STATIC_KEY_FALSE(sched_cluster_active);
++
+ static void update_top_cache_domain(int cpu)
+ {
+ #ifdef CONFIG_SCHED_STEAL
+@@ -1856,6 +1858,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
+ 	struct s_data d;
+ 	struct rq *rq = NULL;
+ 	int i, ret = -ENOMEM;
++	bool has_cluster = false;
+ 
+ 	alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
+ 	if (alloc_state != sa_rootdomain)
+@@ -1868,6 +1871,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
+ 		sd = NULL;
+ 		for_each_sd_topology(tl) {
+ 			sd = build_sched_domain(tl, cpu_map, attr, sd, i);
++			has_cluster |= sd->flags & SD_CLUSTER;
+ 			if (tl == sched_domain_topology)
+ 				*per_cpu_ptr(d.sd, i) = sd;
+ 			if (tl->flags & SDTL_OVERLAP)
+@@ -1924,6 +1928,9 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
+ 	}
+ 	rcu_read_unlock();
+ 
++	if (has_cluster)
++		static_branch_inc_cpuslocked(&sched_cluster_active);
++
+ 	if (rq && sched_debug_enabled) {
+ 		pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
+ 			cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
+@@ -2018,8 +2025,12 @@ int sched_init_domains(const struct cpumask *cpu_map)
+  */
+ static void detach_destroy_domains(const struct cpumask *cpu_map)
+ {
++	unsigned int cpu = cpumask_any(cpu_map);
+ 	int i;
+ 
++	if (rcu_access_pointer(per_cpu(sd_cluster, cpu)))
++		static_branch_dec_cpuslocked(&sched_cluster_active);
++
+ 	rcu_read_lock();
+ 	for_each_cpu(i, cpu_map)
+ 		cpu_attach_domain(NULL, &def_root_domain, i);
+-- 
+2.23.0
+
diff --git a/patches/0125-scheduler-Create-SDTL_SKIP-flag-to-skip-topology-lev.patch b/patches/0125-scheduler-Create-SDTL_SKIP-flag-to-skip-topology-lev.patch
new file mode 100644
index 00000000..acf8ffea
--- /dev/null
+++ b/patches/0125-scheduler-Create-SDTL_SKIP-flag-to-skip-topology-lev.patch
@@ -0,0 +1,78 @@
+From 302c73229a43f4c11f262c971129db0d12f4b8a4 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Fri, 3 Dec 2021 12:32:38 -0800
+Subject: [PATCH 125/132] scheduler: Create SDTL_SKIP flag to skip topology
+ level
+
+kunpeng inclusion
+category: feature
+bugzilla: https://gitee.com/openeuler/kernel/issues/I5W44S
+Reference: https://lore.kernel.org/lkml/cover.1638563225.git.tim.c.chen@linux.intel.com/
+
+----------------------------------------------------------------------
+
+A system admin may not want to use cluster scheduling.  Make changes to
+allow cluster topology level to be skipped when building sched domains.
+
+Create SDTL_SKIP bit on the sched_domain_topology_level flag so we can
+check if the cluster topology level should be skipped when building
+sched domains.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Jie Liu <liujie375@h-partners.com>
+
+Conflicts:
+	kernel/sched/topology.c
+
+Signed-off-by: Jiang Yi <jiangyi38@hisilicon.com>
+---
+ include/linux/sched/topology.h |  1 +
+ kernel/sched/topology.c        | 12 ++++++++++--
+ 2 files changed, 11 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
+index 55eec54e7f1e..ae4ba452c111 100644
+--- a/include/linux/sched/topology.h
++++ b/include/linux/sched/topology.h
+@@ -187,6 +187,7 @@ typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
+ typedef int (*sched_domain_flags_f)(void);
+ 
+ #define SDTL_OVERLAP	0x01
++#define SDTL_SKIP	0x02
+ 
+ struct sd_data {
+ 	struct sched_domain *__percpu *sd;
+diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
+index eda15f08577f..887e2d06d98a 100644
+--- a/kernel/sched/topology.c
++++ b/kernel/sched/topology.c
+@@ -1330,8 +1330,16 @@ static struct sched_domain_topology_level default_topology[] = {
+ static struct sched_domain_topology_level *sched_domain_topology =
+ 	default_topology;
+ 
++static struct sched_domain_topology_level *
++next_tl(struct sched_domain_topology_level *tl)
++{
++	while (tl->mask && tl->flags & SDTL_SKIP)
++		++tl;
++	return tl;
++}
++
+ #define for_each_sd_topology(tl)			\
+-	for (tl = sched_domain_topology; tl->mask; tl++)
++	for (tl = next_tl(sched_domain_topology); tl->mask; tl = next_tl(++tl))
+ 
+ void set_sched_topology(struct sched_domain_topology_level *tl)
+ {
+@@ -1872,7 +1880,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
+ 		for_each_sd_topology(tl) {
+ 			sd = build_sched_domain(tl, cpu_map, attr, sd, i);
+ 			has_cluster |= sd->flags & SD_CLUSTER;
+-			if (tl == sched_domain_topology)
++			if (tl == next_tl(sched_domain_topology))
+ 				*per_cpu_ptr(d.sd, i) = sd;
+ 			if (tl->flags & SDTL_OVERLAP)
+ 				sd->flags |= SD_OVERLAP;
+-- 
+2.23.0
+
diff --git a/patches/0126-sysctl-add-a-new-register_sysctl_init-interface.patch b/patches/0126-sysctl-add-a-new-register_sysctl_init-interface.patch
new file mode 100644
index 00000000..422158ca
--- /dev/null
+++ b/patches/0126-sysctl-add-a-new-register_sysctl_init-interface.patch
@@ -0,0 +1,196 @@
+From 39518365ec710c299de64d5fb28ce80f22869536 Mon Sep 17 00:00:00 2001
+From: Xiaoming Ni <nixiaoming@huawei.com>
+Date: Thu, 28 Jul 2022 18:06:57 +0800
+Subject: [PATCH 126/132] sysctl: add a new register_sysctl_init() interface
+
+mainline inclusion
+from mainline-v5.17-rc1
+commit 3ddd9a808cee7284931312f2f3e854c9617f44b2
+category: feature
+bugzilla: https://gitee.com/openeuler/kernel/issues/I5W44S
+Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3ddd9a808cee7284931312f2f3e854c9617f44b2
+
+----------------------------------------------------------------------
+
+Patch series "sysctl: first set of kernel/sysctl cleanups", v2.
+
+Finally had time to respin the series of the work we had started last
+year on cleaning up the kernel/sysct.c kitchen sink.  People keeps
+stuffing their sysctls in that file and this creates a maintenance
+burden.  So this effort is aimed at placing sysctls where they actually
+belong.
+
+I'm going to split patches up into series as there is quite a bit of
+work.
+
+This first set adds register_sysctl_init() for uses of registerting a
+sysctl on the init path, adds const where missing to a few places,
+generalizes common values so to be more easy to share, and starts the
+move of a few kernel/sysctl.c out where they belong.
+
+The majority of rework on v2 in this first patch set is 0-day fixes.
+Eric Biederman's feedback is later addressed in subsequent patch sets.
+
+I'll only post the first two patch sets for now.  We can address the
+rest once the first two patch sets get completely reviewed / Acked.
+
+This patch (of 9):
+
+The kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
+dishes, this makes it very difficult to maintain.
+
+To help with this maintenance let's start by moving sysctls to places
+where they actually belong.  The proc sysctl maintainers do not want to
+know what sysctl knobs you wish to add for your own piece of code, we
+just care about the core logic.
+
+Today though folks heavily rely on tables on kernel/sysctl.c so they can
+easily just extend this table with their needed sysctls.  In order to
+help users move their sysctls out we need to provide a helper which can
+be used during code initialization.
+
+We special-case the initialization use of register_sysctl() since it
+*is* safe to fail, given all that sysctls do is provide a dynamic
+interface to query or modify at runtime an existing variable.  So the
+use case of register_sysctl() on init should *not* stop if the sysctls
+don't end up getting registered.  It would be counter productive to stop
+boot if a simple sysctl registration failed.
+
+Provide a helper for init then, and document the recommended init levels
+to use for callers of this routine.  We will later use this in
+subsequent patches to start slimming down kernel/sysctl.c tables and
+moving sysctl registration to the code which actually needs these
+sysctls.
+
+[mcgrof@kernel.org: major commit log and documentation rephrasing also moved to fs/proc/proc_sysctl.c                  ]
+
+Link: https://lkml.kernel.org/r/20211123202347.818157-1-mcgrof@kernel.org
+Link: https://lkml.kernel.org/r/20211123202347.818157-2-mcgrof@kernel.org
+Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Cc: Iurii Zaikin <yzaikin@google.com>
+Cc: "Eric W. Biederman" <ebiederm@xmission.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Paul Turner <pjt@google.com>
+Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Cc: Sebastian Reichel <sre@kernel.org>
+Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Cc: Petr Mladek <pmladek@suse.com>
+Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
+Cc: Qing Wang <wangqing@vivo.com>
+Cc: Benjamin LaHaise <bcrl@kvack.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Amir Goldstein <amir73il@gmail.com>
+Cc: Stephen Kitt <steve@sk2.org>
+Cc: Antti Palosaari <crope@iki.fi>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Clemens Ladisch <clemens@ladisch.de>
+Cc: David Airlie <airlied@linux.ie>
+Cc: Jani Nikula <jani.nikula@linux.intel.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Julia Lawall <julia.lawall@inria.fr>
+Cc: Lukas Middendorf <kernel@tuxforce.de>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Phillip Potter <phil@philpotter.co.uk>
+Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Cc: Douglas Gilbert <dgilbert@interlog.com>
+Cc: James E.J. Bottomley <jejb@linux.ibm.com>
+Cc: Jani Nikula <jani.nikula@intel.com>
+Cc: John Ogness <john.ogness@linutronix.de>
+Cc: Martin K. Petersen <martin.petersen@oracle.com>
+Cc: "Rafael J. Wysocki" <rafael@kernel.org>
+Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Liu Shixin <liushixin2@huawei.com>
+Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
+Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com>
+
+Conflicts:
+	both modified:   fs/proc/proc_sysctl.c
+	both modified:   include/linux/sysctl.h
+
+Signed-off-by: Jiang Yi <jiangyi38@hisilicon.com>
+---
+ fs/proc/proc_sysctl.c  | 34 ++++++++++++++++++++++++++++++++++
+ include/linux/sysctl.h |  4 ++++
+ 2 files changed, 38 insertions(+)
+
+diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
+index c95f32b83a94..2712aa568331 100644
+--- a/fs/proc/proc_sysctl.c
++++ b/fs/proc/proc_sysctl.c
+@@ -13,6 +13,7 @@
+ #include <linux/namei.h>
+ #include <linux/mm.h>
+ #include <linux/module.h>
++#include <linux/kmemleak.h>
+ #include "internal.h"
+ 
+ static const struct dentry_operations proc_sys_dentry_operations;
+@@ -1376,6 +1377,39 @@ struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *tab
+ }
+ EXPORT_SYMBOL(register_sysctl);
+ 
++/**
++ * __register_sysctl_init() - register sysctl table to path
++ * @path: path name for sysctl base
++ * @table: This is the sysctl table that needs to be registered to the path
++ * @table_name: The name of sysctl table, only used for log printing when
++ *              registration fails
++ *
++ * The sysctl interface is used by userspace to query or modify at runtime
++ * a predefined value set on a variable. These variables however have default
++ * values pre-set. Code which depends on these variables will always work even
++ * if register_sysctl() fails. If register_sysctl() fails you'd just loose the
++ * ability to query or modify the sysctls dynamically at run time. Chances of
++ * register_sysctl() failing on init are extremely low, and so for both reasons
++ * this function does not return any error as it is used by initialization code.
++ *
++ * Context: Can only be called after your respective sysctl base path has been
++ * registered. So for instance, most base directories are registered early on
++ * init before init levels are processed through proc_sys_init() and
++ * sysctl_init().
++ */
++void __init __register_sysctl_init(const char *path, struct ctl_table *table,
++				 const char *table_name)
++{
++	struct ctl_table_header *hdr = register_sysctl(path, table);
++
++	if (unlikely(!hdr)) {
++		pr_err("failed when register_sysctl %s to %s\n",
++			table_name, path);
++		return;
++	}
++	kmemleak_not_leak(hdr);
++}
++
+ static char *append_path(const char *path, char *pos, const char *name)
+ {
+ 	int namelen;
+diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
+index b769ecfcc3bd..04c822f6e7e9 100644
+--- a/include/linux/sysctl.h
++++ b/include/linux/sysctl.h
+@@ -198,6 +198,10 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
+ void unregister_sysctl_table(struct ctl_table_header * table);
+ 
+ extern int sysctl_init(void);
++extern void __register_sysctl_init(const char *path, struct ctl_table *table,
++				 const char *table_name);
++#define register_sysctl_init(path, table) \
++	__register_sysctl_init(path, table, #table)
+ 
+ extern struct ctl_table sysctl_mount_point[];
+ 
+-- 
+2.23.0
+
diff --git a/patches/0127-sched-topology-drivers-base-arch_topology-Rebuild-th.patch b/patches/0127-sched-topology-drivers-base-arch_topology-Rebuild-th.patch
new file mode 100644
index 00000000..4594b3d4
--- /dev/null
+++ b/patches/0127-sched-topology-drivers-base-arch_topology-Rebuild-th.patch
@@ -0,0 +1,87 @@
+From 965864f440f01ae1212284dfff617ef3e435569b Mon Sep 17 00:00:00 2001
+From: Morten Rasmussen <morten.rasmussen@arm.com>
+Date: Fri, 20 Jul 2018 14:32:32 +0100
+Subject: [PATCH 127/132] sched/topology, drivers/base/arch_topology: Rebuild
+ the sched_domain hierarchy when capacities change
+
+mainline inclusion
+from mainline-v4.20-rc1
+commit bb1fbdd3c3fd12b612c7d8cdf13bd6bfeebdefa3
+category: feature
+bugzilla: https://gitee.com/openeuler/kernel/issues/I5W44S
+Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=bb1fbdd3c3fd12b612c7d8cdf13bd6bfeebdefa3
+
+----------------------------------------------------------------------
+
+The setting of SD_ASYM_CPUCAPACITY depends on the per-CPU capacities.
+These might not have their final values when the hierarchy is initially
+built as the values depend on cpufreq to be initialized or the values
+being set through sysfs. To ensure that the flags are set correctly we
+need to rebuild the sched_domain hierarchy whenever the reported per-CPU
+capacity (arch_scale_cpu_capacity()) changes.
+
+This patch ensure that a full sched_domain rebuild happens when CPU
+capacity changes occur.
+
+Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: dietmar.eggemann@arm.com
+Cc: valentin.schneider@arm.com
+Cc: vincent.guittot@linaro.org
+Link: http://lkml.kernel.org/r/1532093554-30504-3-git-send-email-morten.rasmussen@arm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+Conflicts:
+	drivers/base/arch_topology.c
+
+Signed-off-by: Jiang Yi <jiangyi38@hisilicon.com>
+---
+ drivers/base/arch_topology.c  | 8 ++++++++
+ include/linux/arch_topology.h | 1 +
+ 2 files changed, 9 insertions(+)
+
+diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
+index 729dded51e7b..5ef5e0198f9e 100644
+--- a/drivers/base/arch_topology.c
++++ b/drivers/base/arch_topology.c
+@@ -14,6 +14,7 @@
+ #include <linux/slab.h>
+ #include <linux/string.h>
+ #include <linux/sched/topology.h>
++#include <linux/cpuset.h>
+ 
+ DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
+ 
+@@ -67,6 +68,13 @@ static int register_cpu_capacity_sysctl(void)
+ }
+ subsys_initcall(register_cpu_capacity_sysctl);
+ 
++static int update_topology;
++
++int topology_update_cpu_topology(void)
++{
++	return update_topology;
++}
++
+ static u32 capacity_scale;
+ static u32 *raw_capacity;
+ 
+diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
+index 80c28bfce557..a0889776a9e0 100644
+--- a/include/linux/arch_topology.h
++++ b/include/linux/arch_topology.h
+@@ -9,6 +9,7 @@
+ #include <linux/percpu.h>
+ 
+ void topology_normalize_cpu_scale(void);
++int topology_update_cpu_topology(void);
+ 
+ struct device_node;
+ bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu);
+-- 
+2.23.0
+
diff --git a/patches/0128-sched-topology-arch-arm64-Rebuild-the-sched_domain-h.patch b/patches/0128-sched-topology-arch-arm64-Rebuild-the-sched_domain-h.patch
new file mode 100644
index 00000000..29e47d41
--- /dev/null
+++ b/patches/0128-sched-topology-arch-arm64-Rebuild-the-sched_domain-h.patch
@@ -0,0 +1,58 @@
+From 006c23c028bec016ef2dd7210932b3296330c840 Mon Sep 17 00:00:00 2001
+From: Morten Rasmussen <morten.rasmussen@arm.com>
+Date: Fri, 20 Jul 2018 14:32:33 +0100
+Subject: [PATCH 128/132] sched/topology, arch/arm64: Rebuild the sched_domain
+ hierarchy when the CPU capacity changes
+
+mainline inclusion
+from mainline-v4.20-rc1
+commit 3ba09df4b8b6e3f01ed6381e8fb890840fd0bca3
+category: feature
+bugzilla: https://gitee.com/openeuler/kernel/issues/I5W44S
+Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3ba09df4b8b6e3f01ed6381e8fb890840fd0bca3
+
+----------------------------------------------------------------------
+
+Asymmetric CPU capacity can not necessarily be determined accurately at
+the time the initial sched_domain hierarchy is built during boot. It is
+therefore necessary to be able to force a full rebuild of the hierarchy
+later triggered by the arch_topology driver. A full rebuild requires the
+arch-code to implement arch_update_cpu_topology() which isn't yet
+implemented for arm64. This patch points the arm64 implementation to
+arch_topology driver to ensure that full hierarchy rebuild happens when
+needed.
+
+Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: dietmar.eggemann@arm.com
+Cc: valentin.schneider@arm.com
+Cc: vincent.guittot@linaro.org
+Link: http://lkml.kernel.org/r/1532093554-30504-4-git-send-email-morten.rasmussen@arm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Jiang Yi <jiangyi38@hisilicon.com>
+---
+ arch/arm64/include/asm/topology.h | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
+index 164e26035653..7f0de9ec65f7 100644
+--- a/arch/arm64/include/asm/topology.h
++++ b/arch/arm64/include/asm/topology.h
+@@ -50,6 +50,9 @@ int pcibus_to_node(struct pci_bus *bus);
+ /* Replace task scheduler's default cpu-invariant accounting */
+ #define arch_scale_cpu_capacity topology_get_cpu_scale
+ 
++/* Enable topology flag updates */
++#define arch_update_cpu_topology topology_update_cpu_topology
++
+ #include <asm-generic/topology.h>
+ 
+ #endif /* _ASM_ARM_TOPOLOGY_H */
+-- 
+2.23.0
+
diff --git a/patches/0129-scheduler-Add-runtime-knob-sysctl_sched_cluster.patch b/patches/0129-scheduler-Add-runtime-knob-sysctl_sched_cluster.patch
new file mode 100644
index 00000000..6b46431f
--- /dev/null
+++ b/patches/0129-scheduler-Add-runtime-knob-sysctl_sched_cluster.patch
@@ -0,0 +1,237 @@
+From 54062cb62d4060a08ebe460e82eb94a49da70f6a Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Fri, 3 Dec 2021 12:32:40 -0800
+Subject: [PATCH 129/132] scheduler: Add runtime knob sysctl_sched_cluster
+
+kunpeng inclusion
+category: feature
+bugzilla: https://gitee.com/openeuler/kernel/issues/I5W44S
+Reference: https://lore.kernel.org/lkml/cover.1638563225.git.tim.c.chen@linux.intel.com/
+
+----------------------------------------------------------------------
+
+Allow run time configuration of the scheduler to use cluster
+scheduling.  Configuration can be changed via the sysctl variable
+/proc/sys/kernel/sched_cluster. Setting it to 1 enable cluster
+scheduling and setting it to 0 turns it off.
+
+Cluster scheduling should benefit independent tasks by load balancing
+them between clusters.  It reaps the most benefit when the system's CPUs
+are not fully busy, so we can spread the tasks out between the clusters to
+reduce contention on cluster resource (e.g. L2 cache).
+
+However, if the system is expected to operate close to full utilization,
+the system admin could turn this feature off so as not to incur
+extra load balancing overhead between the cluster domains.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Jie Liu <liujie375@h-partners.com>
+
+Conflicts:
+	arch/x86/kernel/smpboot.c
+	drivers/base/arch_topology.c
+	include/linux/sched/sysctl.h
+
+Signed-off-by: Jiang Yi <jiangyi38@hisilicon.com>
+---
+ arch/x86/kernel/smpboot.c    |  8 +++++
+ drivers/base/arch_topology.c | 10 +++++-
+ include/linux/sched/sysctl.h |  7 ++++
+ include/linux/topology.h     |  1 +
+ kernel/sched/core.c          |  1 +
+ kernel/sched/sched.h         |  6 ++++
+ kernel/sched/topology.c      | 67 ++++++++++++++++++++++++++++++++++++
+ 7 files changed, 99 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
+index e9dd01f7d602..1993690cfd80 100644
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -56,6 +56,7 @@
+ #include <linux/stackprotector.h>
+ #include <linux/gfp.h>
+ #include <linux/cpuidle.h>
++#include <linux/cpuset.h>
+ 
+ #include <asm/acpi.h>
+ #include <asm/desc.h>
+@@ -116,6 +117,13 @@ int arch_update_cpu_topology(void)
+ 	return retval;
+ }
+ 
++void arch_rebuild_cpu_topology(void)
++{
++	x86_topology_update = true;
++	rebuild_sched_domains();
++	x86_topology_update = false;
++}
++
+ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
+ {
+ 	unsigned long flags;
+diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
+index 5ef5e0198f9e..f601eb3238a1 100644
+--- a/drivers/base/arch_topology.c
++++ b/drivers/base/arch_topology.c
+@@ -68,6 +68,7 @@ static int register_cpu_capacity_sysctl(void)
+ }
+ subsys_initcall(register_cpu_capacity_sysctl);
+ 
++static u32 capacity_scale;
+ static int update_topology;
+ 
+ int topology_update_cpu_topology(void)
+@@ -75,7 +76,14 @@ int topology_update_cpu_topology(void)
+ 	return update_topology;
+ }
+ 
+-static u32 capacity_scale;
++void __weak arch_rebuild_cpu_topology(void)
++{
++	update_topology = 1;
++	rebuild_sched_domains();
++	pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
++	update_topology = 0;
++}
++
+ static u32 *raw_capacity;
+ 
+ static int free_raw_capacity(void)
+diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
+index ad472760e97d..691037143faa 100644
+--- a/include/linux/sched/sysctl.h
++++ b/include/linux/sched/sysctl.h
+@@ -104,4 +104,11 @@ extern int sysctl_schedstats(struct ctl_table *table, int write,
+ 				 loff_t *ppos);
+ 
+ extern int sysctl_umh_affinity;
++
++#ifdef CONFIG_SCHED_CLUSTER
++extern unsigned int sysctl_sched_cluster;
++int sched_cluster_handler(struct ctl_table *table, int write,
++			  void *buffer, size_t *lenp, loff_t *ppos);
++#endif
++
+ #endif /* _LINUX_SCHED_SYSCTL_H */
+diff --git a/include/linux/topology.h b/include/linux/topology.h
+index 9a7753fcae6a..63fb192f425b 100644
+--- a/include/linux/topology.h
++++ b/include/linux/topology.h
+@@ -43,6 +43,7 @@
+ 		if (nr_cpus_node(node))
+ 
+ int arch_update_cpu_topology(void);
++void arch_rebuild_cpu_topology(void);
+ 
+ /* Conform to ACPI 2.0 SLIT distance definitions */
+ #define LOCAL_DISTANCE		10
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index bbfed1ce2372..e518fc08fd41 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -5968,6 +5968,7 @@ int sched_cpu_dying(unsigned int cpu)
+ void __init sched_init_smp(void)
+ {
+ 	sched_init_numa();
++	set_sched_cluster();
+ 
+ 	/*
+ 	 * There's no userspace yet to cause hotplug operations; hence all the
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 131228b5c268..7e2c49032615 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -1190,6 +1190,12 @@ static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf)
+ #endif
+ }
+ 
++#ifdef CONFIG_SCHED_CLUSTER
++extern void set_sched_cluster(void);
++#else
++static inline void set_sched_cluster(void) { }
++#endif
++
+ #ifdef CONFIG_NUMA
+ enum numa_topology_type {
+ 	NUMA_DIRECT,
+diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
+index 887e2d06d98a..8157e9fb9bfa 100644
+--- a/kernel/sched/topology.c
++++ b/kernel/sched/topology.c
+@@ -1330,6 +1330,73 @@ static struct sched_domain_topology_level default_topology[] = {
+ static struct sched_domain_topology_level *sched_domain_topology =
+ 	default_topology;
+ 
++#ifdef CONFIG_SCHED_CLUSTER
++void set_sched_cluster(void)
++{
++	struct sched_domain_topology_level *tl;
++
++	for (tl = sched_domain_topology; tl->mask; tl++) {
++		if (tl->sd_flags && (tl->sd_flags() & SD_CLUSTER)) {
++			if (!sysctl_sched_cluster)
++				tl->flags |= SDTL_SKIP;
++			else
++				tl->flags &= ~SDTL_SKIP;
++			break;
++		}
++	}
++}
++
++/* set via /proc/sys/kernel/sched_cluster */
++unsigned int __read_mostly sysctl_sched_cluster = 1;
++
++static DEFINE_MUTEX(sched_cluster_mutex);
++int sched_cluster_handler(struct ctl_table *table, int write,
++		void *buffer, size_t *lenp, loff_t *ppos)
++{
++	int ret;
++	unsigned int oldval;
++
++	if (write && !capable(CAP_SYS_ADMIN))
++		return -EPERM;
++
++	mutex_lock(&sched_cluster_mutex);
++	oldval = sysctl_sched_cluster;
++	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
++	if (!ret && write) {
++		if (oldval != sysctl_sched_cluster) {
++			set_sched_cluster();
++			arch_rebuild_cpu_topology();
++		}
++	}
++	mutex_unlock(&sched_cluster_mutex);
++
++	return ret;
++}
++
++static int zero;
++static int one = 1;
++
++static struct ctl_table sched_cluster_sysctls[] = {
++	{
++		.procname       = "sched_cluster",
++		.data           = &sysctl_sched_cluster,
++		.maxlen         = sizeof(unsigned int),
++		.mode           = 0644,
++		.proc_handler   = sched_cluster_handler,
++		.extra1         = (void *)&zero,
++		.extra2         = (void *)&one,
++	},
++	{}
++};
++
++static int __init sched_cluster_sysctl_init(void)
++{
++	register_sysctl_init("kernel", sched_cluster_sysctls);
++	return 0;
++}
++late_initcall(sched_cluster_sysctl_init);
++#endif
++
+ static struct sched_domain_topology_level *
+ next_tl(struct sched_domain_topology_level *tl)
+ {
+-- 
+2.23.0
+
diff --git a/patches/0130-scheduler-Add-boot-time-enabling-disabling-of-cluste.patch b/patches/0130-scheduler-Add-boot-time-enabling-disabling-of-cluste.patch
new file mode 100644
index 00000000..c690683f
--- /dev/null
+++ b/patches/0130-scheduler-Add-boot-time-enabling-disabling-of-cluste.patch
@@ -0,0 +1,72 @@
+From 57ed48e6a2f3f540d27a795c875e957a72272245 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Fri, 3 Dec 2021 12:32:41 -0800
+Subject: [PATCH 130/132] scheduler: Add boot time enabling/disabling of
+ cluster scheduling
+
+kunpeng inclusion
+category: feature
+bugzilla: https://gitee.com/openeuler/kernel/issues/I5W44S
+Reference: https://lore.kernel.org/lkml/cover.1638563225.git.tim.c.chen@linux.intel.com/
+
+----------------------------------------------------------------------
+
+Add boot time parameter sched_cluster to enable or disable cluster
+scheduling.  Set boot parameter as follow:
+
+	sched_cluster=0 disables cluster scheduling
+	sched_cluster=1 enables cluster scheduling
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Jie Liu <liujie375@h-partners.com>
+Signed-off-by: Jiang Yi <jiangyi38@hisilicon.com>
+---
+ Documentation/admin-guide/kernel-parameters.txt |  4 ++++
+ kernel/sched/topology.c                         | 16 ++++++++++++++++
+ 2 files changed, 20 insertions(+)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 81c3e5e6447f..cd413b202ea5 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -4310,6 +4310,10 @@
+ 
+ 	sched_debug	[KNL] Enables verbose scheduler debug messages.
+ 
++	sched_cluster=  Enable or disable cluster scheduling.
++			0 -- disable.
++			1 -- enable.
++
+ 	schedstats=	[KNL,X86] Enable or disable scheduled statistics.
+ 			Allowed values are enable and disable. This feature
+ 			incurs a small amount of overhead in the scheduler
+diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
+index 8157e9fb9bfa..fdc3ae9e1bc0 100644
+--- a/kernel/sched/topology.c
++++ b/kernel/sched/topology.c
+@@ -1395,6 +1395,22 @@ static int __init sched_cluster_sysctl_init(void)
+ 	return 0;
+ }
+ late_initcall(sched_cluster_sysctl_init);
++
++static int __init sched_cluster_option(char *str)
++{
++	int enable;
++
++	if (get_option(&str, &enable)) {
++		if (enable != 0 && enable != 1)
++			return -EINVAL;
++
++		sysctl_sched_cluster = enable;
++		return 0;
++	}
++
++	return -EINVAL;
++}
++early_param("sched_cluster", sched_cluster_option);
+ #endif
+ 
+ static struct sched_domain_topology_level *
+-- 
+2.23.0
+
diff --git a/patches/0131-scheduler-Disable-cluster-scheduling-by-default.patch b/patches/0131-scheduler-Disable-cluster-scheduling-by-default.patch
new file mode 100644
index 00000000..f2e27545
--- /dev/null
+++ b/patches/0131-scheduler-Disable-cluster-scheduling-by-default.patch
@@ -0,0 +1,38 @@
+From cab126264ad499e2b122e2926b8dc24ca2eb4a7c Mon Sep 17 00:00:00 2001
+From: Yicong Yang <yangyicong@hisilicon.com>
+Date: Mon, 13 Feb 2023 10:48:54 +0800
+Subject: [PATCH 131/132] scheduler: Disable cluster scheduling by default
+
+kunpeng inclusion
+category: feature
+bugzilla: https://gitee.com/openeuler/kernel/issues/I5W44S
+
+----------------------------------------------------------------------
+
+Disable cluster scheduling by default since it's not a universal win.
+User can choose to enable it through sysctl or at boot time according to
+their scenario.
+
+Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
+Signed-off-by: Jie Liu <liujie375@h-partners.com>
+Signed-off-by: Jiang Yi <jiangyi38@hisilicon.com>
+---
+ kernel/sched/topology.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
+index fdc3ae9e1bc0..1cc9ec74d24b 100644
+--- a/kernel/sched/topology.c
++++ b/kernel/sched/topology.c
+@@ -1347,7 +1347,7 @@ void set_sched_cluster(void)
+ }
+ 
+ /* set via /proc/sys/kernel/sched_cluster */
+-unsigned int __read_mostly sysctl_sched_cluster = 1;
++unsigned int __read_mostly sysctl_sched_cluster;
+ 
+ static DEFINE_MUTEX(sched_cluster_mutex);
+ int sched_cluster_handler(struct ctl_table *table, int write,
+-- 
+2.23.0
+
diff --git a/patches/0132-sched-Open-the-kernel-configuration-for-cluster.patch b/patches/0132-sched-Open-the-kernel-configuration-for-cluster.patch
new file mode 100644
index 00000000..ea12d9ba
--- /dev/null
+++ b/patches/0132-sched-Open-the-kernel-configuration-for-cluster.patch
@@ -0,0 +1,35 @@
+From 9714cea606654bd9b4118c7604db5854ae68e626 Mon Sep 17 00:00:00 2001
+From: Jie Liu <liujie375@h-partners.com>
+Date: Mon, 24 Oct 2022 09:34:57 +0800
+Subject: [PATCH 132/132] sched:Open the kernel configuration for cluster.
+
+kunpeng inclusion
+category: feature
+bugzilla: https://gitee.com/openeuler/kernel/issues/I5W44S
+
+----------------------------------------------------------------------
+
+In the past configuration, CONFIG_SCHED_CLUSTER was not set. Now, we need
+to open the configuration.
+
+Signed-off-by: Jie Liu <liujie375@h-partners.com>
+Signed-off-by: Jiang Yi <jiangyi38@hisilicon.com>
+---
+ arch/arm64/configs/openeuler_defconfig | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig
+index b04256636d4b..5f818e5e1790 100644
+--- a/arch/arm64/configs/openeuler_defconfig
++++ b/arch/arm64/configs/openeuler_defconfig
+@@ -420,6 +420,7 @@ CONFIG_ARM64_PA_BITS=48
+ CONFIG_SCHED_MC=y
+ # CONFIG_SCHED_SMT is not set
+ CONFIG_NR_CPUS=1024
++CONFIG_SCHED_CLUSTER=y
+ CONFIG_HOTPLUG_CPU=y
+ CONFIG_ARM64_ERR_RECOV=y
+ CONFIG_MPAM=y
+-- 
+2.23.0
+
diff --git a/series.conf b/series.conf
index c02d844c..fc6d3050 100644
--- a/series.conf
+++ b/series.conf
@@ -117,3 +117,19 @@ patches/0113-perf-auxtrace-arm-Refactor-event-list-iteration-in-a.patch
 patches/0114-perf-auxtrace-arm64-Add-support-for-HiSilicon-PCIe-T.patch
 patches/0115-perf-auxtrace-arm64-Add-support-for-parsing-HiSilico.patch
 patches/0116-Fix-the-header-file-location-error-and-adjust-the-fu.patch
+patches/0117-topology-Represent-clusters-of-CPUs-within-a-die.patch
+patches/0118-sched-Add-cluster-scheduler-level-in-core-and-relate.patch
+patches/0119-topology-sysfs-export-cluster-attributes-only-if-an-.patch
+patches/0120-topology-Remove-unused-cpu_cluster_mask.patch
+patches/0121-arch_topology-Limit-span-of-cpu_clustergroup_mask.patch
+patches/0122-arch_topology-Make-cluster-topology-span-at-least-SM.patch
+patches/0123-sched-Add-per_cpu-cluster-domain-info-and-cpus_share.patch
+patches/0124-sched-fair-Scan-cluster-before-scanning-LLC-in-wake-.patch
+patches/0125-scheduler-Create-SDTL_SKIP-flag-to-skip-topology-lev.patch
+patches/0126-sysctl-add-a-new-register_sysctl_init-interface.patch
+patches/0127-sched-topology-drivers-base-arch_topology-Rebuild-th.patch
+patches/0128-sched-topology-arch-arm64-Rebuild-the-sched_domain-h.patch
+patches/0129-scheduler-Add-runtime-knob-sysctl_sched_cluster.patch
+patches/0130-scheduler-Add-boot-time-enabling-disabling-of-cluste.patch
+patches/0131-scheduler-Disable-cluster-scheduling-by-default.patch
+patches/0132-sched-Open-the-kernel-configuration-for-cluster.patch
-- 
Gitee