diff --git a/0001-Revert-percpu_counter-introduce-atomic-mode-for-perc.patch b/0001-Revert-percpu_counter-introduce-atomic-mode-for-perc.patch new file mode 100644 index 0000000000000000000000000000000000000000..e2f60eab4dc22fcdaa8bdaa1033df4bc0f61a6a9 --- /dev/null +++ b/0001-Revert-percpu_counter-introduce-atomic-mode-for-perc.patch @@ -0,0 +1,199 @@ +From fbb1507f90066c8794e089a1180a342c38e9d5f9 Mon Sep 17 00:00:00 2001 +From: liutianyu1250 +Date: Mon, 20 Jan 2025 10:05:38 +0800 +Subject: [PATCH 1/2] Revert "percpu_counter: introduce atomic mode for + percpu_counter" + +This reverts commit 69381c36f1ac06079e7a27999503d6b1ceb10c09. + +The commit will cause sleeping function called when irq disabled, +log as below: + +[ 467.014851][T10602] BUG: sleeping function called from invalid +context at kernel/locking/spinlock_rt.c:48 +[ 467.014865][T10602] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, +pid: 10602, name: snap +[ 467.014874][T10602] preempt_count: 1, expected: 0 +[ 467.014879][T10602] RCU nest depth: 0, expected: 0 +[ 467.014884][T10602] 1 lock held by snap/10602: +[ 467.014890][T10602] #0: ffffffc080d234e0 (pcpu_lock){....}-{2:2}, +at: pcpu_alloc+0x15c/0x748 +[ 467.014923][T10602] CPU: 4 PID: 10602 Comm: snap Tainted: G W +6.6.0-openeuler-phytium-embedded-v3.0+ #22 +76582bb8da3acb4739dc2acf08c42407448fd930 +[ 467.014936][T10602] Hardware name: Pd2308 DEMO (DT) +[ 467.014941][T10602] Call trace: +[ 467.014944][T10602] dump_backtrace+0xb0/0xd4 +[ 467.014961][T10602] show_stack+0x18/0x24 +[ 467.014973][T10602] dump_stack_lvl+0x54/0x78 +[ 467.014986][T10602] dump_stack+0x18/0x24 +[ 467.014995][T10602] __might_resched+0x130/0x14c +[ 467.015004][T10602] rt_spin_lock+0x54/0xcc +[ 467.015016][T10602] pcpu_alloc+0x15c/0x748 +[ 467.015024][T10602] __alloc_percpu_gfp+0x18/0x24 +[ 467.015033][T10602] __percpu_counter_init_many+0x44/0x140 +[ 467.015044][T10602] percpu_counter_switch_to_pcpu_many+0x54/0xc0 +[ 467.015053][T10602] copy_process+0xc30/0x1bc4 +[ 467.015062][T10602] kernel_clone+0xa0/0x484 +[ 467.015069][T10602] __se_sys_clone3+0xdc/0xf0 +[ 467.015076][T10602] __arm64_sys_clone3+0x14/0x20 +[ 467.015083][T10602] invoke_syscall+0x74/0xfc +[ 467.015094][T10602] el0_svc_common.constprop.0+0xb8/0xd4 +[ 467.015106][T10602] do_el0_svc+0x1c/0x28 +[ 467.015115][T10602] el0_svc+0xb0/0x110 +[ 467.015126][T10602] el0t_64_sync_handler+0x84/0x12c +[ 467.015137][T10602] el0t_64_sync+0x174/0x178 +--- + include/linux/percpu_counter.h | 48 +++------------------------------- + lib/percpu_counter.c | 35 ++----------------------- + 2 files changed, 5 insertions(+), 78 deletions(-) + +diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h +index 1a0f25a27d7b..d01351b1526f 100644 +--- a/include/linux/percpu_counter.h ++++ b/include/linux/percpu_counter.h +@@ -21,18 +21,7 @@ + + struct percpu_counter { + raw_spinlock_t lock; +- /* +- * Depending on whether counters is NULL, we can support two modes, +- * atomic mode using count_atomic and perpcu mode using count. +- * The single-thread processes should use atomic mode to reduce the +- * memory consumption and performance regression. +- * The multiple-thread processes should use percpu mode to reduce the +- * error margin. +- */ +- union { +- s64 count; +- atomic64_t count_atomic; +- }; ++ s64 count; + #ifdef CONFIG_HOTPLUG_CPU + struct list_head list; /* All percpu_counters are on a list */ + #endif +@@ -43,14 +32,14 @@ extern int percpu_counter_batch; + + int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, + gfp_t gfp, u32 nr_counters, +- struct lock_class_key *key, bool switch_mode); ++ struct lock_class_key *key); + + #define percpu_counter_init_many(fbc, value, gfp, nr_counters) \ + ({ \ + static struct lock_class_key __key; \ + \ + __percpu_counter_init_many(fbc, value, gfp, nr_counters,\ +- &__key, false); \ ++ &__key); \ + }) + + +@@ -132,20 +121,6 @@ static inline bool percpu_counter_initialized(struct percpu_counter *fbc) + return (fbc->counters != NULL); + } + +-static inline s64 percpu_counter_atomic_read(struct percpu_counter *fbc) +-{ +- return atomic64_read(&fbc->count_atomic); +-} +- +-static inline void percpu_counter_atomic_add(struct percpu_counter *fbc, +- s64 amount) +-{ +- atomic64_add(amount, &fbc->count_atomic); +-} +- +-int percpu_counter_switch_to_pcpu_many(struct percpu_counter *fbc, +- u32 nr_counters); +- + #else /* !CONFIG_SMP */ + + struct percpu_counter { +@@ -255,23 +230,6 @@ static inline bool percpu_counter_initialized(struct percpu_counter *fbc) + static inline void percpu_counter_sync(struct percpu_counter *fbc) + { + } +- +-static inline s64 percpu_counter_atomic_read(struct percpu_counter *fbc) +-{ +- return fbc->count; +-} +- +-static inline void percpu_counter_atomic_add(struct percpu_counter *fbc, +- s64 amount) +-{ +- percpu_counter_add(fbc, amount); +-} +- +-static inline int percpu_counter_switch_to_pcpu_many(struct percpu_counter *fbc, +- u32 nr_counters) +-{ +- return 0; +-} + #endif /* CONFIG_SMP */ + + static inline void percpu_counter_inc(struct percpu_counter *fbc) +diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c +index 7d2eaba4db1d..9073430dc865 100644 +--- a/lib/percpu_counter.c ++++ b/lib/percpu_counter.c +@@ -153,7 +153,7 @@ EXPORT_SYMBOL(__percpu_counter_sum); + + int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, + gfp_t gfp, u32 nr_counters, +- struct lock_class_key *key, bool switch_mode) ++ struct lock_class_key *key) + { + unsigned long flags __maybe_unused; + size_t counter_size; +@@ -174,8 +174,7 @@ int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, + #ifdef CONFIG_HOTPLUG_CPU + INIT_LIST_HEAD(&fbc[i].list); + #endif +- if (likely(!switch_mode)) +- fbc[i].count = amount; ++ fbc[i].count = amount; + fbc[i].counters = (void *)counters + (i * counter_size); + + debug_percpu_counter_activate(&fbc[i]); +@@ -279,36 +278,6 @@ int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) + } + EXPORT_SYMBOL(__percpu_counter_compare); + +-/* +- * percpu_counter_switch_to_pcpu_many: Converts struct percpu_counters from +- * atomic mode to percpu mode. +- * +- * Return: 0 if percpu_counter is already in atomic mode or successfully +- * switched to atomic mode; -ENOMEM if perpcu memory allocation fails, +- * perpcu_counter is still in atomic mode. +- */ +-int percpu_counter_switch_to_pcpu_many(struct percpu_counter *fbc, +- u32 nr_counters) +-{ +- static struct lock_class_key __key; +- unsigned long flags; +- bool ret = 0; +- +- if (percpu_counter_initialized(fbc)) +- return 0; +- +- preempt_disable(); +- local_irq_save(flags); +- if (likely(!percpu_counter_initialized(fbc))) +- ret = __percpu_counter_init_many(fbc, 0, +- GFP_ATOMIC|__GFP_NOWARN|__GFP_ZERO, +- nr_counters, &__key, true); +- local_irq_restore(flags); +- preempt_enable(); +- +- return ret; +-} +- + static int __init percpu_counter_startup(void) + { + int ret; +-- +2.25.1 + diff --git a/0002-Revert-mm-convert-mm-s-rss-stats-to-use-atomic-mode.patch b/0002-Revert-mm-convert-mm-s-rss-stats-to-use-atomic-mode.patch new file mode 100644 index 0000000000000000000000000000000000000000..2bbf294bd635ef758890a13930b30299db3b96de --- /dev/null +++ b/0002-Revert-mm-convert-mm-s-rss-stats-to-use-atomic-mode.patch @@ -0,0 +1,167 @@ +From 486861e51620347f002b13fdcd94d68ae6283d5b Mon Sep 17 00:00:00 2001 +From: liutianyu1250 +Date: Mon, 20 Jan 2025 10:09:34 +0800 +Subject: [PATCH 2/2] Revert "mm: convert mm's rss stats to use atomic mode" + +This reverts commit c333c44449530463d06fb3feb9e50959aed06061. + +Related with "percpu_counter: introduce atomic mode for percpu_counter" +--- + include/linux/mm.h | 50 ++++++------------------------------- + include/trace/events/kmem.h | 4 +-- + kernel/fork.c | 20 ++++++--------- + 3 files changed, 17 insertions(+), 57 deletions(-) + +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 49f4fac2dcf7..e45b328850c1 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -2611,66 +2611,30 @@ static inline bool get_user_page_fast_only(unsigned long addr, + */ + static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) + { +- struct percpu_counter *fbc = &mm->rss_stat[member]; +- +- if (percpu_counter_initialized(fbc)) +- return percpu_counter_read_positive(fbc); +- +- return percpu_counter_atomic_read(fbc); ++ return percpu_counter_read_positive(&mm->rss_stat[member]); + } + + void mm_trace_rss_stat(struct mm_struct *mm, int member); + + static inline void add_mm_counter(struct mm_struct *mm, int member, long value) + { +- struct percpu_counter *fbc = &mm->rss_stat[member]; +- +- if (percpu_counter_initialized(fbc)) +- percpu_counter_add(fbc, value); +- else +- percpu_counter_atomic_add(fbc, value); ++ percpu_counter_add(&mm->rss_stat[member], value); + + mm_trace_rss_stat(mm, member); + } + + static inline void inc_mm_counter(struct mm_struct *mm, int member) + { +- add_mm_counter(mm, member, 1); +-} +- +-static inline void dec_mm_counter(struct mm_struct *mm, int member) +-{ +- add_mm_counter(mm, member, -1); +-} +- +-static inline s64 mm_counter_sum(struct mm_struct *mm, int member) +-{ +- struct percpu_counter *fbc = &mm->rss_stat[member]; ++ percpu_counter_inc(&mm->rss_stat[member]); + +- if (percpu_counter_initialized(fbc)) +- return percpu_counter_sum(fbc); +- +- return percpu_counter_atomic_read(fbc); +-} +- +-static inline s64 mm_counter_sum_positive(struct mm_struct *mm, int member) +-{ +- struct percpu_counter *fbc = &mm->rss_stat[member]; +- +- if (percpu_counter_initialized(fbc)) +- return percpu_counter_sum_positive(fbc); +- +- return percpu_counter_atomic_read(fbc); ++ mm_trace_rss_stat(mm, member); + } + +-static inline int mm_counter_switch_to_pcpu(struct mm_struct *mm) ++static inline void dec_mm_counter(struct mm_struct *mm, int member) + { +- return percpu_counter_switch_to_pcpu_many(mm->rss_stat, NR_MM_COUNTERS); +-} ++ percpu_counter_dec(&mm->rss_stat[member]); + +-static inline void mm_counter_destroy(struct mm_struct *mm) +-{ +- percpu_counter_destroy_many(mm->rss_stat, NR_MM_COUNTERS); ++ mm_trace_rss_stat(mm, member); + } + + /* Optimized variant when folio is already known not to be anon */ +diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h +index be39ca5af0ba..58688768ef0f 100644 +--- a/include/trace/events/kmem.h ++++ b/include/trace/events/kmem.h +@@ -361,8 +361,8 @@ TRACE_EVENT(rss_stat, + __entry->mm_id = mm_ptr_to_hash(mm); + __entry->curr = !!(current->mm == mm); + __entry->member = member; +- __entry->size = (mm_counter_sum_positive(mm, member) +- << PAGE_SHIFT); ++ __entry->size = (percpu_counter_sum_positive(&mm->rss_stat[member]) ++ << PAGE_SHIFT); + ), + + TP_printk("mm_id=%u curr=%d type=%s size=%ldB", +diff --git a/kernel/fork.c b/kernel/fork.c +index e033388b11bd..899e8c639fac 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -854,7 +854,7 @@ static void check_mm(struct mm_struct *mm) + "Please make sure 'struct resident_page_types[]' is updated as well"); + + for (i = 0; i < NR_MM_COUNTERS; i++) { +- long x = mm_counter_sum(mm, i); ++ long x = percpu_counter_sum(&mm->rss_stat[i]); + + if (unlikely(x)) + pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", +@@ -955,7 +955,7 @@ void __mmdrop(struct mm_struct *mm) + put_user_ns(mm->user_ns); + mm_pasid_drop(mm); + mm_destroy_cid(mm); +- mm_counter_destroy(mm); ++ percpu_counter_destroy_many(mm->rss_stat, NR_MM_COUNTERS); + + free_mm(mm); + } +@@ -1358,11 +1358,17 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, + if (mm_alloc_cid(mm)) + goto fail_cid; + ++ if (percpu_counter_init_many(mm->rss_stat, 0, GFP_KERNEL_ACCOUNT, ++ NR_MM_COUNTERS)) ++ goto fail_pcpu; ++ + sp_init_mm(mm); + mm->user_ns = get_user_ns(user_ns); + lru_gen_init_mm(mm); + return mm; + ++fail_pcpu: ++ mm_destroy_cid(mm); + fail_cid: + destroy_context(mm); + fail_nocontext: +@@ -1778,16 +1784,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) + if (!oldmm) + return 0; + +- /* +- * For single-thread processes, rss_stat is in atomic mode, which +- * reduces the memory consumption and performance regression caused by +- * using percpu. For multiple-thread processes, rss_stat is switched to +- * the percpu mode to reduce the error margin. +- */ +- if (clone_flags & CLONE_THREAD) +- if (mm_counter_switch_to_pcpu(oldmm)) +- return -ENOMEM; +- + if (clone_flags & CLONE_VM) { + mmget(oldmm); + mm = oldmm; +-- +2.25.1 +