From 9e82de74b3627a44eae24db0ffec854b376a2bb6 Mon Sep 17 00:00:00 2001 From: Min Fanlei Date: Thu, 18 Dec 2025 15:23:13 +0800 Subject: [PATCH 01/40] sw64: kvm: fix srcu lock leak in pv steal time Some early return paths in kvm_sw64_record_steal_time() fail to release the SRCU read lock, which can lead to resource leak. Fix it. Signed-off-by: Min Fanlei Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/kvm/pvtime.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/sw_64/kvm/pvtime.c b/arch/sw_64/kvm/pvtime.c index 767617d4c5e0..806e662b1100 100644 --- a/arch/sw_64/kvm/pvtime.c +++ b/arch/sw_64/kvm/pvtime.c @@ -27,19 +27,19 @@ void kvm_sw64_record_steal_time(struct kvm_vcpu *vcpu) hva = kvm_vcpu_gfn_to_hva(vcpu, gfn); if (WARN_ON(kvm_is_error_hva(hva))) { vcpu->arch.steal.base = INVALID_GPA; - return; + goto out_unlock; } steal_ptr = (__u64 __user *)(hva + offset_in_page(base) + offset_s); version_ptr = (__u32 __user *)(hva + offset_in_page(base) + offset_v); if (WARN_ON(get_user(version, version_ptr))) - return; + goto out_unlock; version += 1; if (WARN_ON(put_user(version, version_ptr))) - return; + goto out_unlock; if (!WARN_ON(get_user(steal, steal_ptr))) { vcpu->arch.steal.last_steal = READ_ONCE(current->sched_info.run_delay); @@ -54,6 +54,8 @@ void kvm_sw64_record_steal_time(struct kvm_vcpu *vcpu) WARN_ON(put_user(version, version_ptr)); kvm_vcpu_mark_page_dirty(vcpu, gfn); + +out_unlock: srcu_read_unlock(&kvm->srcu, idx); } -- Gitee From 8cf6465977d22991bd95a2892b7e6bbcd4a94c89 Mon Sep 17 00:00:00 2001 From: Xiao Ziwang Date: Tue, 23 Dec 2025 12:07:30 +0800 Subject: [PATCH 02/40] sw64: kvm: add qemu-gdb watchpoint support We add kvm support for qemu-gdb watchpoint. When qemu inserts a watchpoint, kvm receives the information and converts it. We save and restore the debug CSRs before entering VM and after exiting VM. If a qemu-gdb watchpoint is triggered, we do VM exit to handle it. We also create a file when host kernel booting to indicate that host kernel supports qemu-gdb watchpoint. Signed-off-by: Xiao Ziwang Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/cpu.h | 1 + arch/sw_64/include/asm/kvm_host.h | 5 +++++ arch/sw_64/include/asm/vcpu.h | 4 +++- arch/sw_64/include/uapi/asm/kvm.h | 4 ++++ arch/sw_64/kernel/setup.c | 14 +++++++++++++ arch/sw_64/kvm/handle_exit.c | 8 +++++++- arch/sw_64/kvm/kvm_core4.c | 18 +++++++++++++++++ arch/sw_64/kvm/sw64.c | 33 +++++++++++++++++++++++++++++++ 8 files changed, 85 insertions(+), 2 deletions(-) diff --git a/arch/sw_64/include/asm/cpu.h b/arch/sw_64/include/asm/cpu.h index 9fc739dacfab..cfb6090baded 100644 --- a/arch/sw_64/include/asm/cpu.h +++ b/arch/sw_64/include/asm/cpu.h @@ -25,6 +25,7 @@ enum hmcall_cpuid_cmd { #define CPU_FEAT_SIMD 0x2 #define CPU_FEAT_UNA 0x4 #define CPU_FEAT_VINT 0x8 +#define CPU_FEAT_WP 0x20 enum sunway_cpu_model { CPU_SW3231 = 0x31, diff --git a/arch/sw_64/include/asm/kvm_host.h b/arch/sw_64/include/asm/kvm_host.h index 3d1ec0a2ea5d..6f02a6fb5386 100644 --- a/arch/sw_64/include/asm/kvm_host.h +++ b/arch/sw_64/include/asm/kvm_host.h @@ -119,6 +119,10 @@ struct kvm_vcpu_arch { /* Don't run the guest (internal implementation need) */ bool pause; + /* vcpu debug state */ + struct kvm_guest_debug_arch host_debug_state; + struct kvm_guest_debug_arch guest_debug_state; + struct kvm_decode mmio_decode; /* Cache some mmu pages needed inside spinlock regions */ @@ -207,6 +211,7 @@ void kvm_sw64_destroy_vm(struct kvm *kvm); int kvm_sw64_vcpu_reset(struct kvm_vcpu *vcpu); long kvm_sw64_set_vcb(struct file *filp, unsigned long arg); long kvm_sw64_get_vcb(struct file *filp, unsigned long arg); +void kvm_sw64_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg); void update_aptp(unsigned long pgd); void vcpu_set_numa_affinity(struct kvm_vcpu *vcpu); diff --git a/arch/sw_64/include/asm/vcpu.h b/arch/sw_64/include/asm/vcpu.h index 38066427afdb..2026aea49b3e 100644 --- a/arch/sw_64/include/asm/vcpu.h +++ b/arch/sw_64/include/asm/vcpu.h @@ -113,7 +113,9 @@ struct vcpucb { unsigned long reserved1[10]; /* USE IN HMCODE */ DECLARE_BITMAP(irqs_pending, CORE4VM_IRQS); /* Pending virtual interrupts */ unsigned long irqs_addr; - unsigned long reserved2[30]; + unsigned long reserved2[3]; /* USE IN HMCODE */ + unsigned long watchpoint_in_use; + unsigned long reserved3[26]; }; #endif diff --git a/arch/sw_64/include/uapi/asm/kvm.h b/arch/sw_64/include/uapi/asm/kvm.h index 2b220eed1441..cd180f705647 100644 --- a/arch/sw_64/include/uapi/asm/kvm.h +++ b/arch/sw_64/include/uapi/asm/kvm.h @@ -47,10 +47,14 @@ struct kvm_fpu { struct kvm_debug_exit_arch { unsigned long epc; + unsigned long reason;//indicate breakpoint or watchpoint }; /* for KVM_SET_GUEST_DEBUG */ struct kvm_guest_debug_arch { + uint64_t addr; + uint64_t mask; + uint64_t ctl; }; /* definition of registers in kvm_run */ diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c index f8925a8a1b61..b7e12d9bdeff 100644 --- a/arch/sw_64/kernel/setup.c +++ b/arch/sw_64/kernel/setup.c @@ -831,6 +831,20 @@ static int __init debugfs_mclk_init(void) return 0; } late_initcall(debugfs_mclk_init); + +static int __init debugfs_watchpoint_init(void) +{ + struct dentry *dir = sw64_debugfs_dir; + static u64 feature_wp; + + feature_wp = (cpuid(GET_FEATURES, 0) & CPU_FEAT_WP); + if (feature_wp) { + debugfs_create_u64("watchpoint", 0644, dir, &feature_wp); + } + + return 0; +} +late_initcall(debugfs_watchpoint_init); #endif #ifdef CONFIG_OF diff --git a/arch/sw_64/kvm/handle_exit.c b/arch/sw_64/kvm/handle_exit.c index 3a623c59895a..33849d981427 100644 --- a/arch/sw_64/kvm/handle_exit.c +++ b/arch/sw_64/kvm/handle_exit.c @@ -61,7 +61,13 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, case SW64_KVM_EXIT_DEBUG: vcpu->stat.debug_exits++; vcpu->run->exit_reason = KVM_EXIT_DEBUG; - vcpu->run->debug.arch.epc = vcpu->arch.regs.pc; + /* hargs-arg0 is assigned in hmcode to indicate qemu-gdb watchpoint */ + if (hargs->arg0 == 2) { + vcpu->run->debug.arch.epc = hargs->arg2; + vcpu->run->debug.arch.reason = hargs->arg0; + } else { + vcpu->run->debug.arch.epc = vcpu->arch.regs.pc; + } return 0; #ifdef CONFIG_SUBARCH_C4 case SW64_KVM_EXIT_APT_FAULT: diff --git a/arch/sw_64/kvm/kvm_core4.c b/arch/sw_64/kvm/kvm_core4.c index a13e9fd57249..9f777e434b83 100644 --- a/arch/sw_64/kvm/kvm_core4.c +++ b/arch/sw_64/kvm/kvm_core4.c @@ -102,6 +102,24 @@ long kvm_sw64_set_vcb(struct file *filp, unsigned long arg) return 0; } +void kvm_sw64_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) +{ + uint64_t match_ctl, match_ctl_mode; + vcpu->guest_debug = dbg->control; + if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) { + vcpu->guest_debug = 0; + } else { + vcpu->arch.guest_debug_state = dbg->arch; + match_ctl_mode = (vcpu->arch.guest_debug_state.ctl >> 8) & 0x3; + match_ctl = sw64_read_csr(CSR_DC_CTLP); + match_ctl &= ~((0x1UL << 3) | (0x3UL << DA_MATCH_EN_S) | + (0x1UL << DV_MATCH_EN_S) | (0x1UL << DAV_MATCH_EN_S)); + match_ctl |= (match_ctl_mode << DA_MATCH_EN_S) | (0x1UL << DPM_MATCH_EN_S) | + (0x2UL << DPM_MATCH); + vcpu->arch.guest_debug_state.ctl = match_ctl; + } +} + int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { if (feature_vint) diff --git a/arch/sw_64/kvm/sw64.c b/arch/sw_64/kvm/sw64.c index 665e11656706..e9fe07cce1f1 100644 --- a/arch/sw_64/kvm/sw64.c +++ b/arch/sw_64/kvm/sw64.c @@ -398,6 +398,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { trace_kvm_set_guest_debug(vcpu, dbg->control); +#ifdef CONFIG_SUBARCH_C4 + kvm_sw64_set_guest_debug(vcpu, dbg); +#endif return 0; } @@ -408,6 +411,28 @@ void update_vcpu_stat_time(struct kvm_vcpu_stat *vcpu_stat) vcpu_stat->gtime = current->gtime; } +void kvm_sw64_switch_debug_state_pre_run(struct kvm_vcpu *vcpu) +{ + vcpu->arch.host_debug_state.addr = sw64_read_csr(CSR_DA_MATCH); + vcpu->arch.host_debug_state.mask = sw64_read_csr(CSR_DA_MASK); + vcpu->arch.host_debug_state.ctl = sw64_read_csr(CSR_DC_CTLP); + + sw64_write_csr(vcpu->arch.guest_debug_state.addr, CSR_DA_MATCH); + sw64_write_csr(vcpu->arch.guest_debug_state.mask, CSR_DA_MASK); + sw64_write_csr(vcpu->arch.guest_debug_state.ctl, CSR_DC_CTLP); +} + +void kvm_sw64_switch_debug_state_post_run(struct kvm_vcpu *vcpu) +{ + vcpu->arch.guest_debug_state.addr = sw64_read_csr(CSR_DA_MATCH); + vcpu->arch.guest_debug_state.mask = sw64_read_csr(CSR_DA_MASK); + vcpu->arch.guest_debug_state.ctl = sw64_read_csr(CSR_DC_CTLP); + + sw64_write_csr(vcpu->arch.host_debug_state.addr, CSR_DA_MATCH); + sw64_write_csr(vcpu->arch.host_debug_state.mask, CSR_DA_MASK); + sw64_write_csr(vcpu->arch.host_debug_state.ctl, CSR_DC_CTLP); +} + /* * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on * proper exit to userspace. @@ -477,6 +502,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) /* update aptp before the guest runs */ update_aptp((unsigned long)vcpu->kvm->arch.pgd); + if (vcpu->guest_debug) { + kvm_sw64_switch_debug_state_pre_run(vcpu); + } + /* Enter the guest */ trace_kvm_sw64_entry(vcpu->vcpu_id, vcpu->arch.regs.pc); vcpu->mode = IN_GUEST_MODE; @@ -492,6 +521,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) trace_kvm_sw64_exit(ret, vcpu->arch.regs.pc); + if (vcpu->guest_debug) { + kvm_sw64_switch_debug_state_post_run(vcpu); + } + preempt_enable(); /* ret = 0 indicate interrupt in guest mode, ret > 0 indicate hcall */ -- Gitee From 5c0fe034c82590e8707986eb527171166ddc995d Mon Sep 17 00:00:00 2001 From: Gu Yuchen Date: Tue, 6 Jan 2026 10:28:01 +0800 Subject: [PATCH 03/40] sw64: add definitions for kernel-required CPU capabilities Add definitions for CPU capabilities that are required by the kernel. Signed-off-by: Gu Yuchen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/cpucaps.h | 8 ++++++ arch/sw_64/include/asm/cpufeature.h | 30 +++++++++++++++++++++++ arch/sw_64/include/asm/elf.h | 4 +-- arch/sw_64/include/asm/hwcap.h | 14 +++++++++++ arch/sw_64/include/uapi/asm/hwcap.h | 7 ++++++ arch/sw_64/kernel/Makefile | 2 +- arch/sw_64/kernel/cpu.c | 3 ++- arch/sw_64/kernel/cpufeature.c | 38 +++++++++++++++++++++++++++++ arch/sw_64/kernel/setup.c | 5 +++- 9 files changed, 106 insertions(+), 5 deletions(-) create mode 100644 arch/sw_64/include/asm/cpucaps.h create mode 100644 arch/sw_64/include/asm/cpufeature.h create mode 100644 arch/sw_64/include/asm/hwcap.h create mode 100644 arch/sw_64/include/uapi/asm/hwcap.h create mode 100644 arch/sw_64/kernel/cpufeature.c diff --git a/arch/sw_64/include/asm/cpucaps.h b/arch/sw_64/include/asm/cpucaps.h new file mode 100644 index 000000000000..b39abe2c53c2 --- /dev/null +++ b/arch/sw_64/include/asm/cpucaps.h @@ -0,0 +1,8 @@ +#ifndef __ASM_CPUCAPS_H +#define __ASM_CPUCAPS_H + +#define CPU_FEATURE_HWUNA 0 + +#define SW64_NCAPS 1 + +#endif /* __ASM_CPUCAPS_H */ diff --git a/arch/sw_64/include/asm/cpufeature.h b/arch/sw_64/include/asm/cpufeature.h new file mode 100644 index 000000000000..44105b53f483 --- /dev/null +++ b/arch/sw_64/include/asm/cpufeature.h @@ -0,0 +1,30 @@ +#ifndef __ASM_CPUFEATURE_H +#define __ASM_CPUFEATURE_H + +#include +#include +#include +#include + +#include +#include + +#define MAX_CPU_FEATURES 64 +#define cpu_feature(x) KERNEL_HWCAP_SW64_ ## x +#define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name)) +#define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name)) + +extern DECLARE_BITMAP(system_cpucaps, SW64_NCAPS); + +void __init setup_cpu_features(void); +void cpu_set_feature(unsigned int num); +bool cpu_have_feature(unsigned int num); + +static inline bool cpus_have_cap(unsigned int num) +{ + if (num >= SW64_NCAPS) + return false; + return test_bit(num, system_cpucaps); +} + +#endif diff --git a/arch/sw_64/include/asm/elf.h b/arch/sw_64/include/asm/elf.h index 7aec6327901f..6e26ec976367 100644 --- a/arch/sw_64/include/asm/elf.h +++ b/arch/sw_64/include/asm/elf.h @@ -136,8 +136,8 @@ do { \ * This yields a mask that user programs can use to figure out what * instruction set this CPU supports. */ - -#define ELF_HWCAP 0 +#define ELF_HWCAP (elf_hwcap) +extern unsigned long elf_hwcap; /* * This yields a string that ld.so will use to load implementation diff --git a/arch/sw_64/include/asm/hwcap.h b/arch/sw_64/include/asm/hwcap.h new file mode 100644 index 000000000000..6e1cb87caa47 --- /dev/null +++ b/arch/sw_64/include/asm/hwcap.h @@ -0,0 +1,14 @@ +#ifndef __ASM_HWCAP_H +#define __ASM_HWCAP_H + +#include +#include + +#ifndef __ASSEMBLY__ +#include +#define __khwcap_feature(x) const_ilog2(HWCAP_SW64_ ## x) + +#define KERNEL_HWCAP_SW64_HWUNA __khwcap_feature(HWUNA) + +#endif +#endif diff --git a/arch/sw_64/include/uapi/asm/hwcap.h b/arch/sw_64/include/uapi/asm/hwcap.h new file mode 100644 index 000000000000..36b247e92b20 --- /dev/null +++ b/arch/sw_64/include/uapi/asm/hwcap.h @@ -0,0 +1,7 @@ +#ifndef _UAPI__ASM_HWCAP_H +#define _UAPI__ASM_HWCAP_H + +/* HWCAP flags */ +#define HWCAP_SW64_HWUNA (1 << 0) + +#endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/sw_64/kernel/Makefile b/arch/sw_64/kernel/Makefile index caf6de81dbde..8d195f6df15e 100644 --- a/arch/sw_64/kernel/Makefile +++ b/arch/sw_64/kernel/Makefile @@ -23,7 +23,7 @@ obj-y := fpu.o traps.o process.o sys_sw64.o irq.o cpu.o \ systbls.o dup_print.o chip_setup.o \ insn.o early_init.o topology.o cacheinfo.o \ vdso.o vdso/ hmcall.o stacktrace.o idle.o reset.o \ - head.o termios.o trap_unalign.o + head.o termios.o trap_unalign.o cpufeature.o obj-$(CONFIG_SUBARCH_C3B) += entry_c3.o tc.o obj-$(CONFIG_SUBARCH_C4) += entry_c4.o diff --git a/arch/sw_64/kernel/cpu.c b/arch/sw_64/kernel/cpu.c index 7683fedda70f..63095e0f58bf 100644 --- a/arch/sw_64/kernel/cpu.c +++ b/arch/sw_64/kernel/cpu.c @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -266,7 +267,7 @@ static int show_cpuinfo(struct seq_file *f, void *slot) (loops_per_jiffy / (5000/HZ)) % 100); seq_printf(f, "flags\t\t: fpu simd vpn upn cpuid%s\n", - (cpuid(GET_FEATURES, 0) & CPU_FEAT_UNA) ? " una" : ""); + (cpu_have_named_feature(HWUNA)) ? " una" : ""); seq_printf(f, "page size\t: %d\n", 8192); seq_printf(f, "cache_alignment\t: %d\n", l3_cachline_size); seq_printf(f, "address sizes\t: %u bits physical, %u bits virtual\n\n", diff --git a/arch/sw_64/kernel/cpufeature.c b/arch/sw_64/kernel/cpufeature.c new file mode 100644 index 000000000000..611d14b0ddcd --- /dev/null +++ b/arch/sw_64/kernel/cpufeature.c @@ -0,0 +1,38 @@ +#include + +#include +#include +#include + +unsigned long elf_hwcap __read_mostly; + +DECLARE_BITMAP(system_cpucaps, SW64_NCAPS); +EXPORT_SYMBOL(system_cpucaps); + +void cpu_set_feature(unsigned int num) +{ + WARN_ON(num >= MAX_CPU_FEATURES); + elf_hwcap |= BIT(num); +} +EXPORT_SYMBOL_GPL(cpu_set_feature); + +bool cpu_have_feature(unsigned int num) +{ + WARN_ON(num >= MAX_CPU_FEATURES); + return elf_hwcap & BIT(num); +} +EXPORT_SYMBOL_GPL(cpu_have_feature); + +static void setup_cpu_features_common(void) +{ + elf_hwcap = 0; + if (cpuid(GET_FEATURES, 0) & CPU_FEAT_UNA) { + cpu_set_named_feature(HWUNA); + set_bit(CPU_FEATURE_HWUNA, system_cpucaps); + } +} + +void __init setup_cpu_features(void) +{ + setup_cpu_features_common(); +} diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c index b7e12d9bdeff..4407c0ef9aec 100644 --- a/arch/sw_64/kernel/setup.c +++ b/arch/sw_64/kernel/setup.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -578,7 +579,7 @@ static void __init setup_firmware_fdt(void) static void __init setup_cpu_caps(void) { - if (cpuid(GET_FEATURES, 0) & CPU_FEAT_UNA) + if (cpu_have_named_feature(HWUNA)) static_branch_enable(&hw_una_enabled); } @@ -673,6 +674,8 @@ setup_arch(char **cmdline_p) */ trap_init(); + setup_cpu_features(); + jump_label_init(); #ifdef CONFIG_SUBARCH_C3B -- Gitee From be1794974f52b310d30fe7206c12e6cb3a9a84d5 Mon Sep 17 00:00:00 2001 From: Xu Yiwei Date: Mon, 5 Jan 2026 07:30:20 +0000 Subject: [PATCH 04/40] sw64: pci: add sunway pci controller for Command Completed errata Sunway PCI controller does not set the Command Completed bit unless writes to the Slot Command register change "Control" bits. Add a quirk on sunway pci controller. Signed-off-by: Xu Yiwei Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/pci/pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/sw_64/pci/pci.c b/arch/sw_64/pci/pci.c index 48aff446e3a7..87a34cad3f2b 100644 --- a/arch/sw_64/pci/pci.c +++ b/arch/sw_64/pci/pci.c @@ -232,6 +232,10 @@ static void fixup_root_complex(struct pci_dev *dev) } dev->no_msi = 1; + +#ifdef CONFIG_HOTPLUG_PCI_PCIE + dev->broken_cmd_compl = 1; +#endif } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_JN, PCI_DEVICE_ID_SW64_ROOT_BRIDGE, fixup_root_complex); -- Gitee From da509fa3cb2c32432e16feec6cffe7ee9f69f5b0 Mon Sep 17 00:00:00 2001 From: Gao Chen Date: Thu, 8 Jan 2026 16:35:19 +0800 Subject: [PATCH 05/40] sw64: use ioremap to map spaces for cpufreq and S3 Map SPBU, INTPU and GPIO spaces to fix page faults during cpufreq and S3. Signed-off-by: Gao Chen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/uncore_io_junzhang.h | 4 ++++ arch/sw_64/include/asm/uncore_io_xuelang.h | 4 ++++ drivers/platform/sw64/misc-platform.c | 24 +++++++++++++++------ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/arch/sw_64/include/asm/uncore_io_junzhang.h b/arch/sw_64/include/asm/uncore_io_junzhang.h index 24c1e3be32f3..6f78eb766801 100644 --- a/arch/sw_64/include/asm/uncore_io_junzhang.h +++ b/arch/sw_64/include/asm/uncore_io_junzhang.h @@ -62,6 +62,10 @@ #define LPC_FIRMWARE_IO (0x3UL << 28 | IO_BASE | LPC_BASE) #define PCI_VT_LEGACY_IO (IO_BASE | PCI_BASE | PCI_LEGACY_IO) +#define SPBU_SIZE 0xe000 +#define INTPU_SIZE 0x1900 +#define GPIO_SIZE 0x4000 + #define CORE0_CID (rcid_to_domain_id(cpu_to_rcid(0)) << 7 | \ rcid_to_thread_id(cpu_to_rcid(0)) << 6 | \ rcid_to_core_id(cpu_to_rcid(0))) diff --git a/arch/sw_64/include/asm/uncore_io_xuelang.h b/arch/sw_64/include/asm/uncore_io_xuelang.h index c8f01cb01e52..243b4d34c291 100644 --- a/arch/sw_64/include/asm/uncore_io_xuelang.h +++ b/arch/sw_64/include/asm/uncore_io_xuelang.h @@ -67,6 +67,10 @@ #define DLI_PHY_CTL (0x10UL << 24) #define PCI_VT_LEGACY_IO (IO_BASE | PCI_BASE | PCI_LEGACY_IO) +#define SPBU_SIZE 0x8f00 +#define INTPU_SIZE 0x1680 +#define GPIO_SIZE 0x3c00 + #define CORE0_CID (rcid_to_domain_id(cpu_to_rcid(0)) << 6 | \ rcid_to_core_id(cpu_to_rcid(0))) #define PME_ENABLE_INTD_CORE0 (0x1UL << 62 | 0x1UL << 10 | CORE0_CID) diff --git a/drivers/platform/sw64/misc-platform.c b/drivers/platform/sw64/misc-platform.c index af426429ef23..8959b6d3cdd4 100644 --- a/drivers/platform/sw64/misc-platform.c +++ b/drivers/platform/sw64/misc-platform.c @@ -102,16 +102,28 @@ static int misc_platform_probe(struct platform_device *pdev) gpio_base = __va(SW64_IO_BASE(node) | GPIO_BASE); if (!device_property_read_u64(dev, "sunway,spbu_base", - &base_address)) - spbu_base = __va(base_address); + &base_address)) { + if (is_junzhang_v1() || is_junzhang_v2()) + spbu_base = ioremap(base_address, SPBU_SIZE); + else + spbu_base = __va(base_address); + } if (!device_property_read_u64(dev, "sunway,intpu_base", - &base_address)) - intpu_base = __va(base_address); + &base_address)) { + if (is_junzhang_v1() || is_junzhang_v2()) + intpu_base = ioremap(base_address, INTPU_SIZE); + else + intpu_base = __va(base_address); + } if (!device_property_read_u64(dev, "sunway,gpio_base", - &base_address)) - gpio_base = __va(base_address); + &base_address)) { + if (is_junzhang_v1() || is_junzhang_v2()) + gpio_base = ioremap(base_address, GPIO_SIZE); + else + gpio_base = __va(base_address); + } misc_platform_devices[node].spbu_base = spbu_base; misc_platform_devices[node].intpu_base = intpu_base; -- Gitee From d17c7bcc53d24473f72e6bb3760343bfbaf8f4f1 Mon Sep 17 00:00:00 2001 From: Xu Yiwei Date: Mon, 22 Dec 2025 07:17:34 +0000 Subject: [PATCH 06/40] sw64: pci: fix IO_BASE if numa_off The current implementation assembles a wrong PCI_IO_BASE address when numa is off because hose->node is always all 0 in this case. This will further interfere device usage on other nodes. Signed-off-by: Xu Yiwei Signed-off-by: Jinyu Tang Signed-off-by: Yizhou Chen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- drivers/usb/host/pci-quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index 88c493b60d9a..f382d1712bb9 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -1328,7 +1328,7 @@ fixup_usb_xhci_reset(struct pci_dev *dev) if (offset == 0) return; - base = ioremap(SW64_PCI_IO_BASE(hose->node, hose->index) | offset, SZ_8K); + base = ioremap(hose->dense_mem_base | offset, SZ_8K); ext_cap_offset = xhci_find_next_ext_cap(base, 0, XHCI_EXT_CAPS_LEGACY); if (!ext_cap_offset) -- Gitee From 1b173aac35ac4ae4169bff7c85a8947deac15093 Mon Sep 17 00:00:00 2001 From: Gu Yuchen Date: Tue, 6 Jan 2026 14:56:00 +0800 Subject: [PATCH 07/40] sw64: add alternative runtime patching Introduce the alternative mechanism to sw64. In future, we can use this mechanism to optimize hospot functions according to cpu features. Signed-off-by: Gu Yuchen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/alternative-asm.h | 80 +++++++++++ arch/sw_64/include/asm/alternative.h | 105 +++++++++++++++ arch/sw_64/include/asm/insn.h | 9 ++ arch/sw_64/include/asm/module.h | 15 +++ arch/sw_64/kernel/Makefile | 3 +- arch/sw_64/kernel/alternative.c | 164 +++++++++++++++++++++++ arch/sw_64/kernel/module.c | 16 +++ arch/sw_64/kernel/setup.c | 3 + arch/sw_64/kernel/vdso/vdso.lds.S | 5 + arch/sw_64/kernel/vmlinux.lds.S | 7 + 10 files changed, 406 insertions(+), 1 deletion(-) create mode 100644 arch/sw_64/include/asm/alternative-asm.h create mode 100644 arch/sw_64/include/asm/alternative.h create mode 100644 arch/sw_64/kernel/alternative.c diff --git a/arch/sw_64/include/asm/alternative-asm.h b/arch/sw_64/include/asm/alternative-asm.h new file mode 100644 index 000000000000..088ed1987283 --- /dev/null +++ b/arch/sw_64/include/asm/alternative-asm.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ALTERNATIVE_ASM_H +#define _ASM_ALTERNATIVE_ASM_H + +#ifdef __ASSEMBLY__ + +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ +.macro altinstruction_entry orig alt feature orig_len alt_len + .long \orig - . + .long \alt - . + .short \feature + .byte \orig_len + .byte \alt_len +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. ".fill" directive takes care of proper instruction padding + * in case @newinstr is longer than @oldinstr. + */ +.macro ALTERNATIVE oldinstr, newinstr, feature +140 : + \oldinstr +141 : + .fill - (((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)) / 4, 4, 0x43ff075f +142 : + + .pushsection .altinstructions, "a" + altinstruction_entry 140b, 143f, \feature, 142b-140b, 144f-143f + .popsection + + .subsection 1 +143 : + \newinstr +144 : + .previous +.endm + +#define old_len (141b-140b) +#define new_len1 (144f-143f) +#define new_len2 (145f-144f) + +#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) + +/* + * Same as ALTERNATIVE macro above but for two alternatives. If CPU + * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has + * @feature2, it replaces @oldinstr with @feature2. + */ +.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 +140 : + \oldinstr +141 : + .fill - ((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ + (alt_max_short(new_len1, new_len2) - (old_len)) / 4, 4, 0x43ff075f +142 : + + .pushsection .altinstructions, "a" + altinstruction_entry 140b, 143f, \feature1, 142b-140b, 144f-143f, 142b-141b + altinstruction_entry 140b, 144f, \feature2, 142b-140b, 145f-144f, 142b-141b + .popsection + + .subsection 1 +143 : + \newinstr1 +144 : + \newinstr2 +145 : + .previous +.endm + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_ALTERNATIVE_ASM_H */ diff --git a/arch/sw_64/include/asm/alternative.h b/arch/sw_64/include/asm/alternative.h new file mode 100644 index 000000000000..81138e21472b --- /dev/null +++ b/arch/sw_64/include/asm/alternative.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ALTERNATIVE_H +#define _ASM_ALTERNATIVE_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include + +#include + +struct alt_instr { + s32 instr_offset; /* offset to original instruction */ + s32 replace_offset; /* offset to replacement instruction */ + u16 feature; /* feature bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction */ +} __packed; + +extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); +extern void apply_alternatives_all(void); + +#define b_replacement(num) "664"#num +#define e_replacement(num) "665"#num + +#define alt_end_marker "663" +#define alt_slen "662b-661b" +#define alt_total_slen alt_end_marker"b-661b" +#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" + +#define __OLDINSTR(oldinstr, num) \ + "661:\n\t" oldinstr "\n662:\n" \ + ".fill -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \ + "((" alt_rlen(num) ")-(" alt_slen ")) / 4, 4, 0x43ff075f\n" + +#define OLDINSTR(oldinstr, num) \ + __OLDINSTR(oldinstr, num) \ + alt_end_marker ":\n" + +#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))" + +/* + * Pad the second replacement alternative with additional NOPs if it is + * additionally longer than the first replacement alternative. + */ +#define OLDINSTR_2(oldinstr, num1, num2) \ + "661:\n\t" oldinstr "\n662:\n" \ + ".fill -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \ + "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) / 4, " \ + "4, 0x43ff075f\n" \ + alt_end_marker ":\n" + +#define ALTINSTR_ENTRY(feature, num) \ + " .long 661b - .\n" /* label */ \ + " .long " b_replacement(num)"f - .\n" /* new instruction */ \ + " .short " __stringify(feature) "\n" /* feature bit */ \ + " .byte " alt_total_slen "\n" /* source len */ \ + " .byte " alt_rlen(num) "\n" /* replacement len */ + +#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \ + b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t" + +/* alternative assembly primitive: */ +#define ALTERNATIVE(oldinstr, newinstr, feature) \ + OLDINSTR(oldinstr, 1) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature, 1) \ + ".popsection\n" \ + ".subsection 1\n" \ + ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ + ".previous\n" + +#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ + OLDINSTR_2(oldinstr, 1, 2) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature1, 1) \ + ALTINSTR_ENTRY(feature2, 2) \ + ".popsection\n" \ + ".subsection 1\n" \ + ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ + ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ + ".previous\n" + +/* + * Alternative instructions for different CPU types or capabilities. + * + * This allows to use optimized instructions even on generic binary + * kernels. + * + * length of oldinstr must be longer or equal the length of newinstr + * It can be padded with nops as needed. + * + * For non barrier like inlines please define new variants + * without volatile and memory clobber. + */ +#define alternative(oldinstr, newinstr, feature) \ + (asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")) + +#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ + (asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_ALTERNATIVE_H */ diff --git a/arch/sw_64/include/asm/insn.h b/arch/sw_64/include/asm/insn.h index 437cb48d1e93..7bd24d6f6598 100644 --- a/arch/sw_64/include/asm/insn.h +++ b/arch/sw_64/include/asm/insn.h @@ -77,6 +77,7 @@ SW64_INSN(imemb, 0x18000001, 0xfc00ffff); SW64_INSN(rtc, 0x18000020, 0xfc00ffff); SW64_INSN(halt, 0x18000080, 0xfc00ffff); SW64_INSN(rd_f, 0x18001000, 0xfc00ffff); +SW64_INSN(lbr, 0x74000000, 0xfc000000); SW64_INSN(beq, 0xc0000000, 0xfc000000); SW64_INSN(bne, 0xc4000000, 0xfc000000); SW64_INSN(blt, 0xc8000000, 0xfc000000); @@ -94,4 +95,12 @@ SW64_INSN(fbge, 0xf4000000, 0xfc000000); SW64_INSN(lldw, 0x20000000, 0xfc00f000); SW64_INSN(lldl, 0x20001000, 0xfc00f000); +static inline bool sw64_insn_is_branch(u32 insn) +{ + bool is_branch = ((insn & 0xfc000000) >= 0x10000000 && (insn & 0xfc000000) <= 0x14000000) || \ + ((insn & 0xfc000000) >= 0xc0000000 && (insn & 0xfc000000) <= 0xf4000000) || \ + ((insn & 0xfc000000) == 0x74000000); + return is_branch; +} + #endif /* _ASM_SW64_INSN_H */ diff --git a/arch/sw_64/include/asm/module.h b/arch/sw_64/include/asm/module.h index d1663aab4097..708adc93bca7 100644 --- a/arch/sw_64/include/asm/module.h +++ b/arch/sw_64/include/asm/module.h @@ -14,4 +14,19 @@ struct mod_arch_specific { asm(".section .got, \"aw\", @progbits; .align 3; .previous"); #endif +static inline const Elf_Shdr *find_section(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *name) +{ + const Elf_Shdr *s, *se; + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { + if (strcmp(name, secstrs + s->sh_name) == 0) + return s; + } + + return NULL; +} + #endif /* _ASM_SW64_MODULE_H */ diff --git a/arch/sw_64/kernel/Makefile b/arch/sw_64/kernel/Makefile index 8d195f6df15e..fd166b642113 100644 --- a/arch/sw_64/kernel/Makefile +++ b/arch/sw_64/kernel/Makefile @@ -23,7 +23,8 @@ obj-y := fpu.o traps.o process.o sys_sw64.o irq.o cpu.o \ systbls.o dup_print.o chip_setup.o \ insn.o early_init.o topology.o cacheinfo.o \ vdso.o vdso/ hmcall.o stacktrace.o idle.o reset.o \ - head.o termios.o trap_unalign.o cpufeature.o + head.o termios.o trap_unalign.o alternative.o \ + cpufeature.o obj-$(CONFIG_SUBARCH_C3B) += entry_c3.o tc.o obj-$(CONFIG_SUBARCH_C4) += entry_c4.o diff --git a/arch/sw_64/kernel/alternative.c b/arch/sw_64/kernel/alternative.c new file mode 100644 index 000000000000..87b1d76acc2d --- /dev/null +++ b/arch/sw_64/kernel/alternative.c @@ -0,0 +1,164 @@ +#define pr_fmt(fmt) "alternatives: " fmt + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#define MAX_PATCH_SIZE (((u8)(-1)) / SW64_INSN_SIZE) + +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + +/* Use this to add nops to a buffer, then text_poke the whole buffer. */ +static void __init_or_module add_nops(u32 *insn, int count) +{ + while (count--) { + *insn = SW64_NOP; + insn++; + } +} + +/* Is the jump addr in local .altinstructions */ +static inline bool in_alt_jump(unsigned long jump, void *start, void *end) +{ + return jump >= (unsigned long)start && jump < (unsigned long)end; +} + +static void __init_or_module recompute_jump(u32 *buf, u32 *dest, u32 *src, + void *start, void *end) +{ + unsigned long si_lo21, si_lo26, disp; + unsigned long cur_pc, jump_addr, pc; + + cur_pc = (unsigned long)src; + pc = (unsigned long)dest; + + si_lo21 = *src & 0x1fffff; + si_lo26 = *src & 0x3ffffff; + *buf = *src; + + if (sw64_insn_is_lbr(*src)) { + jump_addr = cur_pc + SW64_INSN_SIZE * si_lo26; + if (in_alt_jump(jump_addr, start, end)) + return; + disp = (jump_addr - pc) / SW64_INSN_SIZE; + *buf = (*buf & ~0x3ffffff) | disp; + } else { + jump_addr = cur_pc + SW64_INSN_SIZE * si_lo21; + if (in_alt_jump(jump_addr, start, end)) + return; + disp = (jump_addr - pc) / SW64_INSN_SIZE; + *buf = (*buf & ~0x1fffff) | disp; + } + + return; +} + +/* Not support pcrel instruction at present! */ +static int __init_or_module copy_alt_insns(u32 *buf, + u32 *dest, u32 *src, int nr) +{ + int i; + + for (i = 0; i < nr; i++) { + buf[i] = src[i]; + + if (sw64_insn_is_branch(src[i])) { + recompute_jump(&buf[i], &dest[i], &src[i], src, src + nr); + } + } + + return 0; +} + +static void *__init_or_module text_poke_early(u32 *insn, u32 *buf, unsigned int nr) +{ + int i; + unsigned long flags; + + local_irq_save(flags); + + for (i = 0; i < nr; i++) + insn[i] = buf[i]; + + local_irq_restore(flags); + + tbiv(); + + return insn; +} + +void __init_or_module apply_alternatives(struct alt_instr *start, struct alt_instr *end) +{ + struct alt_instr *a; + unsigned int nr_instr, nr_repl, nr_insnbuf; + u32 *instr, *replacement; + u32 insnbuf[MAX_PATCH_SIZE]; + /* + * The scan order should be from start to end. A later scanned + * alternative code can overwrite previously scanned alternative code. + * Some kernel functions (e.g. memcpy, memset, etc) use this order to + * patch code. + * + * So be careful if you want to change the scan order to any other + * order. + */ + for (a = start; a < end; a++) { + nr_insnbuf = 0; + + instr = (void *)&a->instr_offset + a->instr_offset; + replacement = (void *)&a->replace_offset + a->replace_offset; + + nr_instr = a->instrlen / SW64_INSN_SIZE; + nr_repl = a->replacementlen / SW64_INSN_SIZE; + + if (!cpus_have_cap(a->feature)) + continue; + + copy_alt_insns(insnbuf, instr, replacement, nr_repl); + nr_insnbuf = nr_repl; + + if (nr_instr > nr_repl) { + add_nops(insnbuf + nr_repl, nr_instr - nr_repl); + nr_insnbuf += nr_instr - nr_repl; + } + + text_poke_early(instr, insnbuf, nr_insnbuf); + } +} + +static void __init apply_vdso_alternatives(void) +{ + const Elf_Ehdr *hdr; + const Elf_Shdr *shdr; + const Elf_Shdr *alt; + struct alt_instr *begin, *end; + + hdr = (Elf_Ehdr *)vdso_start; + shdr = (void *)hdr + hdr->e_shoff; + alt = find_section(hdr, shdr, ".altinstructions"); + if (!alt) + return; + + begin = (void *)hdr + alt->sh_offset, + end = (void *)hdr + alt->sh_offset + alt->sh_size, + + apply_alternatives((struct alt_instr *)begin, + (struct alt_instr *)end); +} + +void __init apply_alternatives_all(void) +{ + apply_vdso_alternatives(); + + apply_alternatives(__alt_instructions, __alt_instructions_end); +} diff --git a/arch/sw_64/kernel/module.c b/arch/sw_64/kernel/module.c index fb7c61c1b481..56bd1ba07f23 100644 --- a/arch/sw_64/kernel/module.c +++ b/arch/sw_64/kernel/module.c @@ -5,6 +5,9 @@ #include #include +#include +#include + #define DEBUGP(fmt...) /* Allocate the GOT at the end of the core sections. */ @@ -287,3 +290,16 @@ void *module_alloc(unsigned long size) GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, __builtin_return_address(0)); } + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + const Elf_Shdr *s; + + s = find_section(hdr, sechdrs, ".altinstructions"); + if (s) + apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size); + + return 0; +} diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c index 4407c0ef9aec..dcfd0acf89e1 100644 --- a/arch/sw_64/kernel/setup.c +++ b/arch/sw_64/kernel/setup.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -676,6 +677,8 @@ setup_arch(char **cmdline_p) setup_cpu_features(); + apply_alternatives_all(); + jump_label_init(); #ifdef CONFIG_SUBARCH_C3B diff --git a/arch/sw_64/kernel/vdso/vdso.lds.S b/arch/sw_64/kernel/vdso/vdso.lds.S index 0dde95a9e912..212b9d10d2a3 100644 --- a/arch/sw_64/kernel/vdso/vdso.lds.S +++ b/arch/sw_64/kernel/vdso/vdso.lds.S @@ -48,6 +48,11 @@ SECTIONS PROVIDE (_etext = .); PROVIDE (etext = .); + . = ALIGN(4); + .altinstructions : { + *(.altinstructions) + } + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr .eh_frame : { KEEP (*(.eh_frame)) } :text diff --git a/arch/sw_64/kernel/vmlinux.lds.S b/arch/sw_64/kernel/vmlinux.lds.S index 8bbce3e743fe..6e2b59bf4292 100644 --- a/arch/sw_64/kernel/vmlinux.lds.S +++ b/arch/sw_64/kernel/vmlinux.lds.S @@ -53,6 +53,13 @@ SECTIONS } PERCPU_SECTION(L1_CACHE_BYTES) + . = ALIGN(4); + .altinstructions : { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + /* * Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page * needed for the THREAD_SIZE aligned init_task gets freed after init -- Gitee From da3069809d3826678b7558e4b02d721129ce5a14 Mon Sep 17 00:00:00 2001 From: Jing Li Date: Wed, 21 Jan 2026 12:28:07 +0800 Subject: [PATCH 08/40] sw64: cache: fix shared_cpu_map when PPTT is invalid Add check to the code to prevent access to null pointer. Signed-off-by: Jing Li Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/kernel/cacheinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sw_64/kernel/cacheinfo.c b/arch/sw_64/kernel/cacheinfo.c index a6e8298cff50..9aa1890420d1 100644 --- a/arch/sw_64/kernel/cacheinfo.c +++ b/arch/sw_64/kernel/cacheinfo.c @@ -131,7 +131,7 @@ static void setup_shared_cpu_map(unsigned int cpu) struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); if ((rcid_to_domain_id(sib_rcid) != rcid_to_domain_id(rcid)) || - (i == cpu)) + (i == cpu) || !sib_cpu_ci->info_list) continue; sib_leaf = sib_cpu_ci->info_list + index; -- Gitee From 6ebb11d1cf489c8f0f92c6f471ba8be12d29b883 Mon Sep 17 00:00:00 2001 From: Jing Li Date: Fri, 5 Dec 2025 19:02:23 +0800 Subject: [PATCH 09/40] sw64: iommu: refactor iommu v2 initialization Move the early initialization code of Sunway IOMMU version 2 to the PCI driver and add device tree description for the IOMMU. Signed-off-by: Jing Li Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/pci/acpi.c | 7 + drivers/iommu/sw64/iommu.c | 4 +- drivers/iommu/sw64/iommu.h | 4 +- drivers/iommu/sw64/iommu_v2.c | 382 +++++++++++----------------- drivers/pci/controller/pci-sunway.c | 7 + 5 files changed, 172 insertions(+), 232 deletions(-) diff --git a/arch/sw_64/pci/acpi.c b/arch/sw_64/pci/acpi.c index 1b748074fdf8..a45a4e232c26 100644 --- a/arch/sw_64/pci/acpi.c +++ b/arch/sw_64/pci/acpi.c @@ -173,6 +173,11 @@ static int pci_acpi_prepare_root_resources(struct acpi_pci_root_info *ci) return status; } +int __weak sunway_iommu_early_init(struct pci_controller *hose) +{ + return -ENOENT; +} + /** * This function is called from ACPI code and used to * setup PCI host controller. @@ -229,6 +234,8 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) pcie_bus_configure_settings(child); } + sunway_iommu_early_init(pci_bus_to_pci_controller(bus)); + return bus; setup_ecam_err: diff --git a/drivers/iommu/sw64/iommu.c b/drivers/iommu/sw64/iommu.c index de135a7e648b..a161aec1069c 100644 --- a/drivers/iommu/sw64/iommu.c +++ b/drivers/iommu/sw64/iommu.c @@ -739,7 +739,7 @@ struct syscore_ops iommu_cpu_syscore_ops = { static struct iommu_domain *sunway_iommu_domain_alloc(unsigned int type); -static struct sunway_iommu *sunway_iommu_early_init(struct pci_controller *hose) +static struct sunway_iommu *iommu_early_init(struct pci_controller *hose) { struct sunway_iommu *iommu; struct page *page; @@ -805,7 +805,7 @@ static int sunway_iommu_init(void) if (hose->iommu_enable) continue; - iommu = sunway_iommu_early_init(hose); + iommu = iommu_early_init(hose); if (!iommu) { pr_err("Allocating sunway_iommu failed\n"); hose->iommu_enable = false; diff --git a/drivers/iommu/sw64/iommu.h b/drivers/iommu/sw64/iommu.h index a8b49139fade..5a64943e8e9a 100644 --- a/drivers/iommu/sw64/iommu.h +++ b/drivers/iommu/sw64/iommu.h @@ -21,7 +21,7 @@ struct sunway_iommu { unsigned long *iommu_dtbr; spinlock_t dt_lock; /* Device Table Lock */ - struct pci_controller *hose_pt; + struct pci_controller *hose; struct iommu_device iommu; /* IOMMU core code handle */ struct list_head list; }; @@ -85,3 +85,5 @@ struct sunway_iommu_group { #define PAGE_SIZE_IOMMU (_AC(1, UL) << PAGE_SHIFT_IOMMU) #define PCACHE_FLUSHPADDR_MASK 0xffffffffff80UL + +extern int sunway_pci_init_iommu(struct pci_controller *hose); diff --git a/drivers/iommu/sw64/iommu_v2.c b/drivers/iommu/sw64/iommu_v2.c index 7e363bfbdaaf..d93d5fff6001 100644 --- a/drivers/iommu/sw64/iommu_v2.c +++ b/drivers/iommu/sw64/iommu_v2.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -26,9 +27,8 @@ #include #include #include + #include -#include -#include #include "iommu.h" #include "../dma-iommu.h" @@ -57,7 +57,7 @@ struct acpi_table_header *dmar_tbl; #define MAX_NR_IOMMU_PER_NODE 16 -LIST_HEAD(iommu_list); +static LIST_HEAD(iommu_list); /* IOMMU Exceptional Status */ enum exceptype { @@ -565,54 +565,6 @@ static struct sunway_iommu_dev *search_dev_data(u16 devid) return NULL; } -/********************************************************************** - * - * Following functions describe IOMMU init ops - * - **********************************************************************/ - -static struct sunway_iommu *sunway_iommu_early_init(struct pci_controller *hose) -{ - struct sunway_iommu *iommu; - struct page *page; - unsigned long base; - int ret = 0; - int node; - - iommu = kzalloc(sizeof(struct sunway_iommu), GFP_KERNEL); - if (!iommu) { - ret = -ENOMEM; - goto out; - } - - spin_lock_init(&iommu->dt_lock); - - iommu->node = hose->node; - iommu->index = hose->index; - - node = node_online(iommu->node) ? iommu->node : NUMA_NO_NODE; - page = alloc_pages_node(node, __GFP_ZERO, get_order(PAGE_SIZE)); - if (!page) { - ret = -ENOMEM; - goto free_iommu; - } - - iommu->iommu_dtbr = page_address(page); - base = __pa(iommu->iommu_dtbr) & PAGE_MASK; - iommu->reg_base_addr = hose->piu_ior0_base; - writeq(base, iommu->reg_base_addr + DTBASEADDR); - - hose->pci_iommu = iommu; - iommu->enabled = true; - - return iommu; - -free_iommu: - kfree(iommu); -out: - return ERR_PTR(ret); -} - unsigned long fetch_dte(struct sunway_iommu *iommu, unsigned long devid, enum exceptype type) { @@ -802,30 +754,27 @@ struct irqaction iommu_irqaction = { .name = "sunway_iommu", }; -void sunway_enable_iommu_func(struct pci_controller *hose) +static void sunway_enable_iommu_func(struct sunway_iommu *iommu) { - struct sunway_iommu *iommu; + struct pci_controller *hose = iommu->hose; unsigned int iommu_irq, err; unsigned long iommu_conf, iommu_ctrl; iommu_irq = hose->int_irq; - pr_debug("%s node %ld rc %ld iommu_irq %d\n", - __func__, hose->node, hose->index, iommu_irq); err = request_irq(iommu_irq, iommu_interrupt, IRQF_SHARED, "sunway_iommu", hose); if (err < 0) pr_info("sw iommu request irq failed!\n"); - iommu = hose->pci_iommu; iommu_ctrl = (1UL << 63) | (0x100UL << 10); writeq(iommu_ctrl, iommu->reg_base_addr + IOMMUEXCPT_CTRL); iommu_conf = readq(iommu->reg_base_addr + PIUCONFIG0); iommu_conf = iommu_conf | (0x3 << 7); writeq(iommu_conf, iommu->reg_base_addr + PIUCONFIG0); writeq(0xf, iommu->reg_base_addr + TIMEOUT_CONFIG); - iommu_conf = readq(iommu->reg_base_addr + PIUCONFIG0); - pr_debug("SW arch configure node %ld hose-%ld iommu_conf = %#lx\n", - hose->node, hose->index, iommu_conf); + + iommu->enabled = true; + hose->iommu_enable = true; } /* iommu cpu syscore ops */ @@ -844,219 +793,213 @@ struct syscore_ops iommu_cpu_syscore_ops = { .resume = iommu_cpu_resume, }; -static struct iommu_domain *sunway_iommu_domain_alloc(unsigned int type); - -/* Init functions */ -static int do_detect(void) +static bool sunway_iommu_is_enabled(unsigned long pnode, unsigned long index) { - acpi_status status = AE_OK; - - status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl); - - if (ACPI_SUCCESS(status) && !dmar_tbl) { - pr_warn("No DMAR found!\n"); - status = AE_NOT_FOUND; - } + unsigned long which_iommu = MAX_NR_IOMMU_PER_NODE * pnode + index; - return ACPI_SUCCESS(status) ? 0 : -ENOENT; + return test_bit(which_iommu, iommu_bitmap); } -static struct pci_controller *find_hose_by_rcid(int node, int index) +#ifdef CONFIG_ACPI +static const struct acpi_dmar_sw_hardware_unit * +find_dmar_entry(unsigned long pnode, unsigned long index) { - struct pci_controller *hose; + struct acpi_table_header *dmar_header; + struct acpi_table_sw_dmar *dmar; + const struct acpi_sw_dmar_header *entry, *start, *end; + const struct acpi_dmar_sw_hardware_unit *unit; + size_t len; + u16 iommu_index = ((u16)pnode << 8) | ((u16)index); + + acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_header); + if (!dmar_header) { + pr_warn("No DMAR table found\n"); + return NULL; + } - for (hose = hose_head; hose; hose = hose->next) - if (hose->node == node && hose->index == index) - return hose; + dmar = (struct acpi_table_sw_dmar *)dmar_header; + len = dmar->header.length - sizeof(*dmar); + entry = start = (struct acpi_sw_dmar_header *)(dmar + 1); + end = (void *)start + len; - return NULL; -} + for (; entry < end; entry = (void *)entry + entry->length) { + unit = (const struct acpi_dmar_sw_hardware_unit *)entry; -static int parse_one_drhd_unit(struct acpi_sw_dmar_header *header) -{ - struct acpi_dmar_sw_hardware_unit *drhd; - struct sunway_iommu *iommu; - struct pci_controller *hose; - struct page *page; - unsigned long base; - int cmdline_enabled; - int rc_mask, ret, node; - int rc_node, rc_index; + if (!entry->length || + (entry->type >= ACPI_SW_DMAR_TYPE_RESERVED)) { + pr_err(FW_BUG "Invalid DMAR table\n"); + acpi_put_table(dmar_header); + return NULL; + } - drhd = (struct acpi_dmar_sw_hardware_unit *)header; - if (!drhd->enable) - return 0; + if (unit->index == iommu_index) + break; + } - rc_node = (drhd->index >> 8) & 0xff; - rc_index = drhd->index & 0xff; + acpi_put_table(dmar_header); - hose = find_hose_by_rcid(rc_node, rc_index); - if (!hose) - return 0; + return (entry < end) ? unit : NULL; +} - iommu = kzalloc(sizeof(struct sunway_iommu), GFP_KERNEL); - if (!iommu) - return -ENOMEM; +static int pci_acpi_init_iommu(struct pci_controller *hose) +{ + struct pci_config_window *cfg; + struct device *dev; + struct acpi_device *adev; + unsigned long long pxm; + acpi_status status; + const struct acpi_dmar_sw_hardware_unit *entry; - iommu->node = rc_node; - iommu->index = rc_index; - iommu->reg_base_addr = ioremap(drhd->address, drhd->size); + cfg = hose->bus->sysdata; + dev = cfg->parent; + adev = to_acpi_device(dev); - rc_mask = MAX_NR_IOMMU_PER_NODE * iommu->node + iommu->index; - cmdline_enabled = test_bit(rc_mask, iommu_bitmap); - if (!cmdline_enabled) { - iommu->enabled = false; - ret = 0; - goto free_iommu; + status = acpi_evaluate_integer(acpi_device_handle(adev), + "_PXM", NULL, &pxm); + if (ACPI_FAILURE(status)) { + dev_err(dev, "failed to retrieve _PXM\n"); + return -EINVAL; } - node = node_online(iommu->node) ? iommu->node : NUMA_NO_NODE; - page = alloc_pages_node(node, __GFP_ZERO, get_order(PAGE_SIZE)); - if (!page) { - ret = -ENOMEM; - goto free_iommu; + entry = find_dmar_entry((unsigned long)pxm, hose->index); + if (!entry) { + dev_err(dev, "failed to find dmar entry\n"); + return -ENODEV; } - iommu->iommu_dtbr = page_address(page); - base = __pa(iommu->iommu_dtbr) & PAGE_MASK; - writeq(base, iommu->reg_base_addr + DTBASEADDR); + if (!entry->enable) { + dev_info(dev, "IOMMU disabled by firmware\n"); + return -ENODEV; + } - list_add(&iommu->list, &iommu_list); - iommu->enabled = true; + if (!sunway_iommu_is_enabled((unsigned long)pxm, hose->index)) { + dev_info(dev, "IOMMU disabled by cmdline\n"); + return -ENODEV; + } - hose->pci_iommu = iommu; + dev_info(dev, "IOMMU with physical node %llu index %lu enabled\n", + pxm, hose->index); - pr_info("iommu: node: %ld index: %ld IOMMU enabled!\n", - iommu->node, iommu->index); return 0; - -free_iommu: - kfree(iommu); - return ret; } +#endif /* CONFIG_ACPI */ -static int parse_drhd_units(struct acpi_table_sw_dmar *dmar) +#ifdef CONFIG_OF +static int pci_of_init_iommu(struct pci_controller *hose) { - struct acpi_sw_dmar_header *iter, *start, *next, *end; - size_t len = dmar->header.length - sizeof(*dmar); - int ret, count = 0; + struct pci_config_window *cfg; + struct device *dev; + struct device_node *np; + u32 pnode = NUMA_NO_NODE, enable, width; - /* Skip DMAR table, point to first DRHD table. */ - start = (struct acpi_sw_dmar_header *)(dmar + 1); - end = ((void *)start) + len; + cfg = hose->bus->sysdata; + dev = cfg->parent; - for (iter = start; iter < end; iter = next) { - next = (void *)iter + iter->length; - if (iter->length == 0) { - pr_warn(FW_BUG "Invalid 0-length structure\n"); - break; - } else if (next > end) { - pr_warn(FW_BUG "Record passes table end\n"); - return -EINVAL; - } + np = of_node_get(dev->of_node); - if (iter->type >= ACPI_SW_DMAR_TYPE_RESERVED) { - pr_info("Unknown DMAR structure type %d\n", - iter->type); - } else if (iter->type == 0) { - ret = parse_one_drhd_unit(iter); - if (ret) - return ret; - } - count++; + of_property_read_u32(np, "numa-node-id", &pnode); + if (pnode == NUMA_NO_NODE) { + dev_err(dev, "failed to retrieve numa-node-id\n"); + return -EINVAL; } - return 0; -} + if (of_property_read_u32(np, "sunway,iommu-width", &width)) + width = 42; // Backward compatibility -static int sunway_iommu_acpi_early_init(void) -{ - int ret; + if (of_property_read_u32(np, "sunway,iommu-enable", &enable)) + enable = 1; // Backward compatibility - struct acpi_table_sw_dmar *dmar; + of_node_put(np); - ret = do_detect(); - if (ret) - return ret; - - dmar = (struct acpi_table_sw_dmar *)dmar_tbl; - if (!dmar) + if (!enable) { + dev_info(dev, "IOMMU disabled by firmware\n"); return -ENODEV; + } - if (dmar->width < 42) { - pr_warn("Invalid DMAR haw\n"); - return -EINVAL; + if (!sunway_iommu_is_enabled(pnode, hose->index)) { + dev_info(dev, "IOMMU disabled by cmdline\n"); + return -ENODEV; } - pr_info("Host address width: %d\n", dmar->width); - ret = parse_drhd_units(dmar); + dev_info(dev, "IOMMU with physical node %u index %lu enabled\n", + pnode, hose->index); - return ret; + return 0; +} +#endif /* CONFIG_OF */ + +static int pci_init_iommu(struct pci_controller *hose) +{ +#ifdef CONFIG_OF + if (acpi_disabled) + return pci_of_init_iommu(hose); +#endif + +#ifdef CONFIG_ACPI + if (!acpi_disabled) + return pci_acpi_init_iommu(hose); +#endif + + return -EINVAL; } -static int sunway_iommu_acpi_init(void) +int sunway_iommu_early_init(struct pci_controller *hose) { struct sunway_iommu *iommu; - struct pci_controller *hose; - int iommu_index = 0; - int ret; + struct page *page; + unsigned long base; + int ret, node; - ret = sunway_iommu_acpi_early_init(); + ret = pci_init_iommu(hose); if (ret) return ret; - for_each_iommu(iommu) { - hose = find_hose_by_rcid(iommu->node, iommu->index); - if (!hose) - continue; + iommu = kzalloc_node(sizeof(struct sunway_iommu), GFP_KERNEL, + hose->node); + if (!iommu) + return -ENOMEM; - if (!iommu->enabled || hose->iommu_enable) - continue; + iommu->node = hose->node; + iommu->index = hose->index; - sunway_enable_iommu_func(hose); - hose->iommu_enable = true; - iommu_device_sysfs_add(&iommu->iommu, NULL, NULL, "%d", - iommu_index); - iommu_device_register(&iommu->iommu, &sunway_iommu_ops, NULL); - iommu_index++; - piu_flush_all(iommu); + node = node_online(iommu->node) ? iommu->node : NUMA_NO_NODE; + page = alloc_pages_node(node, __GFP_ZERO, get_order(PAGE_SIZE)); + if (!page) { + kfree(iommu); + return -ENOMEM; } - ret = iova_cache_get(); - if (ret) - return ret; + iommu->iommu_dtbr = page_address(page); + base = __pa(iommu->iommu_dtbr) & PAGE_MASK; + iommu->reg_base_addr = hose->piu_ior0_base; + writeq(base, iommu->reg_base_addr + DTBASEADDR); - register_syscore_ops(&iommu_cpu_syscore_ops); + hose->pci_iommu = iommu; + iommu->hose = hose; + + list_add(&iommu->list, &iommu_list); return 0; } -static int sunway_iommu_legacy_init(void) +static int sunway_iommu_init(void) { - struct pci_controller *hose; struct sunway_iommu *iommu; - unsigned long rc_mask; - int iommu_index = 0; - int ret; + int iommu_index = 0, ret = 0; - /* Do the loop */ - for (hose = hose_head; hose; hose = hose->next) { - rc_mask = MAX_NR_IOMMU_PER_NODE * hose->node + hose->index; - if (!test_bit(rc_mask, iommu_bitmap)) { - hose->iommu_enable = false; - continue; - } + sunway_iommu_domain_bitmap = + (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(MAX_DOMAIN_NUM / 8)); + if (!sunway_iommu_domain_bitmap) + return -ENOMEM; - if (hose->iommu_enable) - continue; + __set_bit(0, sunway_iommu_domain_bitmap); - iommu = sunway_iommu_early_init(hose); - sunway_enable_iommu_func(hose); - hose->iommu_enable = true; + for_each_iommu(iommu) { + sunway_enable_iommu_func(iommu); iommu_device_sysfs_add(&iommu->iommu, NULL, NULL, "%d", - iommu_index); + iommu_index++); iommu_device_register(&iommu->iommu, &sunway_iommu_ops, NULL); - iommu_index++; piu_flush_all(iommu); } @@ -1068,25 +1011,6 @@ static int sunway_iommu_legacy_init(void) return 0; } - -static int sunway_iommu_init(void) -{ - int ret; - - sunway_iommu_domain_bitmap = - (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(MAX_DOMAIN_NUM / 8)); - if (!sunway_iommu_domain_bitmap) - return 0; - __set_bit(0, sunway_iommu_domain_bitmap); - - if (!acpi_disabled) - ret = sunway_iommu_acpi_init(); - else - ret = sunway_iommu_legacy_init(); - - return ret; -} subsys_initcall_sync(sunway_iommu_init); /******************************************************************************* diff --git a/drivers/pci/controller/pci-sunway.c b/drivers/pci/controller/pci-sunway.c index e3487e2ea43c..d8fc6b280120 100644 --- a/drivers/pci/controller/pci-sunway.c +++ b/drivers/pci/controller/pci-sunway.c @@ -1058,6 +1058,11 @@ static struct pci_host_bridge *sunway_pcie_of_init(struct platform_device *pdev) return bridge; } +int __weak sunway_iommu_early_init(struct pci_controller *hose) +{ + return -ENOENT; +} + static int sunway_pcie_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -1095,6 +1100,8 @@ static int sunway_pcie_probe(struct platform_device *pdev) pci_bus_add_devices(bus); + sunway_iommu_early_init(pci_bus_to_pci_controller(bus)); + return 0; } -- Gitee From 7f18154dfabc23e96661185366a89c9095190908 Mon Sep 17 00:00:00 2001 From: Wu Liliu Date: Thu, 5 Feb 2026 14:06:38 +0800 Subject: [PATCH 10/40] sw64: fix: getcpu passed a null pointer error The parameters of the getcpu system call can be NULL, during processing, it's neccessary to check whether the parameters are NULL before assigning values. Signed-off-by: Wu Liliu Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/kernel/vdso/vgetcpu.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/sw_64/kernel/vdso/vgetcpu.c b/arch/sw_64/kernel/vdso/vgetcpu.c index d17f1b16ccb8..6398abd16736 100644 --- a/arch/sw_64/kernel/vdso/vgetcpu.c +++ b/arch/sw_64/kernel/vdso/vgetcpu.c @@ -27,12 +27,16 @@ static void __getcpu(unsigned int *cpu, unsigned int *node, "mov $0, %0\n" : "=&r"(cpuid) : "i"(HMC_uwhami)); - *cpu = data->vdso_whami_to_cpu[cpuid]; - *node = data->vdso_whami_to_node[cpuid]; + if (cpu) + *cpu = data->vdso_whami_to_cpu[cpuid]; + if (node) + *node = data->vdso_whami_to_node[cpuid]; #else asm volatile ("csrr %0, %1" : "=&r"(cpuid) : "i"(CSR_SOFTCID)); - *cpu = cpuid; - *node = data->vdso_cpu_to_node[*cpu]; + if (cpu) + *cpu = cpuid; + if (node) + *node = data->vdso_cpu_to_node[*cpu]; #endif } -- Gitee From 7461ca31125ade7b30f2aadcde35d1a8bd8a55ed Mon Sep 17 00:00:00 2001 From: He Chuyue Date: Wed, 24 Dec 2025 09:53:39 +0800 Subject: [PATCH 11/40] sw64: add EDAC support with dynamic node and MC detection Add EDAC support for the SW64 platform. The driver dynamically detects the presence of nodes and memory controllers, and supports both ACPI and Device Tree based firmware descriptions. Signed-off-by: He Chuyue Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/Kconfig | 1 + arch/sw_64/include/asm/uncore_io_junzhang.h | 24 + .../include/asm/uncore_io_ops_junzhang.h | 2 + drivers/edac/Kconfig | 7 + drivers/edac/Makefile | 2 + drivers/edac/sw64_edac.c | 483 ++++++++++++++++++ drivers/edac/sw64_edac.h | 5 + 7 files changed, 524 insertions(+) create mode 100644 drivers/edac/sw64_edac.c create mode 100644 drivers/edac/sw64_edac.h diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index ac645c8eda64..c0e293b0bbc9 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -63,6 +63,7 @@ config SW64 select AUDIT_ARCH select COMMON_CLK select DMA_OPS if PCI + select EDAC_SUPPORT select GENERIC_ARCH_TOPOLOGY select GENERIC_CLOCKEVENTS select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO diff --git a/arch/sw_64/include/asm/uncore_io_junzhang.h b/arch/sw_64/include/asm/uncore_io_junzhang.h index 6f78eb766801..2cb23480ecd8 100644 --- a/arch/sw_64/include/asm/uncore_io_junzhang.h +++ b/arch/sw_64/include/asm/uncore_io_junzhang.h @@ -75,4 +75,28 @@ #define PIUCONFIG0_INIT_VAL 0x38016 +/* MC IO REG */ +enum { + CFGDEC = 0x400UL, + CFGCR = 0x480UL, + INIT_CTRL = 0x580UL, + CFGERR = 0xd00UL, + FSMSTAT = 0xe00UL, + PUB_INTERFACE = 0x1000UL, + POWERCTRL = 0x1080UL, + CFGMR0 = 0x1280UL, + CFGMR1 = 0x1300UL, + CFGMR2 = 0x1380UL, + CFGMR3 = 0x1400UL, + PERF_CTRL = 0x1480UL, + MC_PERF0 = 0x1500UL, + CFGMR4 = 0x1800UL, + CFGMR5 = 0x1880UL, + CFGMR6 = 0x1900UL, + MC_CTRL = 0x1c00UL, + MEMSERR_P = 0x1c80UL, + MEMSERR = 0x1d00UL, + MERRADDR = 0x2280UL, +}; + #endif /* _ASM_SW64_UNCORE_IO_JUNZHANG_H */ diff --git a/arch/sw_64/include/asm/uncore_io_ops_junzhang.h b/arch/sw_64/include/asm/uncore_io_ops_junzhang.h index eff4f40886d8..472c495ebcdf 100644 --- a/arch/sw_64/include/asm/uncore_io_ops_junzhang.h +++ b/arch/sw_64/include/asm/uncore_io_ops_junzhang.h @@ -3,6 +3,8 @@ #define _ASM_SW64_UNCORE_IO_OPS_JUNZHANG_H #define OFFSET_CFG_INFO 0x1100UL +#define OFFSET_FAULT_SOURCE 0xb00UL +#define OFFSET_MC_ONLINE 0x3780UL static inline int __get_cpu_nums(void) { diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 2484f0795925..daca7959a3db 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -571,4 +571,11 @@ config EDAC_LOONGSON errors (CE) only. Loongson-3A5000/3C5000/3D5000/3A6000/3C6000 are compatible. +config EDAC_SW64 + tristate "SW64 UNCORE_JUNZHANG" + depends on SW64 && SUBARCH_C4 + help + Support for error detection and correction on the + UNCORE_JUNZHANG chipsets. + endif # EDAC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 699b818ac7cb..aeb56cf0eba8 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -45,6 +45,8 @@ obj-$(CONFIG_EDAC_X38) += x38_edac.o obj-$(CONFIG_EDAC_I82860) += i82860_edac.o obj-$(CONFIG_EDAC_R82600) += r82600_edac.o obj-$(CONFIG_EDAC_AMD64) += amd64_edac.o +obj-$(CONFIG_EDAC_SW64) += sw64_edac.o + obj-$(CONFIG_EDAC_PASEMI) += pasemi_edac.o diff --git a/drivers/edac/sw64_edac.c b/drivers/edac/sw64_edac.c new file mode 100644 index 000000000000..2f6529c00542 --- /dev/null +++ b/drivers/edac/sw64_edac.c @@ -0,0 +1,483 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "edac_module.h" + +#define EDAC_MOD_STR "sw64_edac" + +struct sw64_edac { + struct device *dev; + void __iomem *spbu_base; + struct list_head mc; + int node; + int edac_idx; +}; + +struct sw64_edac_mc { + struct fwnode_handle *fwnode; + struct list_head next; + char *name; + struct mem_ctl_info *mci; + struct sw64_edac *edac; + void __iomem *mc_vbase; + u32 mc_id; + int irq; +}; + +struct sw64_platform_data { + struct sw64_edac_mc *properties; + unsigned int nports; +}; + +static int edac_mc_idx; +static const char *sw64_ctl_name = "SW64"; + +/*********************** DRAM err device **********************************/ + +static void sw64_edac_mc_check(struct mem_ctl_info *mci) +{ + struct sw64_edac_mc *mc = mci->pvt_info; + u32 reg; + u32 err_addr; + + reg = readq(mc->mc_vbase + MEMSERR) >> 32; + err_addr = readq(mc->mc_vbase + MERRADDR); + + /* first bit clear in ECC Err Reg, 1 bit error, correctable by HW */ + if (reg & 0x1) { + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + err_addr >> PAGE_SHIFT, + err_addr & PAGE_MASK, 0, + 0, 0, -1, + mci->ctl_name, ""); + /* clear the error */ + writeq(0x1, mc->mc_vbase + MEMSERR); + writeq(1UL << 32, mc->mc_vbase + MEMSERR); + } + if (reg & 0x2) { /* 2 bit error, UE */ + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + err_addr >> PAGE_SHIFT, + err_addr & PAGE_MASK, 0, + 0, 0, -1, + mci->ctl_name, ""); + /* clear the error */ + writeq(1UL << 33, mc->mc_vbase + MEMSERR); + } + +} + +static irqreturn_t sw64_edac_isr(int irq, void *dev_id) +{ + struct sw64_edac *edac = dev_id; + struct sw64_edac_mc *mc; + u32 cause; + void __iomem *spbu_base = edac->spbu_base; + + cause = (readq(spbu_base + OFFSET_FAULT_SOURCE) << 2) & 0x1; + if (!cause) + return IRQ_NONE; + + /* writing 0's to the ECC err addr in check function clears irq */ + list_for_each_entry(mc, &edac->mc, next) + sw64_edac_mc_check(mc->mci); + + return IRQ_HANDLED; +} + +static unsigned long get_total_mem(struct sw64_edac *edac) +{ + unsigned long total_mem; + void __iomem *spbu_base = edac->spbu_base; + + total_mem = readq(spbu_base + OFFSET_CFG_INFO) >> 3; + total_mem = (total_mem & 0xffff) << 28; + + return total_mem; +} + +static void sw64_init_csrows(struct mem_ctl_info *mci, + struct sw64_edac_mc *mc) +{ + struct csrow_info *csrow; + struct dimm_info *dimm; + unsigned long total_mem; + + u32 devtype; + + total_mem = get_total_mem(mc->edac); + + csrow = mci->csrows[0]; + dimm = csrow->channels[0]->dimm; + + dimm->nr_pages = total_mem >> PAGE_SHIFT; + dimm->grain = 8; + + dimm->mtype = MEM_DDR4; + + devtype = readq(mc->mc_vbase + MC_CTRL) >> 20; + switch (devtype & 0x3) { + case 0x0: + dimm->dtype = DEV_X4; + break; + case 0x2: + dimm->dtype = DEV_X8; + break; + case 0x3: + dimm->dtype = DEV_X16; + break; + default: + dimm->dtype = DEV_UNKNOWN; + break; + } + + dimm->edac_mode = EDAC_SECDED; +} + +#ifdef CONFIG_NUMA +static void sw64_edac_get_node(struct sw64_edac *edac, + struct device *dev) +{ + if (numa_off) + return; + + if (acpi_disabled) { + if (device_property_read_u32(dev, "numa-node-id", &edac->node)) + dev_warn(dev, "sw64_edac: node ID unknown\n"); + } else + edac->node = dev_to_node(dev); + + /** + * If numa_off is not set, we expect a valid node ID. + * If not, fallback to node 0. + */ + if (edac->node == NUMA_NO_NODE) { + pr_warn("Invalid node ID\n"); + edac->node = 0; + } +} +#endif + +static int sw64_edac_mc_add(struct device *dev, struct fwnode_handle *fwnode, + struct sw64_platform_data *pdata, int i) +{ + struct sw64_edac *edac; + struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; + struct sw64_edac_mc tmp_mc; + struct sw64_edac_mc *mc; + struct resource res; + struct resource *r; + int ret; + int numa_node; + struct platform_device *child_pdev; + acpi_status status; + unsigned long long sta; + + edac = dev_get_drvdata(dev); + memset(&tmp_mc, 0, sizeof(tmp_mc)); + + if (!devres_open_group(edac->dev, sw64_edac_mc_add, GFP_KERNEL)) + return -ENOMEM; + + tmp_mc.fwnode = fwnode; + tmp_mc = pdata->properties[i++]; + + if (fwnode_property_read_u32(fwnode, "memory-controller", + &tmp_mc.mc_id)) { + dev_err(dev, "Failed to get memory-controller ID\n"); + ret = -ENODEV; + goto err_group; + } + + if (acpi_disabled) { + unsigned long mc_online; + void __iomem *spbu_base; + + numa_node = edac->node; + spbu_base = edac->spbu_base; + mc_online = readq(spbu_base + OFFSET_MC_ONLINE) & 0xff; + + if (!(mc_online & (1 << tmp_mc.mc_id))) { + pr_info("mc %d.%d is offline, skip init\n", numa_node, tmp_mc.mc_id); + goto err_group; + } + } else { + if (fwnode_property_read_u32(fwnode, "numa-node-id", + &numa_node)) { + dev_info(edac->dev, "Failed to get numa node ID\n"); + ret = -ENODEV; + goto err_group; + } + + status = acpi_evaluate_integer(ACPI_HANDLE(dev), "_STA", NULL, &sta); + if (ACPI_FAILURE(status)) + goto err_group; + if (!sta) { + pr_info("mc %d.%d is offline, skip init\n", numa_node, tmp_mc.mc_id); + goto err_group; + } + } + + + if (acpi_disabled) { + u64 regs[2]; + + if (fwnode_property_read_u64_array(fwnode, "reg", regs, 2)) { + dev_err(dev, "Failed to get MC registers\n"); + fwnode_handle_put(fwnode); + ret = -ENODEV; + } + res.start = regs[0]; + res.end = regs[0] + regs[1] - 1; + res.flags = IORESOURCE_MEM; + tmp_mc.mc_vbase = devm_ioremap_resource(edac->dev, &res); + } else { + child_pdev = to_platform_device(fwnode->dev); + r = platform_get_resource(child_pdev, IORESOURCE_MEM, 0); + if (!r) { + dev_err(dev, "Failed to get MC registers\n"); + ret = -ENODEV; + } + tmp_mc.mc_vbase = devm_ioremap_resource(edac->dev, r); + } + + + if (IS_ERR(tmp_mc.mc_vbase)) { + dev_err(dev, "unable to map MCU resource\n"); + devm_kfree(dev, &mc); + ret = -ENODEV; + goto err_group; + } + + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = 2; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = 1; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers, + sizeof(struct sw64_edac_mc)); + if (!mci) { + ret = -ENOMEM; + goto err_group; + } + + mc = mci->pvt_info; + *mc = tmp_mc; /* Copy over resource value */ + mc->edac = edac; + mc->name = "sw64_edac_mc_err"; + mc->mci = mci; + mci->pdev = &mci->dev; + mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->mod_name = EDAC_MOD_STR; + mci->ctl_name = sw64_ctl_name; + mci->dev_name = mc->name; + mci->edac_check = sw64_edac_mc_check; + mci->ctl_page_to_phys = NULL; + mci->scrub_mode = SCRUB_SW_SRC; + edac->edac_idx = edac_mc_idx++; + + sw64_init_csrows(mci, mc); + + if (edac_mc_add_mc(mci)) { + dev_err(dev, "edac_mc_add_mc failed\n"); + ret = -EINVAL; + goto err_free; + } + + list_add(&mc->next, &edac->mc); + + devres_remove_group(edac->dev, sw64_edac_mc_add); + + dev_info(dev, "SW64 EDAC MC registered\n"); + + return 0; + +err_free: + edac_mc_free(mci); +err_group: + devres_release_group(edac->dev, sw64_edac_mc_add); + return ret; +} + +static int sw64_edac_probe(struct platform_device *pdev) +{ + struct sw64_edac *edac; + struct device *dev = &pdev->dev; + struct resource *res; + int ret; + acpi_status status; + unsigned long long sta; + struct fwnode_handle *fwnode; + struct sw64_platform_data *pdata; + struct sw64_edac_mc *mc; + int nports; + int i; + + edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL); + if (!edac) + return -ENOMEM; + +#ifdef CONFIG_NUMA + sw64_edac_get_node(edac, dev); +#endif + + edac->spbu_base = misc_platform_get_spbu_base(edac->node); + if (IS_ERR(edac->spbu_base)) + return PTR_ERR(edac->spbu_base); + + edac->dev = &pdev->dev; + platform_set_drvdata(pdev, edac); + INIT_LIST_HEAD(&edac->mc); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + ret = -ENOENT; + goto out_err; + } + + if (!acpi_disabled) { + status = acpi_evaluate_integer(ACPI_HANDLE(dev), "_STA", NULL, &sta); + if (ACPI_FAILURE(status)) { + ret = -EIO; + goto out_err; + } + if (!sta) { + ret = -EIO; + goto out_err; + } + } + + if (edac_op_state == EDAC_OPSTATE_INT) { + int irq; + /* acquire interrupt that reports errors */ + irq = platform_get_irq(pdev, 0); + ret = devm_request_irq(&pdev->dev, + irq, + sw64_edac_isr, + IRQF_SHARED, + "[EDAC] MC err", + edac); + if (ret < 0) { + ret = -ENODEV; + goto out_err; + } + } + + i = 0; + nports = device_get_child_node_count(dev); + + if (nports == 0) { + ret = -ENODEV; + goto out_err; + } + + pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) { + ret = -ENOMEM; + goto out_err; + } + + pdata->properties = devm_kcalloc(dev, nports, sizeof(*mc), GFP_KERNEL); + if (!pdata->properties) { + ret = -ENOMEM; + goto out_err; + } + + pdata->nports = nports; + + i = 0; + device_for_each_child_node(dev, fwnode) { + sw64_edac_mc_add(dev, fwnode, pdata, i); + } + + return 0; + +out_err: + return ret; +} + +static int sw64_edac_mc_remove(struct sw64_edac_mc *mc) +{ + edac_mc_del_mc(&mc->mci->dev); + edac_mc_free(mc->mci); + return 0; +} + +static int sw64_edac_remove(struct platform_device *pdev) +{ + struct sw64_edac *edac = dev_get_drvdata(&pdev->dev); + struct sw64_edac_mc *mc, *tmp_mc; + + list_for_each_entry_safe(mc, tmp_mc, &edac->mc, next) + sw64_edac_mc_remove(mc); + + return 0; +} + +static const struct of_device_id sw64_edac_of_match[] = { + { .compatible = "sunway,edac", .data = (void *)0 }, + { /* Sentinel */ } +}; +MODULE_DEVICE_TABLE(of, sw64_edac_of_match); + +#ifdef CONFIG_ACPI +static const struct acpi_device_id sw64_edac_acpi_match[] = { + { "SUNW0201", 0 }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, sw64_edac_acpi_match); +#endif + +static struct platform_driver sw64_edac_driver = { + .probe = sw64_edac_probe, + .remove = sw64_edac_remove, + .driver = { + .name = "sw64-edac", + .of_match_table = of_match_ptr(sw64_edac_of_match), + .acpi_match_table = ACPI_PTR(sw64_edac_acpi_match), + } +}; + +static int __init sw64_edac_init(void) +{ + /* make sure error reporting method is sane */ + switch (edac_op_state) { + case EDAC_OPSTATE_POLL: + case EDAC_OPSTATE_INT: + break; + default: + edac_op_state = EDAC_OPSTATE_POLL; + break; + } + + return platform_driver_register(&sw64_edac_driver); +} +module_init(sw64_edac_init); + +static void __exit sw64_edac_exit(void) +{ + platform_driver_unregister(&sw64_edac_driver); +} +module_exit(sw64_edac_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("He Chuyue "); +MODULE_DESCRIPTION("SW64 EDAC driver"); +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, + "EDAC Error Reporting state: 0=Poll, 2=Interrupt"); diff --git a/drivers/edac/sw64_edac.h b/drivers/edac/sw64_edac.h new file mode 100644 index 000000000000..b48b506bb7c9 --- /dev/null +++ b/drivers/edac/sw64_edac.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SW64_EDAC_H_ +#define _SW64_EDAC_H_ + +#endif -- Gitee From 053ffad625b38f1e22aae119a3bb2995490dcbde Mon Sep 17 00:00:00 2001 From: Gao Chen Date: Tue, 10 Feb 2026 16:25:41 +0800 Subject: [PATCH 12/40] sw64: add memb before/after atomic To fix potential atomicity issue, this patch removes the atchitecture-specific implememtations of __smp_mb__before_atomic and __smp_mb__after_atomic and replaces them with the common implementations using memb. Signed-off-by: Gao Chen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/barrier.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/sw_64/include/asm/barrier.h b/arch/sw_64/include/asm/barrier.h index bff199126c9f..ceadeb323f38 100644 --- a/arch/sw_64/include/asm/barrier.h +++ b/arch/sw_64/include/asm/barrier.h @@ -22,9 +22,6 @@ #define __ASM_SMP_MB #endif -#define __smp_mb__before_atomic() barrier() -#define __smp_mb__after_atomic() barrier() - #include #endif /* _ASM_SW64_BARRIER_H */ -- Gitee From 4b03a26939aa6aab2e7eb4c761d16f1373763aea Mon Sep 17 00:00:00 2001 From: Gu Yuchen Date: Mon, 2 Mar 2026 09:25:24 +0800 Subject: [PATCH 13/40] sw64: fix mmap_base exceeding Java assertion address According to commit 4fd4e1ddbb83 ("sw64: fix random mmap base range"), the mmap_base address where Java dynamic libraries are mapped exceeded Java's assertion address of 0x7fff7ff8000. This commit fixes it. Signed-off-by: Gu Yuchen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sw_64/include/asm/processor.h b/arch/sw_64/include/asm/processor.h index 4360140e9535..03980e7cde72 100644 --- a/arch/sw_64/include/asm/processor.h +++ b/arch/sw_64/include/asm/processor.h @@ -45,7 +45,7 @@ #define TASK_SIZE_MAX TASK_SIZE_64 #define TASK_SIZE TASK_SIZE_64 -#define DEFAULT_MAP_WINDOW DEFAULT_MAP_WINDOW_64 +#define DEFAULT_MAP_WINDOW (DEFAULT_MAP_WINDOW_64 - SZ_8G) #ifdef CONFIG_SW64_FORCE_52BIT #define STACK_TOP_MAX TASK_SIZE -- Gitee From d4c35ad44fd73077490ab88d0288d497f8625444 Mon Sep 17 00:00:00 2001 From: Gao Chen Date: Thu, 17 Jul 2025 17:24:04 +0800 Subject: [PATCH 14/40] sw64: add fixmap support Allocate virtual address space and extablish page tables for fixmap, enabling runtime modification of kernel page table contents. Signed-off-by: Gao Chen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/Kconfig | 8 ++++++ arch/sw_64/include/asm/fixmap.h | 42 ++++++++++++++++++++++++++++++++ arch/sw_64/include/asm/pgtable.h | 6 +++++ arch/sw_64/mm/init.c | 29 ++++++++++++++++++++++ 4 files changed, 85 insertions(+) create mode 100644 arch/sw_64/include/asm/fixmap.h diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index c0e293b0bbc9..f1438771a6d2 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -294,6 +294,14 @@ config SW64_CPUAUTOPLUG Turns on the interface for SW64_CPU CPUAUTOPLUG. endmenu + +config SW64_KERNEL_PAGE_TABLE + bool "sw64 kernel page table" + depends on SUBARCH_C4 + default y + help + Map the kernel and the memory by page table. + # clear all implied options (don't want default values for those): # Most of these machines have ISA slots; not exactly sure which don't, # and this doesn't activate hordes of code, so do it always. diff --git a/arch/sw_64/include/asm/fixmap.h b/arch/sw_64/include/asm/fixmap.h new file mode 100644 index 000000000000..e39b831666b6 --- /dev/null +++ b/arch/sw_64/include/asm/fixmap.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_SW64_FIXMAP_H +#define _ASM_SW64_FIXMAP_H + +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE + +#include +#include +#include +#include +#include + +enum fixed_addresses { + FIX_HOLE, + FIX_PTE, + FIX_PMD, + FIX_PUD, + FIX_TEXT_POKE0, + FIX_EARLYCON_MEM_BASE, + + __end_of_permanent_fixed_addresses, +#define FIX_BTMAPS_SLOTS 7 +#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) + + FIX_BTMAP_END = __end_of_permanent_fixed_addresses, + FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, + + __end_of_fixed_addresses +}; + +#define FIXMAP_PAGE_IO PAGE_KERNEL + +#define __early_set_fixmap __set_fixmap + +#define __late_set_fixmap __set_fixmap +#define __late_clear_fixmap(idx) __set_fixmap((idx), 0, FIXMAP_PAGE_CLEAR) +extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); + +#include + +#endif /* CONFIG_SW64_KERNEL_PAGE_TABLE */ +#endif /* _ASM_SW64_FIXMAP_H */ diff --git a/arch/sw_64/include/asm/pgtable.h b/arch/sw_64/include/asm/pgtable.h index 2614b47d25dc..849828140aa7 100644 --- a/arch/sw_64/include/asm/pgtable.h +++ b/arch/sw_64/include/asm/pgtable.h @@ -89,6 +89,12 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) #define VMALLOC_END ((unsigned long)vmemmap) #endif +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE +#define FIXADDR_TOP MODULES_VADDR +#define FIXADDR_SIZE SZ_8M +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#endif + /* * HMcode-imposed page table bits */ diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c index d5e1baf23f72..5e6dd219f377 100644 --- a/arch/sw_64/mm/init.c +++ b/arch/sw_64/mm/init.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include struct mem_desc_t mem_desc; #ifndef CONFIG_NUMA @@ -43,6 +45,14 @@ static pud_t vmalloc_pud[1024] __aligned(PAGE_SIZE); static phys_addr_t mem_start; static phys_addr_t mem_size_limit; +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE +pgd_t early_pg_dir[1024] __initdata __attribute__((__aligned__(PAGE_SIZE))); + +pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; +pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; +pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss; +#endif /* CONFIG_SW64_KERNEL_PAGE_TABLE */ + #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE unsigned long memory_block_size_bytes(void) { @@ -142,6 +152,25 @@ void __init zone_sizes_init(void) free_area_init(max_zone_pfns); } +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE +void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) +{ + unsigned long addr = __fix_to_virt(idx); + pte_t *ptep; + + BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); + + ptep = &fixmap_pte[pte_index(addr)]; + + if (pgprot_val(prot)) + set_pte(ptep, pfn_pte(PHYS_PFN(phys), prot)); + else + pte_clear(&init_mm, addr, ptep); + + local_flush_tlb_all(); +} +#endif /* CONFIG_SW64_KERNEL_PAGE_TABLE */ + /* * paging_init() sets up the memory map. */ -- Gitee From eef24fadac5930cebf97a747af62bbea9a28d669 Mon Sep 17 00:00:00 2001 From: Gao Chen Date: Thu, 17 Jul 2025 17:30:44 +0800 Subject: [PATCH 15/40] sw64: map the early and the final page table Create temporary page tables in early_paging_init() and final page tables in paging_init(). When kernel page table support is detected via cpuid(), write CSR_ATC to switch the core to paging mode after setting up the temporary page tables. Signed-off-by: Gao Chen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/cpu.h | 2 + arch/sw_64/include/asm/hmcall.h | 8 + arch/sw_64/include/asm/mmu.h | 7 + arch/sw_64/include/asm/pgtable.h | 81 ++++++-- arch/sw_64/include/asm/platform.h | 11 + arch/sw_64/kernel/dup_print.c | 6 - arch/sw_64/kernel/early_init.c | 3 + arch/sw_64/kernel/setup.c | 10 +- arch/sw_64/kernel/smp.c | 15 ++ arch/sw_64/mm/init.c | 335 +++++++++++++++++++++++++++++- 10 files changed, 446 insertions(+), 32 deletions(-) diff --git a/arch/sw_64/include/asm/cpu.h b/arch/sw_64/include/asm/cpu.h index cfb6090baded..1697c2a3369e 100644 --- a/arch/sw_64/include/asm/cpu.h +++ b/arch/sw_64/include/asm/cpu.h @@ -71,4 +71,6 @@ static inline unsigned long get_cpu_freq(unsigned int cpu) } #endif +extern bool sunway_support_kpt; + #endif /* _ASM_SW64_CPU_H */ diff --git a/arch/sw_64/include/asm/hmcall.h b/arch/sw_64/include/asm/hmcall.h index 60f68e31c568..24ef9e4b768b 100644 --- a/arch/sw_64/include/asm/hmcall.h +++ b/arch/sw_64/include/asm/hmcall.h @@ -22,6 +22,7 @@ #define HMC_mtinten 0x0F #define HMC_wrap_asid 0x10 #define HMC_load_mm 0x11 +#define HMC_rwatc 0x12 #define HMC_tbisasid 0x14 #define HMC_tbivpn 0x19 #define HMC_ret 0x1A @@ -172,6 +173,8 @@ __CALL_HMC_R0(rdksp, unsigned long); __CALL_HMC_W1(wrksp, unsigned long); __CALL_HMC_R0(rdhtctl, unsigned long); +__CALL_HMC_RW2(rwatc, unsigned long, unsigned long, unsigned long); + /* * Load a mm context. This is needed when we change the page * table pointer(CSR:PTBR) or when we update the ASID. @@ -253,6 +256,11 @@ static inline void wrap_asid(unsigned long asid, unsigned long ptbr) #define set_nmi(irq) setup_nmi(1, (irq)) #define clear_nmi(irq) setup_nmi(0, (irq)) +#define ATC_PAGE 1 +#define ATC_KSEG 3 +#define set_atc(val) rwatc(1, (val)) +#define get_atc() rwatc(0, 0) + #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/sw_64/include/asm/mmu.h b/arch/sw_64/include/asm/mmu.h index f24219fac654..395df786d114 100644 --- a/arch/sw_64/include/asm/mmu.h +++ b/arch/sw_64/include/asm/mmu.h @@ -7,4 +7,11 @@ typedef struct { unsigned long asid[NR_CPUS]; void *vdso; } mm_context_t; + +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE +void create_pgd_mapping(pgd_t *pgdir, unsigned long virt, unsigned long phys, + unsigned long size, pgprot_t prot, + void *(*pgtable_alloc)(void)); +#endif + #endif /* _ASM_SW64_MMU_H */ diff --git a/arch/sw_64/include/asm/pgtable.h b/arch/sw_64/include/asm/pgtable.h index 849828140aa7..e0fef676dd7c 100644 --- a/arch/sw_64/include/asm/pgtable.h +++ b/arch/sw_64/include/asm/pgtable.h @@ -23,26 +23,6 @@ struct mm_struct; struct vm_area_struct; -static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) -{ - *pmdp = pmd; -} - -static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t pmdval) -{ - set_pmd(pmdp, pmdval); -} - -static inline void set_pud(pud_t *pudp, pud_t pud) -{ - *pudp = pud; -} - -static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) -{ - *p4dp = p4d; -} /* PGDIR_SHIFT determines what a forth-level page table entry can map */ #define PGDIR_SHIFT (PAGE_SHIFT + 3 * (PAGE_SHIFT - 3)) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) @@ -196,7 +176,11 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) #define PAGE_NONE __pgprot(__ACCESS_BITS | _PAGE_FOR | _PAGE_FOW | _PAGE_FOE | _PAGE_LEAF | _PAGE_PROTNONE) #define PAGE_KERNEL __pgprot(_PAGE_VALID | _PAGE_KERN | _PAGE_LEAF) #define _PAGE_NORMAL(x) __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_LEAF | (x)) -#define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL) + +/* prot for kernel page table */ +#define PAGE_KERNEL_NOEXEC __pgprot(_PAGE_VALID | _PAGE_KERN | _PAGE_LEAF | _PAGE_FOE) +#define PAGE_KERNEL_READONLY __pgprot(_PAGE_VALID | _PAGE_KERN | _PAGE_LEAF | _PAGE_FOW | _PAGE_FOE) +#define PAGE_KERNEL_READONLY_EXEC __pgprot(_PAGE_VALID | _PAGE_KERN | _PAGE_LEAF | _PAGE_FOW) #define page_valid_kern(x) ((x & (_PAGE_VALID | _PAGE_KERN)) == (_PAGE_VALID | _PAGE_KERN)) #endif @@ -227,6 +211,8 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) #define PAGE_COPY PAGE_COPY_EXEC #define PAGE_SHARED PAGE_SHARED_EXEC +#define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL) + /* * The hardware can handle write-only mappings, but as the sw64 * architecture does byte-wide writes with a read-modify-write @@ -252,6 +238,12 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC +#define cont_pmd_addr_end(addr, end) \ +({ \ + unsigned long __boundary = ((addr) + CONT_PMD_SIZE) & CONT_PMD_MASK;\ + (__boundary - 1 < (end) - 1) ? __boundary : (end); \ +}) + /* * pgprot_noncached() is only for infiniband pci support, and a real * implementation for RAM would be more complicated. @@ -282,6 +274,38 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, set_pte(ptep, pteval); } +static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + *pmdp = pmd; + + if (page_valid_kern(pmd_val(pmd))) { + mb(); + if ((pmd_val(pmd) & _PAGE_FOE) == 0) + imemb(); + } +} + +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmdval) +{ + set_pmd(pmdp, pmdval); +} + +static inline void set_pud(pud_t *pudp, pud_t pud) +{ + *pudp = pud; + + if (page_valid_kern(pud_val(pud))) { + mb(); + if ((pud_val(pud) & _PAGE_FOE) == 0) + imemb(); + } +} + +static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) +{ + *p4dp = p4d; +} #define pud_write pud_write static inline int pud_write(pud_t pud) { @@ -400,6 +424,13 @@ static inline int pmd_none(pmd_t pmd) return !pmd_val(pmd); } +#define pmd_leaf pmd_leaf +static inline int pmd_leaf(pmd_t pmd) +{ + return !pmd_none(pmd) && + (pmd_val(pmd) & (_PAGE_PRESENT|_PAGE_LEAF)) != _PAGE_PRESENT; +} + static inline int pmd_bad(pmd_t pmd) { return (pmd_val(pmd) & ~_PFN_MASK) != _PAGE_TABLE; @@ -505,6 +536,13 @@ static inline int pud_none(pud_t pud) return !pud_val(pud); } +#define pud_leaf pud_leaf +static inline int pud_leaf(pud_t pud) +{ + return !pud_none(pud) && + (pud_val(pud) & (_PAGE_PRESENT|_PAGE_LEAF)) != _PAGE_PRESENT; +} + static inline int pud_bad(pud_t pud) { return (pud_val(pud) & ~_PFN_MASK) != _PAGE_TABLE; @@ -838,6 +876,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) pr_err("%s: %d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) #define pgd_ERROR(e) \ pr_err("%s: %d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) +extern void early_paging_init(void); extern void paging_init(void); #define HAVE_ARCH_UNMAPPED_AREA diff --git a/arch/sw_64/include/asm/platform.h b/arch/sw_64/include/asm/platform.h index 227f8eeb7f95..a1164258c640 100644 --- a/arch/sw_64/include/asm/platform.h +++ b/arch/sw_64/include/asm/platform.h @@ -9,6 +9,17 @@ #include #endif +#ifdef CONFIG_SW64_RRU +#define USER_PRINT_BUFF_BASE (0x600000UL + __START_KERNEL_map) +#define USER_PRINT_BUFF_LEN 0x100000UL +#define USER_MESSAGE_MAX_LEN 0x100000UL +#endif + +#ifdef CONFIG_SW64_RRK +#define KERNEL_PRINTK_BUFF_BASE (0x700000UL + __START_KERNEL_map) +#define PRINTK_SIZE 0x100000UL +#endif + extern struct boot_params *sunway_boot_params; extern unsigned long sunway_boot_magic; extern unsigned long sunway_dtb_address; diff --git a/arch/sw_64/kernel/dup_print.c b/arch/sw_64/kernel/dup_print.c index 2d77ccae30d2..f15dbacd1c1b 100644 --- a/arch/sw_64/kernel/dup_print.c +++ b/arch/sw_64/kernel/dup_print.c @@ -8,12 +8,9 @@ #ifdef CONFIG_SW64_RRK -#define KERNEL_PRINTK_BUFF_BASE (0x700000UL + __START_KERNEL_map) - static DEFINE_SPINLOCK(printk_lock); static unsigned long sw64_printk_offset; -#define PRINTK_SIZE 0x100000UL static bool rrk_last_newline_end; static unsigned long rrk_last_id; @@ -113,9 +110,6 @@ void sw64_rrk_store(const char *text, u16 text_len, u64 ts_nsec, int level, #include static DEFINE_SPINLOCK(printf_lock); -#define USER_PRINT_BUFF_BASE (0x600000UL + __START_KERNEL_map) -#define USER_PRINT_BUFF_LEN 0x100000UL -#define USER_MESSAGE_MAX_LEN 0x100000UL unsigned long sw64_printf_offset; int sw64_user_printf(const char __user *buf, int len) { diff --git a/arch/sw_64/kernel/early_init.c b/arch/sw_64/kernel/early_init.c index 2ec7a3e99443..8fb15a9e385c 100644 --- a/arch/sw_64/kernel/early_init.c +++ b/arch/sw_64/kernel/early_init.c @@ -2,6 +2,9 @@ #include #include +#include + +bool sunway_support_kpt; asmlinkage __visible void __init sw64_start_kernel(void) { diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c index dcfd0acf89e1..8cb5e97fc280 100644 --- a/arch/sw_64/kernel/setup.c +++ b/arch/sw_64/kernel/setup.c @@ -675,6 +675,8 @@ setup_arch(char **cmdline_p) */ trap_init(); + early_paging_init(); + setup_cpu_features(); apply_alternatives_all(); @@ -701,8 +703,6 @@ setup_arch(char **cmdline_p) atomic_notifier_chain_register(&panic_notifier_list, &sw64_panic_block); - callback_init(); - /* * Process command-line arguments. */ @@ -724,6 +724,10 @@ setup_arch(char **cmdline_p) sw64_memblock_init(); + paging_init(); + + callback_init(); + /* Try to upgrade ACPI tables via initrd */ acpi_table_upgrade(); @@ -743,8 +747,6 @@ setup_arch(char **cmdline_p) zone_sizes_init(); - paging_init(); - kexec_control_page_init(); /* diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c index 968772b7013f..34f9d0ebbc82 100644 --- a/arch/sw_64/kernel/smp.c +++ b/arch/sw_64/kernel/smp.c @@ -186,6 +186,14 @@ void smp_callin(void) complete(&cpu_running); +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE + /* switch to paging mode */ + if (sunway_support_kpt) { + set_atc(ATC_PAGE); + tbiv(); + } +#endif + /* Must have completely accurate bogos. */ local_irq_enable(); @@ -256,12 +264,19 @@ static void __init process_nr_cpu_ids(void) nr_cpu_ids = num_possible_cpus(); } +extern void * __init pgtable_alloc_fixmap(void); + void __init smp_rcb_init(struct smp_rcb_struct *smp_rcb_base_addr) { if (smp_rcb != NULL) return; smp_rcb = smp_rcb_base_addr; +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE + create_pgd_mapping((&init_mm)->pgd, (unsigned long)smp_rcb, __pa(smp_rcb), + CONFIG_PHYSICAL_START - __pa(smp_rcb), + PAGE_KERNEL_NOEXEC, pgtable_alloc_fixmap); +#endif memset(smp_rcb, 0, sizeof(struct smp_rcb_struct)); /* Setup SMP_RCB fields that uses to activate secondary CPU */ smp_rcb->restart_entry = __smp_callin; diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c index 5e6dd219f377..8e8b14fcaea9 100644 --- a/arch/sw_64/mm/init.c +++ b/arch/sw_64/mm/init.c @@ -48,6 +48,13 @@ static phys_addr_t mem_size_limit; #ifdef CONFIG_SW64_KERNEL_PAGE_TABLE pgd_t early_pg_dir[1024] __initdata __attribute__((__aligned__(PAGE_SIZE))); +pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); +pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); +pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); +pud_t early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); +pmd_t early_printk_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); +pud_t early_printk_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); + pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss; @@ -117,7 +124,9 @@ pgd_alloc(struct mm_struct *mm) static inline void switch_to_system_map(void) { +#ifndef CONFIG_SW64_KERNEL_PAGE_TABLE memset(swapper_pg_dir, 0, PAGE_SIZE); +#endif update_ptbr_sys(virt_to_phys(swapper_pg_dir)); #ifdef CONFIG_SUBARCH_C4 update_ptbr_usr(__pa_symbol(empty_zero_page)); @@ -169,13 +178,337 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) local_flush_tlb_all(); } + +static pte_t *__init get_pte_virt_fixmap(phys_addr_t phys) +{ + clear_fixmap(FIX_PTE); + return (pte_t *)set_fixmap_offset(FIX_PTE, phys); +} + +static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t phys) +{ + clear_fixmap(FIX_PMD); + return (pmd_t *)set_fixmap_offset(FIX_PMD, phys); +} + +static pud_t *__init get_pud_virt_fixmap(phys_addr_t phys) +{ + clear_fixmap(FIX_PUD); + return (pud_t *)set_fixmap_offset(FIX_PUD, phys); +} + +void * __init pgtable_alloc_fixmap(void) +{ + return (void *)__va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE)); +} + +static void __init +create_pte_mapping(pte_t *pte_first, unsigned long virt, unsigned long phys, + unsigned long size, pgprot_t prot) +{ + pte_t *pte; + unsigned long addr, next, end, pfn; + + addr = virt; + end = virt + size; + pte_first = get_pte_virt_fixmap(__pa(pte_first)); + for (; addr < end; addr = next) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + pte = pte_first + pte_index(addr); + pfn = PHYS_PFN(phys); + set_pte(pte, pfn_pte(pfn, prot)); + phys += next - addr; + } +} + +static void __init +create_pmd_mapping(pmd_t *pmd_first, unsigned long virt, unsigned long phys, + unsigned long size, pgprot_t prot, + void *(*pgtable_alloc)(void)) +{ + pmd_t *pmd; + pte_t *pte; + unsigned long addr, next, end, pfn; + + addr = virt; + end = virt + size; + for (; addr < end; addr = next) { + next = pmd_addr_end(addr, end); + pmd = pmd_first + pmd_index(addr); + + if (next - addr == PMD_SIZE) { + pfn = PHYS_PFN(phys); + set_pmd(pmd, pfn_pmd(pfn, prot)); + } else { + if (!pmd_none(*pmd)) + pte = pte_offset_kernel(pmd, 0); + else { + pte = (pte_t *)pgtable_alloc(); + memset(get_pte_virt_fixmap(__pa(pte)), 0, + PAGE_SIZE); + pmd_populate(NULL, pmd, + virt_to_page((unsigned long)pte)); + } + create_pte_mapping(pte, addr, phys, next - addr, prot); + } + phys += next - addr; + } +} + +static void __init +create_cont_pmd_mapping(pmd_t *pmd_first, unsigned long virt, + unsigned long phys, unsigned long size, pgprot_t prot, + void *(*pgtable_alloc)(void)) +{ + pmd_t *pmd; + unsigned long addr, next, end, pfn, i; + + addr = virt; + end = virt + size; + pmd_first = get_pmd_virt_fixmap(__pa(pmd_first)); + for (; addr < end; addr = next) { + next = cont_pmd_addr_end(addr, end); + pmd = pmd_first + pmd_index(addr); + + if ((next - addr == CONT_PMD_SIZE) && + (PTRS_PER_PMD - pmd_index(addr) >= CONT_PMDS)) { + pfn = PHYS_PFN(phys); + for (i = 0; i < CONT_PMDS; i++) + set_pmd(pmd + i, pfn_pmd(pfn, + __pgprot(pgprot_val(prot) | + _PAGE_CONT))); + } else + create_pmd_mapping(pmd_first, addr, phys, next - addr, + prot, pgtable_alloc); + phys += next - addr; + } +} + +static void __init +create_pud_mapping(pud_t *pud_first, unsigned long virt, unsigned long phys, + unsigned long size, pgprot_t prot, + void *(*pgtable_alloc)(void)) +{ + pud_t *pud; + pmd_t *pmd; + unsigned long addr, next, end, pfn; + + addr = virt; + end = virt + size; + pud_first = get_pud_virt_fixmap(__pa(pud_first)); + for (; addr < end; addr = next) { + next = pud_addr_end(addr, end); + pud = pud_first + pud_index(addr); + if (next - addr == PUD_SIZE) { + pfn = PHYS_PFN(phys); + set_pud(pud, pfn_pud(pfn, prot)); + } else { + if (!pud_none(*pud)) + pmd = pmd_offset(pud, 0); + else { + pmd = (pmd_t *)pgtable_alloc(); + memset(get_pmd_virt_fixmap(__pa(pmd)), 0, + PAGE_SIZE); + pud_populate(NULL, pud, pmd); + } + create_cont_pmd_mapping(pmd, addr, phys, next - addr, + prot, pgtable_alloc); + } + phys += next - addr; + } +} + +void __init +create_pgd_mapping(pgd_t *pgdir, unsigned long virt, unsigned long phys, + unsigned long size, pgprot_t prot, + void *(*pgtable_alloc)(void)) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + unsigned long addr, next, end; + + addr = virt & PAGE_MASK; + phys &= PAGE_MASK; + end = PAGE_ALIGN(virt + size); + for (; addr < end; addr = next) { + next = pgd_addr_end(addr, end); + pgd = pgd_offset_pgd(pgdir, addr); + p4d = p4d_offset(pgd, addr); + + if (!p4d_none(*p4d)) + pud = pud_offset(p4d, 0); + else { + pud = (pud_t *)pgtable_alloc(); + memset(get_pud_virt_fixmap(__pa(pud)), 0, PAGE_SIZE); + p4d_populate(NULL, p4d, pud); + } + create_pud_mapping(pud, addr, phys, next - addr, prot, + pgtable_alloc); + phys += next - addr; + } + clear_fixmap(FIX_PTE); + clear_fixmap(FIX_PMD); + clear_fixmap(FIX_PUD); +} + +static void __init early_create_pmd(pgd_t *pgdir, pud_t *pud, pmd_t *pmd, + unsigned long start_va, unsigned long size, unsigned long pa) +{ + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + unsigned long addr, end_va; + int pmd_num, i; + + addr = start_va & PMD_MASK; + end_va = start_va + size; + pmd_num = (end_va - addr) / PMD_SIZE; + if (end_va % PMD_SIZE) + pmd_num += 1; + + pgdp = pgd_offset_pgd(pgdir, addr); + p4dp = p4d_offset(pgdp, addr); + if (p4d_none(*p4dp)) { + BUG_ON(!pud); + p4d_populate(NULL, p4dp, pud); + } + pudp = pud_offset(p4dp, addr); + if (pud_none(*pudp)) { + BUG_ON(!pmd); + pud_populate(NULL, pudp, pmd); + } + + for (i = 0; i < pmd_num; i++) { + pmdp = pmd_offset(pudp, addr); + set_pmd(pmdp, pfn_pmd(PHYS_PFN(pa), PAGE_KERNEL)); + addr += PMD_SIZE; + pa += PMD_SIZE; + } +} + +static void __init fixmap_init(pgd_t *pgdir) +{ + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + unsigned long addr = FIXADDR_START & PMD_MASK; + + pgdp = pgd_offset_pgd(pgdir, addr); + p4dp = p4d_offset(pgdp, addr); + if (p4d_none(*p4dp)) + p4d_populate(NULL, p4dp, (pud_t *)fixmap_pud); + pudp = pud_offset(p4dp, addr); + if (pud_none(*pudp)) + pud_populate(NULL, pudp, (pmd_t *)fixmap_pmd); + pmdp = pmd_offset(pudp, addr); + if (pmd_none(*pmdp)) + pmd_populate(NULL, pmdp, virt_to_page(fixmap_pte)); +} #endif /* CONFIG_SW64_KERNEL_PAGE_TABLE */ /* - * paging_init() sets up the memory map. + * early_paging_init sets up a temporary memory map. + */ +void __init early_paging_init(void) +{ +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE + unsigned long img_start, img_size; + unsigned long dtb_start, dtb_size = 0; + + img_start = (unsigned long)(KERNEL_START_PHYS + __START_KERNEL_map); + img_size = (unsigned long)_end - img_start; + dtb_start = sunway_dtb_address; + + fixmap_init(early_pg_dir); + +#ifdef CONFIG_SW64_RRK + early_create_pmd(early_pg_dir, (pud_t *)early_printk_pud, + (pmd_t *)early_printk_pmd, KERNEL_PRINTK_BUFF_BASE, + PRINTK_SIZE, __pa(KERNEL_PRINTK_BUFF_BASE)); +#endif + early_create_pmd(early_pg_dir, (pud_t *)early_pud, (pmd_t *)early_pmd, + img_start, img_size, __pa(img_start)); + if (dtb_start) { + dtb_size = (unsigned long)fdt_totalsize((void *)dtb_start); + early_create_pmd(early_pg_dir, (pud_t *)early_dtb_pud, + (pmd_t *)early_dtb_pmd, dtb_start, dtb_size, + __pa(dtb_start)); + } + update_ptbr_sys(virt_to_phys(early_pg_dir)); + + /* switch to paging mode */ + if (sunway_support_kpt) { + pr_info("SW64 kernel page table enabled\n"); + set_atc(ATC_PAGE); + } + + tbiv(); +#endif /* CONFIG_SW64_KERNEL_PAGE_TABLE */ +} + +/* + * paging_init() sets up the final memory map. */ void __init paging_init(void) { +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE + unsigned long sw64_vcpucb_start = PAGE_OFFSET + 0x20000; + unsigned long sw64_vcpucb_size = 0x60000; + unsigned long sw64_reserve_start = CONFIG_PHYSICAL_START + PAGE_OFFSET; + unsigned long sw64_reserve_size = (unsigned long)_stext - sw64_reserve_start; + unsigned long text_start = (unsigned long)_stext; + unsigned long text_size = (unsigned long)_etext - text_start; + unsigned long ro_start = (unsigned long)__start_rodata; + unsigned long ro_size = (unsigned long)__init_begin - ro_start; + unsigned long init_start = (unsigned long)__init_begin; + unsigned long init_size = (unsigned long)__init_end - init_start; + unsigned long data_start = (unsigned long)_sdata; + unsigned long data_size = (unsigned long)_end - data_start; + pgd_t *pgdir = (&init_mm)->pgd; + phys_addr_t start, end; + u64 i; + + fixmap_init(pgdir); + + create_pgd_mapping(pgdir, sw64_vcpucb_start, __pa(sw64_vcpucb_start), + sw64_vcpucb_size, PAGE_KERNEL_NOEXEC, pgtable_alloc_fixmap); +#ifdef CONFIG_SW64_RRU + create_pgd_mapping(pgdir, USER_PRINT_BUFF_BASE, __pa(USER_PRINT_BUFF_BASE), + USER_PRINT_BUFF_LEN, PAGE_KERNEL_NOEXEC, + pgtable_alloc_fixmap); +#endif +#ifdef CONFIG_SW64_RRK + create_pgd_mapping(pgdir, KERNEL_PRINTK_BUFF_BASE, __pa(KERNEL_PRINTK_BUFF_BASE), + PRINTK_SIZE, PAGE_KERNEL_NOEXEC, pgtable_alloc_fixmap); +#endif + create_pgd_mapping(pgdir, sw64_reserve_start, __pa(sw64_reserve_start), + sw64_reserve_size, PAGE_KERNEL_NOEXEC, + pgtable_alloc_fixmap); + create_pgd_mapping(pgdir, text_start, __pa(text_start), text_size, + PAGE_KERNEL_READONLY_EXEC, pgtable_alloc_fixmap); + create_pgd_mapping(pgdir, ro_start, __pa(ro_start), ro_size, + PAGE_KERNEL_NOEXEC, pgtable_alloc_fixmap); + create_pgd_mapping(pgdir, init_start, __pa(init_start), init_size, + PAGE_KERNEL, pgtable_alloc_fixmap); + create_pgd_mapping(pgdir, data_start, __pa(data_start), data_size, + PAGE_KERNEL_NOEXEC, pgtable_alloc_fixmap); + + memblock_mark_nomap(__pa(sw64_reserve_start), + __pa((unsigned long)_end - sw64_reserve_start)); + for_each_mem_range(i, &start, &end) { + if (start >= end) + break; + create_pgd_mapping(pgdir, (unsigned long)__va(start), + (unsigned long)start, + (unsigned long)(end - start), + PAGE_KERNEL_NOEXEC, pgtable_alloc_fixmap); + } + memblock_clear_nomap(__pa(sw64_reserve_start), + __pa((unsigned long)_end - sw64_reserve_start)); +#endif /* CONFIG_SW64_KERNEL_PAGE_TABLE */ } static void __init setup_socket_info(void) -- Gitee From c4f03809aa58f2c9574adf804817c73d07a67971 Mon Sep 17 00:00:00 2001 From: Gao Chen Date: Tue, 22 Jul 2025 10:54:30 +0800 Subject: [PATCH 16/40] sw64: modify codes with fixmap Create a temporary fixmap mapping for a text page, modify through this mapping, then remove the mapping. Signed-off-by: Gao Chen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/insn.h | 3 ++ arch/sw_64/kernel/ftrace.c | 16 ++++---- arch/sw_64/kernel/insn.c | 67 +++++++++++++++++++++++++++------- arch/sw_64/kernel/jump_label.c | 4 ++ 4 files changed, 67 insertions(+), 23 deletions(-) diff --git a/arch/sw_64/include/asm/insn.h b/arch/sw_64/include/asm/insn.h index 7bd24d6f6598..901df28338c5 100644 --- a/arch/sw_64/include/asm/insn.h +++ b/arch/sw_64/include/asm/insn.h @@ -56,6 +56,9 @@ extern unsigned int sw64_insn_nop(void); extern unsigned int sw64_insn_call(unsigned int ra, unsigned int rb); extern unsigned int sw64_insn_sys_call(unsigned int num); extern unsigned int sw64_insn_br(unsigned int ra, unsigned long pc, unsigned long new_pc); +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE +extern int sw64_patch_text_nosync(void *addr, u32 insn); +#endif #define SW64_OPCODE_RA(opcode) ((opcode >> 21) & 0x1f) diff --git a/arch/sw_64/kernel/ftrace.c b/arch/sw_64/kernel/ftrace.c index 84ba10d93c9f..2f36ef89df70 100644 --- a/arch/sw_64/kernel/ftrace.c +++ b/arch/sw_64/kernel/ftrace.c @@ -77,10 +77,9 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) insn[1] = SW64_CALL(R28, R28, 0); insn[2] = SW64_NOP; - *((u32 *)pc) = insn[0]; - mb(); - *((u32 *)(pc + 4)) = insn[1]; - *((u32 *)(pc + 8)) = insn[2]; + ftrace_modify_code(pc, insn[0]); + ftrace_modify_code(pc + 4, insn[1]); + ftrace_modify_code(pc + 8, insn[2]); return 0; } @@ -94,10 +93,9 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long pc = rec->ip + MCOUNT_LDGP_SIZE; unsigned int insn[3] = {SW64_NOP, SW64_NOP, SW64_NOP}; - *((u32 *)(pc + 8)) = insn[2]; - *((u32 *)(pc + 4)) = insn[1]; - mb(); - *((u32 *)pc) = insn[0]; + ftrace_modify_code(pc + 8, insn[2]); + ftrace_modify_code(pc + 4, insn[1]); + ftrace_modify_code(pc, insn[0]); return 0; } @@ -135,7 +133,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, /* ldl r28,(ftrace_addr_offset)(r8) */ insn[0] = (0x23U << 26) | (28U << 21) | (8U << 16) | offset; - copy_to_kernel_nofault((void *)pc, insn, SW64_INSN_SIZE); + ftrace_modify_code(pc, insn[0]); return 0; } diff --git a/arch/sw_64/kernel/insn.c b/arch/sw_64/kernel/insn.c index 281578e1bfc0..1ef13ff18f27 100644 --- a/arch/sw_64/kernel/insn.c +++ b/arch/sw_64/kernel/insn.c @@ -17,7 +17,38 @@ #include #include -//static DEFINE_RAW_SPINLOCK(patch_lock); +#include + +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE +static DEFINE_RAW_SPINLOCK(patch_lock); + +static void __kprobes *patch_map(void *addr, int fixmap) +{ + unsigned long uintaddr = (uintptr_t)addr; + struct page *page; + + if (core_kernel_text((unsigned long)addr)) + page = pfn_to_page(PHYS_PFN(__pa(addr))); + else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + page = vmalloc_to_page(addr); + else + return addr; + + BUG_ON(!page); + return (void *)set_fixmap_offset(fixmap, page_to_pa(page) + + (uintaddr & ~PAGE_MASK)); +} + +static void __kprobes patch_unmap(int fixmap) +{ + clear_fixmap(fixmap); +} + +int __kprobes sw64_patch_text_nosync(void *addr, u32 insn) +{ + return sw64_insn_write(addr, insn); +} +#endif /* CONFIG_SW64_KERNEL_PAGE_TABLE */ int __kprobes sw64_insn_read(void *addr, u32 *insnp) { @@ -33,30 +64,37 @@ int __kprobes sw64_insn_read(void *addr, u32 *insnp) static int __kprobes __sw64_insn_write(void *addr, __le32 insn) { - void *waddr = addr; int ret; +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE + void *waddr; + unsigned long flags = 0; - //raw_spin_lock_irqsave(&patch_lock, flags); - + raw_spin_lock_irqsave(&patch_lock, flags); + waddr = patch_map(addr, FIX_TEXT_POKE0); ret = copy_to_kernel_nofault(waddr, &insn, SW64_INSN_SIZE); - - //raw_spin_unlock_irqrestore(&patch_lock, flags); - + patch_unmap(FIX_TEXT_POKE0); + raw_spin_unlock_irqrestore(&patch_lock, flags); +#else + ret = copy_to_kernel_nofault(addr, &insn, SW64_INSN_SIZE); +#endif return ret; } static int __kprobes __sw64_insn_double_write(void *addr, __le64 insn) { - void *waddr = addr; - //unsigned long flags = 0; int ret; +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE + void *waddr; + unsigned long flags = 0; - //raw_spin_lock_irqsave(&patch_lock, flags); - + raw_spin_lock_irqsave(&patch_lock, flags); + waddr = patch_map(addr, FIX_TEXT_POKE0); ret = copy_to_kernel_nofault(waddr, &insn, 2 * SW64_INSN_SIZE); - - //raw_spin_unlock_irqrestore(&patch_lock, flags); - + patch_unmap(FIX_TEXT_POKE0); + raw_spin_unlock_irqrestore(&patch_lock, flags); +#else + ret = copy_to_kernel_nofault(addr, &insn, 2 * SW64_INSN_SIZE); +#endif return ret; } @@ -77,6 +115,7 @@ int __kprobes sw64_insn_double_write(void *addr, u64 insn) return -EINVAL; return __sw64_insn_double_write(addr, cpu_to_le64(insn)); } + unsigned int __kprobes sw64_insn_nop(void) { return SW64_BIS(R31, R31, R31); diff --git a/arch/sw_64/kernel/jump_label.c b/arch/sw_64/kernel/jump_label.c index f3bc40370e4d..4e448dea4221 100644 --- a/arch/sw_64/kernel/jump_label.c +++ b/arch/sw_64/kernel/jump_label.c @@ -18,7 +18,11 @@ void arch_jump_label_transform(struct jump_entry *entry, insn = sw64_insn_nop(); } +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE + sw64_patch_text_nosync(insnp, insn); +#else *insnp = insn; +#endif flush_icache_range(entry->code, entry->code + SW64_INSN_SIZE); } -- Gitee From c55fe751b434e262f77656a475478ebd1ca609ff Mon Sep 17 00:00:00 2001 From: Gao Chen Date: Tue, 22 Jul 2025 15:53:00 +0800 Subject: [PATCH 17/40] sw64: set protection for kernel page table Use mark_rodata_ro() to configure distinct page permissions for rodata section in the kernel page table. Signed-off-by: Gao Chen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/Kconfig | 4 +- arch/sw_64/include/asm/set_memory.h | 13 +++ arch/sw_64/mm/Makefile | 1 + arch/sw_64/mm/init.c | 12 +++ arch/sw_64/mm/pageattr.c | 128 ++++++++++++++++++++++++++++ 5 files changed, 157 insertions(+), 1 deletion(-) create mode 100644 arch/sw_64/include/asm/set_memory.h create mode 100644 arch/sw_64/mm/pageattr.c diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index f1438771a6d2..620699b359b2 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -17,7 +17,9 @@ config SW64 select ARCH_HAS_PMEM_API select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL + select ARCH_HAS_SET_MEMORY if SW64_KERNEL_PAGE_TABLE select ARCH_HAS_SG_CHAIN + select ARCH_HAS_STRICT_KERNEL_RWX if SW64_KERNEL_PAGE_TABLE select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_VM_GET_PAGE_PROT select ARCH_HAS_ZONE_DEVICE @@ -67,7 +69,7 @@ config SW64 select GENERIC_ARCH_TOPOLOGY select GENERIC_CLOCKEVENTS select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO - select GENERIC_IOREMAP if SUBARCH_C4 + select GENERIC_IOREMAP if SW64_KERNEL_PAGE_TABLE select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP select GENERIC_IRQ_LEGACY select GENERIC_IRQ_MIGRATION if SMP diff --git a/arch/sw_64/include/asm/set_memory.h b/arch/sw_64/include/asm/set_memory.h new file mode 100644 index 000000000000..6429975a7ad7 --- /dev/null +++ b/arch/sw_64/include/asm/set_memory.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_SW64_SET_MEMORY_H +#define _ASM_SW64_SET_MEMORY_H + +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE +int set_memory_ro(unsigned long addr, int numpages); +int set_memory_rw(unsigned long addr, int numpages); +int set_memory_x(unsigned long addr, int numpages); +int set_memory_nx(unsigned long addr, int numpages); +#endif + +#endif /* _ASM_SW64_SET_MEMORY_H */ diff --git a/arch/sw_64/mm/Makefile b/arch/sw_64/mm/Makefile index 8b9d6e4d2ebf..1f6cd3965633 100644 --- a/arch/sw_64/mm/Makefile +++ b/arch/sw_64/mm/Makefile @@ -7,6 +7,7 @@ obj-y := init.o fault.o physaddr.o mmap.o extable.o +obj-$(CONFIG_SW64_KERNEL_PAGE_TABLE) += pageattr.o obj-$(CONFIG_NUMA) += numa.o ifeq ($(CONFIG_SUBARCH_C4),y) obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage_c4.o diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c index 8e8b14fcaea9..433b5be03573 100644 --- a/arch/sw_64/mm/init.c +++ b/arch/sw_64/mm/init.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -511,6 +512,17 @@ void __init paging_init(void) #endif /* CONFIG_SW64_KERNEL_PAGE_TABLE */ } +#ifdef CONFIG_STRICT_KERNEL_RWX +void mark_rodata_ro(void) +{ + unsigned long ro_start = (unsigned long)__start_rodata; + unsigned long ro_size = (unsigned long)__init_begin - ro_start; + + if (sunway_support_kpt) + set_memory_ro(ro_start, PAGE_ALIGN(ro_size) >> PAGE_SHIFT); +} +#endif + static void __init setup_socket_info(void) { int i; diff --git a/arch/sw_64/mm/pageattr.c b/arch/sw_64/mm/pageattr.c new file mode 100644 index 000000000000..2949fa7b42dd --- /dev/null +++ b/arch/sw_64/mm/pageattr.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +#include +#include + +struct pageattr_masks { + pgprot_t set_mask; + pgprot_t clear_mask; +}; + +static unsigned long +set_pageattr_masks(unsigned long val, struct mm_walk *walk) +{ + struct pageattr_masks *masks = walk->private; + unsigned long new_val = val; + + new_val &= ~(pgprot_val(masks->clear_mask)); + new_val |= (pgprot_val(masks->set_mask)); + + return new_val; +} + +static int pageattr_pud_entry(pud_t *pud, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pud_t val = READ_ONCE(*pud); + + if (pud_huge(val)) { + val = __pud(set_pageattr_masks(pud_val(val), walk)); + set_pud(pud, val); + } + + return 0; +} + +static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pmd_t val = READ_ONCE(*pmd); + + if (pmd_huge(val)) { + val = __pmd(set_pageattr_masks(pmd_val(val), walk)); + set_pmd(pmd, val); + } + + return 0; +} + +static int pageattr_pte_entry(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pte_t val = READ_ONCE(*pte); + + val = __pte(set_pageattr_masks(pte_val(val), walk)); + set_pte(pte, val); + + return 0; +} + +static const struct mm_walk_ops pageattr_ops = { + .pud_entry = pageattr_pud_entry, + .pmd_entry = pageattr_pmd_entry, + .pte_entry = pageattr_pte_entry, +}; + +static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, + pgprot_t clear_mask) +{ + int ret; + unsigned long start = addr; + unsigned long end = start + PAGE_SIZE * numpages; + struct pageattr_masks masks = { + .set_mask = set_mask, + .clear_mask = clear_mask + }; + + if (!numpages) + return 0; + + mmap_read_lock(&init_mm); + ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, + &masks); + mmap_read_unlock(&init_mm); + + local_flush_tlb_all(); + + return ret; +} + +int set_memory_ro(unsigned long addr, int numpages) +{ + if (sunway_support_kpt) + return __set_memory(addr, numpages, __pgprot(_PAGE_FOW), + __pgprot(_PAGE_FOR)); + else + return 0; +} + +int set_memory_rw(unsigned long addr, int numpages) +{ + if (sunway_support_kpt) + return __set_memory(addr, numpages, __pgprot(0), + __pgprot(_PAGE_FOR | _PAGE_FOW)); + else + return 0; +} + +int set_memory_x(unsigned long addr, int numpages) +{ + if (sunway_support_kpt) + return __set_memory(addr, numpages, __pgprot(0), + __pgprot(_PAGE_FOE)); + else + return 0; +} + +int set_memory_nx(unsigned long addr, int numpages) +{ + if (sunway_support_kpt) + return __set_memory(addr, numpages, __pgprot(_PAGE_FOE), + __pgprot(0)); + else + return 0; +} -- Gitee From fbaa5e7b58550004ceff24df202658c2ffffb61a Mon Sep 17 00:00:00 2001 From: Gao Chen Date: Thu, 18 Sep 2025 11:28:12 +0800 Subject: [PATCH 18/40] sw64: setup mapping for efi_mm Create page table mappings in efi_mm for all efi memory descs. During efi_call_virt, first switch to efi_mm to perform the callback, then switch back to init_mm. Signed-off-by: Gao Chen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/efi.h | 11 +++++ arch/sw_64/kernel/efi.c | 43 +++++++++++++++++++ drivers/firmware/efi/sunway-runtime.c | 59 ++++++++++++++++++++++++++- 3 files changed, 112 insertions(+), 1 deletion(-) diff --git a/arch/sw_64/include/asm/efi.h b/arch/sw_64/include/asm/efi.h index f2a470127c47..63d1d3d32c50 100644 --- a/arch/sw_64/include/asm/efi.h +++ b/arch/sw_64/include/asm/efi.h @@ -3,6 +3,9 @@ #ifndef _ASM_SW64_EFI_H #define _ASM_SW64_EFI_H +#include + +#include #include #ifdef CONFIG_EFI @@ -21,8 +24,16 @@ extern unsigned long sunway_bios_version; #define sunway_bios_version (0) #endif /* CONFIG_EFI */ +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE +int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); +#define arch_efi_call_virt_setup() efi_virtmap_load() +#define arch_efi_call_virt_teardown() efi_virtmap_unload() +void efi_virtmap_load(void); +void efi_virtmap_unload(void); +#else #define arch_efi_call_virt_setup() #define arch_efi_call_virt_teardown() +#endif #define ARCH_EFI_IRQ_FLAGS_MASK 0x00000001 diff --git a/arch/sw_64/kernel/efi.c b/arch/sw_64/kernel/efi.c index 7955fa840a43..ac6d8dd42a71 100644 --- a/arch/sw_64/kernel/efi.c +++ b/arch/sw_64/kernel/efi.c @@ -2,8 +2,10 @@ #include #include +#include #include +#include bool efi_poweroff_required(void) { @@ -17,3 +19,44 @@ bool efi_poweroff_required(void) return efi_enabled(EFI_RUNTIME_SERVICES); } + +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE +static __init pgprot_t create_mapping_protection(efi_memory_desc_t *md) +{ + switch (md->type) { + case EFI_RUNTIME_SERVICES_CODE: + case EFI_PAL_CODE: + return PAGE_KERNEL_READONLY_EXEC; + case EFI_RESERVED_TYPE: + case EFI_RUNTIME_SERVICES_DATA: + case EFI_UNUSABLE_MEMORY: + case EFI_ACPI_MEMORY_NVS: + case EFI_MEMORY_MAPPED_IO: + case EFI_MEMORY_MAPPED_IO_PORT_SPACE: + case EFI_MAX_MEMORY_TYPE: + return PAGE_KERNEL_NOEXEC; + default: + break; + } + + return PAGE_KERNEL; +} + +static void * __init pgtable_alloc_late(void) +{ + return (void *)__get_free_page(GFP_KERNEL); +} + +int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) +{ + pgprot_t prot = create_mapping_protection(md); + unsigned long start, size; + + start = (unsigned long)__va(md->phys_addr); + size = (unsigned long)(md->num_pages << EFI_PAGE_SHIFT); + create_pgd_mapping(mm->pgd, start, (unsigned long)md->phys_addr, size, + prot, pgtable_alloc_late); + + return 0; +} +#endif diff --git a/drivers/firmware/efi/sunway-runtime.c b/drivers/firmware/efi/sunway-runtime.c index 6bd96cff7d5d..489792adc6b1 100644 --- a/drivers/firmware/efi/sunway-runtime.c +++ b/drivers/firmware/efi/sunway-runtime.c @@ -29,6 +29,57 @@ #include #include #include +#include + +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE + +DEFINE_PER_CPU(unsigned long, atc_state); + +static bool __init efi_virtmap_init(void) +{ + efi_memory_desc_t *md; + + efi_mm.pgd = pgd_alloc(&efi_mm); + memcpy(efi_mm.pgd + USER_PTRS_PER_PGD, + init_mm.pgd + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + mm_init_cpumask(&efi_mm); + init_new_context(NULL, &efi_mm); + + for_each_efi_memory_desc(md) { + phys_addr_t phys = md->phys_addr; + int ret; + + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + + ret = efi_create_mapping(&efi_mm, md); + if (ret) { + pr_warn("EFI remap %pa: failed to create mapping (%d)\n", + &phys, ret); + return false; + } + } + + return true; +} + +void efi_virtmap_load(void) +{ + preempt_disable(); + update_ptbr_sys(virt_to_phys(efi_mm.pgd)); + /* switch CSR_ATC for bios compatibility */ + this_cpu_write(atc_state, get_atc()); + set_atc(ATC_KSEG); +} + +void efi_virtmap_unload(void) +{ + set_atc(this_cpu_read(atc_state)); + update_ptbr_sys(virt_to_phys(init_mm.pgd)); + preempt_enable(); +} +#endif /* CONFIG_SW64_KERNEL_PAGE_TABLE */ /* * Enable the UEFI Runtime Services if all prerequisites are in place, i.e., @@ -63,6 +114,13 @@ static int __init sunway_enable_runtime_services(void) return 0; } +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE + if (!efi_virtmap_init()) { + pr_err("UEFI virtual mapping missing or invalid -- runtime services will not be available\n"); + return -ENOMEM; + } +#endif + /* Set up runtime services function pointers */ efi_native_runtime_setup(); set_bit(EFI_RUNTIME_SERVICES, &efi.flags); @@ -71,7 +129,6 @@ static int __init sunway_enable_runtime_services(void) } early_initcall(sunway_enable_runtime_services); - static int __init sunway_dmi_init(void) { /* -- Gitee From 01beecb44da94eb44fe8cf9c194e49fdf03b034a Mon Sep 17 00:00:00 2001 From: Gao Chen Date: Wed, 23 Jul 2025 09:35:20 +0800 Subject: [PATCH 19/40] sw64: reset ATC for kvm Reset vcb.atc to "3" when kvm restarts. Signed-off-by: Gao Chen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/kvm/sw64.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/sw_64/kvm/sw64.c b/arch/sw_64/kvm/sw64.c index e9fe07cce1f1..90031e61b095 100644 --- a/arch/sw_64/kvm/sw64.c +++ b/arch/sw_64/kvm/sw64.c @@ -481,6 +481,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (vcpu->arch.restart == 1) { /* handle reset vCPU */ vcpu->arch.regs.pc = GUEST_RESET_PC; +#ifdef CONFIG_SUBARCH_C4 + vcpu->arch.vcb.atc = 3; +#endif vcpu->arch.restart = 0; } -- Gitee From 940734b40feb7c479ddcf9d86c53276f06182fb2 Mon Sep 17 00:00:00 2001 From: Gao Chen Date: Wed, 23 Jul 2025 11:02:58 +0800 Subject: [PATCH 20/40] sw64: setup early_ioremap Select GENERIC_EARLY_IOREMAP for C4, which map IO space by fixmap. Signed-off-by: Gao Chen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/Kconfig | 1 + arch/sw_64/include/asm/Kbuild | 1 + arch/sw_64/include/asm/dmi.h | 4 ++-- arch/sw_64/include/asm/early_ioremap.h | 8 ++++++++ arch/sw_64/kernel/setup.c | 8 ++++++++ 5 files changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index 620699b359b2..db5ebfe253be 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -68,6 +68,7 @@ config SW64 select EDAC_SUPPORT select GENERIC_ARCH_TOPOLOGY select GENERIC_CLOCKEVENTS + select GENERIC_EARLY_IOREMAP if SW64_KERNEL_PAGE_TABLE select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO select GENERIC_IOREMAP if SW64_KERNEL_PAGE_TABLE select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP diff --git a/arch/sw_64/include/asm/Kbuild b/arch/sw_64/include/asm/Kbuild index bbd410bda0a4..beeffdb0188d 100644 --- a/arch/sw_64/include/asm/Kbuild +++ b/arch/sw_64/include/asm/Kbuild @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +generic-$(CONFIG_SW64_KERNEL_PAGE_TABLE) += early_ioremap.h generic-y += clkdev.h generic-y += export.h generic-y += mcs_spinlock.h diff --git a/arch/sw_64/include/asm/dmi.h b/arch/sw_64/include/asm/dmi.h index 05e80c9a3a76..6cf70a01475d 100644 --- a/arch/sw_64/include/asm/dmi.h +++ b/arch/sw_64/include/asm/dmi.h @@ -23,8 +23,8 @@ /* Use early IO mappings for DMI because it's initialized early */ #define dmi_early_remap(x, l) early_ioremap(x, l) #define dmi_early_unmap(x, l) early_iounmap(x, l) -#define dmi_remap(x, l) early_ioremap(x, l) -#define dmi_unmap(x) early_iounmap(x, 0) +#define dmi_remap(x, l) ioremap(x, l) +#define dmi_unmap(x) iounmap(x) #define dmi_alloc(l) kzalloc(l, GFP_KERNEL) #endif /* _ASM_SW64_DMI_H */ diff --git a/arch/sw_64/include/asm/early_ioremap.h b/arch/sw_64/include/asm/early_ioremap.h index 172b96a401cb..dd57d34b26f4 100644 --- a/arch/sw_64/include/asm/early_ioremap.h +++ b/arch/sw_64/include/asm/early_ioremap.h @@ -5,6 +5,8 @@ #include #include +#ifndef CONFIG_GENERIC_EARLY_IOREMAP + static inline void __iomem * early_ioremap(unsigned long phys_addr, unsigned long size) { @@ -26,4 +28,10 @@ static inline void early_iounmap(volatile void __iomem *addr, unsigned long size } #define early_memunmap(addr, size) early_iounmap(addr, size) +#else + +#include + +#endif /* !CONFIG_GENERIC_EARLY_IOREMAP */ + #endif /* _ASM_SW64_EARLY_IOREMAP_H */ diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c index 8cb5e97fc280..f06204022c9f 100644 --- a/arch/sw_64/kernel/setup.c +++ b/arch/sw_64/kernel/setup.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -676,6 +677,9 @@ setup_arch(char **cmdline_p) trap_init(); early_paging_init(); +#ifdef CONFIG_GENERIC_EARLY_IOREMAP + early_ioremap_setup(); +#endif setup_cpu_features(); @@ -737,6 +741,10 @@ setup_arch(char **cmdline_p) if (acpi_disabled) device_tree_init(); +#ifdef CONFIG_GENERIC_EARLY_IOREMAP + early_ioremap_reset(); +#endif + setup_smp(); sw64_numa_init(); -- Gitee From 9d6a671bbada45296e8146ae5afde2177d24ef4b Mon Sep 17 00:00:00 2001 From: Gao Chen Date: Mon, 13 Oct 2025 15:07:49 +0800 Subject: [PATCH 21/40] sw64: map legacy I/O to K segment Map legacy I/O regions for LPC and PCI into the K segment to allow access via __va(). Signed-off-by: Gao Chen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/mm/init.c | 31 +++++++++++++++++++++++++++++ drivers/pci/controller/pci-sunway.c | 4 ++++ 2 files changed, 35 insertions(+) diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c index 433b5be03573..b61b29f276b7 100644 --- a/arch/sw_64/mm/init.c +++ b/arch/sw_64/mm/init.c @@ -408,6 +408,35 @@ static void __init fixmap_init(pgd_t *pgdir) if (pmd_none(*pmdp)) pmd_populate(NULL, pmdp, virt_to_page(fixmap_pte)); } + +/* + * Map legacy io to K segmemt in advance. + */ +extern unsigned long legacy_io_base; +extern unsigned long legacy_io_shift; +static void __init map_legacy_io(pgd_t *pgdir) +{ + unsigned long pci_io_start; + unsigned long lpc_legacy_io_start = LPC_LEGACY_IO; + unsigned long legacy_io_start = legacy_io_base; + unsigned long size = 0x10000; + unsigned long i, j; + pgprot_t prot_none; + + prot_none = __pgprot(pgprot_val(PAGE_KERNEL_READONLY) | _PAGE_FOW); + + for (i = 0; i < 2; i++) { + for (j = 0; j < 6; j++) { + pci_io_start = SW64_PCI_IO_BASE(i, j) | PCI_LEGACY_IO; + create_pgd_mapping(pgdir, (unsigned long)__va(pci_io_start), pci_io_start, + size, prot_none, pgtable_alloc_fixmap); + } + } + create_pgd_mapping(pgdir, (unsigned long)__va(legacy_io_start), legacy_io_start, + size << legacy_io_shift, PAGE_KERNEL_NOEXEC, pgtable_alloc_fixmap); + create_pgd_mapping(pgdir, (unsigned long)__va(lpc_legacy_io_start), lpc_legacy_io_start, + size, PAGE_KERNEL_NOEXEC, pgtable_alloc_fixmap); +} #endif /* CONFIG_SW64_KERNEL_PAGE_TABLE */ /* @@ -474,6 +503,8 @@ void __init paging_init(void) fixmap_init(pgdir); + map_legacy_io(pgdir); + create_pgd_mapping(pgdir, sw64_vcpucb_start, __pa(sw64_vcpucb_start), sw64_vcpucb_size, PAGE_KERNEL_NOEXEC, pgtable_alloc_fixmap); #ifdef CONFIG_SW64_RRU diff --git a/drivers/pci/controller/pci-sunway.c b/drivers/pci/controller/pci-sunway.c index d8fc6b280120..7225f6f48903 100644 --- a/drivers/pci/controller/pci-sunway.c +++ b/drivers/pci/controller/pci-sunway.c @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -865,6 +866,9 @@ static int pci_prepare_controller(struct pci_controller *hose, hose->sparse_io_base = 0; hose->dense_mem_base = props[PROP_PCIE_IO_BASE]; hose->dense_io_base = props[PROP_EP_IO_BASE]; +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE + set_memory_rw((unsigned long)__va(hose->dense_io_base), 0x10000 >> PAGE_SHIFT); +#endif if (!is_guest_or_emul()) { hose->rc_config_space_base = ioremap(props[PROP_RC_CONFIG_BASE], SUNWAY_RC_SIZE); -- Gitee From 34f7286a904f6a384b1a4629b7315a4088716f74 Mon Sep 17 00:00:00 2001 From: Lei Yilong Date: Thu, 16 Oct 2025 13:42:02 +0800 Subject: [PATCH 22/40] sw64: use ioremap to map IO address Readq is no longer used in function get_vt_smp_info(). Use ioremap to map the IO physical address before accessing the IO virtual address. Signed-off-by: Lei Yilong Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/kernel/topology.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/sw_64/kernel/topology.c b/arch/sw_64/kernel/topology.c index 76b80593a9d1..605a3e73953b 100644 --- a/arch/sw_64/kernel/topology.c +++ b/arch/sw_64/kernel/topology.c @@ -22,13 +22,16 @@ static void __init get_vt_smp_info(void) { unsigned long smp_info; void __iomem *spbu_base = misc_platform_get_spbu_base(0); + unsigned long *smp_info_addr; - smp_info = readq(spbu_base + OFFSET_SMP_INFO); + smp_info_addr = ioremap((phys_addr_t)(__pa(spbu_base) + OFFSET_SMP_INFO), 0x80); + smp_info = *smp_info_addr; if (smp_info == -1UL) smp_info = 0; topo_nr_threads = (smp_info >> VT_THREADS_SHIFT) & VT_THREADS_MASK; topo_nr_cores = (smp_info >> VT_CORES_SHIFT) & VT_CORES_MASK; topo_nr_maxcpus = (smp_info >> VT_MAX_CPUS_SHIFT) & VT_MAX_CPUS_MASK; + iounmap(smp_info_addr); } static void __init init_topo_threads(void) -- Gitee From 3f274152ef85a47e5c62c7e038465b17b2368d05 Mon Sep 17 00:00:00 2001 From: Gu Yuchen Date: Fri, 31 Oct 2025 17:41:27 +0800 Subject: [PATCH 23/40] sw64: move non-boot core's kernel page table enabling logic to hmcode Currently, the enabling of non-boot core's kernel page table is in smp_callin. Accessing __smp_callin can result in a PTINVALID error because kernel page table is not enabled for non-boot cores. Move non-boot core's kernel page table enabling logic before calling __smp_callin to hmcode, so that non-boot core is already running in kernel page table mode when jumping to __smp_callin. Signed-off-by: Gu Yuchen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/smp.h | 1 + arch/sw_64/kernel/smp.c | 14 ++++++-------- arch/sw_64/mm/init.c | 5 +++++ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/sw_64/include/asm/smp.h b/arch/sw_64/include/asm/smp.h index 0b1ebf2143f2..cdd9b29a0792 100644 --- a/arch/sw_64/include/asm/smp.h +++ b/arch/sw_64/include/asm/smp.h @@ -35,6 +35,7 @@ struct smp_rcb_struct { unsigned long ready; unsigned long init_done; unsigned long feat_vint; + unsigned long ptbr; }; extern bool __init is_rcid_duplicate(int rcid); diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c index 34f9d0ebbc82..0c11735d39b1 100644 --- a/arch/sw_64/kernel/smp.c +++ b/arch/sw_64/kernel/smp.c @@ -186,14 +186,6 @@ void smp_callin(void) complete(&cpu_running); -#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE - /* switch to paging mode */ - if (sunway_support_kpt) { - set_atc(ATC_PAGE); - tbiv(); - } -#endif - /* Must have completely accurate bogos. */ local_irq_enable(); @@ -281,6 +273,12 @@ void __init smp_rcb_init(struct smp_rcb_struct *smp_rcb_base_addr) /* Setup SMP_RCB fields that uses to activate secondary CPU */ smp_rcb->restart_entry = __smp_callin; smp_rcb->init_done = 0xDEADBEEFUL; +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE + if (sunway_support_kpt) { + smp_rcb->init_done = 0x2025DEADBEEFUL; + smp_rcb->ptbr = virt_to_phys(init_mm.pgd); + } +#endif mb(); } diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c index b61b29f276b7..8bd24cdfeef4 100644 --- a/arch/sw_64/mm/init.c +++ b/arch/sw_64/mm/init.c @@ -497,6 +497,8 @@ void __init paging_init(void) unsigned long init_size = (unsigned long)__init_end - init_start; unsigned long data_start = (unsigned long)_sdata; unsigned long data_size = (unsigned long)_end - data_start; + unsigned long sw64_guest_reset_start = (unsigned long)(__va(0x10000)); + unsigned long sw64_guest_reset_size = PAGE_SIZE; pgd_t *pgdir = (&init_mm)->pgd; phys_addr_t start, end; u64 i; @@ -527,6 +529,9 @@ void __init paging_init(void) PAGE_KERNEL, pgtable_alloc_fixmap); create_pgd_mapping(pgdir, data_start, __pa(data_start), data_size, PAGE_KERNEL_NOEXEC, pgtable_alloc_fixmap); + if (is_in_guest()) + create_pgd_mapping(pgdir, sw64_guest_reset_start, __pa(sw64_guest_reset_start), sw64_guest_reset_size, + PAGE_KERNEL_NOEXEC, pgtable_alloc_fixmap); memblock_mark_nomap(__pa(sw64_reserve_start), __pa((unsigned long)_end - sw64_reserve_start)); -- Gitee From 890abcf72b1f92c7d650cc37b8a5249fe5178c53 Mon Sep 17 00:00:00 2001 From: Gu Yuchen Date: Fri, 31 Oct 2025 18:32:14 +0800 Subject: [PATCH 24/40] sw64: use ioremap to map IO address in functions like __get_cpu_nums() Use ioremap to map the IO physical address in functions like __get_cpu_nums() before accessing the IO virtual address. Signed-off-by: Gu Yuchen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/uncore_io_ops_junzhang.h | 10 ++++++++-- arch/sw_64/kernel/smp.c | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/sw_64/include/asm/uncore_io_ops_junzhang.h b/arch/sw_64/include/asm/uncore_io_ops_junzhang.h index 472c495ebcdf..74fe919f4528 100644 --- a/arch/sw_64/include/asm/uncore_io_ops_junzhang.h +++ b/arch/sw_64/include/asm/uncore_io_ops_junzhang.h @@ -11,13 +11,16 @@ static inline int __get_cpu_nums(void) int cpus; unsigned long cfg_info; void __iomem *spbu_base; + unsigned long *cfg_info_addr; spbu_base = misc_platform_get_spbu_base(0); - cfg_info = readq(spbu_base + OFFSET_CFG_INFO); + cfg_info_addr = ioremap((phys_addr_t)(__pa(spbu_base) + OFFSET_CFG_INFO), 0x8); + cfg_info = readq(cfg_info_addr); cfg_info = (cfg_info >> 33) & 0x3; cpus = 1 << cfg_info; + iounmap(cfg_info_addr); return cpus; } @@ -26,13 +29,16 @@ static inline unsigned long __get_node_mem(int node) unsigned long node_mem; unsigned long total_mem; void __iomem *spbu_base; + unsigned long *cfg_info_addr; spbu_base = misc_platform_get_spbu_base(node); - total_mem = readq(spbu_base + OFFSET_CFG_INFO) >> 3; + cfg_info_addr = ioremap((phys_addr_t)(__pa(spbu_base) + OFFSET_CFG_INFO), 0x8); + total_mem = readq(cfg_info_addr) >> 3; total_mem = (total_mem & 0xffff) << 28; node_mem = total_mem / __get_cpu_nums(); + iounmap(cfg_info_addr); return node_mem; } diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c index 0c11735d39b1..10b063ae881b 100644 --- a/arch/sw_64/kernel/smp.c +++ b/arch/sw_64/kernel/smp.c @@ -68,6 +68,8 @@ static void upshift_freq(void) { int i, cpu_num; void __iomem *spbu_base; + unsigned long *clu_lv2_selh_addr; + unsigned long *clu_lv2_sell_addr; if (is_guest_or_emul()) return; @@ -78,10 +80,14 @@ static void upshift_freq(void) cpu_num = sw64_chip->get_cpu_num(); for (i = 0; i < cpu_num; i++) { spbu_base = misc_platform_get_spbu_base(i); - writeq(-1UL, spbu_base + OFFSET_CLU_LV2_SELH); - writeq(-1UL, spbu_base + OFFSET_CLU_LV2_SELL); + clu_lv2_selh_addr = ioremap((phys_addr_t)(__pa(spbu_base) + OFFSET_CLU_LV2_SELH), 0x8); + clu_lv2_sell_addr = ioremap((phys_addr_t)(__pa(spbu_base) + OFFSET_CLU_LV2_SELL), 0x8); + writeq(-1UL, clu_lv2_selh_addr); + writeq(-1UL, clu_lv2_sell_addr); udelay(1000); } + iounmap(clu_lv2_selh_addr); + iounmap(clu_lv2_sell_addr); } static void downshift_freq(void) -- Gitee From 9785e935d9bcc7ab21d6211b13ff0b5b89998a51 Mon Sep 17 00:00:00 2001 From: Gao Chen Date: Thu, 24 Jul 2025 16:27:44 +0800 Subject: [PATCH 25/40] sw64: enable kernel page table Check if we support sunway kernel page table by cpuid(). Signed-off-by: Gao Chen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/cpu.h | 9 +++++++++ arch/sw_64/kernel/early_init.c | 1 + 2 files changed, 10 insertions(+) diff --git a/arch/sw_64/include/asm/cpu.h b/arch/sw_64/include/asm/cpu.h index 1697c2a3369e..8f0b7f3d31d8 100644 --- a/arch/sw_64/include/asm/cpu.h +++ b/arch/sw_64/include/asm/cpu.h @@ -25,6 +25,7 @@ enum hmcall_cpuid_cmd { #define CPU_FEAT_SIMD 0x2 #define CPU_FEAT_UNA 0x4 #define CPU_FEAT_VINT 0x8 +#define CPU_FEAT_KPT 0x10 #define CPU_FEAT_WP 0x20 enum sunway_cpu_model { @@ -73,4 +74,12 @@ static inline unsigned long get_cpu_freq(unsigned int cpu) extern bool sunway_support_kpt; +static inline void check_sunway_kpt_support(void) +{ + if (cpuid(GET_FEATURES, 0) & CPU_FEAT_KPT) + sunway_support_kpt = true; + else + sunway_support_kpt = false; +} + #endif /* _ASM_SW64_CPU_H */ diff --git a/arch/sw_64/kernel/early_init.c b/arch/sw_64/kernel/early_init.c index 8fb15a9e385c..d5e3d578211d 100644 --- a/arch/sw_64/kernel/early_init.c +++ b/arch/sw_64/kernel/early_init.c @@ -8,6 +8,7 @@ bool sunway_support_kpt; asmlinkage __visible void __init sw64_start_kernel(void) { + check_sunway_kpt_support(); fixup_hmcall(); save_ktp(); start_kernel(); -- Gitee From 05c4d0dad895893c2441c958daf158cede9ab81d Mon Sep 17 00:00:00 2001 From: Gao Chen Date: Mon, 26 Jan 2026 08:46:11 +0800 Subject: [PATCH 26/40] sw64: fix an inappropriate lock in __set_memory Using mmap_read_lock() in __set_memory is inappropriate, and this patch modifies it to mmap_write_lock(). Signed-off-by: Gao Chen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/mm/pageattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sw_64/mm/pageattr.c b/arch/sw_64/mm/pageattr.c index 2949fa7b42dd..0d55065fbe54 100644 --- a/arch/sw_64/mm/pageattr.c +++ b/arch/sw_64/mm/pageattr.c @@ -81,10 +81,10 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, if (!numpages) return 0; - mmap_read_lock(&init_mm); + mmap_write_lock(&init_mm); ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, &masks); - mmap_read_unlock(&init_mm); + mmap_write_unlock(&init_mm); local_flush_tlb_all(); -- Gitee From bd75dcb3c6837fa776d6b941602c33f3508f2974 Mon Sep 17 00:00:00 2001 From: Gao Chen Date: Tue, 27 Jan 2026 13:41:03 +0800 Subject: [PATCH 27/40] sw64: adapt kernel page table for S4 Fix kernel page table adaptation for hibernation with the following adjustmemts: - PCI reserved space and memmap reserved space are not saved. - Use restore code page to prevent self-overwriting of the code during resume. - Reserve extra soft_csrs, gp, uts_version, header_entry and csr_atc in hibernate header. - Switch csr_atc to KSEG to ensure access permission during resume. Signed-off-by: Gao Chen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/Kconfig | 3 + arch/sw_64/include/asm/csr.h | 159 ++++++++++++++++++++++++++++++ arch/sw_64/include/asm/suspend.h | 2 + arch/sw_64/kernel/asm-offsets.c | 1 + arch/sw_64/kernel/hibernate.c | 104 ++++++++++++++++--- arch/sw_64/kernel/hibernate_asm.S | 20 ++-- arch/sw_64/kernel/setup.c | 22 +++++ 7 files changed, 292 insertions(+), 19 deletions(-) diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index db5ebfe253be..a6987ccf1949 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -702,6 +702,9 @@ config ARCH_HIBERNATION_POSSIBLE depends on SW64 def_bool y +config ARCH_HIBERNATION_HEADER + def_bool HIBERNATION + config SW64_POWERCAP bool "Sunway powercap driver" select IPMI_HANDLER diff --git a/arch/sw_64/include/asm/csr.h b/arch/sw_64/include/asm/csr.h index 39688acbda36..b879d15d9f80 100644 --- a/arch/sw_64/include/asm/csr.h +++ b/arch/sw_64/include/asm/csr.h @@ -82,6 +82,55 @@ #endif +#define SOFTCSR0 0xe0 +#define SOFTCSR1 0xe1 +#define SOFTCSR2 0xe2 +#define SOFTCSR3 0xe3 +#define SOFTCSR4 0xe4 +#define SOFTCSR5 0xe5 +#define SOFTCSR6 0xe6 +#define SOFTCSR7 0xe7 +#define SOFTCSR8 0xe8 +#define SOFTCSR9 0xe9 +#define SOFTCSR10 0xea +#define SOFTCSR11 0xeb +#define SOFTCSR12 0xec +#define SOFTCSR13 0xed +#define SOFTCSR14 0xee +#define SOFTCSR15 0xef +#define SOFTCSR16 0xf0 +#define SOFTCSR17 0xf1 +#define SOFTCSR18 0xf2 +#define SOFTCSR19 0xf3 +#define SOFTCSR20 0xf4 +#define SOFTCSR21 0xf5 +#define SOFTCSR22 0xf6 +#define SOFTCSR23 0xf7 +#define SOFTCSR24 0xf8 +#define SOFTCSR25 0xf9 +#define SOFTCSR26 0xfa +#define SOFTCSR27 0xfb +#define SOFTCSR28 0xfc +#define SOFTCSR29 0xfd +#define SOFTCSR30 0xfe +#define SOFTCSR31 0xff + +#define SOFTCSR32 0xd0 +#define SOFTCSR33 0xd1 +#define SOFTCSR34 0xd2 +#define SOFTCSR35 0xd3 +#define SOFTCSR36 0xd4 +#define SOFTCSR37 0xd5 +#define SOFTCSR38 0xd6 +#define SOFTCSR39 0xd7 +#define SOFTCSR40 0xd8 +#define SOFTCSR41 0xd9 +#define SOFTCSR42 0xda +#define SOFTCSR43 0xdb +#define SOFTCSR44 0xdc +#define SOFTCSR45 0xdd +#define SOFTCSR46 0xde +#define SOFTCSR47 0xdf #ifdef CONFIG_HAVE_CSRRW #ifndef __ASSEMBLY__ @@ -130,6 +179,116 @@ static inline void update_ptbr_sys(unsigned long ptbr) wrptbr(ptbr); } #endif +#endif /* CONFIG_HAVE_CSRRW */ + +#ifndef __ASSEMBLY__ +struct soft_csrs { + unsigned long sc[48]; +}; +static inline void save_all_soft_csrs(struct soft_csrs *sc) +{ + sc->sc[0] = sw64_read_csr(SOFTCSR0); + sc->sc[1] = sw64_read_csr(SOFTCSR1); + sc->sc[2] = sw64_read_csr(SOFTCSR2); + sc->sc[3] = sw64_read_csr(SOFTCSR3); + sc->sc[4] = sw64_read_csr(SOFTCSR4); + sc->sc[5] = sw64_read_csr(SOFTCSR5); + sc->sc[6] = sw64_read_csr(SOFTCSR6); + sc->sc[7] = sw64_read_csr(SOFTCSR7); + sc->sc[8] = sw64_read_csr(SOFTCSR8); + sc->sc[9] = sw64_read_csr(SOFTCSR9); + sc->sc[10] = sw64_read_csr(SOFTCSR10); + sc->sc[11] = sw64_read_csr(SOFTCSR11); + sc->sc[12] = sw64_read_csr(SOFTCSR12); + sc->sc[13] = sw64_read_csr(SOFTCSR13); + sc->sc[14] = sw64_read_csr(SOFTCSR14); + sc->sc[15] = sw64_read_csr(SOFTCSR15); + sc->sc[16] = sw64_read_csr(SOFTCSR16); + sc->sc[17] = sw64_read_csr(SOFTCSR17); + sc->sc[18] = sw64_read_csr(SOFTCSR18); + sc->sc[19] = sw64_read_csr(SOFTCSR19); + sc->sc[20] = sw64_read_csr(SOFTCSR20); + sc->sc[21] = sw64_read_csr(SOFTCSR21); + sc->sc[22] = sw64_read_csr(SOFTCSR22); + sc->sc[23] = sw64_read_csr(SOFTCSR23); + sc->sc[24] = sw64_read_csr(SOFTCSR24); + sc->sc[25] = sw64_read_csr(SOFTCSR25); + sc->sc[26] = sw64_read_csr(SOFTCSR26); + sc->sc[27] = sw64_read_csr(SOFTCSR27); + sc->sc[28] = sw64_read_csr(SOFTCSR28); + sc->sc[29] = sw64_read_csr(SOFTCSR29); + sc->sc[30] = sw64_read_csr(SOFTCSR30); + sc->sc[31] = sw64_read_csr(SOFTCSR31); + sc->sc[32] = sw64_read_csr(SOFTCSR32); + sc->sc[33] = sw64_read_csr(SOFTCSR33); + sc->sc[34] = sw64_read_csr(SOFTCSR34); + sc->sc[35] = sw64_read_csr(SOFTCSR35); + sc->sc[36] = sw64_read_csr(SOFTCSR36); + sc->sc[37] = sw64_read_csr(SOFTCSR37); + sc->sc[38] = sw64_read_csr(SOFTCSR38); + sc->sc[39] = sw64_read_csr(SOFTCSR39); + sc->sc[40] = sw64_read_csr(SOFTCSR40); + sc->sc[41] = sw64_read_csr(SOFTCSR41); + sc->sc[42] = sw64_read_csr(SOFTCSR42); + sc->sc[43] = sw64_read_csr(SOFTCSR43); + sc->sc[44] = sw64_read_csr(SOFTCSR44); + sc->sc[45] = sw64_read_csr(SOFTCSR45); + sc->sc[46] = sw64_read_csr(SOFTCSR46); + sc->sc[47] = sw64_read_csr(SOFTCSR47); +} + +static inline void restore_all_soft_csrs(struct soft_csrs *sc) +{ + sw64_write_csr(sc->sc[0], SOFTCSR0); + sw64_write_csr(sc->sc[1], SOFTCSR1); + sw64_write_csr(sc->sc[2], SOFTCSR2); + sw64_write_csr(sc->sc[3], SOFTCSR3); + sw64_write_csr(sc->sc[4], SOFTCSR4); + sw64_write_csr(sc->sc[5], SOFTCSR5); + sw64_write_csr(sc->sc[6], SOFTCSR6); + sw64_write_csr(sc->sc[7], SOFTCSR7); + sw64_write_csr(sc->sc[8], SOFTCSR8); + sw64_write_csr(sc->sc[9], SOFTCSR9); + sw64_write_csr(sc->sc[10], SOFTCSR10); + sw64_write_csr(sc->sc[11], SOFTCSR11); + sw64_write_csr(sc->sc[12], SOFTCSR12); + sw64_write_csr(sc->sc[13], SOFTCSR13); + sw64_write_csr(sc->sc[14], SOFTCSR14); + sw64_write_csr(sc->sc[15], SOFTCSR15); + sw64_write_csr(sc->sc[16], SOFTCSR16); + sw64_write_csr(sc->sc[17], SOFTCSR17); + sw64_write_csr(sc->sc[18], SOFTCSR18); + sw64_write_csr(sc->sc[19], SOFTCSR19); + sw64_write_csr(sc->sc[20], SOFTCSR20); + sw64_write_csr(sc->sc[21], SOFTCSR21); + sw64_write_csr(sc->sc[22], SOFTCSR22); + sw64_write_csr(sc->sc[23], SOFTCSR23); + sw64_write_csr(sc->sc[24], SOFTCSR24); + sw64_write_csr(sc->sc[25], SOFTCSR25); + sw64_write_csr(sc->sc[26], SOFTCSR26); + sw64_write_csr(sc->sc[27], SOFTCSR27); + sw64_write_csr(sc->sc[28], SOFTCSR28); + sw64_write_csr(sc->sc[29], SOFTCSR29); + sw64_write_csr(sc->sc[30], SOFTCSR30); + sw64_write_csr(sc->sc[31], SOFTCSR31); + sw64_write_csr(sc->sc[32], SOFTCSR32); + sw64_write_csr(sc->sc[33], SOFTCSR33); + sw64_write_csr(sc->sc[34], SOFTCSR34); + sw64_write_csr(sc->sc[35], SOFTCSR35); + sw64_write_csr(sc->sc[36], SOFTCSR36); + sw64_write_csr(sc->sc[37], SOFTCSR37); + sw64_write_csr(sc->sc[38], SOFTCSR38); + sw64_write_csr(sc->sc[39], SOFTCSR39); + sw64_write_csr(sc->sc[40], SOFTCSR40); + sw64_write_csr(sc->sc[41], SOFTCSR41); + sw64_write_csr(sc->sc[42], SOFTCSR42); + sw64_write_csr(sc->sc[43], SOFTCSR43); + sw64_write_csr(sc->sc[44], SOFTCSR44); + sw64_write_csr(sc->sc[45], SOFTCSR45); + sw64_write_csr(sc->sc[46], SOFTCSR46); + sw64_write_csr(sc->sc[47], SOFTCSR47); +} #endif + #endif /* _ASM_SW64_CSR_H */ diff --git a/arch/sw_64/include/asm/suspend.h b/arch/sw_64/include/asm/suspend.h index b76db1580543..127a7481c34f 100644 --- a/arch/sw_64/include/asm/suspend.h +++ b/arch/sw_64/include/asm/suspend.h @@ -41,7 +41,9 @@ struct processor_state { unsigned long ktp; #ifdef CONFIG_HIBERNATION unsigned long sp; + unsigned long gp; struct vcpucb vcb; + struct soft_csrs sc; #endif }; diff --git a/arch/sw_64/kernel/asm-offsets.c b/arch/sw_64/kernel/asm-offsets.c index 1a68486da270..dd728be0820f 100644 --- a/arch/sw_64/kernel/asm-offsets.c +++ b/arch/sw_64/kernel/asm-offsets.c @@ -38,6 +38,7 @@ void foo(void) OFFSET(PSTATE_KTP, processor_state, ktp); #ifdef CONFIG_HIBERNATION OFFSET(PSTATE_SP, processor_state, sp); + OFFSET(PSTATE_GP, processor_state, gp); #endif OFFSET(PBE_ADDR, pbe, address); OFFSET(PBE_ORIG_ADDR, pbe, orig_address); diff --git a/arch/sw_64/kernel/hibernate.c b/arch/sw_64/kernel/hibernate.c index e84f93762f13..eaef5b4a76d2 100644 --- a/arch/sw_64/kernel/hibernate.c +++ b/arch/sw_64/kernel/hibernate.c @@ -1,16 +1,42 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include +#include + #include #include +#include struct processor_state hibernate_state; /* Defined in hibernate_asm.S */ extern int restore_image(void); +extern int trampoline_restore_image(unsigned long code, + unsigned long processor_state_addr); + +/** + * struct arch_hibernate_hdr_invariants - container to store kernel build version. + * @uts_version: put the build number and date so that we do not resume with a + * different kernel. + */ +struct arch_hibernate_hdr_invariants { + char uts_version[__NEW_UTS_LEN + 1]; +}; + +/** + * struct arch_hibernate_hdr - helper paramenters that help us to restore the image. + * @invariants: container to store kernel build version. + * @processor_state_addr: address of image where the processor_state is saved. + */ +static struct arch_hibernate_hdr { + struct arch_hibernate_hdr_invariants invariants; + unsigned long processor_state_addr; +} resume_hdr; void save_processor_state(void) { struct vcpucb *vcb = &(hibernate_state.vcb); + struct soft_csrs *sc = &(hibernate_state.sc); vcb->ksp = rdksp(); vcb->usp = rdusp(); @@ -20,12 +46,15 @@ void save_processor_state(void) #elif defined(CONFIG_SUBARCH_C4) vcb->ptbr_usr = sw64_read_csr(CSR_PTBR_USR); vcb->ptbr_sys = sw64_read_csr(CSR_PTBR_SYS); + vcb->atc = get_atc(); + save_all_soft_csrs(sc); #endif } void restore_processor_state(void) { struct vcpucb *vcb = &(hibernate_state.vcb); + struct soft_csrs *sc = &(hibernate_state.sc); wrksp(vcb->ksp); wrusp(vcb->usp); @@ -35,16 +64,53 @@ void restore_processor_state(void) #elif defined(CONFIG_SUBARCH_C4) sw64_write_csr_imb(vcb->ptbr_usr, CSR_PTBR_USR); sw64_write_csr_imb(vcb->ptbr_sys, CSR_PTBR_SYS); + set_atc(vcb->atc); + restore_all_soft_csrs(sc); #endif sflush(); tbiv(); } +static unsigned long prepare_restore_code_page(void) +{ + void *page; + + /* + * Memory allocated by get_safe_page() will be handled by the hibernate code, so + * there is no need to free it here. + */ + page = (void *)get_safe_page(GFP_ATOMIC); + if (!page) + return -ENOMEM; + copy_page(page, restore_image); + + /* Make sure this page is executable. */ + set_memory_x((unsigned long)page, 1); + + return (unsigned long)page; +} + int swsusp_arch_resume(void) { - restore_image(); + unsigned long relocated_restore_code; + +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE + /* + * To ensure executable and writeable permission for the pages during resuming, + * set CSR_ATC to KSEG. Then restore_processor_state() will restore CSR_ATC to + * the value saved in the image. + */ + set_atc(ATC_KSEG); +#endif + + /* Prepare a retore code page so that it doesn't get overwritten by itself. */ + relocated_restore_code = prepare_restore_code_page(); + + trampoline_restore_image(relocated_restore_code, resume_hdr.processor_state_addr); + return 0; } + /* References to section boundaries */ extern const void __nosave_begin, __nosave_end; int pfn_is_nosave(unsigned long pfn) @@ -55,26 +121,30 @@ int pfn_is_nosave(unsigned long pfn) return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); } -struct restore_data_record { - unsigned long magic; -}; - -#define RESTORE_MAGIC 0x0123456789ABCDEFUL +static void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i) +{ + memset(i, 0, sizeof(*i)); + memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version)); +} /** * arch_hibernation_header_save - populate the architecture specific part - * of a hibernation image header + * of a hibernation image header, and save helper paramenters. * @addr: address to save the data at */ int arch_hibernation_header_save(void *addr, unsigned int max_size) { - struct restore_data_record *rdr = addr; + struct arch_hibernate_hdr *hdr = addr; - if (max_size < sizeof(struct restore_data_record)) + if (max_size < sizeof(*hdr)) return -EOVERFLOW; - rdr->magic = RESTORE_MAGIC; + + arch_hdr_invariants(&hdr->invariants); + hdr->processor_state_addr = (unsigned long)&hibernate_state; + return 0; } +EXPORT_SYMBOL(arch_hibernation_header_save); /** * arch_hibernation_header_restore - read the architecture specific data @@ -83,7 +153,17 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size) */ int arch_hibernation_header_restore(void *addr) { - struct restore_data_record *rdr = addr; + struct arch_hibernate_hdr_invariants invariants; + struct arch_hibernate_hdr *hdr = addr; - return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL; + arch_hdr_invariants(&invariants); + if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) { + pr_crit("Hibernate image not generated by this kernel!\n"); + return -EINVAL; + } + + resume_hdr = *hdr; + + return 0; } +EXPORT_SYMBOL(arch_hibernation_header_restore); diff --git a/arch/sw_64/kernel/hibernate_asm.S b/arch/sw_64/kernel/hibernate_asm.S index 0655efc59a25..134bc0d2a212 100644 --- a/arch/sw_64/kernel/hibernate_asm.S +++ b/arch/sw_64/kernel/hibernate_asm.S @@ -26,6 +26,7 @@ ENTRY(swsusp_arch_suspend) stl $14, CALLEE_R14($1) stl $15, CALLEE_R15($1) stl $26, CALLEE_RA($1) + /* SIMD-FP */ ldi $1, PSTATE_FPREGS($16) vstd $f2, CALLEE_F2($1) @@ -41,6 +42,7 @@ ENTRY(swsusp_arch_suspend) stl $8, PSTATE_KTP($16) stl sp, PSTATE_SP($16) + stl gp, PSTATE_GP($16) call swsusp_save ldi $16, hibernate_state ldi $1, PSTATE_REGS($16) @@ -50,6 +52,7 @@ ENTRY(swsusp_arch_suspend) ret END(swsusp_arch_suspend) +/* The restore_image code will be copied to a 'safe' page and executed there. */ ENTRY(restore_image) /* prepare to copy image data to their original locations */ ldi t0, restore_pblist @@ -74,13 +77,11 @@ $cpyloop: ldl t0, PBE_NEXT(t0) bne t0, $loop $done: - /* tell the hibernation core that we've just restored the memory */ ldi $0, in_suspend stl $31, 0($0) - ldi $16, hibernate_state - ldi $1, PSTATE_REGS($16) + ldi $1, PSTATE_REGS($17) ldl $9, CALLEE_R9($1) ldl $10, CALLEE_R10($1) @@ -91,7 +92,7 @@ $done: ldl $15, CALLEE_R15($1) ldl $26, CALLEE_RA($1) /* SIMD-FP */ - fldd $f0, PSTATE_FPCR($16) + fldd $f0, PSTATE_FPCR($17) wfpcr $f0 fimovd $f0, $2 and $2, 0x3, $2 @@ -111,7 +112,7 @@ $hibernate_setfpec_1: $hibernate_setfpec_2: setfpec2 $hibernate_setfpec_over: - ldi $1, PSTATE_FPREGS($16) + ldi $1, PSTATE_FPREGS($17) vldd $f2, CALLEE_F2($1) vldd $f3, CALLEE_F3($1) vldd $f4, CALLEE_F4($1) @@ -121,10 +122,15 @@ $hibernate_setfpec_over: vldd $f8, CALLEE_F8($1) vldd $f9, CALLEE_F9($1) - ldl sp, PSTATE_SP($16) - ldl $8, PSTATE_KTP($16) + ldl sp, PSTATE_SP($17) + ldl gp, PSTATE_GP($17) + ldl $8, PSTATE_KTP($17) SAVE_KTP ldi $0, 0($31) ret END(restore_image) + +ENTRY(trampoline_restore_image) + call $26, ($16) +END(trampoline_restore_image) diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c index f06204022c9f..655b3b851e08 100644 --- a/arch/sw_64/kernel/setup.c +++ b/arch/sw_64/kernel/setup.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -333,6 +334,21 @@ int __init add_memmap_region(u64 addr, u64 size, enum memmap_types type) return 0; } +static void __init memmap_nosave_init(void) +{ + int i; + phys_addr_t start, end; + + for (i = 0; i < memmap_nr; i++) { + if (memmap_map[i].type == memmap_reserved || + memmap_map[i].type == memmap_pci) { + start = memmap_map[i].addr; + end = start + memmap_map[i].size; + register_nosave_region(PFN_DOWN(start), PFN_UP(end)); + } + } +} + static struct resource* __init insert_ram_resource(u64 start, u64 end, bool reserved) { @@ -732,6 +748,12 @@ setup_arch(char **cmdline_p) callback_init(); + /* + * After linear mapping is established, register no-save regions to ensure + * these spaces are unsaveable during hibernation. + */ + memmap_nosave_init(); + /* Try to upgrade ACPI tables via initrd */ acpi_table_upgrade(); -- Gitee From 222a0fad6da3611a11421876298c1d10bc36a9ff Mon Sep 17 00:00:00 2001 From: Gu Yuchen Date: Tue, 27 Jan 2026 09:28:29 +0800 Subject: [PATCH 28/40] sw64: adjust the marking and definitions of some kernel mapping functions Currently, the create_pgd_mapping function is marked as __init type, making it inaccessible after the initialization phase. Add a non-__init interface __create_pgd_mapping. The same applies to the pgtable_alloc_late function, and its declaration is moved to mmu.h. Signed-off-by: Gu Yuchen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/mmu.h | 4 ++++ arch/sw_64/kernel/efi.c | 5 ----- arch/sw_64/kernel/smp.c | 3 +-- arch/sw_64/mm/init.c | 31 ++++++++++++++++++++++--------- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/arch/sw_64/include/asm/mmu.h b/arch/sw_64/include/asm/mmu.h index 395df786d114..da9f268baeee 100644 --- a/arch/sw_64/include/asm/mmu.h +++ b/arch/sw_64/include/asm/mmu.h @@ -12,6 +12,10 @@ typedef struct { void create_pgd_mapping(pgd_t *pgdir, unsigned long virt, unsigned long phys, unsigned long size, pgprot_t prot, void *(*pgtable_alloc)(void)); + +void * __init pgtable_alloc_fixmap(void); +void *pgtable_alloc_late(void); + #endif #endif /* _ASM_SW64_MMU_H */ diff --git a/arch/sw_64/kernel/efi.c b/arch/sw_64/kernel/efi.c index ac6d8dd42a71..4120b53a14ac 100644 --- a/arch/sw_64/kernel/efi.c +++ b/arch/sw_64/kernel/efi.c @@ -42,11 +42,6 @@ static __init pgprot_t create_mapping_protection(efi_memory_desc_t *md) return PAGE_KERNEL; } -static void * __init pgtable_alloc_late(void) -{ - return (void *)__get_free_page(GFP_KERNEL); -} - int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) { pgprot_t prot = create_mapping_protection(md); diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c index 10b063ae881b..07047d0d0ae1 100644 --- a/arch/sw_64/kernel/smp.c +++ b/arch/sw_64/kernel/smp.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -262,8 +263,6 @@ static void __init process_nr_cpu_ids(void) nr_cpu_ids = num_possible_cpus(); } -extern void * __init pgtable_alloc_fixmap(void); - void __init smp_rcb_init(struct smp_rcb_struct *smp_rcb_base_addr) { if (smp_rcb != NULL) diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c index 8bd24cdfeef4..d95403371a72 100644 --- a/arch/sw_64/mm/init.c +++ b/arch/sw_64/mm/init.c @@ -180,19 +180,19 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) local_flush_tlb_all(); } -static pte_t *__init get_pte_virt_fixmap(phys_addr_t phys) +static pte_t *get_pte_virt_fixmap(phys_addr_t phys) { clear_fixmap(FIX_PTE); return (pte_t *)set_fixmap_offset(FIX_PTE, phys); } -static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t phys) +static pmd_t *get_pmd_virt_fixmap(phys_addr_t phys) { clear_fixmap(FIX_PMD); return (pmd_t *)set_fixmap_offset(FIX_PMD, phys); } -static pud_t *__init get_pud_virt_fixmap(phys_addr_t phys) +static pud_t *get_pud_virt_fixmap(phys_addr_t phys) { clear_fixmap(FIX_PUD); return (pud_t *)set_fixmap_offset(FIX_PUD, phys); @@ -203,7 +203,12 @@ void * __init pgtable_alloc_fixmap(void) return (void *)__va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE)); } -static void __init +void *pgtable_alloc_late(void) +{ + return (void *)__get_free_page(GFP_KERNEL); +} + +static void create_pte_mapping(pte_t *pte_first, unsigned long virt, unsigned long phys, unsigned long size, pgprot_t prot) { @@ -222,7 +227,7 @@ create_pte_mapping(pte_t *pte_first, unsigned long virt, unsigned long phys, } } -static void __init +static void create_pmd_mapping(pmd_t *pmd_first, unsigned long virt, unsigned long phys, unsigned long size, pgprot_t prot, void *(*pgtable_alloc)(void)) @@ -256,7 +261,7 @@ create_pmd_mapping(pmd_t *pmd_first, unsigned long virt, unsigned long phys, } } -static void __init +static void create_cont_pmd_mapping(pmd_t *pmd_first, unsigned long virt, unsigned long phys, unsigned long size, pgprot_t prot, void *(*pgtable_alloc)(void)) @@ -285,7 +290,7 @@ create_cont_pmd_mapping(pmd_t *pmd_first, unsigned long virt, } } -static void __init +static void create_pud_mapping(pud_t *pud_first, unsigned long virt, unsigned long phys, unsigned long size, pgprot_t prot, void *(*pgtable_alloc)(void)) @@ -319,8 +324,8 @@ create_pud_mapping(pud_t *pud_first, unsigned long virt, unsigned long phys, } } -void __init -create_pgd_mapping(pgd_t *pgdir, unsigned long virt, unsigned long phys, +static void +__create_pgd_mapping(pgd_t *pgdir, unsigned long virt, unsigned long phys, unsigned long size, pgprot_t prot, void *(*pgtable_alloc)(void)) { @@ -353,6 +358,14 @@ create_pgd_mapping(pgd_t *pgdir, unsigned long virt, unsigned long phys, clear_fixmap(FIX_PUD); } +void __init +create_pgd_mapping(pgd_t *pgdir, unsigned long virt, unsigned long phys, + unsigned long size, pgprot_t prot, + void *(*pgtable_alloc)(void)) +{ + __create_pgd_mapping(pgdir, virt, phys, size, prot, pgtable_alloc); +} + static void __init early_create_pmd(pgd_t *pgdir, pud_t *pud, pmd_t *pmd, unsigned long start_va, unsigned long size, unsigned long pa) { -- Gitee From 0dbeaa2cc7c08189982dc91ef6c33885167d6182 Mon Sep 17 00:00:00 2001 From: Gu Yuchen Date: Thu, 20 Nov 2025 11:04:04 +0800 Subject: [PATCH 29/40] sw64: set execute permission for virtual machine's hmcode text section The execute permission for the virtual machine's hmcode section is incorrectly set to non-executable before. This commit updates its permission to read and execute. Signed-off-by: Gu Yuchen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c index d95403371a72..9a28c84d218b 100644 --- a/arch/sw_64/mm/init.c +++ b/arch/sw_64/mm/init.c @@ -544,7 +544,7 @@ void __init paging_init(void) PAGE_KERNEL_NOEXEC, pgtable_alloc_fixmap); if (is_in_guest()) create_pgd_mapping(pgdir, sw64_guest_reset_start, __pa(sw64_guest_reset_start), sw64_guest_reset_size, - PAGE_KERNEL_NOEXEC, pgtable_alloc_fixmap); + PAGE_KERNEL_READONLY_EXEC, pgtable_alloc_fixmap); memblock_mark_nomap(__pa(sw64_reserve_start), __pa((unsigned long)_end - sw64_reserve_start)); -- Gitee From a6760af9304021f815cd84624cf12957647ba7f0 Mon Sep 17 00:00:00 2001 From: Gu Yuchen Date: Wed, 26 Nov 2025 10:22:46 +0800 Subject: [PATCH 30/40] sw64: set PTBR_SYS before accessing vmalloc area In commit c054792f93b3 ("sw64: use ioremap to map IO address in functions like __get_cpu_nums()"), some IO addresses are mapped to vmalloc area via ioremap() before being accessed. Move update_ptbr_sys() to the beginning of smp_callin() to make sure PTBR_SYS is set before accessing vmalloc area. Signed-off-by: Gu Yuchen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c index 07047d0d0ae1..072165260321 100644 --- a/arch/sw_64/kernel/smp.c +++ b/arch/sw_64/kernel/smp.c @@ -142,6 +142,8 @@ void smp_callin(void) unsigned long __maybe_unused nmi_stack; save_ktp(); + /* update csr:ptbr */ + update_ptbr_sys(virt_to_phys(init_mm.pgd)); upshift_freq(); cpuid = smp_processor_id(); WARN_ON_ONCE(!irqs_disabled()); @@ -163,8 +165,6 @@ void smp_callin(void) /* All kernel threads share the same mm context. */ mmgrab(&init_mm); current->active_mm = &init_mm; - /* update csr:ptbr */ - update_ptbr_sys(virt_to_phys(init_mm.pgd)); #ifdef CONFIG_SUBARCH_C4 update_ptbr_usr(__pa_symbol(empty_zero_page)); #endif -- Gitee From b618fc263ba00f2bb21b640297eb4826481df0a7 Mon Sep 17 00:00:00 2001 From: Gu Yuchen Date: Wed, 4 Feb 2026 13:57:30 +0800 Subject: [PATCH 31/40] sw64: adapt kernel page table for memory hotplug Add the mapping operation for the memory block to the memory hot-add function, and the unmapping operation to the memory hot-remove function, so as to enable the kernel page table to support memory hot-plug. Signed-off-by: Gu Yuchen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/pgtable-4level.h | 1 + arch/sw_64/mm/init.c | 336 ++++++++++++++++++++++++ 2 files changed, 337 insertions(+) diff --git a/arch/sw_64/include/asm/pgtable-4level.h b/arch/sw_64/include/asm/pgtable-4level.h index 719e2c5377e3..bbe93ee82bbe 100644 --- a/arch/sw_64/include/asm/pgtable-4level.h +++ b/arch/sw_64/include/asm/pgtable-4level.h @@ -27,6 +27,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; #endif /* !__ASSEMBLY__ */ #define PAGE_OFFSET 0xfff0000000000000 +#define PAGE_END 0xfff07fffffffffff #endif #endif /* _ASM_SW64_PGTABLE_4LEVEL_H */ diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c index 9a28c84d218b..4ed79bc92617 100644 --- a/arch/sw_64/mm/init.c +++ b/arch/sw_64/mm/init.c @@ -872,16 +872,349 @@ void vmemmap_free(unsigned long start, unsigned long end, #endif #ifdef CONFIG_MEMORY_HOTPLUG +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE +static void free_hotplug_page_range(struct page *page, size_t size, + struct vmem_altmap *altmap) +{ + if (altmap) { + vmem_altmap_free(altmap, size >> PAGE_SHIFT); + } else { + WARN_ON(PageReserved(page)); + free_pages((unsigned long)page_address(page), get_order(size)); + } +} + +static void free_hotplug_pgtable_page(struct page *page) +{ + free_hotplug_page_range(page, PAGE_SIZE, NULL); +} + +static bool pgtable_range_aligned(unsigned long start, unsigned long end, + unsigned long floor, unsigned long ceiling, + unsigned long mask) +{ + start &= mask; + if (start < floor) + return false; + + if (ceiling) { + ceiling &= mask; + if (!ceiling) + return false; + } + + if (end - 1 > ceiling - 1) + return false; + return true; +} + +static void unmap_hotplug_pte_range(pmd_t *pmdp, unsigned long addr, + unsigned long end, bool free_mapped, + struct vmem_altmap *altmap) +{ + pte_t *ptep, pte; + + do { + ptep = pte_offset_kernel(pmdp, addr); + pte = READ_ONCE(*ptep); + if (pte_none(pte)) + continue; + + WARN_ON(!pte_present(pte)); + pte_clear(&init_mm, addr, ptep); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + if (free_mapped) + free_hotplug_page_range(pte_page(pte), + PAGE_SIZE, altmap); + } while (addr += PAGE_SIZE, addr < end); +} + +static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr, + unsigned long end, bool free_mapped, + struct vmem_altmap *altmap) +{ + unsigned long next; + pmd_t *pmdp, pmd; + + for (; addr < end; addr = next) { + next = pmd_addr_end(addr, end); + pmdp = pmd_offset(pudp, addr); + pmd = READ_ONCE(*pmdp); + if (pmd_none(pmd)) + continue; + + WARN_ON(!pmd_present(pmd)); + if (next - addr == PMD_SIZE) { + pmd_clear(pmdp); + flush_tlb_kernel_range(addr, addr + PMD_SIZE); + if (free_mapped) + free_hotplug_page_range(pmd_page(pmd), + PMD_SIZE, altmap); + continue; + } + unmap_hotplug_pte_range(pmdp, addr, next, free_mapped, altmap); + } +} + +static void unmap_hotplug_cont_pmd_range(pud_t *pudp, unsigned long addr, + unsigned long end, bool free_mapped, + struct vmem_altmap *altmap) +{ + unsigned long next, i; + pmd_t *pmdp, pmd; + + for (; addr < end; addr = next) { + next = cont_pmd_addr_end(addr, end); + pmdp = pmd_offset(pudp, addr); + pmd = READ_ONCE(*pmdp); + if (pmd_none(pmd)) + continue; + + WARN_ON(!pmd_present(pmd)); + if ((next - addr == CONT_PMD_SIZE) && + (PTRS_PER_PMD - pmd_index(addr) >= CONT_PMDS)) { + for (i = 0; i < CONT_PMDS; i++) { + pmd_clear(pmdp + i); + if (free_mapped) + free_hotplug_page_range(pmd_page(pmd), + PMD_SIZE, altmap); + } + flush_tlb_kernel_range(addr, addr + CONT_PMD_SIZE); + continue; + } + unmap_hotplug_pmd_range(pudp, addr, next, free_mapped, altmap); + } +} + +static void unmap_hotplug_pud_range(p4d_t *p4dp, unsigned long addr, + unsigned long end, bool free_mapped, + struct vmem_altmap *altmap) +{ + unsigned long next; + pud_t *pudp, pud; + + for (; addr < end; addr = next) { + next = pud_addr_end(addr, end); + pudp = pud_offset(p4dp, addr); + pud = READ_ONCE(*pudp); + if (pud_none(pud)) + continue; + + WARN_ON(!pud_present(pud)); + if (next - addr == PUD_SIZE) { + pud_clear(pudp); + flush_tlb_kernel_range(addr, addr + PUD_SIZE); + if (free_mapped) + free_hotplug_page_range(pud_page(pud), + PUD_SIZE, altmap); + continue; + } + unmap_hotplug_cont_pmd_range(pudp, addr, next, free_mapped, altmap); + } +} + +static void unmap_hotplug_range(unsigned long addr, unsigned long end, + bool free_mapped, struct vmem_altmap *altmap) +{ + unsigned long next; + pgd_t *pgdp; + p4d_t *p4dp, p4d; + + /* + * altmap can only be used as vmemmap mapping backing memory. + * In case the backing memory itself is not being freed, then + * altmap is irrelevant. Warn about this inconsistency when + * encountered. + */ + WARN_ON(!free_mapped && altmap); + + for (; addr < end; addr = next) { + next = pgd_addr_end(addr, end); + pgdp = pgd_offset_k(addr); + p4dp = p4d_offset(pgdp, addr); + p4d = READ_ONCE(*p4dp); + if (p4d_none(p4d)) + continue; + + WARN_ON(!p4d_present(p4d)); + unmap_hotplug_pud_range(p4dp, addr, next, free_mapped, altmap); + } +} + +static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr, + unsigned long end, unsigned long floor, + unsigned long ceiling) +{ + pte_t *ptep, pte; + unsigned long i, start = addr; + + do { + ptep = pte_offset_kernel(pmdp, addr); + pte = READ_ONCE(*ptep); + + /* + * This is just a sanity check here which verifies that + * pte clearing has been done by earlier unmap loops. + */ + WARN_ON(!pte_none(pte)); + } while (addr += PAGE_SIZE, addr < end); + + if (!pgtable_range_aligned(start, end, floor, ceiling, PMD_MASK)) + return; + + /* + * Check whether we can free the pte page if the rest of the + * entries are empty. Overlap with other regions have been + * handled by the floor/ceiling check. + */ + ptep = pte_offset_kernel(pmdp, 0UL); + for (i = 0; i < PTRS_PER_PTE; i++) { + if (!pte_none(READ_ONCE(ptep[i]))) + return; + } + + pmd_clear(pmdp); + + /* invalidate the walk caches */ + flush_tlb_all(); + free_hotplug_pgtable_page(virt_to_page(ptep)); +} + +static void free_empty_pmd_table(pud_t *pudp, unsigned long addr, + unsigned long end, unsigned long floor, + unsigned long ceiling) +{ + pmd_t *pmdp, pmd; + unsigned long i, next, start = addr; + + for (; addr < end; addr = next) { + next = pmd_addr_end(addr, end); + pmdp = pmd_offset(pudp, addr); + pmd = READ_ONCE(*pmdp); + if (pmd_none(pmd)) + continue; + + WARN_ON(!pmd_present(pmd)); + free_empty_pte_table(pmdp, addr, next, floor, ceiling); + }; + + if (CONFIG_PGTABLE_LEVELS <= 2) + return; + + if (!pgtable_range_aligned(start, end, floor, ceiling, PUD_MASK)) + return; + + /* + * Check whether we can free the pmd page if the rest of the + * entries are empty. Overlap with other regions have been + * handled by the floor/ceiling check. + */ + pmdp = pmd_offset(pudp, 0UL); + for (i = 0; i < PTRS_PER_PMD; i++) { + if (!pmd_none(READ_ONCE(pmdp[i]))) + return; + } + + pud_clear(pudp); + + /* invalidate the walk caches */ + flush_tlb_all(); + free_hotplug_pgtable_page(virt_to_page(pmdp)); +} + +static void free_empty_pud_table(p4d_t *p4dp, unsigned long addr, + unsigned long end, unsigned long floor, + unsigned long ceiling) +{ + pud_t *pudp, pud; + unsigned long i, next, start = addr; + + for (; addr < end; addr = next) { + next = pud_addr_end(addr, end); + pudp = pud_offset(p4dp, addr); + pud = READ_ONCE(*pudp); + if (pud_none(pud)) + continue; + + WARN_ON(!pud_present(pud)); + free_empty_pmd_table(pudp, addr, next, floor, ceiling); + } + + if (CONFIG_PGTABLE_LEVELS <= 3) + return; + + if (!pgtable_range_aligned(start, end, floor, ceiling, PGDIR_MASK)) + return; + + /* + * Check whether we can free the pud page if the rest of the + * entries are empty. Overlap with other regions have been + * handled by the floor/ceiling check. + */ + pudp = pud_offset(p4dp, 0UL); + for (i = 0; i < PTRS_PER_PUD; i++) { + if (!pud_none(READ_ONCE(pudp[i]))) + return; + } + + p4d_clear(p4dp); + + /* invalidate the walk caches */ + flush_tlb_all(); + free_hotplug_pgtable_page(virt_to_page(pudp)); +} + +static void free_empty_tables(unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ + unsigned long next; + pgd_t *pgdp; + p4d_t *p4dp, p4d; + + for (; addr < end; addr = next) { + next = pgd_addr_end(addr, end); + pgdp = pgd_offset_k(addr); + p4dp = p4d_offset(pgdp, addr); + p4d = READ_ONCE(*p4dp); + if (p4d_none(p4d)) + continue; + + WARN_ON(!p4d_present(p4d)); + free_empty_pud_table(p4dp, addr, next, floor, ceiling); + } +} + +static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size) +{ + unsigned long end = start + size; + + WARN_ON(pgdir != init_mm.pgd); + WARN_ON((start < PAGE_OFFSET) || (end > PAGE_END)); + + unmap_hotplug_range(start, end, false, NULL); + free_empty_tables(start, end, PAGE_OFFSET, PAGE_END); +} +#endif /* CONFIG_SW64_KERNEL_PAGE_TABLE */ + int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE + __create_pgd_mapping(swapper_pg_dir, (unsigned long)__va(start), start, + size, params->pgprot, pgtable_alloc_late); +#endif /* CONFIG_SW64_KERNEL_PAGE_TABLE */ + ret = __add_pages(nid, start_pfn, nr_pages, params); if (ret) pr_warn("%s: Problem encountered in __add_pages() as ret=%d\n", __func__, ret); + else { + max_pfn = PFN_UP(start + size); + max_low_pfn = max_pfn; + } return ret; } @@ -892,6 +1225,9 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) unsigned long nr_pages = size >> PAGE_SHIFT; __remove_pages(start_pfn, nr_pages, altmap); +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE + __remove_pgd_mapping(swapper_pg_dir, (unsigned long)__va(start), size); +#endif /* CONFIG_SW64_KERNEL_PAGE_TABLE */ } #endif -- Gitee From 2ac468840e1c0c09ca448c01387eea3c9c409fc5 Mon Sep 17 00:00:00 2001 From: Gao Chen Date: Thu, 5 Feb 2026 09:29:45 +0800 Subject: [PATCH 32/40] sw64: adapt kernel page table for kdump This patch fixes the adaptation of kernel page table for kdump with the following adjustmemts: - Move kexec_start_address and kexec_indirection_page out of the text section to ensure they have write permission. - Add execute permission to the page of reboot_code_buffer. - Set csr_atc to KSEG before jumping to the second kernel. - Clear soft_csrs before jumping to the second kernel. - Creating mapping for the sunway FDT. Signed-off-by: Gao Chen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/csr.h | 52 +++++++++++++++++++ arch/sw_64/include/asm/setup.h | 3 +- arch/sw_64/kernel/machine_kexec.c | 15 +++++- arch/sw_64/kernel/relocate_kernel.S | 10 ---- arch/sw_64/mm/init.c | 79 +++++++++++++++++++++++++++-- 5 files changed, 142 insertions(+), 17 deletions(-) diff --git a/arch/sw_64/include/asm/csr.h b/arch/sw_64/include/asm/csr.h index b879d15d9f80..b7b2a3fb9467 100644 --- a/arch/sw_64/include/asm/csr.h +++ b/arch/sw_64/include/asm/csr.h @@ -289,6 +289,58 @@ static inline void restore_all_soft_csrs(struct soft_csrs *sc) sw64_write_csr(sc->sc[46], SOFTCSR46); sw64_write_csr(sc->sc[47], SOFTCSR47); } + +static inline void clear_soft_csrs(void) +{ + sw64_write_csr_imb(0, SOFTCSR0); + sw64_write_csr_imb(0, SOFTCSR1); + sw64_write_csr_imb(0, SOFTCSR2); + sw64_write_csr_imb(0, SOFTCSR3); + sw64_write_csr_imb(0, SOFTCSR4); + sw64_write_csr_imb(0, SOFTCSR5); + sw64_write_csr_imb(0, SOFTCSR6); + sw64_write_csr_imb(0, SOFTCSR7); + sw64_write_csr_imb(0, SOFTCSR8); + sw64_write_csr_imb(0, SOFTCSR9); + sw64_write_csr_imb(0, SOFTCSR10); + sw64_write_csr_imb(0, SOFTCSR11); + sw64_write_csr_imb(0, SOFTCSR12); + sw64_write_csr_imb(0, SOFTCSR13); + sw64_write_csr_imb(0, SOFTCSR14); + sw64_write_csr_imb(0, SOFTCSR15); + sw64_write_csr_imb(0, SOFTCSR16); + sw64_write_csr_imb(0, SOFTCSR17); + sw64_write_csr_imb(0, SOFTCSR18); + sw64_write_csr_imb(0, SOFTCSR19); + sw64_write_csr_imb(0, SOFTCSR20); + sw64_write_csr_imb(0, SOFTCSR21); + sw64_write_csr_imb(0, SOFTCSR22); + sw64_write_csr_imb(0, SOFTCSR23); + sw64_write_csr_imb(0, SOFTCSR24); + sw64_write_csr_imb(0, SOFTCSR25); + sw64_write_csr_imb(0, SOFTCSR26); + sw64_write_csr_imb(0, SOFTCSR27); + sw64_write_csr_imb(0, SOFTCSR28); + sw64_write_csr_imb(0, SOFTCSR29); + sw64_write_csr_imb(0, SOFTCSR30); + sw64_write_csr_imb(0, SOFTCSR31); + sw64_write_csr_imb(0, SOFTCSR32); + sw64_write_csr_imb(0, SOFTCSR33); + sw64_write_csr_imb(0, SOFTCSR34); + sw64_write_csr_imb(0, SOFTCSR35); + sw64_write_csr_imb(0, SOFTCSR36); + sw64_write_csr_imb(0, SOFTCSR37); + sw64_write_csr_imb(0, SOFTCSR38); + sw64_write_csr_imb(0, SOFTCSR39); + sw64_write_csr_imb(0, SOFTCSR40); + sw64_write_csr_imb(0, SOFTCSR41); + sw64_write_csr_imb(0, SOFTCSR42); + sw64_write_csr_imb(0, SOFTCSR43); + sw64_write_csr_imb(0, SOFTCSR44); + sw64_write_csr_imb(0, SOFTCSR45); + sw64_write_csr_imb(0, SOFTCSR46); + sw64_write_csr_imb(0, SOFTCSR47); +} #endif #endif /* _ASM_SW64_CSR_H */ diff --git a/arch/sw_64/include/asm/setup.h b/arch/sw_64/include/asm/setup.h index 0a2edf9af3ca..fdcbb9debbd2 100644 --- a/arch/sw_64/include/asm/setup.h +++ b/arch/sw_64/include/asm/setup.h @@ -32,7 +32,8 @@ #define INITRD_SIZE (*(unsigned long *)(PARAM + 0x108)) #define DTB_START (*(unsigned long *)(PARAM + 0x118)) -#define _TEXT_START (KERNEL_START + 0x10000) +#define TEXT_OFFSET 0x10000 +#define _TEXT_START (KERNEL_START + TEXT_OFFSET) #define COMMAND_LINE_OFF (0x10000UL - 0xB000UL) #define INITRD_START_OFF (0x10000UL - 0xA100UL) diff --git a/arch/sw_64/kernel/machine_kexec.c b/arch/sw_64/kernel/machine_kexec.c index 9547f12a5299..9d8f3a6a7ba9 100644 --- a/arch/sw_64/kernel/machine_kexec.c +++ b/arch/sw_64/kernel/machine_kexec.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -22,8 +23,8 @@ extern const unsigned char relocate_new_kernel[]; extern const size_t relocate_new_kernel_size; -extern unsigned long kexec_start_address; -extern unsigned long kexec_indirection_page; +unsigned long kexec_start_address; +unsigned long kexec_indirection_page; static atomic_t waiting_for_crash_ipi; static void *kexec_control_page; @@ -346,6 +347,8 @@ void machine_kexec(struct kimage *image) unsigned long *ptr; reboot_code_buffer = kexec_control_page; + /* Since we do not have kimg, use set_memory to add exec_prot. */ + set_memory_x((unsigned long)reboot_code_buffer, 1); pr_info("reboot_code_buffer = %px\n", reboot_code_buffer); kexec_start_address = phys_to_ktext(image->start); pr_info("kexec_start_address = %#lx\n", kexec_start_address); @@ -383,6 +386,14 @@ void machine_kexec(struct kimage *image) pr_info("Will call new kernel at %08lx\n", image->start); pr_info("Bye ...\n"); +#ifdef CONFIG_SW64_KERNEL_PAGE_TABLE + /* + * We will jump directly instead of following the full reboot + * path, so softcsrs and CSR_ATC needs to be reseted here. + */ + clear_soft_csrs(); + set_atc(ATC_KSEG); +#endif smp_wmb(); ((noretfun_t) reboot_code_buffer)(sunway_boot_magic, sunway_dtb_address); diff --git a/arch/sw_64/kernel/relocate_kernel.S b/arch/sw_64/kernel/relocate_kernel.S index a4b0d27778b9..793ab7d548d9 100644 --- a/arch/sw_64/kernel/relocate_kernel.S +++ b/arch/sw_64/kernel/relocate_kernel.S @@ -120,16 +120,6 @@ kexec_flag: .quad 0x1 #endif -kexec_start_address: - .globl kexec_start_address - .quad 0x0 - .size kexec_start_address, 8 - -kexec_indirection_page: - .globl kexec_indirection_page - .quad 0 - .size kexec_indirection_page, 8 - relocate_new_kernel_end: relocate_new_kernel_size: diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c index 4ed79bc92617..b2edfe37fff5 100644 --- a/arch/sw_64/mm/init.c +++ b/arch/sw_64/mm/init.c @@ -450,6 +450,75 @@ static void __init map_legacy_io(pgd_t *pgdir) create_pgd_mapping(pgdir, (unsigned long)__va(lpc_legacy_io_start), lpc_legacy_io_start, size, PAGE_KERNEL_NOEXEC, pgtable_alloc_fixmap); } + +static bool check_present(pgd_t *pgdir, unsigned long addr) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + bool ret; + + if (!addr) + return false; + + pgd = pgd_offset_pgd(pgdir, addr); + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + return false; + + pud = pud_offset(p4d, addr); + pud = get_pud_virt_fixmap(__pa(pud)); + if (pud_none(*pud)) { + ret = false; + goto out; + } + if (pud_leaf(*pud)) { + ret = true; + goto out; + } + + pmd = pmd_offset(pud, addr); + pmd = get_pmd_virt_fixmap(__pa(pmd)); + if (!pmd_present(*pmd)) { + ret = false; + goto out; + } + if (pmd_leaf(*pmd)) { + ret = true; + goto out; + } + + pte = pte_offset_kernel(pmd, addr); + pte = get_pte_virt_fixmap(__pa(pte)); + if (pte_none(*pte)) + ret = false; + else + ret = true; + +out: + clear_fixmap(FIX_PTE); + clear_fixmap(FIX_PMD); + clear_fixmap(FIX_PUD); + + return ret; +} + +static void __init map_fdt(pgd_t *pgdir) +{ + unsigned long dtb_size = 0; + + /* + * If sunway_dtb_address is not included in the memory mapping, create a mapping + * for it. + */ + if (!check_present(pgdir, sunway_dtb_address)) { + dtb_size = (unsigned long)fdt_totalsize((void *)sunway_dtb_address); + create_pgd_mapping(pgdir, sunway_dtb_address, __pa(sunway_dtb_address), + dtb_size, PAGE_KERNEL_READONLY, pgtable_alloc_fixmap); + } +} #endif /* CONFIG_SW64_KERNEL_PAGE_TABLE */ /* @@ -461,7 +530,7 @@ void __init early_paging_init(void) unsigned long img_start, img_size; unsigned long dtb_start, dtb_size = 0; - img_start = (unsigned long)(KERNEL_START_PHYS + __START_KERNEL_map); + img_start = (unsigned long)_text - TEXT_OFFSET; img_size = (unsigned long)_end - img_start; dtb_start = sunway_dtb_address; @@ -500,9 +569,9 @@ void __init paging_init(void) #ifdef CONFIG_SW64_KERNEL_PAGE_TABLE unsigned long sw64_vcpucb_start = PAGE_OFFSET + 0x20000; unsigned long sw64_vcpucb_size = 0x60000; - unsigned long sw64_reserve_start = CONFIG_PHYSICAL_START + PAGE_OFFSET; - unsigned long sw64_reserve_size = (unsigned long)_stext - sw64_reserve_start; - unsigned long text_start = (unsigned long)_stext; + unsigned long sw64_reserve_start = (unsigned long)_text - TEXT_OFFSET; + unsigned long sw64_reserve_size = (unsigned long)_text - sw64_reserve_start; + unsigned long text_start = (unsigned long)_text; unsigned long text_size = (unsigned long)_etext - text_start; unsigned long ro_start = (unsigned long)__start_rodata; unsigned long ro_size = (unsigned long)__init_begin - ro_start; @@ -558,6 +627,8 @@ void __init paging_init(void) } memblock_clear_nomap(__pa(sw64_reserve_start), __pa((unsigned long)_end - sw64_reserve_start)); + + map_fdt(pgdir); #endif /* CONFIG_SW64_KERNEL_PAGE_TABLE */ } -- Gitee From 5920dad14999271ca40d2a2a5730f614f7f1a36f Mon Sep 17 00:00:00 2001 From: Gao Chen Date: Fri, 6 Feb 2026 09:16:56 +0800 Subject: [PATCH 33/40] sw64: add barriers and tlb flush for set_atc Add barriers and TLB flush before and after switching CSR_ATC to ensure correntness. Signed-off-by: Gao Chen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/hmcall.h | 2 +- arch/sw_64/mm/init.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/sw_64/include/asm/hmcall.h b/arch/sw_64/include/asm/hmcall.h index 24ef9e4b768b..e0cd6bbb17d2 100644 --- a/arch/sw_64/include/asm/hmcall.h +++ b/arch/sw_64/include/asm/hmcall.h @@ -258,7 +258,7 @@ static inline void wrap_asid(unsigned long asid, unsigned long ptbr) #define ATC_PAGE 1 #define ATC_KSEG 3 -#define set_atc(val) rwatc(1, (val)) +#define set_atc(val) ({mb(); imemb(); rwatc(1, (val)); tbiv(); imemb(); }) #define get_atc() rwatc(0, 0) #endif /* !__ASSEMBLY__ */ diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c index b2edfe37fff5..837725a1b116 100644 --- a/arch/sw_64/mm/init.c +++ b/arch/sw_64/mm/init.c @@ -556,8 +556,6 @@ void __init early_paging_init(void) pr_info("SW64 kernel page table enabled\n"); set_atc(ATC_PAGE); } - - tbiv(); #endif /* CONFIG_SW64_KERNEL_PAGE_TABLE */ } -- Gitee From 3e532e6c610d591523c1c52ee36f25a80fc00594 Mon Sep 17 00:00:00 2001 From: Wang Yicheng Date: Thu, 5 Mar 2026 14:42:40 +0800 Subject: [PATCH 34/40] sw64: cpuidle: refine C6 state stability This commit refines the sw64 C6 deep idle state by setting CPUIDLE_FLAG_RCU_IDLE for manual RCU management and update clock events to recalibrate timer interrupts during complex frequency transitions. However, software compensations cannot fully eliminate hardware-level timekeeping drift, the cpuidle driver is disabled under tickless systems to guarantee overall system stability. Signed-off-by: Wang Yicheng Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- drivers/clocksource/timer-sw64.c | 9 +++++++++ drivers/cpuidle/Kconfig.sw64 | 5 +++++ drivers/cpuidle/cpuidle-sw64.c | 28 +++++++++++++++++++++------- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/drivers/clocksource/timer-sw64.c b/drivers/clocksource/timer-sw64.c index d925d2650bde..2840b0e2dbe2 100644 --- a/drivers/clocksource/timer-sw64.c +++ b/drivers/clocksource/timer-sw64.c @@ -384,6 +384,15 @@ static void sw64_update_clockevents(void *data) clockevents_update_freq(swevt, freqs->new * 1000); } +void sw64_cpuidle_updatevents(int freq) +{ + + struct clock_event_device *swevt = this_cpu_ptr(&timer_events); + + clockevents_update_freq(swevt, freq * 1000); + +} + static int sw64_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { diff --git a/drivers/cpuidle/Kconfig.sw64 b/drivers/cpuidle/Kconfig.sw64 index 0f23175ad36a..f36ace6e2a01 100644 --- a/drivers/cpuidle/Kconfig.sw64 +++ b/drivers/cpuidle/Kconfig.sw64 @@ -4,3 +4,8 @@ # config SW64_CPUIDLE bool "Generic SW64 CPU idle Driver" + depends on !NO_HZ + depends on !NO_HZ_IDLE + help + Enable cpuidle support for SW64 architecture. + Disabled when tickless system (NO_HZ or NO_HZ_IDLE) is active. diff --git a/drivers/cpuidle/cpuidle-sw64.c b/drivers/cpuidle/cpuidle-sw64.c index 7af9d75185c1..59635640c93a 100644 --- a/drivers/cpuidle/cpuidle-sw64.c +++ b/drivers/cpuidle/cpuidle-sw64.c @@ -38,9 +38,13 @@ static bool can_switch_freq(int cpu_sibling) * Handles frequency scaling by writing specific bits to * CLU_LV2_SEL when CPU core wants a deeper idle state. */ +extern unsigned int cpufreq_quick_get(unsigned int cpu); +extern void sw64_cpuidle_updatevents(int freq); + static void sw64_do_deeper_idle(int cpu) { int core_id, node_id, cpu_sibling; + int cur_freq, downshift_freq = 200000; unsigned long flags; u64 freq_scaling; @@ -48,6 +52,8 @@ static void sw64_do_deeper_idle(int cpu) node_id = rcid_to_domain_id(cpu_to_rcid(cpu)); cpu_sibling = cpu_siblings[cpu]; + cur_freq = cpufreq_quick_get(cpu); + /* downshift frequency before idle if possible*/ spin_lock_irqsave(&per_core_lock[node_id][core_id], flags); cpu_deeper_states[cpu] = true; @@ -55,15 +61,23 @@ static void sw64_do_deeper_idle(int cpu) freq_scaling = 0x1UL << (2 * (core_id & CORE_ID_BITMASK)); writeq(freq_scaling, spbu_base[node_id] + OFFSET_CLU_LV2(core_id)); } + + sw64_cpuidle_updatevents(downshift_freq); + spin_unlock_irqrestore(&per_core_lock[node_id][core_id], flags); + rcu_idle_enter(); arch_cpu_idle(); + local_irq_disable(); + rcu_idle_exit(); + local_irq_enable(); /* upshift frequency after idle */ spin_lock_irqsave(&per_core_lock[node_id][core_id], flags); cpu_deeper_states[cpu] = false; freq_scaling = 0x3UL << (2 * (core_id & CORE_ID_BITMASK)); writeq(freq_scaling, spbu_base[node_id] + OFFSET_CLU_LV2(core_id)); + sw64_cpuidle_updatevents(cur_freq); spin_unlock_irqrestore(&per_core_lock[node_id][core_id], flags); } @@ -117,21 +131,21 @@ static struct cpuidle_driver sw64_idle_driver = { * be unavailable. */ .states[0] = { - .name = "idle0", - .desc = "sw64 idle 0", + .name = "C1", + .desc = "halt", .exit_latency = 1, .target_residency = 1, .enter = sw64_idle_enter, }, .states[1] = { - .name = "idle1", - .desc = "sw64 idle 1", - .exit_latency = 100, - .target_residency = 100, + .name = "C6", + .desc = "freq downshift", + .exit_latency = 200, + .target_residency = 200, .enter = sw64_idle_enter, + .flags = CPUIDLE_FLAG_RCU_IDLE, }, .state_count = 2, - .cpumask = (struct cpumask *) cpu_possible_mask, }; static int get_sibling_cpu(int cpu) -- Gitee From c28ffcc29627c57dfee6dae248e60d8cc0542648 Mon Sep 17 00:00:00 2001 From: Gu Yuchen Date: Sat, 28 Feb 2026 16:10:02 +0800 Subject: [PATCH 35/40] sw64: define get_cycles macro Define get_cycles macro. Signed-off-by: Gu Yuchen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/timex.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sw_64/include/asm/timex.h b/arch/sw_64/include/asm/timex.h index a5760bf8abd4..2564bbf59f3e 100644 --- a/arch/sw_64/include/asm/timex.h +++ b/arch/sw_64/include/asm/timex.h @@ -19,5 +19,6 @@ static inline cycles_t get_cycles(void) { return rdtc(); } +#define get_cycles get_cycles #endif /* _ASM_SW64_TIMEX_H */ -- Gitee From d64d996419f382407180cd424cf54e65f0af74da Mon Sep 17 00:00:00 2001 From: Jinyu Tang Date: Mon, 2 Mar 2026 08:19:20 +0800 Subject: [PATCH 36/40] sw64: add HUGE_VMAP support for c4 Enable HAVE_ARCH_HUGE_VMALLOC and HAVE_ARCH_HUGE_VMAP for the sw64 c4 architecture to allow ioremap() and vmalloc() to use huge pages. This significantly reduces TLB pressure and page table memory overhead when mapping large contiguous physical regions like PCIe BARs. Signed-off-by: Jinyu Tang Signed-off-by: Gu Yuchen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/Kconfig | 2 + arch/sw_64/include/asm/vmalloc.h | 19 +++++++ arch/sw_64/mm/Makefile | 2 +- arch/sw_64/mm/pgtable.c | 90 ++++++++++++++++++++++++++++++++ 4 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 arch/sw_64/mm/pgtable.c diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index a6987ccf1949..5e1bdeb07679 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -93,6 +93,8 @@ config SW64 select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP + select HAVE_ARCH_HUGE_VMAP if SUBARCH_C4 select HAVE_ASM_MODVERSIONS select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_BUGVERBOSE diff --git a/arch/sw_64/include/asm/vmalloc.h b/arch/sw_64/include/asm/vmalloc.h index a76d1133d6c6..548e32d159b6 100644 --- a/arch/sw_64/include/asm/vmalloc.h +++ b/arch/sw_64/include/asm/vmalloc.h @@ -2,4 +2,23 @@ #ifndef _ASM_SW64_VMALLOC_H #define _ASM_SW64_VMALLOC_H +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP + +#include + +#define IOREMAP_MAX_ORDER (PUD_SHIFT) + +#define arch_vmap_pud_supported arch_vmap_pud_supported +static inline bool arch_vmap_pud_supported(pgprot_t prot) +{ + return true; +} + +#define arch_vmap_pmd_supported arch_vmap_pmd_supported +static inline bool arch_vmap_pmd_supported(pgprot_t prot) +{ + return true; +} + +#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ #endif /* _ASM_SW64_VMALLOC_H */ diff --git a/arch/sw_64/mm/Makefile b/arch/sw_64/mm/Makefile index 1f6cd3965633..700345bd3c40 100644 --- a/arch/sw_64/mm/Makefile +++ b/arch/sw_64/mm/Makefile @@ -5,7 +5,7 @@ #ccflags-y := -Werror -obj-y := init.o fault.o physaddr.o mmap.o extable.o +obj-y := init.o fault.o physaddr.o mmap.o extable.o pgtable.o obj-$(CONFIG_SW64_KERNEL_PAGE_TABLE) += pageattr.o obj-$(CONFIG_NUMA) += numa.o diff --git a/arch/sw_64/mm/pgtable.c b/arch/sw_64/mm/pgtable.c new file mode 100644 index 000000000000..b47ec8c96b7a --- /dev/null +++ b/arch/sw_64/mm/pgtable.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include + +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP + +int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot) +{ + pmd_t new_pmd = pfn_pmd(phys >> PAGE_SHIFT, prot); + + new_pmd = __pmd(pmd_val(new_pmd) | _PAGE_LEAF); + set_pmd(pmd, new_pmd); + + return 1; +} + +int pmd_clear_huge(pmd_t *pmd) +{ + if (!(pmd_val(*pmd) & _PAGE_LEAF)) + return 0; + + pmd_clear(pmd); + return 1; +} + +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) +{ + pte_t *pte; + + if (pmd_val(*pmd) & _PAGE_LEAF) + return 0; + + pte = (pte_t *)pmd_page_vaddr(*pmd); + + pmd_clear(pmd); + + flush_tlb_kernel_range(addr, addr + PMD_SIZE); + + pte_free_kernel(NULL, pte); + + return 1; +} + +int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot) +{ + pud_t new_pud = pfn_pud(phys >> PAGE_SHIFT, prot); + + new_pud = __pud(pud_val(new_pud) | _PAGE_LEAF); + + set_pud(pud, new_pud); + return 1; +} + +int pud_clear_huge(pud_t *pud) +{ + if (!(pud_val(*pud) & _PAGE_LEAF)) + return 0; + + pud_clear(pud); + return 1; +} + +int pud_free_pmd_page(pud_t *pud, unsigned long addr) +{ + pmd_t *pmd; + int i; + + if (pud_val(*pud) & _PAGE_LEAF) + return 0; + + pmd = (pmd_t *)pud_page_vaddr(*pud); + pud_clear(pud); + flush_tlb_kernel_range(addr, addr + PUD_SIZE); + + for (i = 0; i < PTRS_PER_PMD; i++) { + if (!pmd_none(pmd[i])) { + pte_t *pte = (pte_t *)pmd_page_vaddr(pmd[i]); + + pte_free_kernel(NULL, pte); + } + } + + pmd_free(NULL, pmd); + + return 1; +} +#endif -- Gitee From af158ca19444c1de516989d25dac1dbf77aa1c97 Mon Sep 17 00:00:00 2001 From: Gu Yuchen Date: Thu, 12 Mar 2026 16:34:16 +0800 Subject: [PATCH 37/40] sw64: fix the parameter passing logic of memblock_mark_nomap Fix the parameter passing logic of memblock_mark_nomap and memblock_clear_nomap. Signed-off-by: Gu Yuchen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/mm/init.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c index 837725a1b116..ec9125914a71 100644 --- a/arch/sw_64/mm/init.c +++ b/arch/sw_64/mm/init.c @@ -613,8 +613,7 @@ void __init paging_init(void) create_pgd_mapping(pgdir, sw64_guest_reset_start, __pa(sw64_guest_reset_start), sw64_guest_reset_size, PAGE_KERNEL_READONLY_EXEC, pgtable_alloc_fixmap); - memblock_mark_nomap(__pa(sw64_reserve_start), - __pa((unsigned long)_end - sw64_reserve_start)); + memblock_mark_nomap(__pa(sw64_reserve_start), (unsigned long)_end - sw64_reserve_start); for_each_mem_range(i, &start, &end) { if (start >= end) break; @@ -623,8 +622,7 @@ void __init paging_init(void) (unsigned long)(end - start), PAGE_KERNEL_NOEXEC, pgtable_alloc_fixmap); } - memblock_clear_nomap(__pa(sw64_reserve_start), - __pa((unsigned long)_end - sw64_reserve_start)); + memblock_clear_nomap(__pa(sw64_reserve_start), (unsigned long)_end - sw64_reserve_start); map_fdt(pgdir); #endif /* CONFIG_SW64_KERNEL_PAGE_TABLE */ -- Gitee From eab52a48c7ac7f9990b4751d4f75eadf6df1768d Mon Sep 17 00:00:00 2001 From: Jinyu Tang Date: Wed, 4 Mar 2026 17:30:01 +0800 Subject: [PATCH 38/40] sw64: add support for batched TLB flushing during unmap This patch enables ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH for sw64 architecture. By deferring TLB flushes during memory reclamation, we can significantly reduce the number of Inter-Processor Interrupts (IPIs) sent between cores. This is highly beneficial in multi-threaded workloads sharing the same memory space, preventing IPI storms when swapping out pages actively cached in multiple CPUs. Signed-off-by: Jinyu Tang Signed-off-by: Gu Yuchen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/Kconfig | 1 + arch/sw_64/include/asm/tlbbatch.h | 11 +++++++++++ arch/sw_64/include/asm/tlbflush.h | 24 ++++++++++++++++++++++++ arch/sw_64/kernel/smp.c | 18 ++++++++++++++++++ 4 files changed, 54 insertions(+) create mode 100644 arch/sw_64/include/asm/tlbbatch.h diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index 5e1bdeb07679..968b1edb50f7 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -59,6 +59,7 @@ config SW64 select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS + select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_IPC_PARSE_VERSION diff --git a/arch/sw_64/include/asm/tlbbatch.h b/arch/sw_64/include/asm/tlbbatch.h new file mode 100644 index 000000000000..b48a199f92aa --- /dev/null +++ b/arch/sw_64/include/asm/tlbbatch.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_SW_64_TLBBATCH_H +#define _ASM_SW_64_TLBBATCH_H + +#include + +struct arch_tlbflush_unmap_batch { + struct cpumask cpumask; +}; + +#endif /* _ASM_SW_64_TLBBATCH_H */ diff --git a/arch/sw_64/include/asm/tlbflush.h b/arch/sw_64/include/asm/tlbflush.h index 73995d9663a6..ce9b9f42e673 100644 --- a/arch/sw_64/include/asm/tlbflush.h +++ b/arch/sw_64/include/asm/tlbflush.h @@ -91,4 +91,28 @@ extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); #endif /* CONFIG_SMP */ + +static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm) +{ + bool should_defer = false; + + /* If remote CPUs need to be flushed then defer batch the flush */ + if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids) + should_defer = true; + put_cpu(); + + return should_defer; +} + +static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm) +{ + flush_tlb_mm(mm); +} + +extern void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, + struct mm_struct *mm, + unsigned long uaddr); +extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); + + #endif /* _ASM_SW64_TLBFLUSH_H */ diff --git a/arch/sw_64/kernel/smp.c b/arch/sw_64/kernel/smp.c index 072165260321..0a9326ec8cc7 100644 --- a/arch/sw_64/kernel/smp.c +++ b/arch/sw_64/kernel/smp.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -821,6 +822,23 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) } EXPORT_SYMBOL(flush_tlb_kernel_range); +void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, + struct mm_struct *mm, + unsigned long uaddr) +{ + cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); + + mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); +} + +void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) +{ + if (!cpumask_empty(&batch->cpumask)) { + on_each_cpu_mask(&batch->cpumask, ipi_flush_tlb_all, NULL, 1); + cpumask_clear(&batch->cpumask); + } +} + #ifdef CONFIG_HOTPLUG_CPU extern int can_unplug_cpu(void); int __cpu_disable(void) -- Gitee From 6d4e1f2a3957faa98884129623671284be6a4a7b Mon Sep 17 00:00:00 2001 From: Gu Yuchen Date: Thu, 12 Mar 2026 16:47:24 +0800 Subject: [PATCH 39/40] sw64: fix the compilation error when CONFIG_DEBUG_VIRTUAL is enabled Compiling with CONFIG_DEBUG_VIRTUAL=y caused the following error: implicit declaration of function 'phys_addr_valid' This commit fixes the compilation error. Signed-off-by: Gu Yuchen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/kernel/setup.c | 2 +- arch/sw_64/mm/physaddr.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c index 655b3b851e08..fd3a24dc3643 100644 --- a/arch/sw_64/kernel/setup.c +++ b/arch/sw_64/kernel/setup.c @@ -46,7 +46,7 @@ DEFINE_PER_CPU(unsigned long, hard_node_id) = { 0 }; static DEFINE_PER_CPU(struct cpu, cpu_devices); -static inline int phys_addr_valid(unsigned long addr) +inline int phys_addr_valid(unsigned long addr) { /* * At this point memory probe has not been done such that max_pfn diff --git a/arch/sw_64/mm/physaddr.c b/arch/sw_64/mm/physaddr.c index 3c6ecb8ee86a..bdbfbe9c9920 100644 --- a/arch/sw_64/mm/physaddr.c +++ b/arch/sw_64/mm/physaddr.c @@ -4,6 +4,7 @@ #include #include +extern int phys_addr_valid(unsigned long); unsigned long __phys_addr(unsigned long addr) { VIRTUAL_BUG_ON(addr < PAGE_OFFSET); -- Gitee From b40409023018564be65076729f271a3d7745acd2 Mon Sep 17 00:00:00 2001 From: Gu Zitao Date: Tue, 17 Mar 2026 14:21:22 +0800 Subject: [PATCH 40/40] sw64: always define pxx_pgprot() according to upstream Signed-off-by: Gu Zitao Reviewed-by: He Sheng --- arch/sw_64/mm/hugetlbpage_c4.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sw_64/mm/hugetlbpage_c4.c b/arch/sw_64/mm/hugetlbpage_c4.c index 40094107eff2..93fdffde9f90 100644 --- a/arch/sw_64/mm/hugetlbpage_c4.c +++ b/arch/sw_64/mm/hugetlbpage_c4.c @@ -40,6 +40,7 @@ EXPORT_SYMBOL(pud_huge); /* * Select all bits except the pfn */ +#define pte_pgprot pte_pgprot static inline pgprot_t pte_pgprot(pte_t pte) { unsigned long pfn = pte_pfn(pte); -- Gitee