From 4adb47a74ba6507b8d29af4507502d72bb8db89f Mon Sep 17 00:00:00 2001 From: YunYi Yang Date: Sat, 25 Nov 2023 10:15:19 +0800 Subject: [PATCH] Support handle CXL devices AER errors in firmware-first mode --- kernel.spec | 24 +- ...RR-Return-status-of-pcie_do_recovery.patch | 88 ++++ ...e-aer-variable-for-capability-offset.patch | 486 ++++++++++++++++++ ...ci_aer_clear_device_status-to-pcie_c.patch | 130 +++++ ...Ie-Device-Status-errors-only-if-OS-o.patch | 134 +++++ ...R-Capability-only-when-we-control-it.patch | 97 ++++ ...RCEC-devices-to-the-Root-Port-driver.patch | 106 ++++ ...EC-EA-Capability-offset-in-pci_init_.patch | 207 ++++++++ ...implify-by-using-pci_upstream_bridge.patch | 50 ++ ...lify-by-computing-pci_pcie_type-once.patch | 107 ++++ ...idge-for-clarity-in-pcie_do_recovery.patch | 127 +++++ ...void-negated-conditional-for-clarity.patch | 57 ++ ...-pci_walk_bridge-to-pcie_do_recovery.patch | 117 +++++ ...-AER-status-only-when-we-control-AER.patch | 73 +++ ...PCI-ERR-Recover-from-RCEC-AER-errors.patch | 220 ++++++++ ...d-pcie_link_rcec-to-associate-RCiEPs.patch | 191 +++++++ ...CI-ERR-Recover-from-RCiEP-AER-errors.patch | 141 +++++ ...-pcie_walk_rcec-to-RCEC-AER-handling.patch | 163 ++++++ ...-pcie_walk_rcec-to-RCEC-PME-handling.patch | 114 ++++ ...Add-RCEC-AER-error-injection-support.patch | 57 ++ ...Fix-RCiEP-device-to-RCEC-association.patch | 58 +++ ...CI-fix-kabi-change-in-struct-pci_dev.patch | 66 +++ series.conf | 23 +- 23 files changed, 2834 insertions(+), 2 deletions(-) create mode 100644 patches/0757-PCI-ERR-Return-status-of-pcie_do_recovery.patch create mode 100644 patches/0758-PCI-AER-Use-aer-variable-for-capability-offset.patch create mode 100644 patches/0759-PCI-ERR-Rename-pci_aer_clear_device_status-to-pcie_c.patch create mode 100644 patches/0760-PCI-ERR-Clear-PCIe-Device-Status-errors-only-if-OS-o.patch create mode 100644 patches/0761-PCI-AER-Write-AER-Capability-only-when-we-control-it.patch create mode 100644 patches/0762-PCI-ERR-Bind-RCEC-devices-to-the-Root-Port-driver.patch create mode 100644 patches/0763-PCI-ERR-Cache-RCEC-EA-Capability-offset-in-pci_init_.patch create mode 100644 patches/0764-PCI-ERR-Simplify-by-using-pci_upstream_bridge.patch create mode 100644 patches/0765-PCI-ERR-Simplify-by-computing-pci_pcie_type-once.patch create mode 100644 patches/0766-PCI-ERR-Use-bridge-for-clarity-in-pcie_do_recovery.patch create mode 100644 patches/0767-PCI-ERR-Avoid-negated-conditional-for-clarity.patch create mode 100644 patches/0768-PCI-ERR-Add-pci_walk_bridge-to-pcie_do_recovery.patch create mode 100644 patches/0769-PCI-ERR-Clear-AER-status-only-when-we-control-AER.patch create mode 100644 patches/0770-PCI-ERR-Recover-from-RCEC-AER-errors.patch create mode 100644 patches/0771-PCI-ERR-Add-pcie_link_rcec-to-associate-RCiEPs.patch create mode 100644 patches/0772-PCI-ERR-Recover-from-RCiEP-AER-errors.patch create mode 100644 patches/0773-PCI-AER-Add-pcie_walk_rcec-to-RCEC-AER-handling.patch create mode 100644 patches/0774-PCI-PME-Add-pcie_walk_rcec-to-RCEC-PME-handling.patch create mode 100644 patches/0775-PCI-AER-Add-RCEC-AER-error-injection-support.patch create mode 100644 patches/0776-PCI-RCEC-Fix-RCiEP-device-to-RCEC-association.patch create mode 100644 patches/0777-PCI-fix-kabi-change-in-struct-pci_dev.patch diff --git a/kernel.spec b/kernel.spec index 65d48447..2dd6eca2 100644 --- a/kernel.spec +++ b/kernel.spec @@ -32,7 +32,7 @@ Name: kernel Version: 4.19.90 -Release: %{hulkrelease}.0250 +Release: %{hulkrelease}.0251 Summary: Linux Kernel License: GPLv2 URL: http://www.kernel.org/ @@ -849,6 +849,28 @@ fi %endif %changelog +* Sat Nov 25 2023 YunYi Yang - 4.19.90-2311.4.0.0251 +- PCI: fix kabi change in struct pci_dev +- PCI/RCEC: Fix RCiEP device to RCEC association +- PCI/AER: Add RCEC AER error injection support +- PCI/PME: Add pcie_walk_rcec() to RCEC PME handling +- PCI/AER: Add pcie_walk_rcec() to RCEC AER handling +- PCI/ERR: Recover from RCiEP AER errors +- PCI/ERR: Add pcie_link_rcec() to associate RCiEPs +- PCI/ERR: Recover from RCEC AER errors +- PCI/ERR: Clear AER status only when we control AER +- PCI/ERR: Add pci_walk_bridge() to pcie_do_recovery() +- PCI/ERR: Avoid negated conditional for clarity +- PCI/ERR: Use "bridge" for clarity in pcie_do_recovery() +- PCI/ERR: Simplify by computing pci_pcie_type() once +- PCI/ERR: Simplify by using pci_upstream_bridge() +- PCI/ERR: Cache RCEC EA Capability offset in pci_init_capabilities() +- PCI/ERR: Bind RCEC devices to the Root Port driver +- PCI/AER: Write AER Capability only when we control it +- PCI/ERR: Clear PCIe Device Status errors only if OS owns AER +- PCI/ERR: Rename pci_aer_clear_device_status() to pcie_clear_device_status() +- PCI/AER: Use "aer" variable for capability offset +- PCI/ERR: Return status of pcie_do_recovery() * Thu Nov 23 2023 Keyi Zhong - 4.19.90-2311.4.0.0250 - crypto: hisilicon - Add value profile support for kernel diff --git a/patches/0757-PCI-ERR-Return-status-of-pcie_do_recovery.patch b/patches/0757-PCI-ERR-Return-status-of-pcie_do_recovery.patch new file mode 100644 index 00000000..ad92cf6a --- /dev/null +++ b/patches/0757-PCI-ERR-Return-status-of-pcie_do_recovery.patch @@ -0,0 +1,88 @@ +From 8f61a3bd168def697a48584b62086238ffdfb9d0 Mon Sep 17 00:00:00 2001 +From: Kuppuswamy Sathyanarayanan +Date: Mon, 23 Mar 2020 17:26:03 -0700 +Subject: [PATCH 01/21] PCI/ERR: Return status of pcie_do_recovery() + +mainline inclusion +from mainline-v5.7-rc1 +commit e8e5ff2aeec19ade42f0535f4b554a3f6e1a58f7 +category: bugfix +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e8e5ff2aeec19ade42f0535f4b554a3f6e1a58f7 + +---------------------------------------------------------------------------- + +As per the DPC Enhancements ECN [1], sec 4.5.1, table 4-4, if the OS +supports Error Disconnect Recover (EDR), it must invalidate the software +state associated with child devices of the port without attempting to +access the child device hardware. In addition, if the OS supports DPC, it +must attempt to recover the child devices if the port implements the DPC +Capability. If the OS continues operation, the OS must inform the firmware +of the status of the recovery operation via the _OST method. + +Return the result of pcie_do_recovery() so we can report it to firmware via +_OST. + +[1] Downstream Port Containment Related Enhancements ECN, Jan 28, 2019, + affecting PCI Firmware Specification, Rev. 3.2 + https://members.pcisig.com/wg/PCI-SIG/document/12888 + +Link: https://lore.kernel.org/r/eb60ec89448769349c6722954ffbf2de163155b5.1585000084.git.sathyanarayanan.kuppuswamy@linux.intel.com +Signed-off-by: Kuppuswamy Sathyanarayanan +Signed-off-by: Bjorn Helgaas +Signed-off-by: YunYi Yang + + Conflicts: + drivers/pci/pci.h + drivers/pci/pcie/err.c +--- + drivers/pci/pci.h | 2 +- + drivers/pci/pcie/err.c | 6 ++++-- + 2 files changed, 5 insertions(+), 3 deletions(-) + +diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h +index ff5dde9e6..8ccf608c7 100644 +--- a/drivers/pci/pci.h ++++ b/drivers/pci/pci.h +@@ -511,7 +511,7 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev) + #endif + + /* PCI error reporting and recovery */ +-void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, ++pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, + u32 service); + + bool pcie_wait_for_link(struct pci_dev *pdev, bool active); +diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c +index 6d3d5b6a5..68093505a 100644 +--- a/drivers/pci/pcie/err.c ++++ b/drivers/pci/pcie/err.c +@@ -185,7 +185,7 @@ static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) + return status; + } + +-void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, ++pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, + u32 service) + { + pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; +@@ -236,11 +236,13 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, + pci_aer_clear_device_status(dev); + pci_cleanup_aer_uncorrect_error_status(dev); + pci_info(dev, "AER: Device recovery successful\n"); +- return; ++ return status; + + failed: + pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); + + /* TODO: Should kernel panic here? */ + pci_info(dev, "AER: Device recovery failed\n"); ++ ++ return status; + } +-- +2.27.0 + diff --git a/patches/0758-PCI-AER-Use-aer-variable-for-capability-offset.patch b/patches/0758-PCI-AER-Use-aer-variable-for-capability-offset.patch new file mode 100644 index 00000000..7929b730 --- /dev/null +++ b/patches/0758-PCI-AER-Use-aer-variable-for-capability-offset.patch @@ -0,0 +1,486 @@ +From 8fce6bd596e455302bf7ecd7f23f79b5d989bacb Mon Sep 17 00:00:00 2001 +From: Bjorn Helgaas +Date: Fri, 29 May 2020 17:56:09 -0500 +Subject: [PATCH 02/21] PCI/AER: Use "aer" variable for capability offset + +mainline inclusion +from mainline-v5.8-rc1 +commit 07b2fbb565e2df7ccc41e5c977b19f5f1f9fe013 +category: bugfix +bugzilla: https://e.gitee.com/open_euler/dashboard?issue=I8EAHA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=07b2fbb565e2df7ccc41e5c977b19f5f1f9fe013 + +---------------------------------------------------------------------- + +Previously we used "pos" or "aer_pos" for the offset of the AER Capability. +Use "aer" consistently and initialize it the same way everywhere. No +functional change intended. + +Link: https://lore.kernel.org/r/20200529230915.GA479883@bjorn-Precision-5520 +Signed-off-by: Bjorn Helgaas +Reviewed-by: Kuppuswamy Sathyanarayanan +Signed-off-by: YunYi Yang + + Conflicts: + drivers/pci/pcie/aer.c +--- + drivers/pci/pcie/aer.c | 185 +++++++++++++++++++---------------------- + 1 file changed, 86 insertions(+), 99 deletions(-) + +diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c +index 57584cbaa..71feb30a2 100644 +--- a/drivers/pci/pcie/aer.c ++++ b/drivers/pci/pcie/aer.c +@@ -135,22 +135,21 @@ static const char * const ecrc_policy_str[] = { + */ + static int enable_ecrc_checking(struct pci_dev *dev) + { +- int pos; ++ int aer = dev->aer_cap; + u32 reg32; + + if (!pci_is_pcie(dev)) + return -ENODEV; + +- pos = dev->aer_cap; +- if (!pos) ++ if (!aer) + return -ENODEV; + +- pci_read_config_dword(dev, pos + PCI_ERR_CAP, ®32); ++ pci_read_config_dword(dev, aer + PCI_ERR_CAP, ®32); + if (reg32 & PCI_ERR_CAP_ECRC_GENC) + reg32 |= PCI_ERR_CAP_ECRC_GENE; + if (reg32 & PCI_ERR_CAP_ECRC_CHKC) + reg32 |= PCI_ERR_CAP_ECRC_CHKE; +- pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32); ++ pci_write_config_dword(dev, aer + PCI_ERR_CAP, reg32); + + return 0; + } +@@ -163,19 +162,18 @@ static int enable_ecrc_checking(struct pci_dev *dev) + */ + static int disable_ecrc_checking(struct pci_dev *dev) + { +- int pos; ++ int aer = dev->aer_cap; + u32 reg32; + + if (!pci_is_pcie(dev)) + return -ENODEV; + +- pos = dev->aer_cap; +- if (!pos) ++ if (!aer) + return -ENODEV; + +- pci_read_config_dword(dev, pos + PCI_ERR_CAP, ®32); ++ pci_read_config_dword(dev, aer + PCI_ERR_CAP, ®32); + reg32 &= ~(PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); +- pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32); ++ pci_write_config_dword(dev, aer + PCI_ERR_CAP, reg32); + + return 0; + } +@@ -378,22 +376,21 @@ void pci_aer_clear_device_status(struct pci_dev *dev) + + int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) + { +- int pos; ++ int aer = dev->aer_cap; + u32 status, sev; + +- pos = dev->aer_cap; +- if (!pos) ++ if (!aer) + return -EIO; + + if (pcie_aer_get_firmware_first(dev)) + return -EIO; + + /* Clear status bits for ERR_NONFATAL errors only */ +- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); +- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev); ++ pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); ++ pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, &sev); + status &= ~sev; + if (status) +- pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); ++ pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status); + + return 0; + } +@@ -401,35 +398,33 @@ EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status); + + void pci_aer_clear_fatal_status(struct pci_dev *dev) + { +- int pos; ++ int aer = dev->aer_cap; + u32 status, sev; + +- pos = dev->aer_cap; +- if (!pos) ++ if (!aer) + return; + + if (pcie_aer_get_firmware_first(dev)) + return; + + /* Clear status bits for ERR_FATAL errors only */ +- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); +- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev); ++ pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); ++ pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, &sev); + status &= sev; + if (status) +- pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); ++ pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status); + } + + int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) + { +- int pos; ++ int aer = dev->aer_cap; + u32 status; + int port_type; + + if (!pci_is_pcie(dev)) + return -ENODEV; + +- pos = dev->aer_cap; +- if (!pos) ++ if (!aer) + return -EIO; + + if (pcie_aer_get_firmware_first(dev)) +@@ -437,27 +432,26 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) + + port_type = pci_pcie_type(dev); + if (port_type == PCI_EXP_TYPE_ROOT_PORT) { +- pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status); +- pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, status); ++ pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &status); ++ pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, status); + } + +- pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); +- pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status); ++ pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, &status); ++ pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS, status); + +- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); +- pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); ++ pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); ++ pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status); + + return 0; + } + + void pci_save_aer_state(struct pci_dev *dev) + { ++ int aer = dev->aer_cap; + struct pci_cap_saved_state *save_state; + u32 *cap; +- int pos; + +- pos = dev->aer_cap; +- if (!pos) ++ if (!aer) + return; + + save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR); +@@ -465,22 +459,21 @@ void pci_save_aer_state(struct pci_dev *dev) + return; + + cap = &save_state->cap.data[0]; +- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, cap++); +- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, cap++); +- pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, cap++); +- pci_read_config_dword(dev, pos + PCI_ERR_CAP, cap++); ++ pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, cap++); ++ pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, cap++); ++ pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, cap++); ++ pci_read_config_dword(dev, aer + PCI_ERR_CAP, cap++); + if (pcie_cap_has_rtctl(dev)) +- pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, cap++); ++ pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, cap++); + } + + void pci_restore_aer_state(struct pci_dev *dev) + { ++ int aer = dev->aer_cap; + struct pci_cap_saved_state *save_state; + u32 *cap; +- int pos; + +- pos = dev->aer_cap; +- if (!pos) ++ if (!aer) + return; + + save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR); +@@ -488,12 +481,12 @@ void pci_restore_aer_state(struct pci_dev *dev) + return; + + cap = &save_state->cap.data[0]; +- pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, *cap++); +- pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, *cap++); +- pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, *cap++); +- pci_write_config_dword(dev, pos + PCI_ERR_CAP, *cap++); ++ pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, *cap++); ++ pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, *cap++); ++ pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, *cap++); ++ pci_write_config_dword(dev, aer + PCI_ERR_CAP, *cap++); + if (pcie_cap_has_rtctl(dev)) +- pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, *cap++); ++ pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, *cap++); + } + + void pci_aer_init(struct pci_dev *dev) +@@ -923,7 +916,7 @@ static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) + */ + static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) + { +- int pos; ++ int aer = dev->aer_cap; + u32 status, mask; + u16 reg16; + +@@ -958,17 +951,16 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) + if (!(reg16 & PCI_EXP_AER_FLAGS)) + return false; + +- pos = dev->aer_cap; +- if (!pos) ++ if (!aer) + return false; + + /* Check if error is recorded */ + if (e_info->severity == AER_CORRECTABLE) { +- pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); +- pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &mask); ++ pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, &status); ++ pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask); + } else { +- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); +- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask); ++ pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status); ++ pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask); + } + if (status & ~mask) + return true; +@@ -1040,16 +1032,15 @@ static bool find_source_device(struct pci_dev *parent, + */ + static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) + { +- int pos; ++ int aer = dev->aer_cap; + + if (info->severity == AER_CORRECTABLE) { + /* + * Correctable error does not need software intervention. + * No need to go through error recovery process. + */ +- pos = dev->aer_cap; +- if (pos) +- pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, ++ if (aer) ++ pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS, + info->status); + pci_aer_clear_device_status(dev); + } else if (info->severity == AER_NONFATAL) +@@ -1142,22 +1133,21 @@ EXPORT_SYMBOL_GPL(aer_recover_queue); + */ + int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) + { +- int pos, temp; ++ int aer = dev->aer_cap; ++ int temp; + + /* Must reset in this function */ + info->status = 0; + info->tlp_header_valid = 0; + +- pos = dev->aer_cap; +- + /* The device might not support AER */ +- if (!pos) ++ if (!aer) + return 0; + + if (info->severity == AER_CORRECTABLE) { +- pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, ++ pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, + &info->status); +- pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, ++ pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, + &info->mask); + if (!(info->status & ~info->mask)) + return 0; +@@ -1166,27 +1156,27 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) + info->severity == AER_NONFATAL) { + + /* Link is still healthy for IO reads */ +- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, ++ pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, + &info->status); +- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, ++ pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, + &info->mask); + if (!(info->status & ~info->mask)) + return 0; + + /* Get First Error Pointer */ +- pci_read_config_dword(dev, pos + PCI_ERR_CAP, &temp); ++ pci_read_config_dword(dev, aer + PCI_ERR_CAP, &temp); + info->first_error = PCI_ERR_CAP_FEP(temp); + + if (info->status & AER_LOG_TLP_MASKS) { + info->tlp_header_valid = 1; + pci_read_config_dword(dev, +- pos + PCI_ERR_HEADER_LOG, &info->tlp.dw0); ++ aer + PCI_ERR_HEADER_LOG, &info->tlp.dw0); + pci_read_config_dword(dev, +- pos + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1); ++ aer + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1); + pci_read_config_dword(dev, +- pos + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2); ++ aer + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2); + pci_read_config_dword(dev, +- pos + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3); ++ aer + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3); + } + } + +@@ -1292,15 +1282,15 @@ static irqreturn_t aer_irq(int irq, void *context) + struct pcie_device *pdev = (struct pcie_device *)context; + struct aer_rpc *rpc = get_service_data(pdev); + struct pci_dev *rp = rpc->rpd; ++ int aer = rp->aer_cap; + struct aer_err_source e_src = {}; +- int pos = rp->aer_cap; + +- pci_read_config_dword(rp, pos + PCI_ERR_ROOT_STATUS, &e_src.status); ++ pci_read_config_dword(rp, aer + PCI_ERR_ROOT_STATUS, &e_src.status); + if (!(e_src.status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV))) + return IRQ_NONE; + +- pci_read_config_dword(rp, pos + PCI_ERR_ROOT_ERR_SRC, &e_src.id); +- pci_write_config_dword(rp, pos + PCI_ERR_ROOT_STATUS, e_src.status); ++ pci_read_config_dword(rp, aer + PCI_ERR_ROOT_ERR_SRC, &e_src.id); ++ pci_write_config_dword(rp, aer + PCI_ERR_ROOT_STATUS, e_src.status); + + if (!kfifo_put(&rpc->aer_fifo, e_src)) + return IRQ_HANDLED; +@@ -1352,7 +1342,7 @@ static void set_downstream_devices_error_reporting(struct pci_dev *dev, + static void aer_enable_rootport(struct aer_rpc *rpc) + { + struct pci_dev *pdev = rpc->rpd; +- int aer_pos; ++ int aer = pdev->aer_cap; + u16 reg16; + u32 reg32; + +@@ -1364,14 +1354,13 @@ static void aer_enable_rootport(struct aer_rpc *rpc) + pcie_capability_clear_word(pdev, PCI_EXP_RTCTL, + SYSTEM_ERROR_INTR_ON_MESG_MASK); + +- aer_pos = pdev->aer_cap; + /* Clear error status */ +- pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, ®32); +- pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32); +- pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, ®32); +- pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32); +- pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, ®32); +- pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32); ++ pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, ®32); ++ pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, reg32); ++ pci_read_config_dword(pdev, aer + PCI_ERR_COR_STATUS, ®32); ++ pci_write_config_dword(pdev, aer + PCI_ERR_COR_STATUS, reg32); ++ pci_read_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, ®32); ++ pci_write_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, reg32); + + /* + * Enable error reporting for the root port device and downstream port +@@ -1380,9 +1369,9 @@ static void aer_enable_rootport(struct aer_rpc *rpc) + set_downstream_devices_error_reporting(pdev, true); + + /* Enable Root Port's interrupt in response to error messages */ +- pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, ®32); ++ pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, ®32); + reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; +- pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, reg32); ++ pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32); + } + + /** +@@ -1394,8 +1383,8 @@ static void aer_enable_rootport(struct aer_rpc *rpc) + static void aer_disable_rootport(struct aer_rpc *rpc) + { + struct pci_dev *pdev = rpc->rpd; ++ int aer = pdev->aer_cap; + u32 reg32; +- int pos; + + /* + * Disable error reporting for the root port device and downstream port +@@ -1403,15 +1392,14 @@ static void aer_disable_rootport(struct aer_rpc *rpc) + */ + set_downstream_devices_error_reporting(pdev, false); + +- pos = pdev->aer_cap; + /* Disable Root's interrupt in response to error messages */ +- pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, ®32); ++ pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, ®32); + reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; +- pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, reg32); ++ pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32); + + /* Clear Root's error status reg */ +- pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, ®32); +- pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, reg32); ++ pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, ®32); ++ pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, reg32); + } + + /** +@@ -1470,28 +1458,27 @@ static int aer_probe(struct pcie_device *dev) + */ + static pci_ers_result_t aer_root_reset(struct pci_dev *dev) + { ++ int aer = dev->aer_cap; + u32 reg32; +- int pos; + int rc; + +- pos = dev->aer_cap; + + /* Disable Root's interrupt in response to error messages */ +- pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32); ++ pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32); + reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; +- pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); ++ pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32); + + rc = pci_bus_error_reset(dev); + pci_printk(KERN_DEBUG, dev, "Root Port link has been reset\n"); + + /* Clear Root Error Status */ +- pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, ®32); +- pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, reg32); ++ pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, ®32); ++ pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32); + + /* Enable Root Port's interrupt in response to error messages */ +- pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32); ++ pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32); + reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; +- pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); ++ pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32); + + return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; + } +-- +2.27.0 + diff --git a/patches/0759-PCI-ERR-Rename-pci_aer_clear_device_status-to-pcie_c.patch b/patches/0759-PCI-ERR-Rename-pci_aer_clear_device_status-to-pcie_c.patch new file mode 100644 index 00000000..d50f087f --- /dev/null +++ b/patches/0759-PCI-ERR-Rename-pci_aer_clear_device_status-to-pcie_c.patch @@ -0,0 +1,130 @@ +From c3623d39a19138ef735d6b968e8e764e9c9134ea Mon Sep 17 00:00:00 2001 +From: Bjorn Helgaas +Date: Thu, 16 Jul 2020 17:34:30 -0500 +Subject: [PATCH 03/21] PCI/ERR: Rename pci_aer_clear_device_status() to + pcie_clear_device_status() + +mainline inclusion +from mainline-v5.9-rc1 +commit 600a5b4fc8e8f6dad098cd50e0e727cb2a16be46 +category: bugfix +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=600a5b4fc8e8f6dad098cd50e0e727cb2a16be46 + +---------------------------------------------------------------------------- + +pci_aer_clear_device_status() clears the error bits in the PCIe Device +Status Register (PCI_EXP_DEVSTA). Every PCIe device has this register, +regardless of whether it supports AER. + +Rename pci_aer_clear_device_status() to pcie_clear_device_status() to make +clear that it is PCIe-specific but not AER-specific. Move it to +drivers/pci/pci.c, again since it's not AER-specific. No functional change +intended. + +Link: https://lore.kernel.org/r/20200717195619.766662-1-helgaas@kernel.org +Signed-off-by: Bjorn Helgaas +Signed-off-by: YunYi Yang + + Conflicts: + drivers/pci/pci.h + drivers/pci/pcie/aer.c + drivers/pci/pcie/err.c +--- + drivers/pci/pci.c | 8 ++++++++ + drivers/pci/pci.h | 3 +-- + drivers/pci/pcie/aer.c | 10 +--------- + drivers/pci/pcie/err.c | 2 +- + 4 files changed, 11 insertions(+), 12 deletions(-) + +diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c +index e58fe13c9..a1abe33cc 100644 +--- a/drivers/pci/pci.c ++++ b/drivers/pci/pci.c +@@ -1966,6 +1966,14 @@ int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) + } + EXPORT_SYMBOL_GPL(pci_set_pcie_reset_state); + ++void pcie_clear_device_status(struct pci_dev *dev) ++{ ++ u16 sta; ++ ++ pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta); ++ pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta); ++} ++ + /** + * pcie_clear_root_pme_status - Clear root port PME interrupt status. + * @dev: PCIe root port or event collector. +diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h +index 8ccf608c7..a29e7fb4a 100644 +--- a/drivers/pci/pci.h ++++ b/drivers/pci/pci.h +@@ -81,6 +81,7 @@ void pci_refresh_power_state(struct pci_dev *dev); + void pci_power_up(struct pci_dev *dev); + void pci_disable_enabled_device(struct pci_dev *dev); + int pci_finish_runtime_suspend(struct pci_dev *dev); ++void pcie_clear_device_status(struct pci_dev *dev); + void pcie_clear_root_pme_status(struct pci_dev *dev); + int __pci_pme_wakeup(struct pci_dev *dev, void *ign); + void pci_pme_restore(struct pci_dev *dev); +@@ -615,13 +616,11 @@ void pci_aer_init(struct pci_dev *dev); + void pci_aer_exit(struct pci_dev *dev); + extern const struct attribute_group aer_stats_attr_group; + void pci_aer_clear_fatal_status(struct pci_dev *dev); +-void pci_aer_clear_device_status(struct pci_dev *dev); + #else + static inline void pci_no_aer(void) { } + static inline void pci_aer_init(struct pci_dev *d) { } + static inline void pci_aer_exit(struct pci_dev *d) { } + static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { } +-static inline void pci_aer_clear_device_status(struct pci_dev *dev) { } + #endif + + #endif /* DRIVERS_PCI_H */ +diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c +index 71feb30a2..a4146f7db 100644 +--- a/drivers/pci/pcie/aer.c ++++ b/drivers/pci/pcie/aer.c +@@ -366,14 +366,6 @@ int pci_disable_pcie_error_reporting(struct pci_dev *dev) + } + EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting); + +-void pci_aer_clear_device_status(struct pci_dev *dev) +-{ +- u16 sta; +- +- pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta); +- pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta); +-} +- + int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) + { + int aer = dev->aer_cap; +@@ -1042,7 +1034,7 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) + if (aer) + pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS, + info->status); +- pci_aer_clear_device_status(dev); ++ pcie_clear_device_status(dev); + } else if (info->severity == AER_NONFATAL) + pcie_do_recovery(dev, pci_channel_io_normal, + PCIE_PORT_SERVICE_AER); +diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c +index 68093505a..567e16724 100644 +--- a/drivers/pci/pcie/err.c ++++ b/drivers/pci/pcie/err.c +@@ -233,7 +233,7 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state st + pci_dbg(dev, "broadcast resume message\n"); + pci_walk_bus(bus, report_resume, &status); + +- pci_aer_clear_device_status(dev); ++ pcie_clear_device_status(dev); + pci_cleanup_aer_uncorrect_error_status(dev); + pci_info(dev, "AER: Device recovery successful\n"); + return status; +-- +2.27.0 + diff --git a/patches/0760-PCI-ERR-Clear-PCIe-Device-Status-errors-only-if-OS-o.patch b/patches/0760-PCI-ERR-Clear-PCIe-Device-Status-errors-only-if-OS-o.patch new file mode 100644 index 00000000..7fb7714a --- /dev/null +++ b/patches/0760-PCI-ERR-Clear-PCIe-Device-Status-errors-only-if-OS-o.patch @@ -0,0 +1,134 @@ +From 222d54f9162afa5ec231f408b167aa16558bd031 Mon Sep 17 00:00:00 2001 +From: Jonathan Cameron +Date: Mon, 22 Jun 2020 19:35:23 +0800 +Subject: [PATCH 04/21] PCI/ERR: Clear PCIe Device Status errors only if OS + owns AER + +mainline inclusion +from mainline-v5.9-rc1 +commit 068c29a248b6ddbfdf7bb150b547569759620d36 +category: bugfix +bugzilla: https://e.gitee.com/open_euler/dashboard?issue=I8EAHA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=068c29a248b6ddbfdf7bb150b547569759620d36 + +---------------------------------------------------------------------- + +pcie_clear_device_status() resets the error bits in the PCIe Device Status +Register (PCI_EXP_DEVSTA). + +Previously we did this unconditionally, but on ACPI systems, the _OSC AER +bit negotiates control of the AER capability. Per sec 4.5.1 of the System +Firmware Intermediary _OSC and DPC Updates ECN [1], this bit also covers +other error enable/status bits including the following: + + Correctable Error Reporting Enable + Non-Fatal Error Reporting Enable + Fatal Error Reporting Enable + Unsupported Request Reporting Enable + +These bits are all in the PCIe Device Control register (the ECN omitted +"Reporting", but I think that's a typo), so by implication the _OSC AER bit +also applies to the error status bits in the PCIe Device Status register: + + Correctable Error Detected + Non-Fatal Error Detected + Fatal Error Detected + Unsupported Request Detected + +Clear the PCIe Device Status error bits only when the OS controls the AER +capability and related error enable/status bits. If platform firmware +controls the AER capability, firmware is responsible for clearing these +bits. + +One call path leading here is: + + ghes_do_proc + ghes_handle_aer + aer_recover_queue + schedule_work(&aer_recover_work) + ... + aer_recover_work_func + pcie_do_recovery + pcie_clear_device_status + +[1] System Firmware Intermediary (SFI) _OSC and DPC Updates ECN, Feb 24, + 2020, affecting PCI Firmware Specification, Rev. 3.2 + https://members.pcisig.com/wg/PCI-SIG/document/14076 +[bhelgaas: commit log, move test from pcie_clear_device_status() to callers] +Link: https://lore.kernel.org/r/20200622113523.891666-1-Jonathan.Cameron@huawei.com +Signed-off-by: Jonathan Cameron +Signed-off-by: Bjorn Helgaas +Signed-off-by: YunYi Yang + + Conflicts: + drivers/pci/pcie/err.c +--- + drivers/pci/pcie/aer.c | 13 ++++++++++++- + drivers/pci/pcie/err.c | 3 ++- + drivers/pci/pcie/portdrv.h | 2 ++ + 3 files changed, 16 insertions(+), 2 deletions(-) + +diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c +index a4146f7db..2b6c79bbe 100644 +--- a/drivers/pci/pcie/aer.c ++++ b/drivers/pci/pcie/aer.c +@@ -344,6 +344,16 @@ bool aer_acpi_firmware_first(void) + #define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ + PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) + ++int pcie_aer_is_native(struct pci_dev *dev) ++{ ++ struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); ++ ++ if (!dev->aer_cap) ++ return 0; ++ ++ return pcie_ports_native || host->native_aer; ++} ++ + int pci_enable_pcie_error_reporting(struct pci_dev *dev) + { + if (pcie_aer_get_firmware_first(dev)) +@@ -1034,7 +1044,8 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) + if (aer) + pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS, + info->status); +- pcie_clear_device_status(dev); ++ if (pcie_aer_is_native(dev)) ++ pcie_clear_device_status(dev); + } else if (info->severity == AER_NONFATAL) + pcie_do_recovery(dev, pci_channel_io_normal, + PCIE_PORT_SERVICE_AER); +diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c +index 567e16724..055b45386 100644 +--- a/drivers/pci/pcie/err.c ++++ b/drivers/pci/pcie/err.c +@@ -233,7 +233,8 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state st + pci_dbg(dev, "broadcast resume message\n"); + pci_walk_bus(bus, report_resume, &status); + +- pcie_clear_device_status(dev); ++ if (pcie_aer_is_native(dev)) ++ pcie_clear_device_status(dev); + pci_cleanup_aer_uncorrect_error_status(dev); + pci_info(dev, "AER: Device recovery successful\n"); + return status; +diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h +index c42cb76fa..4798047ed 100644 +--- a/drivers/pci/pcie/portdrv.h ++++ b/drivers/pci/pcie/portdrv.h +@@ -27,8 +27,10 @@ extern bool pcie_ports_dpc_native; + + #ifdef CONFIG_PCIEAER + int pcie_aer_init(void); ++int pcie_aer_is_native(struct pci_dev *dev); + #else + static inline int pcie_aer_init(void) { return 0; } ++static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } + #endif + + #ifdef CONFIG_HOTPLUG_PCI_PCIE +-- +2.27.0 + diff --git a/patches/0761-PCI-AER-Write-AER-Capability-only-when-we-control-it.patch b/patches/0761-PCI-AER-Write-AER-Capability-only-when-we-control-it.patch new file mode 100644 index 00000000..9972ea94 --- /dev/null +++ b/patches/0761-PCI-AER-Write-AER-Capability-only-when-we-control-it.patch @@ -0,0 +1,97 @@ +From 37366f8fbea5d31659abb1a62c679862ae326ab2 Mon Sep 17 00:00:00 2001 +From: Sean V Kelley +Date: Thu, 21 Apr 2022 21:49:40 +0800 +Subject: [PATCH 05/21] PCI/AER: Write AER Capability only when we control it + +mainline inclusion +from mainline-v5.11-rc1 +commit 50cc18fcd3053fb46a09db5a39e6516e9560f765 +category: feature +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=50cc18fcd3053fb46a09db5a39e6516e9560f765 + +---------------------------------------------------------------------------- + +If an OS has not been granted AER control via _OSC, it should not make +changes to PCI_ERR_ROOT_COMMAND and PCI_ERR_ROOT_STATUS related registers. +Per section 4.5.1 of the System Firmware Intermediary (SFI) _OSC and DPC +Updates ECN [1], this bit also covers these aspects of the PCI Express +Advanced Error Reporting. Based on the above and earlier discussion [2], +make the following changes: + +Add a check for the native case (i.e., AER control via _OSC) + +Note that the previous "clear, reset, enable" order suggests that the reset +might cause errors that we should ignore. After this commit, those errors +(if any) will remain logged in the PCI_ERR_ROOT_STATUS register. + +[1] System Firmware Intermediary (SFI) _OSC and DPC Updates ECN, Feb 24, + 2020, affecting PCI Firmware Specification, Rev. 3.2 + https://members.pcisig.com/wg/PCI-SIG/document/14076 +[2] https://lore.kernel.org/linux-pci/20201020162820.GA370938@bjorn-Precision-5520/ + +Link: https://lore.kernel.org/r/20201121001036.8560-2-sean.v.kelley@intel.com +Tested-by: Jonathan Cameron # non-native/no RCEC +Signed-off-by: Sean V Kelley +Signed-off-by: Bjorn Helgaas +Signed-off-by: Jiefeng Ou +Reviewed-by: Jay Fang +Reviewed-by: Xiongfeng Wang +Signed-off-by: Zheng Zengkai +Signed-off-by: YunYi Yang + + Conflicts: + drivers/pci/pcie/aer.c +--- + drivers/pci/pcie/aer.c | 29 ++++++++++++++++------------- + 1 file changed, 16 insertions(+), 13 deletions(-) + +diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c +index 2b6c79bbe..caa4c07a5 100644 +--- a/drivers/pci/pcie/aer.c ++++ b/drivers/pci/pcie/aer.c +@@ -1465,23 +1465,26 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) + u32 reg32; + int rc; + +- +- /* Disable Root's interrupt in response to error messages */ +- pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32); +- reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; +- pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32); ++ if (pcie_aer_is_native(dev)) { ++ /* Disable Root's interrupt in response to error messages */ ++ pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32); ++ reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; ++ pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32); ++ } + + rc = pci_bus_error_reset(dev); +- pci_printk(KERN_DEBUG, dev, "Root Port link has been reset\n"); ++ pci_printk(KERN_DEBUG, dev, "Root Port link has been reset (%d)\n", rc); + +- /* Clear Root Error Status */ +- pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, ®32); +- pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32); ++ if (pcie_aer_is_native(dev)) { ++ /* Clear Root Error Status */ ++ pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, ®32); ++ pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32); + +- /* Enable Root Port's interrupt in response to error messages */ +- pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32); +- reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; +- pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32); ++ /* Enable Root Port's interrupt in response to error messages */ ++ pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32); ++ reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; ++ pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32); ++ } + + return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; + } +-- +2.27.0 + diff --git a/patches/0762-PCI-ERR-Bind-RCEC-devices-to-the-Root-Port-driver.patch b/patches/0762-PCI-ERR-Bind-RCEC-devices-to-the-Root-Port-driver.patch new file mode 100644 index 00000000..41fa6655 --- /dev/null +++ b/patches/0762-PCI-ERR-Bind-RCEC-devices-to-the-Root-Port-driver.patch @@ -0,0 +1,106 @@ +From 9fc895d79673e004f7023fddf370800a68adcb84 Mon Sep 17 00:00:00 2001 +From: Qiuxu Zhuo +Date: Thu, 21 Apr 2022 21:49:41 +0800 +Subject: [PATCH 06/21] PCI/ERR: Bind RCEC devices to the Root Port driver + +mainline inclusion +from mainline-v5.11-rc1 +commit c9d659b60770db94b898f94947192a94bbf95c5c +category: feature +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c9d659b60770db94b898f94947192a94bbf95c5c + +---------------------------------------------------------------------------- + +If a Root Complex Integrated Endpoint (RCiEP) is implemented, it may signal +errors through a Root Complex Event Collector (RCEC). Each RCiEP must be +associated with no more than one RCEC. + +For an RCEC (which is technically not a Bridge), error messages "received" +from associated RCiEPs must be enabled for "transmission" in order to cause +a System Error via the Root Control register or (when the Advanced Error +Reporting Capability is present) reporting via the Root Error Command +register and logging in the Root Error Status register and Error Source +Identification register. + +Given the commonality with Root Ports and the need to also support AER and +PME services for RCECs, extend the Root Port driver to support RCEC devices +by adding the RCEC Class ID to the driver structure. + +Co-developed-by: Sean V Kelley +Link: https://lore.kernel.org/r/20201121001036.8560-3-sean.v.kelley@intel.com +Tested-by: Jonathan Cameron # non-native/no RCEC +Signed-off-by: Sean V Kelley +Signed-off-by: Qiuxu Zhuo +Signed-off-by: Bjorn Helgaas +Signed-off-by: Jiefeng Ou +Reviewed-by: Jonathan Cameron +Reviewed-by: Kuppuswamy Sathyanarayanan +Reviewed-by: Jay Fang +Reviewed-by: Xiongfeng Wang +Signed-off-by: Zheng Zengkai +Signed-off-by: YunYi Yang +--- + drivers/pci/pcie/portdrv_pci.c | 5 ++++- + include/linux/pci_ids.h | 1 + + include/uapi/linux/pci_regs.h | 7 +++++++ + 3 files changed, 12 insertions(+), 1 deletion(-) + +diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c +index 94c2fd71c..557545410 100644 +--- a/drivers/pci/pcie/portdrv_pci.c ++++ b/drivers/pci/pcie/portdrv_pci.c +@@ -108,7 +108,8 @@ static int pcie_portdrv_probe(struct pci_dev *dev, + if (!pci_is_pcie(dev) || + ((pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT) && + (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM) && +- (pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM))) ++ (pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM) && ++ (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC))) + return -ENODEV; + + status = pcie_port_device_register(dev); +@@ -197,6 +198,8 @@ static const struct pci_device_id port_pci_ids[] = { + { PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x00), ~0) }, + /* subtractive decode PCI-to-PCI bridge, class type is 060401h */ + { PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x01), ~0) }, ++ /* handle any Root Complex Event Collector */ ++ { PCI_DEVICE_CLASS(((PCI_CLASS_SYSTEM_RCEC << 8) | 0x00), ~0) }, + { }, + }; + +diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h +index 37b493192..952f56a4f 100644 +--- a/include/linux/pci_ids.h ++++ b/include/linux/pci_ids.h +@@ -81,6 +81,7 @@ + #define PCI_CLASS_SYSTEM_RTC 0x0803 + #define PCI_CLASS_SYSTEM_PCI_HOTPLUG 0x0804 + #define PCI_CLASS_SYSTEM_SDHCI 0x0805 ++#define PCI_CLASS_SYSTEM_RCEC 0x0807 + #define PCI_CLASS_SYSTEM_OTHER 0x0880 + + #define PCI_BASE_CLASS_INPUT 0x09 +diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h +index c209c4d17..514ac4398 100644 +--- a/include/uapi/linux/pci_regs.h ++++ b/include/uapi/linux/pci_regs.h +@@ -823,6 +823,13 @@ + #define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */ + #define PCI_EXT_CAP_PWR_SIZEOF 16 + ++/* Root Complex Event Collector Endpoint Association */ ++#define PCI_RCEC_RCIEP_BITMAP 4 /* Associated Bitmap for RCiEPs */ ++#define PCI_RCEC_BUSN 8 /* RCEC Associated Bus Numbers */ ++#define PCI_RCEC_BUSN_REG_VER 0x02 /* Least version with BUSN present */ ++#define PCI_RCEC_BUSN_NEXT(x) (((x) >> 8) & 0xff) ++#define PCI_RCEC_BUSN_LAST(x) (((x) >> 16) & 0xff) ++ + /* Vendor-Specific (VSEC, PCI_EXT_CAP_ID_VNDR) */ + #define PCI_VNDR_HEADER 4 /* Vendor-Specific Header */ + #define PCI_VNDR_HEADER_ID(x) ((x) & 0xffff) +-- +2.27.0 + diff --git a/patches/0763-PCI-ERR-Cache-RCEC-EA-Capability-offset-in-pci_init_.patch b/patches/0763-PCI-ERR-Cache-RCEC-EA-Capability-offset-in-pci_init_.patch new file mode 100644 index 00000000..88317499 --- /dev/null +++ b/patches/0763-PCI-ERR-Cache-RCEC-EA-Capability-offset-in-pci_init_.patch @@ -0,0 +1,207 @@ +From 3774ed9c8bdd0381085b91e178bf7cd3ab298018 Mon Sep 17 00:00:00 2001 +From: Sean V Kelley +Date: Thu, 21 Apr 2022 21:49:42 +0800 +Subject: [PATCH 07/21] PCI/ERR: Cache RCEC EA Capability offset in + pci_init_capabilities() + +mainline inclusion +from mainline-v5.11-rc1 +commit 90655631988f8f501529e6de5f13614389717ead +category: feature +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=90655631988f8f501529e6de5f13614389717ead + +---------------------------------------------------------------------------- + +Extend support for Root Complex Event Collectors by decoding and caching +the RCEC Endpoint Association Extended Capabilities when enumerating. Use +that cached information for later error source reporting. See PCIe r5.0, +sec 7.9.10. + +Co-developed-by: Qiuxu Zhuo +Link: https://lore.kernel.org/r/20201121001036.8560-4-sean.v.kelley@intel.com +Tested-by: Jonathan Cameron # non-native/no RCEC +Signed-off-by: Qiuxu Zhuo +Signed-off-by: Sean V Kelley +Signed-off-by: Bjorn Helgaas +Signed-off-by: Jiefeng Ou +Reviewed-by: Jonathan Cameron +Reviewed-by: Jay Fang +Reviewed-by: Xiongfeng Wang +Signed-off-by: Zheng Zengkai +Signed-off-by: YunYi Yang + + Conflicts: + drivers/pci/probe.c + include/linux/pci.h +--- + drivers/pci/pci.h | 17 +++++++++++ + drivers/pci/pcie/Makefile | 2 +- + drivers/pci/pcie/rcec.c | 59 +++++++++++++++++++++++++++++++++++++++ + drivers/pci/probe.c | 4 +++ + include/linux/pci.h | 4 +++ + 5 files changed, 85 insertions(+), 1 deletion(-) + create mode 100644 drivers/pci/pcie/rcec.c + +diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h +index a29e7fb4a..3b52c3b57 100644 +--- a/drivers/pci/pci.h ++++ b/drivers/pci/pci.h +@@ -427,6 +427,15 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info); + void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); + #endif /* CONFIG_PCIEAER */ + ++#ifdef CONFIG_PCIEPORTBUS ++/* Cached RCEC Endpoint Association */ ++struct rcec_ea { ++ u8 nextbusn; ++ u8 lastbusn; ++ u32 bitmap; ++}; ++#endif ++ + #ifdef CONFIG_PCIE_DPC + void pci_save_dpc_state(struct pci_dev *dev); + void pci_restore_dpc_state(struct pci_dev *dev); +@@ -435,6 +444,14 @@ static inline void pci_save_dpc_state(struct pci_dev *dev) {} + static inline void pci_restore_dpc_state(struct pci_dev *dev) {} + #endif + ++#ifdef CONFIG_PCIEPORTBUS ++void pci_rcec_init(struct pci_dev *dev); ++void pci_rcec_exit(struct pci_dev *dev); ++#else ++static inline void pci_rcec_init(struct pci_dev *dev) {} ++static inline void pci_rcec_exit(struct pci_dev *dev) {} ++#endif ++ + #ifdef CONFIG_PCI_ATS + void pci_restore_ats_state(struct pci_dev *dev); + #else +diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile +index ab514083d..2d7c290fd 100644 +--- a/drivers/pci/pcie/Makefile ++++ b/drivers/pci/pcie/Makefile +@@ -2,7 +2,7 @@ + # + # Makefile for PCI Express features and port driver + +-pcieportdrv-y := portdrv_core.o portdrv_pci.o err.o ++pcieportdrv-y := portdrv_core.o portdrv_pci.o err.o rcec.o + + obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o + +diff --git a/drivers/pci/pcie/rcec.c b/drivers/pci/pcie/rcec.c +new file mode 100644 +index 000000000..038e9d706 +--- /dev/null ++++ b/drivers/pci/pcie/rcec.c +@@ -0,0 +1,59 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Root Complex Event Collector Support ++ * ++ * Authors: ++ * Sean V Kelley ++ * Qiuxu Zhuo ++ * ++ * Copyright (C) 2020 Intel Corp. ++ */ ++ ++#include ++#include ++#include ++ ++#include "../pci.h" ++ ++void pci_rcec_init(struct pci_dev *dev) ++{ ++ struct rcec_ea *rcec_ea; ++ u32 rcec, hdr, busn; ++ u8 ver; ++ ++ /* Only for Root Complex Event Collectors */ ++ if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) ++ return; ++ ++ rcec = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_RCEC); ++ if (!rcec) ++ return; ++ ++ rcec_ea = kzalloc(sizeof(*rcec_ea), GFP_KERNEL); ++ if (!rcec_ea) ++ return; ++ ++ pci_read_config_dword(dev, rcec + PCI_RCEC_RCIEP_BITMAP, ++ &rcec_ea->bitmap); ++ ++ /* Check whether RCEC BUSN register is present */ ++ pci_read_config_dword(dev, rcec, &hdr); ++ ver = PCI_EXT_CAP_VER(hdr); ++ if (ver >= PCI_RCEC_BUSN_REG_VER) { ++ pci_read_config_dword(dev, rcec + PCI_RCEC_BUSN, &busn); ++ rcec_ea->nextbusn = PCI_RCEC_BUSN_NEXT(busn); ++ rcec_ea->lastbusn = PCI_RCEC_BUSN_LAST(busn); ++ } else { ++ /* Avoid later ver check by setting nextbusn */ ++ rcec_ea->nextbusn = 0xff; ++ rcec_ea->lastbusn = 0x00; ++ } ++ ++ dev->rcec_ea = rcec_ea; ++} ++ ++void pci_rcec_exit(struct pci_dev *dev) ++{ ++ kfree(dev->rcec_ea); ++ dev->rcec_ea = NULL; ++} +diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c +index 055a2c47a..bfa383add 100644 +--- a/drivers/pci/probe.c ++++ b/drivers/pci/probe.c +@@ -2314,6 +2314,7 @@ static void pci_configure_device(struct pci_dev *dev) + static void pci_release_capabilities(struct pci_dev *dev) + { + pci_aer_exit(dev); ++ pci_rcec_exit(dev); + pci_vpd_release(dev); + pci_iov_release(dev); + pci_free_cap_save_buffers(dev); +@@ -2963,6 +2964,9 @@ static void pci_init_capabilities(struct pci_dev *dev) + /* Advanced Error Reporting */ + pci_aer_init(dev); + ++ /* Root Complex Event Collector */ ++ pci_rcec_init(dev); ++ + pcie_report_downtraining(dev); + + if (pci_probe_reset_function(dev) == 0) +diff --git a/include/linux/pci.h b/include/linux/pci.h +index bc49349fc..73ea8ea4a 100644 +--- a/include/linux/pci.h ++++ b/include/linux/pci.h +@@ -287,6 +287,7 @@ struct pcie_link_state; + struct pci_vpd; + struct pci_sriov; + struct pci_ats; ++struct rcec_ea; + + /* The pci_dev structure describes PCI devices */ + struct pci_dev { +@@ -309,6 +310,9 @@ struct pci_dev { + #ifdef CONFIG_PCIEAER + u16 aer_cap; /* AER capability offset */ + struct aer_stats *aer_stats; /* AER stats for this device */ ++#endif ++#ifdef CONFIG_PCIEPORTBUS ++ struct rcec_ea *rcec_ea; /* RCEC cached endpoint association */ + #endif + u8 pcie_cap; /* PCIe capability offset */ + u8 msi_cap; /* MSI capability offset */ +-- +2.27.0 + diff --git a/patches/0764-PCI-ERR-Simplify-by-using-pci_upstream_bridge.patch b/patches/0764-PCI-ERR-Simplify-by-using-pci_upstream_bridge.patch new file mode 100644 index 00000000..e07c5287 --- /dev/null +++ b/patches/0764-PCI-ERR-Simplify-by-using-pci_upstream_bridge.patch @@ -0,0 +1,50 @@ +From 4d4f42e16a747aec76fcb93530cd67fecd7c8bd0 Mon Sep 17 00:00:00 2001 +From: Sean V Kelley +Date: Thu, 21 Apr 2022 21:49:44 +0800 +Subject: [PATCH 08/21] PCI/ERR: Simplify by using pci_upstream_bridge() + +mainline inclusion +from mainline-v5.11-rc1 +commit 5d69dcc9f839bd2d5cac7a098712f52149e1673f +category: feature +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5d69dcc9f839bd2d5cac7a098712f52149e1673f + +---------------------------------------------------------------------------- + +Use pci_upstream_bridge() in place of dev->bus->self. No functional change +intended. + +Link: https://lore.kernel.org/r/20201121001036.8560-6-sean.v.kelley@intel.com +Tested-by: Jonathan Cameron # non-native/no RCEC +Signed-off-by: Sean V Kelley +Signed-off-by: Bjorn Helgaas +Signed-off-by: Jiefeng Ou +Reviewed-by: Kuppuswamy Sathyanarayanan +Acked-by: Jonathan Cameron +Reviewed-by: Jay Fang +Reviewed-by: Xiongfeng Wang +Signed-off-by: Zheng Zengkai +Signed-off-by: YunYi Yang +--- + drivers/pci/pcie/err.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c +index 055b45386..c776629be 100644 +--- a/drivers/pci/pcie/err.c ++++ b/drivers/pci/pcie/err.c +@@ -197,7 +197,7 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state st + */ + if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)) +- dev = dev->bus->self; ++ dev = pci_upstream_bridge(dev); + bus = dev->subordinate; + + pci_dbg(dev, "broadcast error_detected message\n"); +-- +2.27.0 + diff --git a/patches/0765-PCI-ERR-Simplify-by-computing-pci_pcie_type-once.patch b/patches/0765-PCI-ERR-Simplify-by-computing-pci_pcie_type-once.patch new file mode 100644 index 00000000..d9895fe9 --- /dev/null +++ b/patches/0765-PCI-ERR-Simplify-by-computing-pci_pcie_type-once.patch @@ -0,0 +1,107 @@ +From 31574109bca2927a7d284b3ae42f4b1795c372b6 Mon Sep 17 00:00:00 2001 +From: Sean V Kelley +Date: Thu, 21 Apr 2022 21:49:45 +0800 +Subject: [PATCH 09/21] PCI/ERR: Simplify by computing pci_pcie_type() once + +mainline inclusion +from mainline-v5.11-rc1 +commit 480ef7cb9fcebda7b28cbed4f6cdcf0a02f4a6ca +category: feature +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=480ef7cb9fcebda7b28cbed4f6cdcf0a02f4a6ca + +---------------------------------------------------------------------------- + +Instead of calling pci_pcie_type(dev) twice, call it once and save the +result. No functional change intended. + +Link: https://lore.kernel.org/r/20201121001036.8560-7-sean.v.kelley@intel.com +Tested-by: Jonathan Cameron # non-native/no RCEC +Signed-off-by: Sean V Kelley +Signed-off-by: Bjorn Helgaas +Signed-off-by: Jiefeng Ou +Acked-by: Jonathan Cameron +Reviewed-by: Jay Fang +Reviewed-by: Xiongfeng Wang +Signed-off-by: Zheng Zengkai +Signed-off-by: YunYi Yang +--- + drivers/pci/pcie/aer.c | 5 +++-- + drivers/pci/pcie/err.c | 5 +++-- + drivers/pci/pcie/portdrv_pci.c | 9 +++++---- + 3 files changed, 11 insertions(+), 8 deletions(-) + +diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c +index caa4c07a5..788308f9d 100644 +--- a/drivers/pci/pcie/aer.c ++++ b/drivers/pci/pcie/aer.c +@@ -1136,6 +1136,7 @@ EXPORT_SYMBOL_GPL(aer_recover_queue); + */ + int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) + { ++ int type = pci_pcie_type(dev); + int aer = dev->aer_cap; + int temp; + +@@ -1154,8 +1155,8 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) + &info->mask); + if (!(info->status & ~info->mask)) + return 0; +- } else if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || +- pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM || ++ } else if (type == PCI_EXP_TYPE_ROOT_PORT || ++ type == PCI_EXP_TYPE_DOWNSTREAM || + info->severity == AER_NONFATAL) { + + /* Link is still healthy for IO reads */ +diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c +index c776629be..0c3493708 100644 +--- a/drivers/pci/pcie/err.c ++++ b/drivers/pci/pcie/err.c +@@ -188,6 +188,7 @@ static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) + pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, + u32 service) + { ++ int type = pci_pcie_type(dev); + pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; + struct pci_bus *bus; + +@@ -195,8 +196,8 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state st + * Error recovery runs on all subordinates of the first downstream port. + * If the downstream port detected the error, it is cleared at the end. + */ +- if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || +- pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)) ++ if (!(type == PCI_EXP_TYPE_ROOT_PORT || ++ type == PCI_EXP_TYPE_DOWNSTREAM)) + dev = pci_upstream_bridge(dev); + bus = dev->subordinate; + +diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c +index 557545410..6e20c5fb0 100644 +--- a/drivers/pci/pcie/portdrv_pci.c ++++ b/drivers/pci/pcie/portdrv_pci.c +@@ -103,13 +103,14 @@ static const struct dev_pm_ops pcie_portdrv_pm_ops = { + static int pcie_portdrv_probe(struct pci_dev *dev, + const struct pci_device_id *id) + { ++ int type = pci_pcie_type(dev); + int status; + + if (!pci_is_pcie(dev) || +- ((pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT) && +- (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM) && +- (pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM) && +- (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC))) ++ ((type != PCI_EXP_TYPE_ROOT_PORT) && ++ (type != PCI_EXP_TYPE_UPSTREAM) && ++ (type != PCI_EXP_TYPE_DOWNSTREAM) && ++ (type != PCI_EXP_TYPE_RC_EC))) + return -ENODEV; + + status = pcie_port_device_register(dev); +-- +2.27.0 + diff --git a/patches/0766-PCI-ERR-Use-bridge-for-clarity-in-pcie_do_recovery.patch b/patches/0766-PCI-ERR-Use-bridge-for-clarity-in-pcie_do_recovery.patch new file mode 100644 index 00000000..3ed5c359 --- /dev/null +++ b/patches/0766-PCI-ERR-Use-bridge-for-clarity-in-pcie_do_recovery.patch @@ -0,0 +1,127 @@ +From 3d982d1ae467ffb661ac2434ab2153a459c69f69 Mon Sep 17 00:00:00 2001 +From: Sean V Kelley +Date: Thu, 21 Apr 2022 21:49:46 +0800 +Subject: [PATCH 10/21] PCI/ERR: Use "bridge" for clarity in pcie_do_recovery() + +mainline inclusion +from mainline-v5.11-rc1 +commit 0791721d800790e6e533bd8467df67f0dc4f2fec +category: feature +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0791721d800790e6e533bd8467df67f0dc4f2fec + +---------------------------------------------------------------------------- + +pcie_do_recovery() may be called with "dev" being either a bridge (Root +Port or Switch Downstream Port) or an Endpoint. The bulk of the function +deals with the bridge, so if we start with an Endpoint, we reset "dev" to +be the bridge leading to it. + +For clarity, replace "dev" in the body of the function with "bridge". No +functional change intended. + +Link: https://lore.kernel.org/r/20201121001036.8560-8-sean.v.kelley@intel.com +Tested-by: Jonathan Cameron # non-native/no RCEC +Signed-off-by: Sean V Kelley +Signed-off-by: Bjorn Helgaas +Signed-off-by: Jiefeng Ou +Reviewed-by: Kuppuswamy Sathyanarayanan +Acked-by: Jonathan Cameron +Reviewed-by: Jay Fang +Reviewed-by: Xiongfeng Wang +Signed-off-by: Zheng Zengkai +Signed-off-by: YunYi Yang + + Conflicts: + drivers/pci/pcie/err.c +--- + drivers/pci/pcie/err.c | 35 +++++++++++++++++++---------------- + 1 file changed, 19 insertions(+), 16 deletions(-) + +diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c +index 0c3493708..f8cb3ab73 100644 +--- a/drivers/pci/pcie/err.c ++++ b/drivers/pci/pcie/err.c +@@ -189,22 +189,25 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state st + u32 service) + { + int type = pci_pcie_type(dev); +- pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; ++ struct pci_dev *bridge; + struct pci_bus *bus; ++ pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; + + /* +- * Error recovery runs on all subordinates of the first downstream port. +- * If the downstream port detected the error, it is cleared at the end. ++ * Error recovery runs on all subordinates of the bridge. If the ++ * bridge detected the error, it is cleared at the end. + */ + if (!(type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_DOWNSTREAM)) +- dev = pci_upstream_bridge(dev); +- bus = dev->subordinate; ++ bridge = pci_upstream_bridge(dev); ++ else ++ bridge = dev; + +- pci_dbg(dev, "broadcast error_detected message\n"); ++ bus = bridge->subordinate; ++ pci_dbg(bridge, "broadcast error_detected message\n"); + if (state == pci_channel_io_frozen) { + pci_walk_bus(bus, report_frozen_detected, &status); +- status = reset_link(dev, service); ++ status = reset_link(bridge, service); + if (status != PCI_ERS_RESULT_RECOVERED) + goto failed; + } else { +@@ -213,7 +216,7 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state st + + if (status == PCI_ERS_RESULT_CAN_RECOVER) { + status = PCI_ERS_RESULT_RECOVERED; +- pci_dbg(dev, "broadcast mmio_enabled message\n"); ++ pci_dbg(bridge, "broadcast mmio_enabled message\n"); + pci_walk_bus(bus, report_mmio_enabled, &status); + } + +@@ -224,27 +227,27 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state st + * drivers' slot_reset callbacks? + */ + status = PCI_ERS_RESULT_RECOVERED; +- pci_dbg(dev, "broadcast slot_reset message\n"); ++ pci_dbg(bridge, "broadcast slot_reset message\n"); + pci_walk_bus(bus, report_slot_reset, &status); + } + + if (status != PCI_ERS_RESULT_RECOVERED) + goto failed; + +- pci_dbg(dev, "broadcast resume message\n"); ++ pci_dbg(bridge, "broadcast resume message\n"); + pci_walk_bus(bus, report_resume, &status); + +- if (pcie_aer_is_native(dev)) +- pcie_clear_device_status(dev); +- pci_cleanup_aer_uncorrect_error_status(dev); +- pci_info(dev, "AER: Device recovery successful\n"); ++ if (pcie_aer_is_native(bridge)) ++ pcie_clear_device_status(bridge); ++ pci_cleanup_aer_uncorrect_error_status(bridge); ++ pci_info(bridge, "AER: Device recovery successful\n"); + return status; + + failed: +- pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); ++ pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT); + + /* TODO: Should kernel panic here? */ +- pci_info(dev, "AER: Device recovery failed\n"); ++ pci_info(bridge, "AER: Device recovery failed\n"); + + return status; + } +-- +2.27.0 + diff --git a/patches/0767-PCI-ERR-Avoid-negated-conditional-for-clarity.patch b/patches/0767-PCI-ERR-Avoid-negated-conditional-for-clarity.patch new file mode 100644 index 00000000..071d79ab --- /dev/null +++ b/patches/0767-PCI-ERR-Avoid-negated-conditional-for-clarity.patch @@ -0,0 +1,57 @@ +From a5d8c7f3131f3a42604f96f80c2d3eeac53d35bb Mon Sep 17 00:00:00 2001 +From: Sean V Kelley +Date: Thu, 21 Apr 2022 21:49:47 +0800 +Subject: [PATCH 11/21] PCI/ERR: Avoid negated conditional for clarity + +mainline inclusion +from mainline-v5.11-rc1 +commit 3d7d8fc78f4b504819882278fcfe10784eb985fa +category: feature +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3d7d8fc78f4b504819882278fcfe10784eb985fa + +---------------------------------------------------------------------------- + +Reverse the sense of the Root Port/Downstream Port conditional for clarity. +No functional change intended. + +Link: https://lore.kernel.org/r/20201121001036.8560-9-sean.v.kelley@intel.com +Tested-by: Jonathan Cameron # non-native/no RCEC +Signed-off-by: Sean V Kelley +Signed-off-by: Bjorn Helgaas +Signed-off-by: Jiefeng Ou +Reviewed-by: Kuppuswamy Sathyanarayanan +Acked-by: Jonathan Cameron +Reviewed-by: Jay Fang +Reviewed-by: Xiongfeng Wang +Signed-off-by: Zheng Zengkai +Signed-off-by: YunYi Yang +--- + drivers/pci/pcie/err.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c +index f8cb3ab73..35d9dcbdb 100644 +--- a/drivers/pci/pcie/err.c ++++ b/drivers/pci/pcie/err.c +@@ -197,11 +197,11 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state st + * Error recovery runs on all subordinates of the bridge. If the + * bridge detected the error, it is cleared at the end. + */ +- if (!(type == PCI_EXP_TYPE_ROOT_PORT || +- type == PCI_EXP_TYPE_DOWNSTREAM)) +- bridge = pci_upstream_bridge(dev); +- else ++ if (type == PCI_EXP_TYPE_ROOT_PORT || ++ type == PCI_EXP_TYPE_DOWNSTREAM) + bridge = dev; ++ else ++ bridge = pci_upstream_bridge(dev); + + bus = bridge->subordinate; + pci_dbg(bridge, "broadcast error_detected message\n"); +-- +2.27.0 + diff --git a/patches/0768-PCI-ERR-Add-pci_walk_bridge-to-pcie_do_recovery.patch b/patches/0768-PCI-ERR-Add-pci_walk_bridge-to-pcie_do_recovery.patch new file mode 100644 index 00000000..53dbc876 --- /dev/null +++ b/patches/0768-PCI-ERR-Add-pci_walk_bridge-to-pcie_do_recovery.patch @@ -0,0 +1,117 @@ +From 3fd3737daa485e4fbcded63b58cacbec553a49c3 Mon Sep 17 00:00:00 2001 +From: Sean V Kelley +Date: Thu, 21 Apr 2022 21:49:48 +0800 +Subject: [PATCH 12/21] PCI/ERR: Add pci_walk_bridge() to pcie_do_recovery() + +mainline inclusion +from mainline-v5.11-rc1 +commit 05e9ae19ab83881a0f33025bd1288e41e552a34b +category: feature +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=05e9ae19ab83881a0f33025bd1288e41e552a34b + +---------------------------------------------------------------------------- + +Consolidate subordinate bus checks with pci_walk_bus() into +pci_walk_bridge() for walking below potentially AER affected bridges. + +Link: https://lore.kernel.org/r/20201121001036.8560-10-sean.v.kelley@intel.com +Tested-by: Jonathan Cameron # non-native/no RCEC +Signed-off-by: Sean V Kelley +Signed-off-by: Bjorn Helgaas +Signed-off-by: Jiefeng Ou +Reviewed-by: Jay Fang +Reviewed-by: Xiongfeng Wang +Signed-off-by: Zheng Zengkai +Signed-off-by: YunYi Yang + + Conflicts: + drivers/pci/pcie/err.c +--- + drivers/pci/pcie/err.c | 30 +++++++++++++++++++++++------- + 1 file changed, 23 insertions(+), 7 deletions(-) + +diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c +index 35d9dcbdb..dcfb949c3 100644 +--- a/drivers/pci/pcie/err.c ++++ b/drivers/pci/pcie/err.c +@@ -144,6 +144,24 @@ static int report_resume(struct pci_dev *dev, void *data) + return 0; + } + ++/** ++ * pci_walk_bridge - walk bridges potentially AER affected ++ * @bridge: bridge which may be a Port ++ * @cb: callback to be called for each device found ++ * @userdata: arbitrary pointer to be passed to callback ++ * ++ * If the device provided is a bridge, walk the subordinate bus, including ++ * any bridged devices on buses under this bus. Call the provided callback ++ * on each device found. ++ */ ++static void pci_walk_bridge(struct pci_dev *bridge, ++ int (*cb)(struct pci_dev *, void *), ++ void *userdata) ++{ ++ if (bridge->subordinate) ++ pci_walk_bus(bridge->subordinate, cb, userdata); ++} ++ + /** + * default_reset_link - default reset function + * @dev: pointer to pci_dev data structure +@@ -190,7 +208,6 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state st + { + int type = pci_pcie_type(dev); + struct pci_dev *bridge; +- struct pci_bus *bus; + pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; + + /* +@@ -203,21 +220,20 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state st + else + bridge = pci_upstream_bridge(dev); + +- bus = bridge->subordinate; + pci_dbg(bridge, "broadcast error_detected message\n"); + if (state == pci_channel_io_frozen) { +- pci_walk_bus(bus, report_frozen_detected, &status); ++ pci_walk_bridge(bridge, report_frozen_detected, &status); + status = reset_link(bridge, service); + if (status != PCI_ERS_RESULT_RECOVERED) + goto failed; + } else { +- pci_walk_bus(bus, report_normal_detected, &status); ++ pci_walk_bridge(bridge, report_normal_detected, &status); + } + + if (status == PCI_ERS_RESULT_CAN_RECOVER) { + status = PCI_ERS_RESULT_RECOVERED; + pci_dbg(bridge, "broadcast mmio_enabled message\n"); +- pci_walk_bus(bus, report_mmio_enabled, &status); ++ pci_walk_bridge(bridge, report_mmio_enabled, &status); + } + + if (status == PCI_ERS_RESULT_NEED_RESET) { +@@ -228,14 +244,14 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state st + */ + status = PCI_ERS_RESULT_RECOVERED; + pci_dbg(bridge, "broadcast slot_reset message\n"); +- pci_walk_bus(bus, report_slot_reset, &status); ++ pci_walk_bridge(bridge, report_slot_reset, &status); + } + + if (status != PCI_ERS_RESULT_RECOVERED) + goto failed; + + pci_dbg(bridge, "broadcast resume message\n"); +- pci_walk_bus(bus, report_resume, &status); ++ pci_walk_bridge(bridge, report_resume, &status); + + if (pcie_aer_is_native(bridge)) + pcie_clear_device_status(bridge); +-- +2.27.0 + diff --git a/patches/0769-PCI-ERR-Clear-AER-status-only-when-we-control-AER.patch b/patches/0769-PCI-ERR-Clear-AER-status-only-when-we-control-AER.patch new file mode 100644 index 00000000..f02c62da --- /dev/null +++ b/patches/0769-PCI-ERR-Clear-AER-status-only-when-we-control-AER.patch @@ -0,0 +1,73 @@ +From e3bd8459b8451e090625de5918d6e3168aac2cb6 Mon Sep 17 00:00:00 2001 +From: Sean V Kelley +Date: Thu, 21 Apr 2022 21:49:49 +0800 +Subject: [PATCH 13/21] PCI/ERR: Clear AER status only when we control AER + +mainline inclusion +from mainline-v5.11-rc1 +commit aa344bc8b727b47b4350b59d8166216a3f351e55 +category: feature +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=aa344bc8b727b47b4350b59d8166216a3f351e55 + +---------------------------------------------------------------------------- + +In some cases a bridge may not exist as the hardware controlling may be +handled only by firmware and so is not visible to the OS. This scenario is +also possible in future use cases involving non-native use of RCECs by +firmware. In this scenario, we expect the platform to retain control of the +bridge and to clear error status itself. + +Clear error status only when the OS has native control of AER. + +Signed-off-by: Sean V Kelley +Signed-off-by: Bjorn Helgaas +Signed-off-by: Jiefeng Ou +Reviewed-by: Jay Fang +Reviewed-by: Xiongfeng Wang +Signed-off-by: Zheng Zengkai +Signed-off-by: YunYi Yang + + Conflicts: + drivers/pci/pcie/err.c +--- + drivers/pci/pcie/err.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c +index dcfb949c3..4637685d1 100644 +--- a/drivers/pci/pcie/err.c ++++ b/drivers/pci/pcie/err.c +@@ -209,6 +209,7 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state st + int type = pci_pcie_type(dev); + struct pci_dev *bridge; + pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; ++ struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); + + /* + * Error recovery runs on all subordinates of the bridge. If the +@@ -253,9 +254,17 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state st + pci_dbg(bridge, "broadcast resume message\n"); + pci_walk_bridge(bridge, report_resume, &status); + +- if (pcie_aer_is_native(bridge)) ++ /* ++ * If we have native control of AER, clear error status in the Root ++ * Port or Downstream Port that signaled the error. If the ++ * platform retained control of AER, it is responsible for clearing ++ * this status. In that case, the signaling device may not even be ++ * visible to the OS. ++ */ ++ if (host->native_aer || pcie_ports_native) { + pcie_clear_device_status(bridge); +- pci_cleanup_aer_uncorrect_error_status(bridge); ++ pci_cleanup_aer_uncorrect_error_status(bridge); ++ } + pci_info(bridge, "AER: Device recovery successful\n"); + return status; + +-- +2.27.0 + diff --git a/patches/0770-PCI-ERR-Recover-from-RCEC-AER-errors.patch b/patches/0770-PCI-ERR-Recover-from-RCEC-AER-errors.patch new file mode 100644 index 00000000..5566c122 --- /dev/null +++ b/patches/0770-PCI-ERR-Recover-from-RCEC-AER-errors.patch @@ -0,0 +1,220 @@ +From 7bff41c61463287d4833b9100b1fec1337433920 Mon Sep 17 00:00:00 2001 +From: Sean V Kelley +Date: Thu, 21 Apr 2022 21:49:50 +0800 +Subject: [PATCH 14/21] PCI/ERR: Recover from RCEC AER errors + +mainline inclusion +from mainline-v5.11-rc1 +commit a175102b0a82fc57853a9e611c42d1d6172e5180 +category: feature +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a175102b0a82fc57853a9e611c42d1d6172e5180 + +---------------------------------------------------------------------------- + +A Root Complex Event Collector (RCEC) collects and signals AER errors that +were detected by Root Complex Integrated Endpoints (RCiEPs), but it may +also signal errors it detects itself. This is analogous to errors detected +and signaled by a Root Port. + +Update the AER service driver to claim RCECs in addition to Root Ports. +Add support for handling RCEC-detected AER errors. This does not +include handling RCiEP-detected errors that are signaled by the RCEC. + +Note that we expect these errors only from the native AER and APEI paths, +not from DPC or EDR. + +[bhelgaas: split from combined RCEC/RCiEP patch, commit log] +Signed-off-by: Sean V Kelley +Signed-off-by: Bjorn Helgaas +Signed-off-by: Jiefeng Ou +Reviewed-by: Jay Fang +Reviewed-by: Xiongfeng Wang +Signed-off-by: Zheng Zengkai +Signed-off-by: YunYi Yang + + Conflicts: + drivers/pci/pcie/aer.c + drivers/pci/pcie/err.c +--- + drivers/pci/pcie/aer.c | 58 +++++++++++++++++++++++++++++------------- + drivers/pci/pcie/err.c | 19 +++++++++++--- + 2 files changed, 56 insertions(+), 21 deletions(-) + +diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c +index 788308f9d..5628b943c 100644 +--- a/drivers/pci/pcie/aer.c ++++ b/drivers/pci/pcie/aer.c +@@ -433,7 +433,8 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) + return -EIO; + + port_type = pci_pcie_type(dev); +- if (port_type == PCI_EXP_TYPE_ROOT_PORT) { ++ if (port_type == PCI_EXP_TYPE_ROOT_PORT || ++ port_type == PCI_EXP_TYPE_RC_EC) { + pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &status); + pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, status); + } +@@ -698,7 +699,8 @@ static umode_t aer_stats_attrs_are_visible(struct kobject *kobj, + if ((a == &dev_attr_aer_rootport_total_err_cor.attr || + a == &dev_attr_aer_rootport_total_err_fatal.attr || + a == &dev_attr_aer_rootport_total_err_nonfatal.attr) && +- pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) ++ ((pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) && ++ (pci_pcie_type(pdev) != PCI_EXP_TYPE_RC_EC))) + return 0; + + return a->mode; +@@ -1308,6 +1310,7 @@ static int set_device_error_reporting(struct pci_dev *dev, void *data) + int type = pci_pcie_type(dev); + + if ((type == PCI_EXP_TYPE_ROOT_PORT) || ++ (type == PCI_EXP_TYPE_RC_EC) || + (type == PCI_EXP_TYPE_UPSTREAM) || + (type == PCI_EXP_TYPE_DOWNSTREAM)) { + if (enable) +@@ -1432,6 +1435,11 @@ static int aer_probe(struct pcie_device *dev) + struct device *device = &dev->device; + struct pci_dev *port = dev->port; + ++ /* Limit to Root Ports or Root Complex Event Collectors */ ++ if ((pci_pcie_type(port) != PCI_EXP_TYPE_RC_EC) && ++ (pci_pcie_type(port) != PCI_EXP_TYPE_ROOT_PORT)) ++ return -ENODEV; ++ + rpc = devm_kzalloc(device, sizeof(struct aer_rpc), GFP_KERNEL); + if (!rpc) { + dev_printk(KERN_DEBUG, device, "alloc AER rpc failed\n"); +@@ -1455,36 +1463,52 @@ static int aer_probe(struct pcie_device *dev) + } + + /** +- * aer_root_reset - reset link on Root Port +- * @dev: pointer to Root Port's pci_dev data structure ++ * aer_root_reset - reset Root Port hierarchy or RCEC ++ * @dev: pointer to Root Port or RCEC + * +- * Invoked by Port Bus driver when performing link reset at Root Port. ++ * Invoked by Port Bus driver when performing reset. + */ + static pci_ers_result_t aer_root_reset(struct pci_dev *dev) + { +- int aer = dev->aer_cap; ++ int type = pci_pcie_type(dev); ++ struct pci_dev *root; ++ int aer; ++ struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); + u32 reg32; + int rc; + +- if (pcie_aer_is_native(dev)) { ++ root = dev; /* device with Root Error registers */ ++ aer = root->aer_cap; ++ ++ if ((host->native_aer || pcie_ports_native) && aer) { + /* Disable Root's interrupt in response to error messages */ +- pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32); ++ pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, ®32); + reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; +- pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32); ++ pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32); + } + +- rc = pci_bus_error_reset(dev); +- pci_printk(KERN_DEBUG, dev, "Root Port link has been reset (%d)\n", rc); ++ if (type == PCI_EXP_TYPE_RC_EC) { ++ if (pcie_has_flr(dev)) { ++ rc = pcie_flr(dev); ++ pci_info(dev, "has been reset (%d)\n", rc); ++ } else { ++ pci_info(dev, "not reset (no FLR support)\n"); ++ rc = -ENOTTY; ++ } ++ } else { ++ rc = pci_bus_error_reset(dev); ++ pci_info(dev, "Root Port link has been reset (%d)\n", rc); ++ } + +- if (pcie_aer_is_native(dev)) { ++ if ((host->native_aer || pcie_ports_native) && aer) { + /* Clear Root Error Status */ +- pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, ®32); +- pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32); ++ pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, ®32); ++ pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32); + + /* Enable Root Port's interrupt in response to error messages */ +- pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32); ++ pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, ®32); + reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; +- pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32); ++ pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32); + } + + return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; +@@ -1492,7 +1516,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) + + static struct pcie_port_service_driver aerdriver = { + .name = "aer", +- .port_type = PCI_EXP_TYPE_ROOT_PORT, ++ .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_AER, + + .probe = aer_probe, +diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c +index 4637685d1..47d10f828 100644 +--- a/drivers/pci/pcie/err.c ++++ b/drivers/pci/pcie/err.c +@@ -146,13 +146,16 @@ static int report_resume(struct pci_dev *dev, void *data) + + /** + * pci_walk_bridge - walk bridges potentially AER affected +- * @bridge: bridge which may be a Port ++ * @bridge: bridge which may be a Port or an RCEC + * @cb: callback to be called for each device found + * @userdata: arbitrary pointer to be passed to callback + * + * If the device provided is a bridge, walk the subordinate bus, including + * any bridged devices on buses under this bus. Call the provided callback + * on each device found. ++ * ++ * If the device provided has no subordinate bus, e.g., an RCEC, call the ++ * callback on the device itself. + */ + static void pci_walk_bridge(struct pci_dev *bridge, + int (*cb)(struct pci_dev *, void *), +@@ -160,6 +163,8 @@ static void pci_walk_bridge(struct pci_dev *bridge, + { + if (bridge->subordinate) + pci_walk_bus(bridge->subordinate, cb, userdata); ++ else ++ cb(bridge, userdata); + } + + /** +@@ -212,11 +217,17 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state st + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); + + /* +- * Error recovery runs on all subordinates of the bridge. If the +- * bridge detected the error, it is cleared at the end. ++ * If the error was detected by a Root Port, Downstream Port, or ++ * RCEC, recovery runs on the device itself. For Ports, that also ++ * includes any subordinate devices. ++ * ++ * If it was detected by another device (Endpoint, etc), recovery ++ * runs on the device and anything else under the same Port, i.e., ++ * everything under "bridge". + */ + if (type == PCI_EXP_TYPE_ROOT_PORT || +- type == PCI_EXP_TYPE_DOWNSTREAM) ++ type == PCI_EXP_TYPE_DOWNSTREAM || ++ type == PCI_EXP_TYPE_RC_EC) + bridge = dev; + else + bridge = pci_upstream_bridge(dev); +-- +2.27.0 + diff --git a/patches/0771-PCI-ERR-Add-pcie_link_rcec-to-associate-RCiEPs.patch b/patches/0771-PCI-ERR-Add-pcie_link_rcec-to-associate-RCiEPs.patch new file mode 100644 index 00000000..99ac8f32 --- /dev/null +++ b/patches/0771-PCI-ERR-Add-pcie_link_rcec-to-associate-RCiEPs.patch @@ -0,0 +1,191 @@ +From 93fc1869d67cb22e8e64981e92b18d6fdaed6824 Mon Sep 17 00:00:00 2001 +From: Sean V Kelley +Date: Thu, 21 Apr 2022 21:49:51 +0800 +Subject: [PATCH 15/21] PCI/ERR: Add pcie_link_rcec() to associate RCiEPs + +mainline inclusion +from mainline-v5.11-rc1 +commit 507b460f814458605c47b0ed03c11e49a712fc08 +category: feature +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=507b460f814458605c47b0ed03c11e49a712fc08 + +---------------------------------------------------------------------------- + +A Root Complex Event Collector terminates error and PME messages from +associated RCiEPs. + +Use the RCEC Endpoint Association Extended Capability to identify +associated RCiEPs. Link the associated RCiEPs as the RCECs are enumerated. + +Co-developed-by: Qiuxu Zhuo +Link: https://lore.kernel.org/r/20201121001036.8560-12-sean.v.kelley@intel.com +Tested-by: Jonathan Cameron # non-native/no RCEC +Signed-off-by: Qiuxu Zhuo +Signed-off-by: Sean V Kelley +Signed-off-by: Bjorn Helgaas +Signed-off-by: Jiefeng Ou +Reviewed-by: Jonathan Cameron +Reviewed-by: Jay Fang +Reviewed-by: Xiongfeng Wang +Signed-off-by: Zheng Zengkai +Signed-off-by: YunYi Yang +--- + drivers/pci/pci.h | 2 + + drivers/pci/pcie/portdrv_pci.c | 3 ++ + drivers/pci/pcie/rcec.c | 94 ++++++++++++++++++++++++++++++++++ + include/linux/pci.h | 1 + + 4 files changed, 100 insertions(+) + +diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h +index 3b52c3b57..65f9902ed 100644 +--- a/drivers/pci/pci.h ++++ b/drivers/pci/pci.h +@@ -447,9 +447,11 @@ static inline void pci_restore_dpc_state(struct pci_dev *dev) {} + #ifdef CONFIG_PCIEPORTBUS + void pci_rcec_init(struct pci_dev *dev); + void pci_rcec_exit(struct pci_dev *dev); ++void pcie_link_rcec(struct pci_dev *rcec); + #else + static inline void pci_rcec_init(struct pci_dev *dev) {} + static inline void pci_rcec_exit(struct pci_dev *dev) {} ++static inline void pcie_link_rcec(struct pci_dev *rcec) {} + #endif + + #ifdef CONFIG_PCI_ATS +diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c +index 6e20c5fb0..665ffd6f2 100644 +--- a/drivers/pci/pcie/portdrv_pci.c ++++ b/drivers/pci/pcie/portdrv_pci.c +@@ -113,6 +113,9 @@ static int pcie_portdrv_probe(struct pci_dev *dev, + (type != PCI_EXP_TYPE_RC_EC))) + return -ENODEV; + ++ if (type == PCI_EXP_TYPE_RC_EC) ++ pcie_link_rcec(dev); ++ + status = pcie_port_device_register(dev); + if (status) + return status; +diff --git a/drivers/pci/pcie/rcec.c b/drivers/pci/pcie/rcec.c +index 038e9d706..cdec277cb 100644 +--- a/drivers/pci/pcie/rcec.c ++++ b/drivers/pci/pcie/rcec.c +@@ -15,6 +15,100 @@ + + #include "../pci.h" + ++struct walk_rcec_data { ++ struct pci_dev *rcec; ++ int (*user_callback)(struct pci_dev *dev, void *data); ++ void *user_data; ++}; ++ ++static bool rcec_assoc_rciep(struct pci_dev *rcec, struct pci_dev *rciep) ++{ ++ unsigned long bitmap = rcec->rcec_ea->bitmap; ++ unsigned int devn; ++ ++ /* An RCiEP found on a different bus in range */ ++ if (rcec->bus->number != rciep->bus->number) ++ return true; ++ ++ /* Same bus, so check bitmap */ ++ for_each_set_bit(devn, &bitmap, 32) ++ if (devn == rciep->devfn) ++ return true; ++ ++ return false; ++} ++ ++static int link_rcec_helper(struct pci_dev *dev, void *data) ++{ ++ struct walk_rcec_data *rcec_data = data; ++ struct pci_dev *rcec = rcec_data->rcec; ++ ++ if ((pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) && ++ rcec_assoc_rciep(rcec, dev)) { ++ dev->rcec = rcec; ++ pci_dbg(dev, "PME & error events signaled via %s\n", ++ pci_name(rcec)); ++ } ++ ++ return 0; ++} ++ ++static void walk_rcec(int (*cb)(struct pci_dev *dev, void *data), ++ void *userdata) ++{ ++ struct walk_rcec_data *rcec_data = userdata; ++ struct pci_dev *rcec = rcec_data->rcec; ++ u8 nextbusn, lastbusn; ++ struct pci_bus *bus; ++ unsigned int bnr; ++ ++ if (!rcec->rcec_ea) ++ return; ++ ++ /* Walk own bus for bitmap based association */ ++ pci_walk_bus(rcec->bus, cb, rcec_data); ++ ++ nextbusn = rcec->rcec_ea->nextbusn; ++ lastbusn = rcec->rcec_ea->lastbusn; ++ ++ /* All RCiEP devices are on the same bus as the RCEC */ ++ if (nextbusn == 0xff && lastbusn == 0x00) ++ return; ++ ++ for (bnr = nextbusn; bnr <= lastbusn; bnr++) { ++ /* No association indicated (PCIe 5.0-1, 7.9.10.3) */ ++ if (bnr == rcec->bus->number) ++ continue; ++ ++ bus = pci_find_bus(pci_domain_nr(rcec->bus), bnr); ++ if (!bus) ++ continue; ++ ++ /* Find RCiEP devices on the given bus ranges */ ++ pci_walk_bus(bus, cb, rcec_data); ++ } ++} ++ ++/** ++ * pcie_link_rcec - Link RCiEP devices associated with RCEC. ++ * @rcec: RCEC whose RCiEP devices should be linked. ++ * ++ * Link the given RCEC to each RCiEP device found. ++ */ ++void pcie_link_rcec(struct pci_dev *rcec) ++{ ++ struct walk_rcec_data rcec_data; ++ ++ if (!rcec->rcec_ea) ++ return; ++ ++ rcec_data.rcec = rcec; ++ rcec_data.user_callback = NULL; ++ rcec_data.user_data = NULL; ++ ++ walk_rcec(link_rcec_helper, &rcec_data); ++} ++ + void pci_rcec_init(struct pci_dev *dev) + { + struct rcec_ea *rcec_ea; +diff --git a/include/linux/pci.h b/include/linux/pci.h +index 73ea8ea4a..d66f38789 100644 +--- a/include/linux/pci.h ++++ b/include/linux/pci.h +@@ -313,6 +313,7 @@ struct pci_dev { + #endif + #ifdef CONFIG_PCIEPORTBUS + struct rcec_ea *rcec_ea; /* RCEC cached endpoint association */ ++ struct pci_dev *rcec; /* Associated RCEC device */ + #endif + u8 pcie_cap; /* PCIe capability offset */ + u8 msi_cap; /* MSI capability offset */ +-- +2.27.0 + diff --git a/patches/0772-PCI-ERR-Recover-from-RCiEP-AER-errors.patch b/patches/0772-PCI-ERR-Recover-from-RCiEP-AER-errors.patch new file mode 100644 index 00000000..12c84f29 --- /dev/null +++ b/patches/0772-PCI-ERR-Recover-from-RCiEP-AER-errors.patch @@ -0,0 +1,141 @@ +From af03f12294505c308b8ee499fc24c1f209a19a1d Mon Sep 17 00:00:00 2001 +From: Qiuxu Zhuo +Date: Thu, 21 Apr 2022 21:49:52 +0800 +Subject: [PATCH 16/21] PCI/ERR: Recover from RCiEP AER errors + +mainline inclusion +from mainline-v5.11-rc1 +commit 5790862255028c831761e13014ee87a06df828f1 +category: feature +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5790862255028c831761e13014ee87a06df828f1 + +---------------------------------------------------------------------------- + +Add support for handling AER errors detected by Root Complex Integrated +Endpoints (RCiEPs). These errors are signaled to software natively via a +Root Complex Event Collector (RCEC) or non-natively via ACPI APEI if the +platform retains control of AER or uses a non-standard RCEC-like device. + +When recovering from RCiEP errors, the Root Error Command and Status +registers are in the AER Capability of an associated RCEC (if any), not in +a Root Port. In the non-native case, the platform is responsible for those +registers and we can't touch them. + +[bhelgaas: commit log, etc] +Co-developed-by: Sean V Kelley +Link: https://lore.kernel.org/r/20201121001036.8560-13-sean.v.kelley@intel.com +Signed-off-by: Sean V Kelley +Signed-off-by: Qiuxu Zhuo +Signed-off-by: Bjorn Helgaas +Signed-off-by: Jiefeng Ou +Reviewed-by: Jay Fang +Reviewed-by: Xiongfeng Wang +Signed-off-by: Zheng Zengkai +Signed-off-by: YunYi Yang +--- + drivers/pci/pcie/aer.c | 24 +++++++++++++++++++----- + drivers/pci/pcie/err.c | 15 ++++++++------- + 2 files changed, 27 insertions(+), 12 deletions(-) + +diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c +index 5628b943c..8421e21f6 100644 +--- a/drivers/pci/pcie/aer.c ++++ b/drivers/pci/pcie/aer.c +@@ -1463,8 +1463,8 @@ static int aer_probe(struct pcie_device *dev) + } + + /** +- * aer_root_reset - reset Root Port hierarchy or RCEC +- * @dev: pointer to Root Port or RCEC ++ * aer_root_reset - reset Root Port hierarchy, RCEC, or RCiEP ++ * @dev: pointer to Root Port, RCEC, or RCiEP + * + * Invoked by Port Bus driver when performing reset. + */ +@@ -1477,8 +1477,22 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) + u32 reg32; + int rc; + +- root = dev; /* device with Root Error registers */ +- aer = root->aer_cap; ++ /* ++ * Only Root Ports and RCECs have AER Root Command and Root Status ++ * registers. If "dev" is an RCiEP, the relevant registers are in ++ * the RCEC. ++ */ ++ if (type == PCI_EXP_TYPE_RC_END) ++ root = dev->rcec; ++ else ++ root = dev; ++ ++ /* ++ * If the platform retained control of AER, an RCiEP may not have ++ * an RCEC visible to us, so dev->rcec ("root") may be NULL. In ++ * that case, firmware is responsible for these registers. ++ */ ++ aer = root ? root->aer_cap : 0; + + if ((host->native_aer || pcie_ports_native) && aer) { + /* Disable Root's interrupt in response to error messages */ +@@ -1487,7 +1501,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) + pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32); + } + +- if (type == PCI_EXP_TYPE_RC_EC) { ++ if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) { + if (pcie_has_flr(dev)) { + rc = pcie_flr(dev); + pci_info(dev, "has been reset (%d)\n", rc); +diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c +index 47d10f828..6e5b3be62 100644 +--- a/drivers/pci/pcie/err.c ++++ b/drivers/pci/pcie/err.c +@@ -146,7 +146,7 @@ static int report_resume(struct pci_dev *dev, void *data) + + /** + * pci_walk_bridge - walk bridges potentially AER affected +- * @bridge: bridge which may be a Port or an RCEC ++ * @bridge: bridge which may be a Port, an RCEC, or an RCiEP + * @cb: callback to be called for each device found + * @userdata: arbitrary pointer to be passed to callback + * +@@ -154,8 +154,8 @@ static int report_resume(struct pci_dev *dev, void *data) + * any bridged devices on buses under this bus. Call the provided callback + * on each device found. + * +- * If the device provided has no subordinate bus, e.g., an RCEC, call the +- * callback on the device itself. ++ * If the device provided has no subordinate bus, e.g., an RCEC or RCiEP, ++ * call the callback on the device itself. + */ + static void pci_walk_bridge(struct pci_dev *bridge, + int (*cb)(struct pci_dev *, void *), +@@ -217,9 +217,9 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state st + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); + + /* +- * If the error was detected by a Root Port, Downstream Port, or +- * RCEC, recovery runs on the device itself. For Ports, that also +- * includes any subordinate devices. ++ * If the error was detected by a Root Port, Downstream Port, RCEC, ++ * or RCiEP, recovery runs on the device itself. For Ports, that ++ * also includes any subordinate devices. + * + * If it was detected by another device (Endpoint, etc), recovery + * runs on the device and anything else under the same Port, i.e., +@@ -227,7 +227,8 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state st + */ + if (type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_DOWNSTREAM || +- type == PCI_EXP_TYPE_RC_EC) ++ type == PCI_EXP_TYPE_RC_EC || ++ type == PCI_EXP_TYPE_RC_END) + bridge = dev; + else + bridge = pci_upstream_bridge(dev); +-- +2.27.0 + diff --git a/patches/0773-PCI-AER-Add-pcie_walk_rcec-to-RCEC-AER-handling.patch b/patches/0773-PCI-AER-Add-pcie_walk_rcec-to-RCEC-AER-handling.patch new file mode 100644 index 00000000..00e1d2a7 --- /dev/null +++ b/patches/0773-PCI-AER-Add-pcie_walk_rcec-to-RCEC-AER-handling.patch @@ -0,0 +1,163 @@ +From b323cba9ca9441ddab8e61c30505ad181edc0492 Mon Sep 17 00:00:00 2001 +From: Sean V Kelley +Date: Thu, 21 Apr 2022 21:49:53 +0800 +Subject: [PATCH 17/21] PCI/AER: Add pcie_walk_rcec() to RCEC AER handling + +mainline inclusion +from mainline-v5.11-rc1 +commit af113553d9610b2d811d05da96263b4f666f44f0 +category: feature +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=af113553d9610b2d811d05da96263b4f666f44f0 + +---------------------------------------------------------------------------- + +Root Complex Event Collectors (RCEC) appear as peers to Root Ports and also +have the AER capability. In addition, actions need to be taken for +associated RCiEPs. In such cases the RCECs will need to be walked in order +to find and act upon their respective RCiEPs. + +Extend the existing ability to link the RCECs with a walking function +pcie_walk_rcec(). Add RCEC support to the current AER service driver and +attach the AER service driver to the RCEC device. + +Co-developed-by: Qiuxu Zhuo +Link: https://lore.kernel.org/r/20201121001036.8560-14-sean.v.kelley@intel.com +Tested-by: Jonathan Cameron # non-native/no RCEC +Signed-off-by: Qiuxu Zhuo +Signed-off-by: Sean V Kelley +Signed-off-by: Bjorn Helgaas +Signed-off-by: Jiefeng Ou +Reviewed-by: Jonathan Cameron +Reviewed-by: Kuppuswamy Sathyanarayanan +Reviewed-by: Jay Fang +Reviewed-by: Xiongfeng Wang +Signed-off-by: Zheng Zengkai +Signed-off-by: YunYi Yang +--- + drivers/pci/pci.h | 6 ++++++ + drivers/pci/pcie/aer.c | 15 +++++++++++---- + drivers/pci/pcie/rcec.c | 37 +++++++++++++++++++++++++++++++++++++ + 3 files changed, 54 insertions(+), 4 deletions(-) + +diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h +index 65f9902ed..7b45150ca 100644 +--- a/drivers/pci/pci.h ++++ b/drivers/pci/pci.h +@@ -448,10 +448,16 @@ static inline void pci_restore_dpc_state(struct pci_dev *dev) {} + void pci_rcec_init(struct pci_dev *dev); + void pci_rcec_exit(struct pci_dev *dev); + void pcie_link_rcec(struct pci_dev *rcec); ++void pcie_walk_rcec(struct pci_dev *rcec, ++ int (*cb)(struct pci_dev *, void *), ++ void *userdata); + #else + static inline void pci_rcec_init(struct pci_dev *dev) {} + static inline void pci_rcec_exit(struct pci_dev *dev) {} + static inline void pcie_link_rcec(struct pci_dev *rcec) {} ++static inline void pcie_walk_rcec(struct pci_dev *rcec, ++ int (*cb)(struct pci_dev *, void *), ++ void *userdata) {} + #endif + + #ifdef CONFIG_PCI_ATS +diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c +index 8421e21f6..959ea2bf8 100644 +--- a/drivers/pci/pcie/aer.c ++++ b/drivers/pci/pcie/aer.c +@@ -1017,7 +1017,10 @@ static bool find_source_device(struct pci_dev *parent, + if (result) + return true; + +- pci_walk_bus(parent->subordinate, find_device_iter, e_info); ++ if (pci_pcie_type(parent) == PCI_EXP_TYPE_RC_EC) ++ pcie_walk_rcec(parent, find_device_iter, e_info); ++ else ++ pci_walk_bus(parent->subordinate, find_device_iter, e_info); + + if (!e_info->error_dev_num) { + pci_printk(KERN_DEBUG, parent, "can't find device of ID%04x\n", +@@ -1158,6 +1161,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) + if (!(info->status & ~info->mask)) + return 0; + } else if (type == PCI_EXP_TYPE_ROOT_PORT || ++ type == PCI_EXP_TYPE_RC_EC || + type == PCI_EXP_TYPE_DOWNSTREAM || + info->severity == AER_NONFATAL) { + +@@ -1335,9 +1339,12 @@ static void set_downstream_devices_error_reporting(struct pci_dev *dev, + { + set_device_error_reporting(dev, &enable); + +- if (!dev->subordinate) +- return; +- pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable); ++ if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC) ++ pcie_walk_rcec(dev, set_device_error_reporting, &enable); ++ else if (dev->subordinate) ++ pci_walk_bus(dev->subordinate, set_device_error_reporting, ++ &enable); ++ + } + + /** +diff --git a/drivers/pci/pcie/rcec.c b/drivers/pci/pcie/rcec.c +index cdec277cb..2c5c55299 100644 +--- a/drivers/pci/pcie/rcec.c ++++ b/drivers/pci/pcie/rcec.c +@@ -53,6 +53,18 @@ static int link_rcec_helper(struct pci_dev *dev, void *data) + return 0; + } + ++static int walk_rcec_helper(struct pci_dev *dev, void *data) ++{ ++ struct walk_rcec_data *rcec_data = data; ++ struct pci_dev *rcec = rcec_data->rcec; ++ ++ if ((pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) && ++ rcec_assoc_rciep(rcec, dev)) ++ rcec_data->user_callback(dev, rcec_data->user_data); ++ ++ return 0; ++} ++ + static void walk_rcec(int (*cb)(struct pci_dev *dev, void *data), + void *userdata) + { +@@ -109,6 +121,31 @@ void pcie_link_rcec(struct pci_dev *rcec) + walk_rcec(link_rcec_helper, &rcec_data); + } + ++/** ++ * pcie_walk_rcec - Walk RCiEP devices associating with RCEC and call callback. ++ * @rcec: RCEC whose RCiEP devices should be walked ++ * @cb: Callback to be called for each RCiEP device found ++ * @userdata: Arbitrary pointer to be passed to callback ++ * ++ * Walk the given RCEC. Call the callback on each RCiEP found. ++ * ++ * If @cb returns anything other than 0, break out. ++ */ ++void pcie_walk_rcec(struct pci_dev *rcec, int (*cb)(struct pci_dev *, void *), ++ void *userdata) ++{ ++ struct walk_rcec_data rcec_data; ++ ++ if (!rcec->rcec_ea) ++ return; ++ ++ rcec_data.rcec = rcec; ++ rcec_data.user_callback = cb; ++ rcec_data.user_data = userdata; ++ ++ walk_rcec(walk_rcec_helper, &rcec_data); ++} ++ + void pci_rcec_init(struct pci_dev *dev) + { + struct rcec_ea *rcec_ea; +-- +2.27.0 + diff --git a/patches/0774-PCI-PME-Add-pcie_walk_rcec-to-RCEC-PME-handling.patch b/patches/0774-PCI-PME-Add-pcie_walk_rcec-to-RCEC-PME-handling.patch new file mode 100644 index 00000000..5d3bd28d --- /dev/null +++ b/patches/0774-PCI-PME-Add-pcie_walk_rcec-to-RCEC-PME-handling.patch @@ -0,0 +1,114 @@ +From 45c6eb19f8f1f57ca232ff1ffe5f3ab7b54e316c Mon Sep 17 00:00:00 2001 +From: Sean V Kelley +Date: Thu, 21 Apr 2022 21:49:54 +0800 +Subject: [PATCH 18/21] PCI/PME: Add pcie_walk_rcec() to RCEC PME handling + +mainline inclusion +from mainline-v5.11-rc1 +commit 9a2f604f44979e0effa8cf067e5a8ecda729f23b +category: feature +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9a2f604f44979e0effa8cf067e5a8ecda729f23b + +---------------------------------------------------------------------------- + +Root Complex Event Collectors (RCEC) appear as peers of Root Ports and also +have the PME capability. As with AER, there is a need to be able to walk +the RCiEPs associated with their RCEC for purposes of acting upon them with +callbacks. + +Add RCEC support through the use of pcie_walk_rcec() to the current PME +service driver and attach the PME service driver to the RCEC device. + +Co-developed-by: Qiuxu Zhuo +Link: https://lore.kernel.org/r/20201121001036.8560-15-sean.v.kelley@intel.com +Tested-by: Jonathan Cameron # non-native/no RCEC +Signed-off-by: Qiuxu Zhuo +Signed-off-by: Sean V Kelley +Signed-off-by: Bjorn Helgaas +Signed-off-by: Jiefeng Ou +Reviewed-by: Jay Fang +Reviewed-by: Xiongfeng Wang +Signed-off-by: Zheng Zengkai +Signed-off-by: YunYi Yang +--- + drivers/pci/pcie/pme.c | 16 ++++++++++++---- + drivers/pci/pcie/portdrv_core.c | 9 +++------ + 2 files changed, 15 insertions(+), 10 deletions(-) + +diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c +index 94479ec04..89c561bca 100644 +--- a/drivers/pci/pcie/pme.c ++++ b/drivers/pci/pcie/pme.c +@@ -308,7 +308,10 @@ static int pcie_pme_can_wakeup(struct pci_dev *dev, void *ign) + static void pcie_pme_mark_devices(struct pci_dev *port) + { + pcie_pme_can_wakeup(port, NULL); +- if (port->subordinate) ++ ++ if (pci_pcie_type(port) == PCI_EXP_TYPE_RC_EC) ++ pcie_walk_rcec(port, pcie_pme_can_wakeup, NULL); ++ else if (port->subordinate) + pci_walk_bus(port->subordinate, pcie_pme_can_wakeup, NULL); + } + +@@ -318,10 +321,16 @@ static void pcie_pme_mark_devices(struct pci_dev *port) + */ + static int pcie_pme_probe(struct pcie_device *srv) + { +- struct pci_dev *port; ++ struct pci_dev *port = srv->port; + struct pcie_pme_service_data *data; ++ int type = pci_pcie_type(port); + int ret; + ++ /* Limit to Root Ports or Root Complex Event Collectors */ ++ if (type != PCI_EXP_TYPE_RC_EC && ++ type != PCI_EXP_TYPE_ROOT_PORT) ++ return -ENODEV; ++ + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; +@@ -331,7 +340,6 @@ static int pcie_pme_probe(struct pcie_device *srv) + data->srv = srv; + set_service_data(srv, data); + +- port = srv->port; + pcie_pme_interrupt_enable(port, false); + pcie_clear_root_pme_status(port); + +@@ -443,7 +451,7 @@ static void pcie_pme_remove(struct pcie_device *srv) + + static struct pcie_port_service_driver pcie_pme_driver = { + .name = "pcie_pme", +- .port_type = PCI_EXP_TYPE_ROOT_PORT, ++ .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_PME, + + .probe = pcie_pme_probe, +diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c +index 54772e5b7..055d32c36 100644 +--- a/drivers/pci/pcie/portdrv_core.c ++++ b/drivers/pci/pcie/portdrv_core.c +@@ -231,12 +231,9 @@ static int get_port_device_capability(struct pci_dev *dev) + } + #endif + +- /* +- * Root ports are capable of generating PME too. Root Complex +- * Event Collectors can also generate PMEs, but we don't handle +- * those yet. +- */ +- if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT && ++ /* Root Ports and Root Complex Event Collectors may generate PMEs */ ++ if ((pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || ++ pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC) && + (pcie_ports_native || host->native_pme)) { + services |= PCIE_PORT_SERVICE_PME; + +-- +2.27.0 + diff --git a/patches/0775-PCI-AER-Add-RCEC-AER-error-injection-support.patch b/patches/0775-PCI-AER-Add-RCEC-AER-error-injection-support.patch new file mode 100644 index 00000000..a54fbc65 --- /dev/null +++ b/patches/0775-PCI-AER-Add-RCEC-AER-error-injection-support.patch @@ -0,0 +1,57 @@ +From a02cadfe66814892cba755142b44bbfb80da3575 Mon Sep 17 00:00:00 2001 +From: Qiuxu Zhuo +Date: Thu, 21 Apr 2022 21:49:55 +0800 +Subject: [PATCH 19/21] PCI/AER: Add RCEC AER error injection support + +mainline inclusion +from mainline-v5.11-rc1 +commit d292dd0eb3ac6ce6ea66715bb9f6b8e2ae70747c +category: feature +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d292dd0eb3ac6ce6ea66715bb9f6b8e2ae70747c + +---------------------------------------------------------------------------- + +Root Complex Event Collectors (RCEC) appear as peers to Root Ports and may +also have the AER capability. + +Add RCEC support to the AER error injection driver. + +Co-developed-by: Sean V Kelley +Link: https://lore.kernel.org/r/20201121001036.8560-16-sean.v.kelley@intel.com +Tested-by: Jonathan Cameron # non-native/no RCEC +Signed-off-by: Qiuxu Zhuo +Signed-off-by: Sean V Kelley +Signed-off-by: Bjorn Helgaas +Signed-off-by: Jiefeng Ou +Reviewed-by: Kuppuswamy Sathyanarayanan +Reviewed-by: Jay Fang +Reviewed-by: Xiongfeng Wang +Signed-off-by: Zheng Zengkai +Signed-off-by: YunYi Yang +--- + drivers/pci/pcie/aer_inject.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/drivers/pci/pcie/aer_inject.c b/drivers/pci/pcie/aer_inject.c +index 5a1a82b2a..e0dde7419 100644 +--- a/drivers/pci/pcie/aer_inject.c ++++ b/drivers/pci/pcie/aer_inject.c +@@ -340,8 +340,11 @@ static int aer_inject(struct aer_error_inj *einj) + if (!dev) + return -ENODEV; + rpdev = pcie_find_root_port(dev); ++ /* If Root Port not found, try to find an RCEC */ ++ if (!rpdev) ++ rpdev = dev->rcec; + if (!rpdev) { +- pci_err(dev, "Root port not found\n"); ++ pci_err(dev, "Neither Root Port nor RCEC found\n"); + ret = -ENODEV; + goto out_put; + } +-- +2.27.0 + diff --git a/patches/0776-PCI-RCEC-Fix-RCiEP-device-to-RCEC-association.patch b/patches/0776-PCI-RCEC-Fix-RCiEP-device-to-RCEC-association.patch new file mode 100644 index 00000000..7db4da70 --- /dev/null +++ b/patches/0776-PCI-RCEC-Fix-RCiEP-device-to-RCEC-association.patch @@ -0,0 +1,58 @@ +From 1f93ce8b888ffc4af83445dddf5a4c974b00a51d Mon Sep 17 00:00:00 2001 +From: Qiuxu Zhuo +Date: Thu, 21 Apr 2022 21:49:56 +0800 +Subject: [PATCH 20/21] PCI/RCEC: Fix RCiEP device to RCEC association + +mainline inclusion +from mainline-v5.13-rc1 +commit d9b7eae8e3424c3480fe9f40ebafbb0c96426e4c +category: bugfix +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d9b7eae8e3424c3480fe9f40ebafbb0c96426e4c + +---------------------------------------------------------------------------- + +rcec_assoc_rciep() used "rciep->devfn" (a single byte encoding both the +device and function number) as the device number to check whether the +corresponding bit was set in the RCEC's Association Bitmap for RCiEPs. + +But per PCIe r5.0, sec 7.9.10.2, "Association Bitmap for RCiEPs", the +32-bit bitmap contains one bit per device. That bit applies to all +functions of the device. + +Fix rcec_assoc_rciep() to convert the value of "rciep->devfn" to a device +number to ensure that RCiEP devices are correctly associated with the RCEC. + +Reported-and-tested-by: Wen Jin +Fixes: 507b460f8144 ("PCI/ERR: Add pcie_link_rcec() to associate RCiEPs") +Link: https://lore.kernel.org/r/20210222011717.43266-1-qiuxu.zhuo@intel.com +Signed-off-by: Qiuxu Zhuo +Signed-off-by: Bjorn Helgaas +Signed-off-by: Jiefeng Ou +Reviewed-by: Sean V Kelley +Reviewed-by: Jay Fang +Reviewed-by: Xiongfeng Wang +Signed-off-by: Zheng Zengkai +Signed-off-by: YunYi Yang +--- + drivers/pci/pcie/rcec.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/pci/pcie/rcec.c b/drivers/pci/pcie/rcec.c +index 2c5c55299..d0bcd141a 100644 +--- a/drivers/pci/pcie/rcec.c ++++ b/drivers/pci/pcie/rcec.c +@@ -32,7 +32,7 @@ static bool rcec_assoc_rciep(struct pci_dev *rcec, struct pci_dev *rciep) + + /* Same bus, so check bitmap */ + for_each_set_bit(devn, &bitmap, 32) +- if (devn == rciep->devfn) ++ if (devn == PCI_SLOT(rciep->devfn)) + return true; + + return false; +-- +2.27.0 + diff --git a/patches/0777-PCI-fix-kabi-change-in-struct-pci_dev.patch b/patches/0777-PCI-fix-kabi-change-in-struct-pci_dev.patch new file mode 100644 index 00000000..845c741d --- /dev/null +++ b/patches/0777-PCI-fix-kabi-change-in-struct-pci_dev.patch @@ -0,0 +1,66 @@ +From 35612209872a1bf5958588e5d4bdb0a47ca99581 Mon Sep 17 00:00:00 2001 +From: Jiefeng Ou +Date: Thu, 21 Apr 2022 21:49:57 +0800 +Subject: [PATCH 21/21] PCI: fix kabi change in struct pci_dev + +driver inclusion +category: bugfix +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8EAHA + +---------------------------------------------------------------------------- + +Fix kabi change in struct pci_dev since the following patches: +- commit 8eb7b6ca203f ("PCI/ERR: Cache RCEC EA Capability offset in + pci_init_capabilities()") +- commit 1345ecf47242 ("PCI/ERR: Add pcie_link_rcec() to associate RCiEPs") + +Signed-off-by: Jiefeng Ou +Reviewed-by: Xiongfeng Wang +Reviewed-by: Jay Fang +Signed-off-by: Zheng Zengkai +Signed-off-by: YunYi Yang + + Conflicts: + include/linux/pci.h +--- + include/linux/pci.h | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +diff --git a/include/linux/pci.h b/include/linux/pci.h +index d66f38789..8c9b1e74d 100644 +--- a/include/linux/pci.h ++++ b/include/linux/pci.h +@@ -310,10 +310,6 @@ struct pci_dev { + #ifdef CONFIG_PCIEAER + u16 aer_cap; /* AER capability offset */ + struct aer_stats *aer_stats; /* AER stats for this device */ +-#endif +-#ifdef CONFIG_PCIEPORTBUS +- struct rcec_ea *rcec_ea; /* RCEC cached endpoint association */ +- struct pci_dev *rcec; /* Associated RCEC device */ + #endif + u8 pcie_cap; /* PCIe capability offset */ + u8 msi_cap; /* MSI capability offset */ +@@ -479,8 +475,19 @@ struct pci_dev { + #else + KABI_RESERVE(1) + #endif ++ ++#ifndef __GENKSYMS__ ++#ifdef CONFIG_PCIEPORTBUS ++ struct rcec_ea *rcec_ea; /* RCEC cached endpoint association */ ++ struct pci_dev *rcec; /* Associated RCEC device */ ++#else ++ KABI_RESERVE(2) ++ KABI_RESERVE(3) ++#endif ++#else + KABI_RESERVE(2) + KABI_RESERVE(3) ++#endif + KABI_RESERVE(4) + KABI_RESERVE(5) + KABI_RESERVE(6) +-- +2.27.0 + diff --git a/series.conf b/series.conf index 04be6c99..b62a6a07 100644 --- a/series.conf +++ b/series.conf @@ -756,4 +756,25 @@ patches/0752-scsi-hisi_sas-Check-usage-count-only-when-the-runtim.patch patches/0753-scsi-hisi_sas_v3_hw-Don-t-use-PCI-helper-functions.patch patches/0754-scsi-hisi_sas_v3_hw-Remove-extra-function-calls-for-.patch patches/0755-config-arm64-Enable-dubugfs-config-of-hisi-sas.patch -patches/0756-crypto-hisilicon-Add-value-profile-support-for-kerne.patch \ No newline at end of file +patches/0756-crypto-hisilicon-Add-value-profile-support-for-kerne.patch +patches/0757-PCI-ERR-Return-status-of-pcie_do_recovery.patch +patches/0758-PCI-AER-Use-aer-variable-for-capability-offset.patch +patches/0759-PCI-ERR-Rename-pci_aer_clear_device_status-to-pcie_c.patch +patches/0760-PCI-ERR-Clear-PCIe-Device-Status-errors-only-if-OS-o.patch +patches/0761-PCI-AER-Write-AER-Capability-only-when-we-control-it.patch +patches/0762-PCI-ERR-Bind-RCEC-devices-to-the-Root-Port-driver.patch +patches/0763-PCI-ERR-Cache-RCEC-EA-Capability-offset-in-pci_init_.patch +patches/0764-PCI-ERR-Simplify-by-using-pci_upstream_bridge.patch +patches/0765-PCI-ERR-Simplify-by-computing-pci_pcie_type-once.patch +patches/0766-PCI-ERR-Use-bridge-for-clarity-in-pcie_do_recovery.patch +patches/0767-PCI-ERR-Avoid-negated-conditional-for-clarity.patch +patches/0768-PCI-ERR-Add-pci_walk_bridge-to-pcie_do_recovery.patch +patches/0769-PCI-ERR-Clear-AER-status-only-when-we-control-AER.patch +patches/0770-PCI-ERR-Recover-from-RCEC-AER-errors.patch +patches/0771-PCI-ERR-Add-pcie_link_rcec-to-associate-RCiEPs.patch +patches/0772-PCI-ERR-Recover-from-RCiEP-AER-errors.patch +patches/0773-PCI-AER-Add-pcie_walk_rcec-to-RCEC-AER-handling.patch +patches/0774-PCI-PME-Add-pcie_walk_rcec-to-RCEC-PME-handling.patch +patches/0775-PCI-AER-Add-RCEC-AER-error-injection-support.patch +patches/0776-PCI-RCEC-Fix-RCiEP-device-to-RCEC-association.patch +patches/0777-PCI-fix-kabi-change-in-struct-pci_dev.patch -- Gitee