diff options
Diffstat (limited to 'drivers/pci/pcie/err.c')
| -rw-r--r-- | drivers/pci/pcie/err.c | 388 | 
1 files changed, 388 insertions, 0 deletions
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c new file mode 100644 index 000000000000..f7ce0cb0b0b7 --- /dev/null +++ b/drivers/pci/pcie/err.c @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file implements the error recovery as a core part of PCIe error + * reporting. When a PCIe error is delivered, an error message will be + * collected and printed to console, then, an error recovery procedure + * will be executed by following the PCI error recovery rules. + * + * Copyright (C) 2006 Intel Corp. + *	Tom Long Nguyen (tom.l.nguyen@intel.com) + *	Zhang Yanmin (yanmin.zhang@intel.com) + */ + +#include <linux/pci.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/aer.h> +#include "portdrv.h" +#include "../pci.h" + +struct aer_broadcast_data { +	enum pci_channel_state state; +	enum pci_ers_result result; +}; + +static pci_ers_result_t merge_result(enum pci_ers_result orig, +				  enum pci_ers_result new) +{ +	if (new == PCI_ERS_RESULT_NO_AER_DRIVER) +		return PCI_ERS_RESULT_NO_AER_DRIVER; + +	if (new == PCI_ERS_RESULT_NONE) +		return orig; + +	switch (orig) { +	case PCI_ERS_RESULT_CAN_RECOVER: +	case PCI_ERS_RESULT_RECOVERED: +		orig = new; +		break; +	case PCI_ERS_RESULT_DISCONNECT: +		if (new == PCI_ERS_RESULT_NEED_RESET) +			orig = PCI_ERS_RESULT_NEED_RESET; +		break; +	default: +		break; +	} + +	return orig; +} + +static int report_error_detected(struct pci_dev *dev, void *data) +{ +	pci_ers_result_t vote; +	const struct pci_error_handlers *err_handler; +	struct aer_broadcast_data *result_data; + +	result_data = (struct aer_broadcast_data *) data; + +	device_lock(&dev->dev); +	dev->error_state = result_data->state; + +	if (!dev->driver || +		!dev->driver->err_handler || +		!dev->driver->err_handler->error_detected) { +		if (result_data->state == pci_channel_io_frozen && +			dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { +			/* +			 * In case of fatal recovery, if one of down- +			 * stream device has no driver. We might be +			 * unable to recover because a later insmod +			 * of a driver for this device is unaware of +			 * its hw state. +			 */ +			pci_printk(KERN_DEBUG, dev, "device has %s\n", +				   dev->driver ? +				   "no AER-aware driver" : "no driver"); +		} + +		/* +		 * If there's any device in the subtree that does not +		 * have an error_detected callback, returning +		 * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of +		 * the subsequent mmio_enabled/slot_reset/resume +		 * callbacks of "any" device in the subtree. All the +		 * devices in the subtree are left in the error state +		 * without recovery. +		 */ + +		if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) +			vote = PCI_ERS_RESULT_NO_AER_DRIVER; +		else +			vote = PCI_ERS_RESULT_NONE; +	} else { +		err_handler = dev->driver->err_handler; +		vote = err_handler->error_detected(dev, result_data->state); +		pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); +	} + +	result_data->result = merge_result(result_data->result, vote); +	device_unlock(&dev->dev); +	return 0; +} + +static int report_mmio_enabled(struct pci_dev *dev, void *data) +{ +	pci_ers_result_t vote; +	const struct pci_error_handlers *err_handler; +	struct aer_broadcast_data *result_data; + +	result_data = (struct aer_broadcast_data *) data; + +	device_lock(&dev->dev); +	if (!dev->driver || +		!dev->driver->err_handler || +		!dev->driver->err_handler->mmio_enabled) +		goto out; + +	err_handler = dev->driver->err_handler; +	vote = err_handler->mmio_enabled(dev); +	result_data->result = merge_result(result_data->result, vote); +out: +	device_unlock(&dev->dev); +	return 0; +} + +static int report_slot_reset(struct pci_dev *dev, void *data) +{ +	pci_ers_result_t vote; +	const struct pci_error_handlers *err_handler; +	struct aer_broadcast_data *result_data; + +	result_data = (struct aer_broadcast_data *) data; + +	device_lock(&dev->dev); +	if (!dev->driver || +		!dev->driver->err_handler || +		!dev->driver->err_handler->slot_reset) +		goto out; + +	err_handler = dev->driver->err_handler; +	vote = err_handler->slot_reset(dev); +	result_data->result = merge_result(result_data->result, vote); +out: +	device_unlock(&dev->dev); +	return 0; +} + +static int report_resume(struct pci_dev *dev, void *data) +{ +	const struct pci_error_handlers *err_handler; + +	device_lock(&dev->dev); +	dev->error_state = pci_channel_io_normal; + +	if (!dev->driver || +		!dev->driver->err_handler || +		!dev->driver->err_handler->resume) +		goto out; + +	err_handler = dev->driver->err_handler; +	err_handler->resume(dev); +	pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); +out: +	device_unlock(&dev->dev); +	return 0; +} + +/** + * default_reset_link - default reset function + * @dev: pointer to pci_dev data structure + * + * Invoked when performing link reset on a Downstream Port or a + * Root Port with no aer driver. + */ +static pci_ers_result_t default_reset_link(struct pci_dev *dev) +{ +	pci_reset_bridge_secondary_bus(dev); +	pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n"); +	return PCI_ERS_RESULT_RECOVERED; +} + +static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) +{ +	struct pci_dev *udev; +	pci_ers_result_t status; +	struct pcie_port_service_driver *driver = NULL; + +	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { +		/* Reset this port for all subordinates */ +		udev = dev; +	} else { +		/* Reset the upstream component (likely downstream port) */ +		udev = dev->bus->self; +	} + +	/* Use the aer driver of the component firstly */ +	driver = pcie_port_find_service(udev, service); + +	if (driver && driver->reset_link) { +		status = driver->reset_link(udev); +	} else if (udev->has_secondary_link) { +		status = default_reset_link(udev); +	} else { +		pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n", +			pci_name(udev)); +		return PCI_ERS_RESULT_DISCONNECT; +	} + +	if (status != PCI_ERS_RESULT_RECOVERED) { +		pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n", +			pci_name(udev)); +		return PCI_ERS_RESULT_DISCONNECT; +	} + +	return status; +} + +/** + * broadcast_error_message - handle message broadcast to downstream drivers + * @dev: pointer to from where in a hierarchy message is broadcasted down + * @state: error state + * @error_mesg: message to print + * @cb: callback to be broadcasted + * + * Invoked during error recovery process. Once being invoked, the content + * of error severity will be broadcasted to all downstream drivers in a + * hierarchy in question. + */ +static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, +	enum pci_channel_state state, +	char *error_mesg, +	int (*cb)(struct pci_dev *, void *)) +{ +	struct aer_broadcast_data result_data; + +	pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg); +	result_data.state = state; +	if (cb == report_error_detected) +		result_data.result = PCI_ERS_RESULT_CAN_RECOVER; +	else +		result_data.result = PCI_ERS_RESULT_RECOVERED; + +	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { +		/* +		 * If the error is reported by a bridge, we think this error +		 * is related to the downstream link of the bridge, so we +		 * do error recovery on all subordinates of the bridge instead +		 * of the bridge and clear the error status of the bridge. +		 */ +		if (cb == report_error_detected) +			dev->error_state = state; +		pci_walk_bus(dev->subordinate, cb, &result_data); +		if (cb == report_resume) { +			pci_cleanup_aer_uncorrect_error_status(dev); +			dev->error_state = pci_channel_io_normal; +		} +	} else { +		/* +		 * If the error is reported by an end point, we think this +		 * error is related to the upstream link of the end point. +		 */ +		if (state == pci_channel_io_normal) +			/* +			 * the error is non fatal so the bus is ok, just invoke +			 * the callback for the function that logged the error. +			 */ +			cb(dev, &result_data); +		else +			pci_walk_bus(dev->bus, cb, &result_data); +	} + +	return result_data.result; +} + +/** + * pcie_do_fatal_recovery - handle fatal error recovery process + * @dev: pointer to a pci_dev data structure of agent detecting an error + * + * Invoked when an error is fatal. Once being invoked, removes the devices + * beneath this AER agent, followed by reset link e.g. secondary bus reset + * followed by re-enumeration of devices. + */ +void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service) +{ +	struct pci_dev *udev; +	struct pci_bus *parent; +	struct pci_dev *pdev, *temp; +	pci_ers_result_t result; + +	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) +		udev = dev; +	else +		udev = dev->bus->self; + +	parent = udev->subordinate; +	pci_lock_rescan_remove(); +	list_for_each_entry_safe_reverse(pdev, temp, &parent->devices, +					 bus_list) { +		pci_dev_get(pdev); +		pci_dev_set_disconnected(pdev, NULL); +		if (pci_has_subordinate(pdev)) +			pci_walk_bus(pdev->subordinate, +				     pci_dev_set_disconnected, NULL); +		pci_stop_and_remove_bus_device(pdev); +		pci_dev_put(pdev); +	} + +	result = reset_link(udev, service); + +	if ((service == PCIE_PORT_SERVICE_AER) && +	    (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) { +		/* +		 * If the error is reported by a bridge, we think this error +		 * is related to the downstream link of the bridge, so we +		 * do error recovery on all subordinates of the bridge instead +		 * of the bridge and clear the error status of the bridge. +		 */ +		pci_cleanup_aer_uncorrect_error_status(dev); +	} + +	if (result == PCI_ERS_RESULT_RECOVERED) { +		if (pcie_wait_for_link(udev, true)) +			pci_rescan_bus(udev->bus); +		pci_info(dev, "Device recovery from fatal error successful\n"); +	} else { +		pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); +		pci_info(dev, "Device recovery from fatal error failed\n"); +	} + +	pci_unlock_rescan_remove(); +} + +/** + * pcie_do_nonfatal_recovery - handle nonfatal error recovery process + * @dev: pointer to a pci_dev data structure of agent detecting an error + * + * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast + * error detected message to all downstream drivers within a hierarchy in + * question and return the returned code. + */ +void pcie_do_nonfatal_recovery(struct pci_dev *dev) +{ +	pci_ers_result_t status; +	enum pci_channel_state state; + +	state = pci_channel_io_normal; + +	status = broadcast_error_message(dev, +			state, +			"error_detected", +			report_error_detected); + +	if (status == PCI_ERS_RESULT_CAN_RECOVER) +		status = broadcast_error_message(dev, +				state, +				"mmio_enabled", +				report_mmio_enabled); + +	if (status == PCI_ERS_RESULT_NEED_RESET) { +		/* +		 * TODO: Should call platform-specific +		 * functions to reset slot before calling +		 * drivers' slot_reset callbacks? +		 */ +		status = broadcast_error_message(dev, +				state, +				"slot_reset", +				report_slot_reset); +	} + +	if (status != PCI_ERS_RESULT_RECOVERED) +		goto failed; + +	broadcast_error_message(dev, +				state, +				"resume", +				report_resume); + +	pci_info(dev, "AER: Device recovery successful\n"); +	return; + +failed: +	pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); + +	/* TODO: Should kernel panic here? */ +	pci_info(dev, "AER: Device recovery failed\n"); +}  | 
