summaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r--arch/powerpc/platforms/44x/Kconfig1
-rw-r--r--arch/powerpc/platforms/44x/gpio.c108
-rw-r--r--arch/powerpc/platforms/8xx/Kconfig1
-rw-r--r--arch/powerpc/platforms/Kconfig1
-rw-r--r--arch/powerpc/platforms/cell/spufs/syscalls.c4
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig1
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c98
-rw-r--r--arch/powerpc/platforms/powernv/subcore.h4
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig1
-rw-r--r--arch/powerpc/platforms/pseries/Makefile1
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c3
-rw-r--r--arch/powerpc/platforms/pseries/msi.c134
-rw-r--r--arch/powerpc/platforms/pseries/papr-hvpipe.c818
-rw-r--r--arch/powerpc/platforms/pseries/papr-hvpipe.h42
14 files changed, 1017 insertions, 200 deletions
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index 35a1f4b9f827..fc79f8466933 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -231,7 +231,6 @@ config PPC4xx_GPIO
bool "PPC4xx GPIO support"
depends on 44x
select GPIOLIB
- select OF_GPIO_MM_GPIOCHIP
help
Enable gpiolib support for ppc440 based boards
diff --git a/arch/powerpc/platforms/44x/gpio.c b/arch/powerpc/platforms/44x/gpio.c
index 08ab76582568..aea0d913b59d 100644
--- a/arch/powerpc/platforms/44x/gpio.c
+++ b/arch/powerpc/platforms/44x/gpio.c
@@ -14,10 +14,10 @@
#include <linux/spinlock.h>
#include <linux/io.h>
#include <linux/of.h>
-#include <linux/gpio/legacy-of-mm-gpiochip.h>
#include <linux/gpio/driver.h>
#include <linux/types.h>
#include <linux/slab.h>
+#include <linux/platform_device.h>
#define GPIO_MASK(gpio) (0x80000000 >> (gpio))
#define GPIO_MASK2(gpio) (0xc0000000 >> ((gpio) * 2))
@@ -45,7 +45,8 @@ struct ppc4xx_gpio {
};
struct ppc4xx_gpio_chip {
- struct of_mm_gpio_chip mm_gc;
+ struct gpio_chip gc;
+ void __iomem *regs;
spinlock_t lock;
};
@@ -57,8 +58,8 @@ struct ppc4xx_gpio_chip {
static int ppc4xx_gpio_get(struct gpio_chip *gc, unsigned int gpio)
{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
+ struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
+ struct ppc4xx_gpio __iomem *regs = chip->regs;
return !!(in_be32(&regs->ir) & GPIO_MASK(gpio));
}
@@ -66,8 +67,8 @@ static int ppc4xx_gpio_get(struct gpio_chip *gc, unsigned int gpio)
static inline void
__ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
+ struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
+ struct ppc4xx_gpio __iomem *regs = chip->regs;
if (val)
setbits32(&regs->or, GPIO_MASK(gpio));
@@ -93,9 +94,8 @@ static int ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
static int ppc4xx_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
- struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
+ struct ppc4xx_gpio __iomem *regs = chip->regs;
unsigned long flags;
spin_lock_irqsave(&chip->lock, flags);
@@ -123,9 +123,8 @@ static int ppc4xx_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
static int
ppc4xx_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
- struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
+ struct ppc4xx_gpio __iomem *regs = chip->regs;
unsigned long flags;
spin_lock_irqsave(&chip->lock, flags);
@@ -155,42 +154,57 @@ ppc4xx_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
return 0;
}
-static int __init ppc4xx_add_gpiochips(void)
+static int ppc4xx_gpio_probe(struct platform_device *ofdev)
{
- struct device_node *np;
-
- for_each_compatible_node(np, NULL, "ibm,ppc4xx-gpio") {
- int ret;
- struct ppc4xx_gpio_chip *ppc4xx_gc;
- struct of_mm_gpio_chip *mm_gc;
- struct gpio_chip *gc;
-
- ppc4xx_gc = kzalloc(sizeof(*ppc4xx_gc), GFP_KERNEL);
- if (!ppc4xx_gc) {
- ret = -ENOMEM;
- goto err;
- }
-
- spin_lock_init(&ppc4xx_gc->lock);
-
- mm_gc = &ppc4xx_gc->mm_gc;
- gc = &mm_gc->gc;
-
- gc->ngpio = 32;
- gc->direction_input = ppc4xx_gpio_dir_in;
- gc->direction_output = ppc4xx_gpio_dir_out;
- gc->get = ppc4xx_gpio_get;
- gc->set = ppc4xx_gpio_set;
-
- ret = of_mm_gpiochip_add_data(np, mm_gc, ppc4xx_gc);
- if (ret)
- goto err;
- continue;
-err:
- pr_err("%pOF: registration failed with status %d\n", np, ret);
- kfree(ppc4xx_gc);
- /* try others anyway */
- }
- return 0;
+ struct device *dev = &ofdev->dev;
+ struct device_node *np = dev->of_node;
+ struct ppc4xx_gpio_chip *chip;
+ struct gpio_chip *gc;
+
+ chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+
+ spin_lock_init(&chip->lock);
+
+ gc = &chip->gc;
+
+ gc->base = -1;
+ gc->ngpio = 32;
+ gc->direction_input = ppc4xx_gpio_dir_in;
+ gc->direction_output = ppc4xx_gpio_dir_out;
+ gc->get = ppc4xx_gpio_get;
+ gc->set = ppc4xx_gpio_set;
+
+ gc->label = devm_kasprintf(dev, GFP_KERNEL, "%pOF", np);
+ if (!gc->label)
+ return -ENOMEM;
+
+ chip->regs = devm_of_iomap(dev, np, 0, NULL);
+ if (IS_ERR(chip->regs))
+ return PTR_ERR(chip->regs);
+
+ return devm_gpiochip_add_data(dev, gc, chip);
+}
+
+static const struct of_device_id ppc4xx_gpio_match[] = {
+ {
+ .compatible = "ibm,ppc4xx-gpio",
+ },
+ {},
+};
+MODULE_DEVICE_TABLE(of, ppc4xx_gpio_match);
+
+static struct platform_driver ppc4xx_gpio_driver = {
+ .probe = ppc4xx_gpio_probe,
+ .driver = {
+ .name = "ppc4xx-gpio",
+ .of_match_table = ppc4xx_gpio_match,
+ },
+};
+
+static int __init ppc4xx_gpio_init(void)
+{
+ return platform_driver_register(&ppc4xx_gpio_driver);
}
-arch_initcall(ppc4xx_add_gpiochips);
+arch_initcall(ppc4xx_gpio_init);
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
index 8623aebfac48..abb2b45b2789 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -101,7 +101,6 @@ comment "Generic MPC8xx Options"
config 8xx_GPIO
bool "GPIO API Support"
select GPIOLIB
- select OF_GPIO_MM_GPIOCHIP
help
Saying Y here will cause the ports on an MPC8xx processor to be used
with the GPIO API. If you say N here, the kernel needs less memory.
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index fea3766eac0f..364eef32ddcc 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -243,7 +243,6 @@ config CPM2
select CPM
select HAVE_PCI
select GPIOLIB
- select OF_GPIO_MM_GPIOCHIP
help
The CPM2 (Communications Processor Module) is a coprocessor on
embedded CPUs made by Freescale. Selecting this option means that
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index 157e046e6e93..ea4ba1b6ce6a 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -67,11 +67,11 @@ static long do_spu_create(const char __user *pathname, unsigned int flags,
struct dentry *dentry;
int ret;
- dentry = user_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY);
+ dentry = start_creating_user_path(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY);
ret = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
ret = spufs_create(&path, dentry, flags, mode, neighbor);
- done_path_create(&path, dentry);
+ end_creating_path(&path, dentry);
}
return ret;
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 95d7ba73d43d..b5ad7c173ef0 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -9,6 +9,7 @@ config PPC_POWERNV
select PPC_P7_NAP
select FORCE_PCI
select PCI_MSI
+ select IRQ_MSI_LIB
select EPAPR_BOOT
select PPC_INDIRECT_PIO
select PPC_UDBG_16550
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index d8ccf2c9b98a..b0c1d9d16fb5 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -15,6 +15,7 @@
#include <linux/init.h>
#include <linux/memblock.h>
#include <linux/irq.h>
+#include <linux/irqchip/irq-msi-lib.h>
#include <linux/io.h>
#include <linux/msi.h>
#include <linux/iommu.h>
@@ -37,7 +38,6 @@
#include <asm/firmware.h>
#include <asm/pnv-pci.h>
#include <asm/mmzone.h>
-#include <asm/xive.h>
#include "powernv.h"
#include "pci.h"
@@ -1707,23 +1707,6 @@ static int __pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
return 0;
}
-/*
- * The msi_free() op is called before irq_domain_free_irqs_top() when
- * the handler data is still available. Use that to clear the XIVE
- * controller.
- */
-static void pnv_msi_ops_msi_free(struct irq_domain *domain,
- struct msi_domain_info *info,
- unsigned int irq)
-{
- if (xive_enabled())
- xive_irq_free_data(irq);
-}
-
-static struct msi_domain_ops pnv_pci_msi_domain_ops = {
- .msi_free = pnv_msi_ops_msi_free,
-};
-
static void pnv_msi_shutdown(struct irq_data *d)
{
d = d->parent_data;
@@ -1731,31 +1714,33 @@ static void pnv_msi_shutdown(struct irq_data *d)
d->chip->irq_shutdown(d);
}
-static void pnv_msi_mask(struct irq_data *d)
+static bool pnv_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
+ struct irq_domain *real_parent, struct msi_domain_info *info)
{
- pci_msi_mask_irq(d);
- irq_chip_mask_parent(d);
-}
+ struct irq_chip *chip = info->chip;
-static void pnv_msi_unmask(struct irq_data *d)
-{
- pci_msi_unmask_irq(d);
- irq_chip_unmask_parent(d);
-}
+ if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info))
+ return false;
-static struct irq_chip pnv_pci_msi_irq_chip = {
- .name = "PNV-PCI-MSI",
- .irq_shutdown = pnv_msi_shutdown,
- .irq_mask = pnv_msi_mask,
- .irq_unmask = pnv_msi_unmask,
- .irq_eoi = irq_chip_eoi_parent,
-};
+ chip->irq_shutdown = pnv_msi_shutdown;
+ return true;
+}
-static struct msi_domain_info pnv_msi_domain_info = {
- .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
- MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX),
- .ops = &pnv_pci_msi_domain_ops,
- .chip = &pnv_pci_msi_irq_chip,
+#define PNV_PCI_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \
+ MSI_FLAG_USE_DEF_CHIP_OPS | \
+ MSI_FLAG_PCI_MSI_MASK_PARENT)
+#define PNV_PCI_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \
+ MSI_FLAG_PCI_MSIX | \
+ MSI_FLAG_MULTI_PCI_MSI)
+
+static const struct msi_parent_ops pnv_msi_parent_ops = {
+ .required_flags = PNV_PCI_MSI_FLAGS_REQUIRED,
+ .supported_flags = PNV_PCI_MSI_FLAGS_SUPPORTED,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI,
+ .bus_select_token = DOMAIN_BUS_NEXUS,
+ .bus_select_mask = MATCH_PCI_MSI,
+ .prefix = "PNV-",
+ .init_dev_msi_info = pnv_init_dev_msi_info,
};
static void pnv_msi_compose_msg(struct irq_data *d, struct msi_msg *msg)
@@ -1854,7 +1839,7 @@ static int pnv_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
return 0;
out:
- irq_domain_free_irqs_parent(domain, virq, i - 1);
+ irq_domain_free_irqs_parent(domain, virq, i);
msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, nr_irqs);
return ret;
}
@@ -1870,41 +1855,30 @@ static void pnv_irq_domain_free(struct irq_domain *domain, unsigned int virq,
virq, d->hwirq, nr_irqs);
msi_bitmap_free_hwirqs(&phb->msi_bmp, d->hwirq, nr_irqs);
- /* XIVE domain is cleared through ->msi_free() */
+ irq_domain_free_irqs_parent(domain, virq, nr_irqs);
}
static const struct irq_domain_ops pnv_irq_domain_ops = {
+ .select = msi_lib_irq_domain_select,
.alloc = pnv_irq_domain_alloc,
.free = pnv_irq_domain_free,
};
static int __init pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int count)
{
- struct pnv_phb *phb = hose->private_data;
struct irq_domain *parent = irq_get_default_domain();
-
- hose->fwnode = irq_domain_alloc_named_id_fwnode("PNV-MSI", phb->opal_id);
- if (!hose->fwnode)
- return -ENOMEM;
-
- hose->dev_domain = irq_domain_create_hierarchy(parent, 0, count,
- hose->fwnode,
- &pnv_irq_domain_ops, hose);
+ struct irq_domain_info info = {
+ .fwnode = of_fwnode_handle(hose->dn),
+ .ops = &pnv_irq_domain_ops,
+ .host_data = hose,
+ .size = count,
+ .parent = parent,
+ };
+
+ hose->dev_domain = msi_create_parent_irq_domain(&info, &pnv_msi_parent_ops);
if (!hose->dev_domain) {
- pr_err("PCI: failed to create IRQ domain bridge %pOF (domain %d)\n",
- hose->dn, hose->global_number);
- irq_domain_free_fwnode(hose->fwnode);
- return -ENOMEM;
- }
-
- hose->msi_domain = pci_msi_create_irq_domain(of_fwnode_handle(hose->dn),
- &pnv_msi_domain_info,
- hose->dev_domain);
- if (!hose->msi_domain) {
pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n",
hose->dn, hose->global_number);
- irq_domain_free_fwnode(hose->fwnode);
- irq_domain_remove(hose->dev_domain);
return -ENOMEM;
}
diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h
index 77feee8436d4..413fd85d9bc2 100644
--- a/arch/powerpc/platforms/powernv/subcore.h
+++ b/arch/powerpc/platforms/powernv/subcore.h
@@ -9,7 +9,7 @@
#define SYNC_STEP_REAL_MODE 2 /* Set by secondary when in real mode */
#define SYNC_STEP_FINISHED 3 /* Set by secondary when split/unsplit is done */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_SMP
void split_core_secondary_loop(u8 *state);
@@ -18,4 +18,4 @@ extern void update_subcore_sibling_mask(void);
static inline void update_subcore_sibling_mask(void) { }
#endif /* CONFIG_SMP */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index fa3c2fff082a..3e042218d6cd 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -7,6 +7,7 @@ config PPC_PSERIES
select OF_DYNAMIC
select FORCE_PCI
select PCI_MSI
+ select IRQ_MSI_LIB
select GENERIC_ALLOCATOR
select PPC_XICS
select PPC_XIVE_SPAPR
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 57222678bb3f..931ebaa474c8 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -5,6 +5,7 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \
of_helpers.o rtas-work-area.o papr-sysparm.o \
papr-rtas-common.o papr-vpd.o papr-indices.o \
papr-platform-dump.o papr-phy-attest.o \
+ papr-hvpipe.o \
setup.o iommu.o event_sources.o ras.o \
firmware.o power.o dlpar.o mobility.o rng.o \
pci.o pci_dlpar.o eeh_pseries.o msi.o \
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 62bd8e2d5d4c..95fe802ccdfd 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -28,6 +28,7 @@
#include <asm/rtas.h>
#include "pseries.h"
#include "vas.h" /* vas_migration_handler() */
+#include "papr-hvpipe.h" /* hvpipe_migration_handler() */
#include "../../kernel/cacheinfo.h"
static struct kobject *mobility_kobj;
@@ -744,6 +745,7 @@ static int pseries_migrate_partition(u64 handle)
* by closing VAS windows at the beginning of this function.
*/
vas_migration_handler(VAS_SUSPEND);
+ hvpipe_migration_handler(HVPIPE_SUSPEND);
ret = wait_for_vasi_session_suspending(handle);
if (ret)
@@ -770,6 +772,7 @@ static int pseries_migrate_partition(u64 handle)
out:
vas_migration_handler(VAS_RESUME);
+ hvpipe_migration_handler(HVPIPE_RESUME);
return ret;
}
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index ee1c8c6898a3..825f9432e03d 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -7,6 +7,7 @@
#include <linux/crash_dump.h>
#include <linux/device.h>
#include <linux/irq.h>
+#include <linux/irqchip/irq-msi-lib.h>
#include <linux/irqdomain.h>
#include <linux/msi.h>
#include <linux/seq_file.h>
@@ -15,7 +16,6 @@
#include <asm/hw_irq.h>
#include <asm/ppc-pci.h>
#include <asm/machdep.h>
-#include <asm/xive.h>
#include "pseries.h"
@@ -430,43 +430,25 @@ again:
static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev,
int nvec, msi_alloc_info_t *arg)
{
+ struct msi_domain_info *info = domain->host_data;
struct pci_dev *pdev = to_pci_dev(dev);
- int type = pdev->msix_enabled ? PCI_CAP_ID_MSIX : PCI_CAP_ID_MSI;
+ int type = (info->flags & MSI_FLAG_PCI_MSIX) ? PCI_CAP_ID_MSIX : PCI_CAP_ID_MSI;
return rtas_prepare_msi_irqs(pdev, nvec, type, arg);
}
/*
- * ->msi_free() is called before irq_domain_free_irqs_top() when the
- * handler data is still available. Use that to clear the XIVE
- * controller data.
- */
-static void pseries_msi_ops_msi_free(struct irq_domain *domain,
- struct msi_domain_info *info,
- unsigned int irq)
-{
- if (xive_enabled())
- xive_irq_free_data(irq);
-}
-
-/*
* RTAS can not disable one MSI at a time. It's all or nothing. Do it
* at the end after all IRQs have been freed.
*/
-static void pseries_msi_post_free(struct irq_domain *domain, struct device *dev)
+static void pseries_msi_ops_teardown(struct irq_domain *domain, msi_alloc_info_t *arg)
{
- if (WARN_ON_ONCE(!dev_is_pci(dev)))
- return;
+ struct msi_desc *desc = arg->desc;
+ struct pci_dev *pdev = msi_desc_to_pci_dev(desc);
- rtas_disable_msi(to_pci_dev(dev));
+ rtas_disable_msi(pdev);
}
-static struct msi_domain_ops pseries_pci_msi_domain_ops = {
- .msi_prepare = pseries_msi_ops_prepare,
- .msi_free = pseries_msi_ops_msi_free,
- .msi_post_free = pseries_msi_post_free,
-};
-
static void pseries_msi_shutdown(struct irq_data *d)
{
d = d->parent_data;
@@ -474,18 +456,6 @@ static void pseries_msi_shutdown(struct irq_data *d)
d->chip->irq_shutdown(d);
}
-static void pseries_msi_mask(struct irq_data *d)
-{
- pci_msi_mask_irq(d);
- irq_chip_mask_parent(d);
-}
-
-static void pseries_msi_unmask(struct irq_data *d)
-{
- pci_msi_unmask_irq(d);
- irq_chip_unmask_parent(d);
-}
-
static void pseries_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
{
struct msi_desc *entry = irq_data_get_msi_desc(data);
@@ -500,27 +470,39 @@ static void pseries_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
entry->msg = *msg;
}
-static struct irq_chip pseries_pci_msi_irq_chip = {
- .name = "pSeries-PCI-MSI",
- .irq_shutdown = pseries_msi_shutdown,
- .irq_mask = pseries_msi_mask,
- .irq_unmask = pseries_msi_unmask,
- .irq_eoi = irq_chip_eoi_parent,
- .irq_write_msi_msg = pseries_msi_write_msg,
-};
+static bool pseries_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
+ struct irq_domain *real_parent, struct msi_domain_info *info)
+{
+ struct irq_chip *chip = info->chip;
+ if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info))
+ return false;
-/*
- * Set MSI_FLAG_MSIX_CONTIGUOUS as there is no way to express to
- * firmware to request a discontiguous or non-zero based range of
- * MSI-X entries. Core code will reject such setup attempts.
- */
-static struct msi_domain_info pseries_msi_domain_info = {
- .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
- MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX |
- MSI_FLAG_MSIX_CONTIGUOUS),
- .ops = &pseries_pci_msi_domain_ops,
- .chip = &pseries_pci_msi_irq_chip,
+ chip->irq_shutdown = pseries_msi_shutdown;
+ chip->irq_write_msi_msg = pseries_msi_write_msg;
+
+ info->ops->msi_prepare = pseries_msi_ops_prepare;
+ info->ops->msi_teardown = pseries_msi_ops_teardown;
+
+ return true;
+}
+
+#define PSERIES_PCI_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \
+ MSI_FLAG_USE_DEF_CHIP_OPS | \
+ MSI_FLAG_PCI_MSI_MASK_PARENT)
+#define PSERIES_PCI_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \
+ MSI_FLAG_PCI_MSIX | \
+ MSI_FLAG_MSIX_CONTIGUOUS | \
+ MSI_FLAG_MULTI_PCI_MSI)
+
+static const struct msi_parent_ops pseries_msi_parent_ops = {
+ .required_flags = PSERIES_PCI_MSI_FLAGS_REQUIRED,
+ .supported_flags = PSERIES_PCI_MSI_FLAGS_SUPPORTED,
+ .chip_flags = MSI_CHIP_FLAG_SET_EOI,
+ .bus_select_token = DOMAIN_BUS_NEXUS,
+ .bus_select_mask = MATCH_PCI_MSI,
+ .prefix = "pSeries-",
+ .init_dev_msi_info = pseries_init_dev_msi_info,
};
static void pseries_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
@@ -593,7 +575,7 @@ static int pseries_irq_domain_alloc(struct irq_domain *domain, unsigned int virq
out:
/* TODO: handle RTAS cleanup in ->msi_finish() ? */
- irq_domain_free_irqs_parent(domain, virq, i - 1);
+ irq_domain_free_irqs_parent(domain, virq, i);
return ret;
}
@@ -604,11 +586,11 @@ static void pseries_irq_domain_free(struct irq_domain *domain, unsigned int virq
struct pci_controller *phb = irq_data_get_irq_chip_data(d);
pr_debug("%s bridge %pOF %d #%d\n", __func__, phb->dn, virq, nr_irqs);
-
- /* XIVE domain data is cleared through ->msi_free() */
+ irq_domain_free_irqs_parent(domain, virq, nr_irqs);
}
static const struct irq_domain_ops pseries_irq_domain_ops = {
+ .select = msi_lib_irq_domain_select,
.alloc = pseries_irq_domain_alloc,
.free = pseries_irq_domain_free,
};
@@ -617,30 +599,18 @@ static int __pseries_msi_allocate_domains(struct pci_controller *phb,
unsigned int count)
{
struct irq_domain *parent = irq_get_default_domain();
-
- phb->fwnode = irq_domain_alloc_named_id_fwnode("pSeries-MSI",
- phb->global_number);
- if (!phb->fwnode)
- return -ENOMEM;
-
- phb->dev_domain = irq_domain_create_hierarchy(parent, 0, count,
- phb->fwnode,
- &pseries_irq_domain_ops, phb);
+ struct irq_domain_info info = {
+ .fwnode = of_fwnode_handle(phb->dn),
+ .ops = &pseries_irq_domain_ops,
+ .host_data = phb,
+ .size = count,
+ .parent = parent,
+ };
+
+ phb->dev_domain = msi_create_parent_irq_domain(&info, &pseries_msi_parent_ops);
if (!phb->dev_domain) {
- pr_err("PCI: failed to create IRQ domain bridge %pOF (domain %d)\n",
- phb->dn, phb->global_number);
- irq_domain_free_fwnode(phb->fwnode);
- return -ENOMEM;
- }
-
- phb->msi_domain = pci_msi_create_irq_domain(of_fwnode_handle(phb->dn),
- &pseries_msi_domain_info,
- phb->dev_domain);
- if (!phb->msi_domain) {
pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n",
phb->dn, phb->global_number);
- irq_domain_free_fwnode(phb->fwnode);
- irq_domain_remove(phb->dev_domain);
return -ENOMEM;
}
@@ -662,12 +632,8 @@ int pseries_msi_allocate_domains(struct pci_controller *phb)
void pseries_msi_free_domains(struct pci_controller *phb)
{
- if (phb->msi_domain)
- irq_domain_remove(phb->msi_domain);
if (phb->dev_domain)
irq_domain_remove(phb->dev_domain);
- if (phb->fwnode)
- irq_domain_free_fwnode(phb->fwnode);
}
static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev)
diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.c b/arch/powerpc/platforms/pseries/papr-hvpipe.c
new file mode 100644
index 000000000000..21a2f447c43f
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr-hvpipe.c
@@ -0,0 +1,818 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt) "papr-hvpipe: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/anon_inodes.h>
+#include <linux/miscdevice.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/of.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/rtas-work-area.h>
+#include <asm/papr-sysparm.h>
+#include <uapi/asm/papr-hvpipe.h>
+#include "pseries.h"
+#include "papr-hvpipe.h"
+
+static DEFINE_SPINLOCK(hvpipe_src_list_lock);
+static LIST_HEAD(hvpipe_src_list);
+
+static unsigned char hvpipe_ras_buf[RTAS_ERROR_LOG_MAX];
+static struct workqueue_struct *papr_hvpipe_wq;
+static struct work_struct *papr_hvpipe_work;
+static int hvpipe_check_exception_token;
+static bool hvpipe_feature;
+
+/*
+ * New PowerPC FW provides support for partitions and various
+ * sources (Ex: remote hardware management console (HMC)) to
+ * exchange information through an inband hypervisor channel
+ * called HVPIPE. Only HMCs are supported right now and
+ * partitions can communicate with multiple HMCs and each
+ * source represented by source ID.
+ *
+ * FW introduces send HVPIPE and recv HVPIPE RTAS calls for
+ * partitions to send and receive payloads respectively.
+ *
+ * These RTAS functions have the following certain requirements
+ * / limitations:
+ * - One hvpipe per partition for all sources.
+ * - Assume the return status of send HVPIPE as delivered to source
+ * - Assume the return status of recv HVPIPE as ACK to source
+ * - Generates HVPIPE event message when the payload is ready
+ * for the partition. The hypervisor will not deliver another
+ * event until the partition read the previous payload which
+ * means the pipe is blocked for any sources.
+ *
+ * Linux implementation:
+ * Follow the similar interfaces that the OS has for other RTAS calls.
+ * ex: /dev/papr-indices, /dev/papr-vpd, etc.
+ * - /dev/papr-hvpipe is available for the user space.
+ * - devfd = open("/dev/papr-hvpipe", ..)
+ * - fd = ioctl(fd,HVPIPE_IOC_CREATE_HANDLE,&srcID)-for each source
+ * - write(fd, buf, size) --> Issue send HVPIPE RTAS call and
+ * returns size for success or the corresponding error for RTAS
+ * return code for failure.
+ * - poll(fd,..) -> wakeup FD if the payload is available to read.
+ * HVPIPE event message handler wakeup FD based on source ID in
+ * the event message
+ * - read(fd, buf, size) --> Issue recv HVPIPE RTAS call and
+ * returns size for success or the corresponding error for RTAS
+ * return code for failure.
+ */
+
+/*
+ * ibm,receive-hvpipe-msg RTAS call.
+ * @area: Caller-provided work area buffer for results.
+ * @srcID: Source ID returned by the RTAS call.
+ * @bytesw: Bytes written by RTAS call to @area.
+ */
+static int rtas_ibm_receive_hvpipe_msg(struct rtas_work_area *area,
+ u32 *srcID, u32 *bytesw)
+{
+ const s32 token = rtas_function_token(RTAS_FN_IBM_RECEIVE_HVPIPE_MSG);
+ u32 rets[2];
+ s32 fwrc;
+ int ret;
+
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ do {
+ fwrc = rtas_call(token, 2, 3, rets,
+ rtas_work_area_phys(area),
+ rtas_work_area_size(area));
+
+ } while (rtas_busy_delay(fwrc));
+
+ switch (fwrc) {
+ case RTAS_SUCCESS:
+ *srcID = rets[0];
+ *bytesw = rets[1];
+ ret = 0;
+ break;
+ case RTAS_HARDWARE_ERROR:
+ ret = -EIO;
+ break;
+ case RTAS_INVALID_PARAMETER:
+ ret = -EINVAL;
+ break;
+ case RTAS_FUNC_NOT_SUPPORTED:
+ ret = -EOPNOTSUPP;
+ break;
+ default:
+ ret = -EIO;
+ pr_err_ratelimited("unexpected ibm,receive-hvpipe-msg status %d\n", fwrc);
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * ibm,send-hvpipe-msg RTAS call
+ * @area: Caller-provided work area buffer to send.
+ * @srcID: Target source for the send pipe message.
+ */
+static int rtas_ibm_send_hvpipe_msg(struct rtas_work_area *area, u32 srcID)
+{
+ const s32 token = rtas_function_token(RTAS_FN_IBM_SEND_HVPIPE_MSG);
+ s32 fwrc;
+ int ret;
+
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ do {
+ fwrc = rtas_call(token, 2, 1, NULL, srcID,
+ rtas_work_area_phys(area));
+
+ } while (rtas_busy_delay(fwrc));
+
+ switch (fwrc) {
+ case RTAS_SUCCESS:
+ ret = 0;
+ break;
+ case RTAS_HARDWARE_ERROR:
+ ret = -EIO;
+ break;
+ case RTAS_INVALID_PARAMETER:
+ ret = -EINVAL;
+ break;
+ case RTAS_HVPIPE_CLOSED:
+ ret = -EPIPE;
+ break;
+ case RTAS_FUNC_NOT_SUPPORTED:
+ ret = -EOPNOTSUPP;
+ break;
+ default:
+ ret = -EIO;
+ pr_err_ratelimited("unexpected ibm,receive-hvpipe-msg status %d\n", fwrc);
+ break;
+ }
+
+ return ret;
+}
+
+static struct hvpipe_source_info *hvpipe_find_source(u32 srcID)
+{
+ struct hvpipe_source_info *src_info;
+
+ list_for_each_entry(src_info, &hvpipe_src_list, list)
+ if (src_info->srcID == srcID)
+ return src_info;
+
+ return NULL;
+}
+
+/*
+ * This work function collects receive buffer with recv HVPIPE
+ * RTAS call. Called from read()
+ * @buf: User specified buffer to copy the payload that returned
+ * from recv HVPIPE RTAS.
+ * @size: Size of buffer user passed.
+ */
+static int hvpipe_rtas_recv_msg(char __user *buf, int size)
+{
+ struct rtas_work_area *work_area;
+ u32 srcID, bytes_written;
+ int ret;
+
+ work_area = rtas_work_area_alloc(SZ_4K);
+ if (!work_area) {
+ pr_err("Could not allocate RTAS buffer for recv pipe\n");
+ return -ENOMEM;
+ }
+
+ ret = rtas_ibm_receive_hvpipe_msg(work_area, &srcID,
+ &bytes_written);
+ if (!ret) {
+ /*
+ * Recv HVPIPE RTAS is successful.
+ * When releasing FD or no one is waiting on the
+ * specific source, issue recv HVPIPE RTAS call
+ * so that pipe is not blocked - this func is called
+ * with NULL buf.
+ */
+ if (buf) {
+ if (size < bytes_written) {
+ pr_err("Received the payload size = %d, but the buffer size = %d\n",
+ bytes_written, size);
+ bytes_written = size;
+ }
+ ret = copy_to_user(buf,
+ rtas_work_area_raw_buf(work_area),
+ bytes_written);
+ if (!ret)
+ ret = bytes_written;
+ }
+ } else {
+ pr_err("ibm,receive-hvpipe-msg failed with %d\n",
+ ret);
+ }
+
+ rtas_work_area_free(work_area);
+ return ret;
+}
+
+/*
+ * papr_hvpipe_handle_write - Issue send HVPIPE RTAS and return
+ * the size (payload + HVPIPE_HDR_LEN) for RTAS success.
+ * Otherwise returns the status of RTAS to the user space
+ */
+static ssize_t papr_hvpipe_handle_write(struct file *file,
+ const char __user *buf, size_t size, loff_t *off)
+{
+ struct hvpipe_source_info *src_info = file->private_data;
+ struct rtas_work_area *work_area, *work_buf;
+ unsigned long ret, len;
+ __be64 *area_be;
+
+ /*
+ * Return -ENXIO during migration
+ */
+ if (!hvpipe_feature)
+ return -ENXIO;
+
+ if (!src_info)
+ return -EIO;
+
+ /*
+ * Send HVPIPE RTAS is used to send payload to the specific
+ * source with the input parameters source ID and the payload
+ * as buffer list. Each entry in the buffer list contains
+ * address/length pair of the buffer.
+ *
+ * The buffer list format is as follows:
+ *
+ * Header (length of address/length pairs and the header length)
+ * Address of 4K buffer 1
+ * Length of 4K buffer 1 used
+ * ...
+ * Address of 4K buffer n
+ * Length of 4K buffer n used
+ *
+ * See PAPR 7.3.32.2 ibm,send-hvpipe-msg
+ *
+ * Even though can support max 1MB payload, the hypervisor
+ * supports only 4048 bytes payload at present and also
+ * just one address/length entry.
+ *
+ * writev() interface can be added in future when the
+ * hypervisor supports multiple buffer list entries.
+ */
+ /* HVPIPE_MAX_WRITE_BUFFER_SIZE = 4048 bytes */
+ if ((size > (HVPIPE_HDR_LEN + HVPIPE_MAX_WRITE_BUFFER_SIZE)) ||
+ (size <= HVPIPE_HDR_LEN))
+ return -EINVAL;
+
+ /*
+ * The length of (address + length) pair + the length of header
+ */
+ len = (2 * sizeof(u64)) + sizeof(u64);
+ size -= HVPIPE_HDR_LEN;
+ buf += HVPIPE_HDR_LEN;
+ mutex_lock(&rtas_ibm_send_hvpipe_msg_lock);
+ work_area = rtas_work_area_alloc(SZ_4K);
+ if (!work_area) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ area_be = (__be64 *)rtas_work_area_raw_buf(work_area);
+ /* header */
+ area_be[0] = cpu_to_be64(len);
+
+ work_buf = rtas_work_area_alloc(SZ_4K);
+ if (!work_buf) {
+ ret = -ENOMEM;
+ goto out_work;
+ }
+ /* First buffer address */
+ area_be[1] = cpu_to_be64(rtas_work_area_phys(work_buf));
+ /* First buffer address length */
+ area_be[2] = cpu_to_be64(size);
+
+ if (!copy_from_user(rtas_work_area_raw_buf(work_buf), buf, size)) {
+ ret = rtas_ibm_send_hvpipe_msg(work_area, src_info->srcID);
+ if (!ret)
+ ret = size + HVPIPE_HDR_LEN;
+ } else
+ ret = -EPERM;
+
+ rtas_work_area_free(work_buf);
+out_work:
+ rtas_work_area_free(work_area);
+out:
+ mutex_unlock(&rtas_ibm_send_hvpipe_msg_lock);
+ return ret;
+}
+
+/*
+ * papr_hvpipe_handle_read - If the payload for the specific
+ * source is pending in the hypervisor, issue recv HVPIPE RTAS
+ * and return the payload to the user space.
+ *
+ * When the payload is available for the partition, the
+ * hypervisor notifies HVPIPE event with the source ID
+ * and the event handler wakeup FD(s) that are waiting.
+ */
+static ssize_t papr_hvpipe_handle_read(struct file *file,
+ char __user *buf, size_t size, loff_t *off)
+{
+
+ struct hvpipe_source_info *src_info = file->private_data;
+ struct papr_hvpipe_hdr hdr;
+ long ret;
+
+ /*
+ * Return -ENXIO during migration
+ */
+ if (!hvpipe_feature)
+ return -ENXIO;
+
+ if (!src_info)
+ return -EIO;
+
+ /*
+ * Max payload is 4048 (HVPIPE_MAX_WRITE_BUFFER_SIZE)
+ */
+ if ((size > (HVPIPE_HDR_LEN + HVPIPE_MAX_WRITE_BUFFER_SIZE)) ||
+ (size < HVPIPE_HDR_LEN))
+ return -EINVAL;
+
+ /*
+ * Payload is not available to receive or source pipe
+ * is not closed.
+ */
+ if (!src_info->hvpipe_status)
+ return 0;
+
+ hdr.version = 0;
+ hdr.flags = 0;
+
+ /*
+ * In case if the hvpipe has payload and also the
+ * hypervisor closed the pipe to the source, retrieve
+ * the payload and return to the user space first and
+ * then notify the userspace about the hvpipe close in
+ * next read().
+ */
+ if (src_info->hvpipe_status & HVPIPE_MSG_AVAILABLE)
+ hdr.flags = HVPIPE_MSG_AVAILABLE;
+ else if (src_info->hvpipe_status & HVPIPE_LOST_CONNECTION)
+ hdr.flags = HVPIPE_LOST_CONNECTION;
+ else
+ /*
+ * Should not be here without one of the above
+ * flags set
+ */
+ return -EIO;
+
+ ret = copy_to_user(buf, &hdr, HVPIPE_HDR_LEN);
+ if (ret)
+ return ret;
+
+ /*
+ * Message event has payload, so get the payload with
+ * recv HVPIPE RTAS.
+ */
+ if (hdr.flags & HVPIPE_MSG_AVAILABLE) {
+ ret = hvpipe_rtas_recv_msg(buf + HVPIPE_HDR_LEN,
+ size - HVPIPE_HDR_LEN);
+ if (ret > 0) {
+ src_info->hvpipe_status &= ~HVPIPE_MSG_AVAILABLE;
+ ret += HVPIPE_HDR_LEN;
+ }
+ } else if (hdr.flags & HVPIPE_LOST_CONNECTION) {
+ /*
+ * Hypervisor is closing the pipe for the specific
+ * source. So notify user space.
+ */
+ src_info->hvpipe_status &= ~HVPIPE_LOST_CONNECTION;
+ ret = HVPIPE_HDR_LEN;
+ }
+
+ return ret;
+}
+
+/*
+ * The user space waits for the payload to receive.
+ * The hypervisor sends HVPIPE event message to the partition
+ * when the payload is available. The event handler wakeup FD
+ * depends on the source ID in the message event.
+ */
+static __poll_t papr_hvpipe_handle_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ struct hvpipe_source_info *src_info = filp->private_data;
+
+ /*
+ * HVPIPE is disabled during SUSPEND and enabled after migration.
+ * So return POLLRDHUP during migration
+ */
+ if (!hvpipe_feature)
+ return POLLRDHUP;
+
+ if (!src_info)
+ return POLLNVAL;
+
+ /*
+ * If hvpipe already has pending payload, return so that
+ * the user space can issue read().
+ */
+ if (src_info->hvpipe_status)
+ return POLLIN | POLLRDNORM;
+
+ /*
+ * Wait for the message event
+ * hvpipe_event_interrupt() wakes up this wait_queue
+ */
+ poll_wait(filp, &src_info->recv_wqh, wait);
+ if (src_info->hvpipe_status)
+ return POLLIN | POLLRDNORM;
+
+ return 0;
+}
+
+static int papr_hvpipe_handle_release(struct inode *inode,
+ struct file *file)
+{
+ struct hvpipe_source_info *src_info;
+
+ /*
+ * Hold the lock, remove source from src_list, reset the
+ * hvpipe status and release the lock to prevent any race
+ * with message event IRQ.
+ */
+ spin_lock(&hvpipe_src_list_lock);
+ src_info = file->private_data;
+ list_del(&src_info->list);
+ file->private_data = NULL;
+ /*
+ * If the pipe for this specific source has any pending
+ * payload, issue recv HVPIPE RTAS so that pipe will not
+ * be blocked.
+ */
+ if (src_info->hvpipe_status & HVPIPE_MSG_AVAILABLE) {
+ src_info->hvpipe_status = 0;
+ spin_unlock(&hvpipe_src_list_lock);
+ hvpipe_rtas_recv_msg(NULL, 0);
+ } else
+ spin_unlock(&hvpipe_src_list_lock);
+
+ kfree(src_info);
+ return 0;
+}
+
+static const struct file_operations papr_hvpipe_handle_ops = {
+ .read = papr_hvpipe_handle_read,
+ .write = papr_hvpipe_handle_write,
+ .release = papr_hvpipe_handle_release,
+ .poll = papr_hvpipe_handle_poll,
+};
+
+static int papr_hvpipe_dev_create_handle(u32 srcID)
+{
+ struct hvpipe_source_info *src_info;
+ struct file *file;
+ long err;
+ int fd;
+
+ spin_lock(&hvpipe_src_list_lock);
+ /*
+ * Do not allow more than one process communicates with
+ * each source.
+ */
+ src_info = hvpipe_find_source(srcID);
+ if (src_info) {
+ spin_unlock(&hvpipe_src_list_lock);
+ pr_err("pid(%d) is already using the source(%d)\n",
+ src_info->tsk->pid, srcID);
+ return -EALREADY;
+ }
+ spin_unlock(&hvpipe_src_list_lock);
+
+ src_info = kzalloc(sizeof(*src_info), GFP_KERNEL_ACCOUNT);
+ if (!src_info)
+ return -ENOMEM;
+
+ src_info->srcID = srcID;
+ src_info->tsk = current;
+ init_waitqueue_head(&src_info->recv_wqh);
+
+ fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ err = fd;
+ goto free_buf;
+ }
+
+ file = anon_inode_getfile("[papr-hvpipe]",
+ &papr_hvpipe_handle_ops, (void *)src_info,
+ O_RDWR);
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ goto free_fd;
+ }
+
+ spin_lock(&hvpipe_src_list_lock);
+ /*
+ * If two processes are executing ioctl() for the same
+ * source ID concurrently, prevent the second process to
+ * acquire FD.
+ */
+ if (hvpipe_find_source(srcID)) {
+ spin_unlock(&hvpipe_src_list_lock);
+ err = -EALREADY;
+ goto free_file;
+ }
+ list_add(&src_info->list, &hvpipe_src_list);
+ spin_unlock(&hvpipe_src_list_lock);
+
+ fd_install(fd, file);
+ return fd;
+
+free_file:
+ fput(file);
+free_fd:
+ put_unused_fd(fd);
+free_buf:
+ kfree(src_info);
+ return err;
+}
+
+/*
+ * Top-level ioctl handler for /dev/papr_hvpipe
+ *
+ * Use separate FD for each source (exa :HMC). So ioctl is called
+ * with source ID which returns FD.
+ */
+static long papr_hvpipe_dev_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg)
+{
+ u32 __user *argp = (void __user *)arg;
+ u32 srcID;
+ long ret;
+
+ /*
+ * Return -ENXIO during migration
+ */
+ if (!hvpipe_feature)
+ return -ENXIO;
+
+ if (get_user(srcID, argp))
+ return -EFAULT;
+
+ /*
+ * Support only HMC source right now
+ */
+ if (!(srcID & HVPIPE_HMC_ID_MASK))
+ return -EINVAL;
+
+ switch (ioctl) {
+ case PAPR_HVPIPE_IOC_CREATE_HANDLE:
+ ret = papr_hvpipe_dev_create_handle(srcID);
+ break;
+ default:
+ ret = -ENOIOCTLCMD;
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * papr_hvpipe_work_fn - called to issue recv HVPIPE RTAS for
+ * sources that are not monitored by user space so that pipe
+ * will not be blocked.
+ */
+static void papr_hvpipe_work_fn(struct work_struct *work)
+{
+ hvpipe_rtas_recv_msg(NULL, 0);
+}
+
+/*
+ * HVPIPE event message IRQ handler.
+ * The hypervisor sends event IRQ if the partition has payload
+ * and generates another event only after payload is read with
+ * recv HVPIPE RTAS.
+ */
+static irqreturn_t hvpipe_event_interrupt(int irq, void *dev_id)
+{
+ struct hvpipe_event_buf *hvpipe_event;
+ struct pseries_errorlog *pseries_log;
+ struct hvpipe_source_info *src_info;
+ struct rtas_error_log *elog;
+ int rc;
+
+ rc = rtas_call(hvpipe_check_exception_token, 6, 1, NULL,
+ RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq),
+ RTAS_HVPIPE_MSG_EVENTS, 1, __pa(&hvpipe_ras_buf),
+ rtas_get_error_log_max());
+
+ if (rc != 0) {
+ pr_err_ratelimited("unexpected hvpipe-event-notification failed %d\n", rc);
+ return IRQ_HANDLED;
+ }
+
+ elog = (struct rtas_error_log *)hvpipe_ras_buf;
+ if (unlikely(rtas_error_type(elog) != RTAS_TYPE_HVPIPE)) {
+ pr_warn_ratelimited("Unexpected event type %d\n",
+ rtas_error_type(elog));
+ return IRQ_HANDLED;
+ }
+
+ pseries_log = get_pseries_errorlog(elog,
+ PSERIES_ELOG_SECT_ID_HVPIPE_EVENT);
+ hvpipe_event = (struct hvpipe_event_buf *)pseries_log->data;
+
+ /*
+ * The hypervisor notifies partition when the payload is
+ * available to read with recv HVPIPE RTAS and it will not
+ * notify another event for any source until the previous
+ * payload is read. Means the pipe is blocked in the
+ * hypervisor until the payload is read.
+ *
+ * If the source is ready to accept payload and wakeup the
+ * corresponding FD. Hold lock and update hvpipe_status
+ * and this lock is needed in case the user space process
+ * is in release FD instead of poll() so that release()
+ * reads the payload to unblock pipe before closing FD.
+ *
+ * otherwise (means no other user process waiting for the
+ * payload, issue recv HVPIPE RTAS (papr_hvpipe_work_fn())
+ * to unblock pipe.
+ */
+ spin_lock(&hvpipe_src_list_lock);
+ src_info = hvpipe_find_source(be32_to_cpu(hvpipe_event->srcID));
+ if (src_info) {
+ u32 flags = 0;
+
+ if (hvpipe_event->event_type & HVPIPE_LOST_CONNECTION)
+ flags = HVPIPE_LOST_CONNECTION;
+ else if (hvpipe_event->event_type & HVPIPE_MSG_AVAILABLE)
+ flags = HVPIPE_MSG_AVAILABLE;
+
+ src_info->hvpipe_status |= flags;
+ wake_up(&src_info->recv_wqh);
+ spin_unlock(&hvpipe_src_list_lock);
+ } else {
+ spin_unlock(&hvpipe_src_list_lock);
+ /*
+ * user space is not waiting on this source. So
+ * execute receive pipe RTAS so that pipe will not
+ * be blocked.
+ */
+ if (hvpipe_event->event_type & HVPIPE_MSG_AVAILABLE)
+ queue_work(papr_hvpipe_wq, papr_hvpipe_work);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Enable hvpipe by system parameter set with parameter
+ * token = 64 and with 1 byte buffer data:
+ * 0 = hvpipe not in use/disable
+ * 1 = hvpipe in use/enable
+ */
+static int set_hvpipe_sys_param(u8 val)
+{
+ struct papr_sysparm_buf *buf;
+ int ret;
+
+ buf = papr_sysparm_buf_alloc();
+ if (!buf)
+ return -ENOMEM;
+
+ buf->len = cpu_to_be16(1);
+ buf->val[0] = val;
+ ret = papr_sysparm_set(PAPR_SYSPARM_HVPIPE_ENABLE, buf);
+ if (ret)
+ pr_err("Can not enable hvpipe %d\n", ret);
+
+ papr_sysparm_buf_free(buf);
+
+ return ret;
+}
+
+static int __init enable_hvpipe_IRQ(void)
+{
+ struct device_node *np;
+
+ hvpipe_check_exception_token = rtas_function_token(RTAS_FN_CHECK_EXCEPTION);
+ if (hvpipe_check_exception_token == RTAS_UNKNOWN_SERVICE)
+ return -ENODEV;
+
+ /* hvpipe events */
+ np = of_find_node_by_path("/event-sources/ibm,hvpipe-msg-events");
+ if (np != NULL) {
+ request_event_sources_irqs(np, hvpipe_event_interrupt,
+ "HPIPE_EVENT");
+ of_node_put(np);
+ } else {
+ pr_err("Can not enable hvpipe event IRQ\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+void hvpipe_migration_handler(int action)
+{
+ pr_info("hvpipe migration event %d\n", action);
+
+ /*
+ * HVPIPE is not used (Failed to create /dev/papr-hvpipe).
+ * So nothing to do for migration.
+ */
+ if (!papr_hvpipe_work)
+ return;
+
+ switch (action) {
+ case HVPIPE_SUSPEND:
+ if (hvpipe_feature) {
+ /*
+ * Disable hvpipe_feature to the user space.
+ * It will be enabled with RESUME event.
+ */
+ hvpipe_feature = false;
+ /*
+ * set system parameter hvpipe 'disable'
+ */
+ set_hvpipe_sys_param(0);
+ }
+ break;
+ case HVPIPE_RESUME:
+ /*
+ * set system parameter hvpipe 'enable'
+ */
+ if (!set_hvpipe_sys_param(1))
+ hvpipe_feature = true;
+ else
+ pr_err("hvpipe is not enabled after migration\n");
+
+ break;
+ }
+}
+
+static const struct file_operations papr_hvpipe_ops = {
+ .unlocked_ioctl = papr_hvpipe_dev_ioctl,
+};
+
+static struct miscdevice papr_hvpipe_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "papr-hvpipe",
+ .fops = &papr_hvpipe_ops,
+};
+
+static int __init papr_hvpipe_init(void)
+{
+ int ret;
+
+ if (!of_find_property(rtas.dev, "ibm,hypervisor-pipe-capable",
+ NULL))
+ return -ENODEV;
+
+ if (!rtas_function_implemented(RTAS_FN_IBM_SEND_HVPIPE_MSG) ||
+ !rtas_function_implemented(RTAS_FN_IBM_RECEIVE_HVPIPE_MSG))
+ return -ENODEV;
+
+ papr_hvpipe_work = kzalloc(sizeof(struct work_struct), GFP_ATOMIC);
+ if (!papr_hvpipe_work)
+ return -ENOMEM;
+
+ INIT_WORK(papr_hvpipe_work, papr_hvpipe_work_fn);
+
+ papr_hvpipe_wq = alloc_ordered_workqueue("papr hvpipe workqueue", 0);
+ if (!papr_hvpipe_wq) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = enable_hvpipe_IRQ();
+ if (!ret) {
+ ret = set_hvpipe_sys_param(1);
+ if (!ret)
+ ret = misc_register(&papr_hvpipe_dev);
+ }
+
+ if (!ret) {
+ pr_info("hvpipe feature is enabled\n");
+ hvpipe_feature = true;
+ return 0;
+ }
+
+ pr_err("hvpipe feature is not enabled %d\n", ret);
+ destroy_workqueue(papr_hvpipe_wq);
+out:
+ kfree(papr_hvpipe_work);
+ papr_hvpipe_work = NULL;
+ return ret;
+}
+machine_device_initcall(pseries, papr_hvpipe_init);
diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.h b/arch/powerpc/platforms/pseries/papr-hvpipe.h
new file mode 100644
index 000000000000..c343f4230865
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr-hvpipe.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _PAPR_HVPIPE_H
+#define _PAPR_HVPIPE_H
+
+#define HVPIPE_HMC_ID_MASK 0x02000000 /*02-HMC,00-reserved and HMC ID */
+#define HVPIPE_MAX_WRITE_BUFFER_SIZE 4048
+/*
+ * hvpipe specific RTAS return values
+ */
+#define RTAS_HVPIPE_CLOSED -4
+
+#define HVPIPE_HDR_LEN sizeof(struct papr_hvpipe_hdr)
+
+enum hvpipe_migrate_action {
+ HVPIPE_SUSPEND,
+ HVPIPE_RESUME,
+};
+
+struct hvpipe_source_info {
+ struct list_head list; /* list of sources */
+ u32 srcID;
+ u32 hvpipe_status;
+ wait_queue_head_t recv_wqh; /* wake up poll() waitq */
+ struct task_struct *tsk;
+};
+
+/*
+ * Source ID Format 0xCCRRQQQQ
+ * CC = indicating value is source type (ex: 0x02 for HMC)
+ * RR = 0x00 (reserved)
+ * QQQQ = 0x0000 – 0xFFFF indicating the source index indetifier
+ */
+struct hvpipe_event_buf {
+ __be32 srcID; /* Source ID */
+ u8 event_type; /* 0x01 for hvpipe message available */
+ /* from specified src ID */
+ /* 0x02 for loss of pipe connection */
+ /* with specified src ID */
+};
+
+void hvpipe_migration_handler(int action);
+#endif /* _PAPR_HVPIPE_H */