summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/cper.h12
-rw-r--r--include/linux/dma-buf-mapping.h17
-rw-r--r--include/linux/dma-buf.h11
-rw-r--r--include/linux/efi.h6
-rw-r--r--include/linux/generic_pt/common.h191
-rw-r--r--include/linux/generic_pt/iommu.h293
-rw-r--r--include/linux/hisi_acc_qm.h3
-rw-r--r--include/linux/io-pgtable.h2
-rw-r--r--include/linux/iommu.h3
-rw-r--r--include/linux/irqchip/riscv-imsic.h3
-rw-r--r--include/linux/pci-epf.h12
-rw-r--r--include/linux/pci-p2pdma.h120
-rw-r--r--include/linux/pci.h27
-rw-r--r--include/linux/power/max77705_charger.h2
-rw-r--r--include/linux/ras.h16
-rw-r--r--include/linux/sizes.h1
-rw-r--r--include/linux/vfio.h6
-rw-r--r--include/linux/vfio_pci_core.h73
-rw-r--r--include/linux/virtio.h2
-rw-r--r--include/linux/virtio_config.h24
-rw-r--r--include/linux/virtio_features.h29
-rw-r--r--include/linux/virtio_pci_modern.h8
22 files changed, 755 insertions, 106 deletions
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 0ed60a91eca9..5b1236d8c65b 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -297,11 +297,11 @@ enum {
#define CPER_ARM_INFO_FLAGS_PROPAGATED BIT(2)
#define CPER_ARM_INFO_FLAGS_OVERFLOW BIT(3)
-#define CPER_ARM_CACHE_ERROR 0
-#define CPER_ARM_TLB_ERROR 1
-#define CPER_ARM_BUS_ERROR 2
-#define CPER_ARM_VENDOR_ERROR 3
-#define CPER_ARM_MAX_TYPE CPER_ARM_VENDOR_ERROR
+#define CPER_ARM_ERR_TYPE_MASK GENMASK(4,1)
+#define CPER_ARM_CACHE_ERROR BIT(1)
+#define CPER_ARM_TLB_ERROR BIT(2)
+#define CPER_ARM_BUS_ERROR BIT(3)
+#define CPER_ARM_VENDOR_ERROR BIT(4)
#define CPER_ARM_ERR_VALID_TRANSACTION_TYPE BIT(0)
#define CPER_ARM_ERR_VALID_OPERATION_TYPE BIT(1)
@@ -588,6 +588,8 @@ const char *cper_mem_err_type_str(unsigned int);
const char *cper_mem_err_status_str(u64 status);
void cper_print_bits(const char *prefix, unsigned int bits,
const char * const strs[], unsigned int strs_size);
+int cper_bits_to_str(char *buf, int buf_size, unsigned long bits,
+ const char * const strs[], unsigned int strs_size);
void cper_mem_err_pack(const struct cper_sec_mem_err *,
struct cper_mem_err_compact *);
const char *cper_mem_err_unpack(struct trace_seq *,
diff --git a/include/linux/dma-buf-mapping.h b/include/linux/dma-buf-mapping.h
new file mode 100644
index 000000000000..a3c0ce2d3a42
--- /dev/null
+++ b/include/linux/dma-buf-mapping.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * DMA BUF Mapping Helpers
+ *
+ */
+#ifndef __DMA_BUF_MAPPING_H__
+#define __DMA_BUF_MAPPING_H__
+#include <linux/dma-buf.h>
+
+struct sg_table *dma_buf_phys_vec_to_sgt(struct dma_buf_attachment *attach,
+ struct p2pdma_provider *provider,
+ struct dma_buf_phys_vec *phys_vec,
+ size_t nr_ranges, size_t size,
+ enum dma_data_direction dir);
+void dma_buf_free_sgt(struct dma_buf_attachment *attach, struct sg_table *sgt,
+ enum dma_data_direction dir);
+#endif
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index d58e329ac0e7..0bc492090237 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -22,6 +22,7 @@
#include <linux/fs.h>
#include <linux/dma-fence.h>
#include <linux/wait.h>
+#include <linux/pci-p2pdma.h>
struct device;
struct dma_buf;
@@ -531,6 +532,16 @@ struct dma_buf_export_info {
};
/**
+ * struct dma_buf_phys_vec - describe continuous chunk of memory
+ * @paddr: physical address of that chunk
+ * @len: Length of this chunk
+ */
+struct dma_buf_phys_vec {
+ phys_addr_t paddr;
+ size_t len;
+};
+
+/**
* DEFINE_DMA_BUF_EXPORT_INFO - helper macro for exporters
* @name: export-info name
*
diff --git a/include/linux/efi.h b/include/linux/efi.h
index b23ff8b83219..2a43094e23f7 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -290,7 +290,7 @@ typedef efi_status_t efi_get_variable_t (efi_char16_t *name, efi_guid_t *vendor,
unsigned long *data_size, void *data);
typedef efi_status_t efi_get_next_variable_t (unsigned long *name_size, efi_char16_t *name,
efi_guid_t *vendor);
-typedef efi_status_t efi_set_variable_t (efi_char16_t *name, efi_guid_t *vendor,
+typedef efi_status_t efi_set_variable_t (efi_char16_t *name, efi_guid_t *vendor,
u32 attr, unsigned long data_size,
void *data);
typedef efi_status_t efi_get_next_high_mono_count_t (u32 *count);
@@ -373,6 +373,8 @@ void efi_native_runtime_setup(void);
#define EFI_DEVICE_PATH_TO_TEXT_PROTOCOL_GUID EFI_GUID(0x8b843e20, 0x8132, 0x4852, 0x90, 0xcc, 0x55, 0x1a, 0x4e, 0x4a, 0x7f, 0x1c)
#define EFI_DEVICE_PATH_FROM_TEXT_PROTOCOL_GUID EFI_GUID(0x05c99a21, 0xc70f, 0x4ad2, 0x8a, 0x5f, 0x35, 0xdf, 0x33, 0x43, 0xf5, 0x1e)
#define EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID EFI_GUID(0x9042a9de, 0x23dc, 0x4a38, 0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a)
+#define EFI_EDID_DISCOVERED_PROTOCOL_GUID EFI_GUID(0x1c0c34f6, 0xd380, 0x41fa, 0xa0, 0x49, 0x8a, 0xd0, 0x6c, 0x1a, 0x66, 0xaa)
+#define EFI_EDID_ACTIVE_PROTOCOL_GUID EFI_GUID(0xbd8c1056, 0x9f36, 0x44ec, 0x92, 0xa8, 0xa6, 0x33, 0x7f, 0x81, 0x79, 0x86)
#define EFI_PCI_IO_PROTOCOL_GUID EFI_GUID(0x4cf5b200, 0x68b8, 0x4ca5, 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x02, 0x9a)
#define EFI_FILE_INFO_ID EFI_GUID(0x09576e92, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
#define EFI_SYSTEM_RESOURCE_TABLE_GUID EFI_GUID(0xb122a263, 0x3661, 0x4f68, 0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80)
@@ -772,7 +774,7 @@ extern unsigned long efi_mem_attr_table;
*/
typedef int (*efi_memattr_perm_setter)(struct mm_struct *, efi_memory_desc_t *, bool);
-extern int efi_memattr_init(void);
+extern void efi_memattr_init(void);
extern int efi_memattr_apply_permissions(struct mm_struct *mm,
efi_memattr_perm_setter fn);
diff --git a/include/linux/generic_pt/common.h b/include/linux/generic_pt/common.h
new file mode 100644
index 000000000000..6a9a1acb5aad
--- /dev/null
+++ b/include/linux/generic_pt/common.h
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
+ */
+#ifndef __GENERIC_PT_COMMON_H
+#define __GENERIC_PT_COMMON_H
+
+#include <linux/types.h>
+#include <linux/build_bug.h>
+#include <linux/bits.h>
+
+/**
+ * DOC: Generic Radix Page Table
+ *
+ * Generic Radix Page Table is a set of functions and helpers to efficiently
+ * parse radix style page tables typically seen in HW implementations. The
+ * interface is built to deliver similar code generation as the mm's pte/pmd/etc
+ * system by fully inlining the exact code required to handle each table level.
+ *
+ * Like the mm subsystem each format contributes its parsing implementation
+ * under common names and the common code implements the required algorithms.
+ *
+ * The system is divided into three logical levels:
+ *
+ * - The page table format and its manipulation functions
+ * - Generic helpers to give a consistent API regardless of underlying format
+ * - An algorithm implementation (e.g. IOMMU/DRM/KVM/MM)
+ *
+ * Multiple implementations are supported. The intention is to have the generic
+ * format code be re-usable for whatever specialized implementation is required.
+ * The generic code is solely about the format of the radix tree; it does not
+ * include memory allocation or higher level decisions that are left for the
+ * implementation.
+ *
+ * The generic framework supports a superset of functions across many HW
+ * implementations:
+ *
+ * - Entries comprised of contiguous blocks of IO PTEs for larger page sizes
+ * - Multi-level tables, up to 6 levels. Runtime selected top level
+ * - Runtime variable table level size (ARM's concatenated tables)
+ * - Expandable top level allowing dynamic sizing of table levels
+ * - Optional leaf entries at any level
+ * - 32-bit/64-bit virtual and output addresses, using every address bit
+ * - Dirty tracking
+ * - Sign extended addressing
+ */
+
+/**
+ * struct pt_common - struct for all page table implementations
+ */
+struct pt_common {
+ /**
+ * @top_of_table: Encodes the table top pointer and the top level in a
+ * single value. Must use READ_ONCE/WRITE_ONCE to access it. The lower
+ * bits of the aligned table pointer are used for the level.
+ */
+ uintptr_t top_of_table;
+ /**
+ * @max_oasz_lg2: Maximum number of bits the OA can contain. Upper bits
+ * must be zero. This may be less than what the page table format
+ * supports, but must not be more.
+ */
+ u8 max_oasz_lg2;
+ /**
+ * @max_vasz_lg2: Maximum number of bits the VA can contain. Upper bits
+ * are 0 or 1 depending on pt_full_va_prefix(). This may be less than
+ * what the page table format supports, but must not be more. When
+ * PT_FEAT_DYNAMIC_TOP is set this reflects the maximum VA capability.
+ */
+ u8 max_vasz_lg2;
+ /**
+ * @features: Bitmap of `enum pt_features`
+ */
+ unsigned int features;
+};
+
+/* Encoding parameters for top_of_table */
+enum {
+ PT_TOP_LEVEL_BITS = 3,
+ PT_TOP_LEVEL_MASK = GENMASK(PT_TOP_LEVEL_BITS - 1, 0),
+};
+
+/**
+ * enum pt_features - Features turned on in the table. Each symbol is a bit
+ * position.
+ */
+enum pt_features {
+ /**
+ * @PT_FEAT_DMA_INCOHERENT: Cache flush page table memory before
+ * assuming the HW can read it. Otherwise a SMP release is sufficient
+ * for HW to read it.
+ */
+ PT_FEAT_DMA_INCOHERENT,
+ /**
+ * @PT_FEAT_FULL_VA: The table can span the full VA range from 0 to
+ * PT_VADDR_MAX.
+ */
+ PT_FEAT_FULL_VA,
+ /**
+ * @PT_FEAT_DYNAMIC_TOP: The table's top level can be increased
+ * dynamically during map. This requires HW support for atomically
+ * setting both the table top pointer and the starting table level.
+ */
+ PT_FEAT_DYNAMIC_TOP,
+ /**
+ * @PT_FEAT_SIGN_EXTEND: The top most bit of the valid VA range sign
+ * extends up to the full pt_vaddr_t. This divides the page table into
+ * three VA ranges::
+ *
+ * 0 -> 2^N - 1 Lower
+ * 2^N -> (MAX - 2^N - 1) Non-Canonical
+ * MAX - 2^N -> MAX Upper
+ *
+ * In this mode pt_common::max_vasz_lg2 includes the sign bit and the
+ * upper bits that don't fall within the translation are just validated.
+ *
+ * If not set there is no sign extension and valid VA goes from 0 to 2^N
+ * - 1.
+ */
+ PT_FEAT_SIGN_EXTEND,
+ /**
+ * @PT_FEAT_FLUSH_RANGE: IOTLB maintenance is done by flushing IOVA
+ * ranges which will clean out any walk cache or any IOPTE fully
+ * contained by the range. The optimization objective is to minimize the
+ * number of flushes even if ranges include IOVA gaps that do not need
+ * to be flushed.
+ */
+ PT_FEAT_FLUSH_RANGE,
+ /**
+ * @PT_FEAT_FLUSH_RANGE_NO_GAPS: Like PT_FEAT_FLUSH_RANGE except that
+ * the optimization objective is to only flush IOVA that has been
+ * changed. This mode is suitable for cases like hypervisor shadowing
+ * where flushing unchanged ranges may cause the hypervisor to reparse
+ * significant amount of page table.
+ */
+ PT_FEAT_FLUSH_RANGE_NO_GAPS,
+ /* private: */
+ PT_FEAT_FMT_START,
+};
+
+struct pt_amdv1 {
+ struct pt_common common;
+};
+
+enum {
+ /*
+ * The memory backing the tables is encrypted. Use __sme_set() to adjust
+ * the page table pointers in the tree. This only works with
+ * CONFIG_AMD_MEM_ENCRYPT.
+ */
+ PT_FEAT_AMDV1_ENCRYPT_TABLES = PT_FEAT_FMT_START,
+ /*
+ * The PTEs are set to prevent cache incoherent traffic, such as PCI no
+ * snoop. This is set either at creation time or before the first map
+ * operation.
+ */
+ PT_FEAT_AMDV1_FORCE_COHERENCE,
+};
+
+struct pt_vtdss {
+ struct pt_common common;
+};
+
+enum {
+ /*
+ * The PTEs are set to prevent cache incoherent traffic, such as PCI no
+ * snoop. This is set either at creation time or before the first map
+ * operation.
+ */
+ PT_FEAT_VTDSS_FORCE_COHERENCE = PT_FEAT_FMT_START,
+ /*
+ * Prevent creating read-only PTEs. Used to work around HW errata
+ * ERRATA_772415_SPR17.
+ */
+ PT_FEAT_VTDSS_FORCE_WRITEABLE,
+};
+
+struct pt_x86_64 {
+ struct pt_common common;
+};
+
+enum {
+ /*
+ * The memory backing the tables is encrypted. Use __sme_set() to adjust
+ * the page table pointers in the tree. This only works with
+ * CONFIG_AMD_MEM_ENCRYPT.
+ */
+ PT_FEAT_X86_64_AMD_ENCRYPT_TABLES = PT_FEAT_FMT_START,
+};
+
+#endif
diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h
new file mode 100644
index 000000000000..9eefbb74efd0
--- /dev/null
+++ b/include/linux/generic_pt/iommu.h
@@ -0,0 +1,293 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
+ */
+#ifndef __GENERIC_PT_IOMMU_H
+#define __GENERIC_PT_IOMMU_H
+
+#include <linux/generic_pt/common.h>
+#include <linux/iommu.h>
+#include <linux/mm_types.h>
+
+struct iommu_iotlb_gather;
+struct pt_iommu_ops;
+struct pt_iommu_driver_ops;
+struct iommu_dirty_bitmap;
+
+/**
+ * DOC: IOMMU Radix Page Table
+ *
+ * The IOMMU implementation of the Generic Page Table provides an ops struct
+ * that is useful to go with an iommu_domain to serve the DMA API, IOMMUFD and
+ * the generic map/unmap interface.
+ *
+ * This interface uses a caller provided locking approach. The caller must have
+ * a VA range lock concept that prevents concurrent threads from calling ops on
+ * the same VA. Generally the range lock must be at least as large as a single
+ * map call.
+ */
+
+/**
+ * struct pt_iommu - Base structure for IOMMU page tables
+ *
+ * The format-specific struct will include this as the first member.
+ */
+struct pt_iommu {
+ /**
+ * @domain: The core IOMMU domain. The driver should use a union to
+ * overlay this memory with its previously existing domain struct to
+ * create an alias.
+ */
+ struct iommu_domain domain;
+
+ /**
+ * @ops: Function pointers to access the API
+ */
+ const struct pt_iommu_ops *ops;
+
+ /**
+ * @driver_ops: Function pointers provided by the HW driver to help
+ * manage HW details like caches.
+ */
+ const struct pt_iommu_driver_ops *driver_ops;
+
+ /**
+ * @nid: Node ID to use for table memory allocations. The IOMMU driver
+ * may want to set the NID to the device's NID, if there are multiple
+ * table walkers.
+ */
+ int nid;
+
+ /**
+ * @iommu_device: Device pointer used for any DMA cache flushing when
+ * PT_FEAT_DMA_INCOHERENT. This is the iommu device that created the
+ * page table which must have dma ops that perform cache flushing.
+ */
+ struct device *iommu_device;
+};
+
+/**
+ * struct pt_iommu_info - Details about the IOMMU page table
+ *
+ * Returned from pt_iommu_ops->get_info()
+ */
+struct pt_iommu_info {
+ /**
+ * @pgsize_bitmap: A bitmask where each set bit indicates
+ * a page size that can be natively stored in the page table.
+ */
+ u64 pgsize_bitmap;
+};
+
+struct pt_iommu_ops {
+ /**
+ * @set_dirty: Make the iova write dirty
+ * @iommu_table: Table to manipulate
+ * @iova: IO virtual address to start
+ *
+ * This is only used by iommufd testing. It makes the iova dirty so that
+ * read_and_clear_dirty() will see it as dirty. Unlike all the other ops
+ * this one is safe to call without holding any locking. It may return
+ * -EAGAIN if there is a race.
+ */
+ int (*set_dirty)(struct pt_iommu *iommu_table, dma_addr_t iova);
+
+ /**
+ * @get_info: Return the pt_iommu_info structure
+ * @iommu_table: Table to query
+ *
+ * Return some basic static information about the page table.
+ */
+ void (*get_info)(struct pt_iommu *iommu_table,
+ struct pt_iommu_info *info);
+
+ /**
+ * @deinit: Undo a format specific init operation
+ * @iommu_table: Table to destroy
+ *
+ * Release all of the memory. The caller must have already removed the
+ * table from all HW access and all caches.
+ */
+ void (*deinit)(struct pt_iommu *iommu_table);
+};
+
+/**
+ * struct pt_iommu_driver_ops - HW IOTLB cache flushing operations
+ *
+ * The IOMMU driver should implement these using container_of(iommu_table) to
+ * get to it's iommu_domain derived structure. All ops can be called in atomic
+ * contexts as they are buried under DMA API calls.
+ */
+struct pt_iommu_driver_ops {
+ /**
+ * @change_top: Update the top of table pointer
+ * @iommu_table: Table to operate on
+ * @top_paddr: New CPU physical address of the top pointer
+ * @top_level: IOMMU PT level of the new top
+ *
+ * Called under the get_top_lock() spinlock. The driver must update all
+ * HW references to this domain with a new top address and
+ * configuration. On return mappings placed in the new top must be
+ * reachable by the HW.
+ *
+ * top_level encodes the level in IOMMU PT format, level 0 is the
+ * smallest page size increasing from there. This has to be translated
+ * to any HW specific format. During this call the new top will not be
+ * visible to any other API.
+ *
+ * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if
+ * enabled.
+ */
+ void (*change_top)(struct pt_iommu *iommu_table, phys_addr_t top_paddr,
+ unsigned int top_level);
+
+ /**
+ * @get_top_lock: lock to hold when changing the table top
+ * @iommu_table: Table to operate on
+ *
+ * Return a lock to hold when changing the table top page table from
+ * being stored in HW. The lock will be held prior to calling
+ * change_top() and released once the top is fully visible.
+ *
+ * Typically this would be a lock that protects the iommu_domain's
+ * attachment list.
+ *
+ * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if
+ * enabled.
+ */
+ spinlock_t *(*get_top_lock)(struct pt_iommu *iommu_table);
+};
+
+static inline void pt_iommu_deinit(struct pt_iommu *iommu_table)
+{
+ /*
+ * It is safe to call pt_iommu_deinit() before an init, or if init
+ * fails. The ops pointer will only become non-NULL if deinit needs to be
+ * run.
+ */
+ if (iommu_table->ops)
+ iommu_table->ops->deinit(iommu_table);
+}
+
+/**
+ * struct pt_iommu_cfg - Common configuration values for all formats
+ */
+struct pt_iommu_cfg {
+ /**
+ * @features: Features required. Only these features will be turned on.
+ * The feature list should reflect what the IOMMU HW is capable of.
+ */
+ unsigned int features;
+ /**
+ * @hw_max_vasz_lg2: Maximum VA the IOMMU HW can support. This will
+ * imply the top level of the table.
+ */
+ u8 hw_max_vasz_lg2;
+ /**
+ * @hw_max_oasz_lg2: Maximum OA the IOMMU HW can support. The format
+ * might select a lower maximum OA.
+ */
+ u8 hw_max_oasz_lg2;
+};
+
+/* Generate the exported function signatures from iommu_pt.h */
+#define IOMMU_PROTOTYPES(fmt) \
+ phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \
+ dma_addr_t iova); \
+ int pt_iommu_##fmt##_map_pages(struct iommu_domain *domain, \
+ unsigned long iova, phys_addr_t paddr, \
+ size_t pgsize, size_t pgcount, \
+ int prot, gfp_t gfp, size_t *mapped); \
+ size_t pt_iommu_##fmt##_unmap_pages( \
+ struct iommu_domain *domain, unsigned long iova, \
+ size_t pgsize, size_t pgcount, \
+ struct iommu_iotlb_gather *iotlb_gather); \
+ int pt_iommu_##fmt##_read_and_clear_dirty( \
+ struct iommu_domain *domain, unsigned long iova, size_t size, \
+ unsigned long flags, struct iommu_dirty_bitmap *dirty); \
+ int pt_iommu_##fmt##_init(struct pt_iommu_##fmt *table, \
+ const struct pt_iommu_##fmt##_cfg *cfg, \
+ gfp_t gfp); \
+ void pt_iommu_##fmt##_hw_info(struct pt_iommu_##fmt *table, \
+ struct pt_iommu_##fmt##_hw_info *info)
+#define IOMMU_FORMAT(fmt, member) \
+ struct pt_iommu_##fmt { \
+ struct pt_iommu iommu; \
+ struct pt_##fmt member; \
+ }; \
+ IOMMU_PROTOTYPES(fmt)
+
+/*
+ * A driver uses IOMMU_PT_DOMAIN_OPS to populate the iommu_domain_ops for the
+ * iommu_pt
+ */
+#define IOMMU_PT_DOMAIN_OPS(fmt) \
+ .iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, \
+ .map_pages = &pt_iommu_##fmt##_map_pages, \
+ .unmap_pages = &pt_iommu_##fmt##_unmap_pages
+#define IOMMU_PT_DIRTY_OPS(fmt) \
+ .read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty
+
+/*
+ * The driver should setup its domain struct like
+ * union {
+ * struct iommu_domain domain;
+ * struct pt_iommu_xxx xx;
+ * };
+ * PT_IOMMU_CHECK_DOMAIN(struct mock_iommu_domain, xx.iommu, domain);
+ *
+ * Which creates an alias between driver_domain.domain and
+ * driver_domain.xx.iommu.domain. This is to avoid a mass rename of existing
+ * driver_domain.domain users.
+ */
+#define PT_IOMMU_CHECK_DOMAIN(s, pt_iommu_memb, domain_memb) \
+ static_assert(offsetof(s, pt_iommu_memb.domain) == \
+ offsetof(s, domain_memb))
+
+struct pt_iommu_amdv1_cfg {
+ struct pt_iommu_cfg common;
+ unsigned int starting_level;
+};
+
+struct pt_iommu_amdv1_hw_info {
+ u64 host_pt_root;
+ u8 mode;
+};
+
+IOMMU_FORMAT(amdv1, amdpt);
+
+/* amdv1_mock is used by the iommufd selftest */
+#define pt_iommu_amdv1_mock pt_iommu_amdv1
+#define pt_iommu_amdv1_mock_cfg pt_iommu_amdv1_cfg
+struct pt_iommu_amdv1_mock_hw_info;
+IOMMU_PROTOTYPES(amdv1_mock);
+
+struct pt_iommu_vtdss_cfg {
+ struct pt_iommu_cfg common;
+ /* 4 is a 57 bit 5 level table */
+ unsigned int top_level;
+};
+
+struct pt_iommu_vtdss_hw_info {
+ u64 ssptptr;
+ u8 aw;
+};
+
+IOMMU_FORMAT(vtdss, vtdss_pt);
+
+struct pt_iommu_x86_64_cfg {
+ struct pt_iommu_cfg common;
+ /* 4 is a 57 bit 5 level table */
+ unsigned int top_level;
+};
+
+struct pt_iommu_x86_64_hw_info {
+ u64 gcr3_pt;
+ u8 levels;
+};
+
+IOMMU_FORMAT(x86_64, x86_64_pt);
+
+#undef IOMMU_PROTOTYPES
+#undef IOMMU_FORMAT
+#endif
diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index c4690e365ade..ca1ec437a3ca 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -99,6 +99,9 @@
#define QM_DEV_ALG_MAX_LEN 256
+#define QM_MIG_REGION_SEL 0x100198
+#define QM_MIG_REGION_EN BIT(0)
+
/* uacce mode of the driver */
#define UACCE_MODE_NOUACCE 0 /* don't use uacce */
#define UACCE_MODE_SVA 1 /* use uacce sva mode */
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 8a823c6f2b4a..7a1516011ccf 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -15,8 +15,6 @@ enum io_pgtable_fmt {
ARM_64_LPAE_S2,
ARM_V7S,
ARM_MALI_LPAE,
- AMD_IOMMU_V1,
- AMD_IOMMU_V2,
APPLE_DART,
APPLE_DART2,
IO_PGTABLE_NUM_FMTS,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index c30d12e16473..801b2bd9e8d4 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -751,7 +751,8 @@ struct iommu_ops {
* @free: Release the domain after use.
*/
struct iommu_domain_ops {
- int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
+ int (*attach_dev)(struct iommu_domain *domain, struct device *dev,
+ struct iommu_domain *old);
int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev,
ioasid_t pasid, struct iommu_domain *old);
diff --git a/include/linux/irqchip/riscv-imsic.h b/include/linux/irqchip/riscv-imsic.h
index 7494952c5518..7f3ff5c5ea53 100644
--- a/include/linux/irqchip/riscv-imsic.h
+++ b/include/linux/irqchip/riscv-imsic.h
@@ -10,7 +10,6 @@
#include <linux/bitops.h>
#include <linux/device.h>
#include <linux/fwnode.h>
-#include <asm/csr.h>
#define IMSIC_MMIO_PAGE_SHIFT 12
#define IMSIC_MMIO_PAGE_SZ BIT(IMSIC_MMIO_PAGE_SHIFT)
@@ -86,7 +85,7 @@ static inline const struct imsic_global_config *imsic_get_global_config(void)
#endif
-#ifdef CONFIG_ACPI
+#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_RISCV_IMSIC)
int imsic_platform_acpi_probe(struct fwnode_handle *fwnode);
struct fwnode_handle *imsic_acpi_get_fwnode(struct device *dev);
#else
diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h
index 2e85504ba2ba..48f68c4dcfa5 100644
--- a/include/linux/pci-epf.h
+++ b/include/linux/pci-epf.h
@@ -115,8 +115,8 @@ struct pci_epf_driver {
* @phys_addr: physical address that should be mapped to the BAR
* @addr: virtual address corresponding to the @phys_addr
* @size: the size of the address space present in BAR
- * @aligned_size: the size actually allocated to accommodate the iATU alignment
- * requirement
+ * @mem_size: the size actually allocated to accommodate the iATU alignment
+ * requirement
* @barno: BAR number
* @flags: flags that are set for the BAR
*/
@@ -124,7 +124,7 @@ struct pci_epf_bar {
dma_addr_t phys_addr;
void *addr;
size_t size;
- size_t aligned_size;
+ size_t mem_size;
enum pci_barno barno;
int flags;
};
@@ -242,6 +242,12 @@ void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar,
void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar,
enum pci_epc_interface_type type);
+int pci_epf_assign_bar_space(struct pci_epf *epf, size_t size,
+ enum pci_barno bar,
+ const struct pci_epc_features *epc_features,
+ enum pci_epc_interface_type type,
+ dma_addr_t bar_addr);
+
int pci_epf_align_inbound_addr(struct pci_epf *epf, enum pci_barno bar,
u64 addr, dma_addr_t *base, size_t *off);
int pci_epf_bind(struct pci_epf *epf);
diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h
index 951f81a38f3a..517e121d2598 100644
--- a/include/linux/pci-p2pdma.h
+++ b/include/linux/pci-p2pdma.h
@@ -16,7 +16,58 @@
struct block_device;
struct scatterlist;
+/**
+ * struct p2pdma_provider
+ *
+ * A p2pdma provider is a range of MMIO address space available to the CPU.
+ */
+struct p2pdma_provider {
+ struct device *owner;
+ u64 bus_offset;
+};
+
+enum pci_p2pdma_map_type {
+ /*
+ * PCI_P2PDMA_MAP_UNKNOWN: Used internally as an initial state before
+ * the mapping type has been calculated. Exported routines for the API
+ * will never return this value.
+ */
+ PCI_P2PDMA_MAP_UNKNOWN = 0,
+
+ /*
+ * Not a PCI P2PDMA transfer.
+ */
+ PCI_P2PDMA_MAP_NONE,
+
+ /*
+ * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will
+ * traverse the host bridge and the host bridge is not in the
+ * allowlist. DMA Mapping routines should return an error when
+ * this is returned.
+ */
+ PCI_P2PDMA_MAP_NOT_SUPPORTED,
+
+ /*
+ * PCI_P2PDMA_MAP_BUS_ADDR: Indicates that two devices can talk to
+ * each other directly through a PCI switch and the transaction will
+ * not traverse the host bridge. Such a mapping should program
+ * the DMA engine with PCI bus addresses.
+ */
+ PCI_P2PDMA_MAP_BUS_ADDR,
+
+ /*
+ * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk
+ * to each other, but the transaction traverses a host bridge on the
+ * allowlist. In this case, a normal mapping either with CPU physical
+ * addresses (in the case of dma-direct) or IOVA addresses (in the
+ * case of IOMMUs) should be used to program the DMA engine.
+ */
+ PCI_P2PDMA_MAP_THRU_HOST_BRIDGE,
+};
+
#ifdef CONFIG_PCI_P2PDMA
+int pcim_p2pdma_init(struct pci_dev *pdev);
+struct p2pdma_provider *pcim_p2pdma_provider(struct pci_dev *pdev, int bar);
int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
u64 offset);
int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients,
@@ -33,7 +84,18 @@ int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev,
bool *use_p2pdma);
ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev,
bool use_p2pdma);
+enum pci_p2pdma_map_type pci_p2pdma_map_type(struct p2pdma_provider *provider,
+ struct device *dev);
#else /* CONFIG_PCI_P2PDMA */
+static inline int pcim_p2pdma_init(struct pci_dev *pdev)
+{
+ return -EOPNOTSUPP;
+}
+static inline struct p2pdma_provider *pcim_p2pdma_provider(struct pci_dev *pdev,
+ int bar)
+{
+ return NULL;
+}
static inline int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar,
size_t size, u64 offset)
{
@@ -85,6 +147,11 @@ static inline ssize_t pci_p2pdma_enable_show(char *page,
{
return sprintf(page, "none\n");
}
+static inline enum pci_p2pdma_map_type
+pci_p2pdma_map_type(struct p2pdma_provider *provider, struct device *dev)
+{
+ return PCI_P2PDMA_MAP_NOT_SUPPORTED;
+}
#endif /* CONFIG_PCI_P2PDMA */
@@ -99,51 +166,12 @@ static inline struct pci_dev *pci_p2pmem_find(struct device *client)
return pci_p2pmem_find_many(&client, 1);
}
-enum pci_p2pdma_map_type {
- /*
- * PCI_P2PDMA_MAP_UNKNOWN: Used internally as an initial state before
- * the mapping type has been calculated. Exported routines for the API
- * will never return this value.
- */
- PCI_P2PDMA_MAP_UNKNOWN = 0,
-
- /*
- * Not a PCI P2PDMA transfer.
- */
- PCI_P2PDMA_MAP_NONE,
-
- /*
- * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will
- * traverse the host bridge and the host bridge is not in the
- * allowlist. DMA Mapping routines should return an error when
- * this is returned.
- */
- PCI_P2PDMA_MAP_NOT_SUPPORTED,
-
- /*
- * PCI_P2PDMA_MAP_BUS_ADDR: Indicates that two devices can talk to
- * each other directly through a PCI switch and the transaction will
- * not traverse the host bridge. Such a mapping should program
- * the DMA engine with PCI bus addresses.
- */
- PCI_P2PDMA_MAP_BUS_ADDR,
-
- /*
- * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk
- * to each other, but the transaction traverses a host bridge on the
- * allowlist. In this case, a normal mapping either with CPU physical
- * addresses (in the case of dma-direct) or IOVA addresses (in the
- * case of IOMMUs) should be used to program the DMA engine.
- */
- PCI_P2PDMA_MAP_THRU_HOST_BRIDGE,
-};
-
struct pci_p2pdma_map_state {
- struct dev_pagemap *pgmap;
+ struct p2pdma_provider *mem;
enum pci_p2pdma_map_type map;
- u64 bus_off;
};
+
/* helper for pci_p2pdma_state(), do not use directly */
void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state,
struct device *dev, struct page *page);
@@ -162,8 +190,7 @@ pci_p2pdma_state(struct pci_p2pdma_map_state *state, struct device *dev,
struct page *page)
{
if (IS_ENABLED(CONFIG_PCI_P2PDMA) && is_pci_p2pdma_page(page)) {
- if (state->pgmap != page_pgmap(page))
- __pci_p2pdma_update_state(state, dev, page);
+ __pci_p2pdma_update_state(state, dev, page);
return state->map;
}
return PCI_P2PDMA_MAP_NONE;
@@ -172,16 +199,15 @@ pci_p2pdma_state(struct pci_p2pdma_map_state *state, struct device *dev,
/**
* pci_p2pdma_bus_addr_map - Translate a physical address to a bus address
* for a PCI_P2PDMA_MAP_BUS_ADDR transfer.
- * @state: P2P state structure
+ * @provider: P2P provider structure
* @paddr: physical address to map
*
* Map a physically contiguous PCI_P2PDMA_MAP_BUS_ADDR transfer.
*/
static inline dma_addr_t
-pci_p2pdma_bus_addr_map(struct pci_p2pdma_map_state *state, phys_addr_t paddr)
+pci_p2pdma_bus_addr_map(struct p2pdma_provider *provider, phys_addr_t paddr)
{
- WARN_ON_ONCE(state->map != PCI_P2PDMA_MAP_BUS_ADDR);
- return paddr + state->bus_off;
+ return paddr + provider->bus_offset;
}
#endif /* _LINUX_PCI_P2P_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index bf97d49c23cf..b16127c6a7b4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -502,6 +502,8 @@ struct pci_dev {
#ifdef CONFIG_PCIE_PTM
u16 ptm_cap; /* PTM Capability */
unsigned int ptm_root:1;
+ unsigned int ptm_responder:1;
+ unsigned int ptm_requester:1;
unsigned int ptm_enabled:1;
u8 ptm_granularity;
#endif
@@ -648,6 +650,7 @@ struct pci_host_bridge *pci_alloc_host_bridge(size_t priv);
struct pci_host_bridge *devm_pci_alloc_host_bridge(struct device *dev,
size_t priv);
void pci_free_host_bridge(struct pci_host_bridge *bridge);
+struct device *pci_get_host_bridge_device(struct pci_dev *dev);
struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus);
void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
@@ -831,6 +834,7 @@ struct pci_ops {
void __iomem *(*map_bus)(struct pci_bus *bus, unsigned int devfn, int where);
int (*read)(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val);
int (*write)(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val);
+ int (*assert_perst)(struct pci_bus *bus, bool assert);
};
/*
@@ -1421,16 +1425,16 @@ void pcibios_reset_secondary_bus(struct pci_dev *dev);
void pci_update_resource(struct pci_dev *dev, int resno);
int __must_check pci_assign_resource(struct pci_dev *dev, int i);
int pci_release_resource(struct pci_dev *dev, int resno);
-static inline int pci_rebar_bytes_to_size(u64 bytes)
-{
- bytes = roundup_pow_of_two(bytes);
- /* Return BAR size as defined in the resizable BAR specification */
- return max(ilog2(bytes), 20) - 20;
-}
+/* Resizable BAR related routines */
+int pci_rebar_bytes_to_size(u64 bytes);
+resource_size_t pci_rebar_size_to_bytes(int size);
+u64 pci_rebar_get_possible_sizes(struct pci_dev *pdev, int bar);
+bool pci_rebar_size_supported(struct pci_dev *pdev, int bar, int size);
+int pci_rebar_get_max_size(struct pci_dev *pdev, int bar);
+int __must_check pci_resize_resource(struct pci_dev *dev, int i, int size,
+ int exclude_bars);
-u32 pci_rebar_get_possible_sizes(struct pci_dev *pdev, int bar);
-int __must_check pci_resize_resource(struct pci_dev *dev, int i, int size);
int pci_select_bars(struct pci_dev *dev, unsigned long flags);
bool pci_device_is_present(struct pci_dev *pdev);
void pci_ignore_hotplug(struct pci_dev *dev);
@@ -1958,10 +1962,17 @@ DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T))
*/
#ifdef CONFIG_PCI_DOMAINS
extern int pci_domains_supported;
+int pci_bus_find_emul_domain_nr(u32 hint, u32 min, u32 max);
+void pci_bus_release_emul_domain_nr(int domain_nr);
#else
enum { pci_domains_supported = 0 };
static inline int pci_domain_nr(struct pci_bus *bus) { return 0; }
static inline int pci_proc_domain(struct pci_bus *bus) { return 0; }
+static inline int pci_bus_find_emul_domain_nr(u32 hint, u32 min, u32 max)
+{
+ return 0;
+}
+static inline void pci_bus_release_emul_domain_nr(int domain_nr) { }
#endif /* CONFIG_PCI_DOMAINS */
/*
diff --git a/include/linux/power/max77705_charger.h b/include/linux/power/max77705_charger.h
index 6653abfdf747..b3950ce0625e 100644
--- a/include/linux/power/max77705_charger.h
+++ b/include/linux/power/max77705_charger.h
@@ -123,6 +123,8 @@
#define MAX77705_DISABLE_SKIP 1
#define MAX77705_AUTO_SKIP 0
+#define AICL_WORK_DELAY_MS 100
+
/* uA */
#define MAX77705_CURRENT_CHGIN_STEP 25000
#define MAX77705_CURRENT_CHG_STEP 50000
diff --git a/include/linux/ras.h b/include/linux/ras.h
index a64182bc72ad..468941bfe855 100644
--- a/include/linux/ras.h
+++ b/include/linux/ras.h
@@ -24,8 +24,7 @@ int __init parse_cec_param(char *str);
void log_non_standard_event(const guid_t *sec_type,
const guid_t *fru_id, const char *fru_text,
const u8 sev, const u8 *err, const u32 len);
-void log_arm_hw_error(struct cper_sec_proc_arm *err);
-
+void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev);
#else
static inline void
log_non_standard_event(const guid_t *sec_type,
@@ -33,7 +32,7 @@ log_non_standard_event(const guid_t *sec_type,
const u8 sev, const u8 *err, const u32 len)
{ return; }
static inline void
-log_arm_hw_error(struct cper_sec_proc_arm *err) { return; }
+log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev) { return; }
#endif
struct atl_err {
@@ -53,4 +52,15 @@ static inline unsigned long
amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; }
#endif /* CONFIG_AMD_ATL */
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+#include <asm/smp_plat.h>
+/*
+ * Include ARM-specific SMP header which provides a function mapping mpidr to
+ * CPU logical index.
+ */
+#define GET_LOGICAL_INDEX(mpidr) get_logical_index(mpidr & MPIDR_HWID_BITMASK)
+#else
+#define GET_LOGICAL_INDEX(mpidr) -EINVAL
+#endif /* CONFIG_ARM || CONFIG_ARM64 */
+
#endif /* __RAS_H__ */
diff --git a/include/linux/sizes.h b/include/linux/sizes.h
index 49039494076f..f1f1a055b047 100644
--- a/include/linux/sizes.h
+++ b/include/linux/sizes.h
@@ -67,5 +67,6 @@
#define SZ_16T _AC(0x100000000000, ULL)
#define SZ_32T _AC(0x200000000000, ULL)
#define SZ_64T _AC(0x400000000000, ULL)
+#define SZ_128T _AC(0x800000000000, ULL)
#endif /* __LINUX_SIZES_H__ */
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index eb563f538dee..e90859956514 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -21,6 +21,7 @@ struct kvm;
struct iommufd_ctx;
struct iommufd_device;
struct iommufd_access;
+struct vfio_info_cap;
/*
* VFIO devices can be placed in a set, this allows all devices to share this
@@ -132,6 +133,9 @@ struct vfio_device_ops {
size_t count, loff_t *size);
long (*ioctl)(struct vfio_device *vdev, unsigned int cmd,
unsigned long arg);
+ int (*get_region_info_caps)(struct vfio_device *vdev,
+ struct vfio_region_info *info,
+ struct vfio_info_cap *caps);
int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma);
void (*request)(struct vfio_device *vdev, unsigned int count);
int (*match)(struct vfio_device *vdev, char *buf);
@@ -297,6 +301,8 @@ static inline void vfio_put_device(struct vfio_device *device)
int vfio_register_group_dev(struct vfio_device *device);
int vfio_register_emulated_iommu_dev(struct vfio_device *device);
void vfio_unregister_group_dev(struct vfio_device *device);
+bool vfio_device_try_get_registration(struct vfio_device *device);
+void vfio_device_put_registration(struct vfio_device *device);
int vfio_assign_device_set(struct vfio_device *device, void *set_id);
unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set);
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index f541044e42a2..706877f998ff 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -12,6 +12,7 @@
#include <linux/pci.h>
#include <linux/vfio.h>
#include <linux/irqbypass.h>
+#include <linux/rcupdate.h>
#include <linux/types.h>
#include <linux/uuid.h>
#include <linux/notifier.h>
@@ -26,6 +27,14 @@
struct vfio_pci_core_device;
struct vfio_pci_region;
+struct p2pdma_provider;
+struct dma_buf_phys_vec;
+struct dma_buf_attachment;
+
+struct vfio_pci_eventfd {
+ struct eventfd_ctx *ctx;
+ struct rcu_head rcu;
+};
struct vfio_pci_regops {
ssize_t (*rw)(struct vfio_pci_core_device *vdev, char __user *buf,
@@ -49,9 +58,48 @@ struct vfio_pci_region {
u32 flags;
};
+struct vfio_pci_device_ops {
+ int (*get_dmabuf_phys)(struct vfio_pci_core_device *vdev,
+ struct p2pdma_provider **provider,
+ unsigned int region_index,
+ struct dma_buf_phys_vec *phys_vec,
+ struct vfio_region_dma_range *dma_ranges,
+ size_t nr_ranges);
+};
+
+#if IS_ENABLED(CONFIG_VFIO_PCI_DMABUF)
+int vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec,
+ struct vfio_region_dma_range *dma_ranges,
+ size_t nr_ranges, phys_addr_t start,
+ phys_addr_t len);
+int vfio_pci_core_get_dmabuf_phys(struct vfio_pci_core_device *vdev,
+ struct p2pdma_provider **provider,
+ unsigned int region_index,
+ struct dma_buf_phys_vec *phys_vec,
+ struct vfio_region_dma_range *dma_ranges,
+ size_t nr_ranges);
+#else
+static inline int
+vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec,
+ struct vfio_region_dma_range *dma_ranges,
+ size_t nr_ranges, phys_addr_t start,
+ phys_addr_t len)
+{
+ return -EINVAL;
+}
+static inline int vfio_pci_core_get_dmabuf_phys(
+ struct vfio_pci_core_device *vdev, struct p2pdma_provider **provider,
+ unsigned int region_index, struct dma_buf_phys_vec *phys_vec,
+ struct vfio_region_dma_range *dma_ranges, size_t nr_ranges)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
struct vfio_pci_core_device {
struct vfio_device vdev;
struct pci_dev *pdev;
+ const struct vfio_pci_device_ops *pci_ops;
void __iomem *barmap[PCI_STD_NUM_BARS];
bool bar_mmap_supported[PCI_STD_NUM_BARS];
u8 *pci_config_map;
@@ -83,8 +131,8 @@ struct vfio_pci_core_device {
struct pci_saved_state *pci_saved_state;
struct pci_saved_state *pm_save;
int ioeventfds_nr;
- struct eventfd_ctx *err_trigger;
- struct eventfd_ctx *req_trigger;
+ struct vfio_pci_eventfd __rcu *err_trigger;
+ struct vfio_pci_eventfd __rcu *req_trigger;
struct eventfd_ctx *pm_wake_eventfd_ctx;
struct list_head dummy_resources_list;
struct mutex ioeventfds_lock;
@@ -94,6 +142,7 @@ struct vfio_pci_core_device {
struct vfio_pci_core_device *sriov_pf_core_dev;
struct notifier_block nb;
struct rw_semaphore memory_lock;
+ struct list_head dmabufs;
};
/* Will be exported for vfio pci drivers usage */
@@ -115,10 +164,16 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
unsigned long arg);
int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
void __user *arg, size_t argsz);
+int vfio_pci_ioctl_get_region_info(struct vfio_device *core_vdev,
+ struct vfio_region_info *info,
+ struct vfio_info_cap *caps);
ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf,
size_t count, loff_t *ppos);
ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf,
size_t count, loff_t *ppos);
+vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev,
+ struct vm_fault *vmf, unsigned long pfn,
+ unsigned int order);
int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma);
void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count);
int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf);
@@ -134,6 +189,7 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
void __iomem *io, char __user *buf,
loff_t off, size_t count, size_t x_start,
size_t x_end, bool iswrite);
+bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev);
bool vfio_pci_core_range_intersect_range(loff_t buf_start, size_t buf_cnt,
loff_t reg_start, size_t reg_cnt,
loff_t *buf_offset,
@@ -161,4 +217,17 @@ VFIO_IOREAD_DECLARATION(32)
VFIO_IOREAD_DECLARATION(64)
#endif
+static inline bool is_aligned_for_order(struct vm_area_struct *vma,
+ unsigned long addr,
+ unsigned long pfn,
+ unsigned int order)
+{
+ return !(order && (addr < vma->vm_start ||
+ addr + (PAGE_SIZE << order) > vma->vm_end ||
+ !IS_ALIGNED(pfn, 1 << order)));
+}
+
+int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
+ struct dma_buf_phys_vec *phys);
+
#endif /* VFIO_PCI_CORE_H */
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 96c66126c074..132a474e5914 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -177,7 +177,7 @@ struct virtio_device {
union virtio_map vmap;
#ifdef CONFIG_VIRTIO_DEBUG
struct dentry *debugfs_dir;
- u64 debugfs_filter_features[VIRTIO_FEATURES_DWORDS];
+ u64 debugfs_filter_features[VIRTIO_FEATURES_U64S];
#endif
};
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 16001e9f9b39..69f84ea85d71 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -24,7 +24,7 @@ typedef void vq_callback_t(struct virtqueue *);
* a virtqueue unused by the driver.
* @callback: A callback to invoke on a used buffer notification.
* NULL for a virtqueue that does not need a callback.
- * @ctx: A flag to indicate to maintain an extra context per virtqueue.
+ * @ctx: whether to maintain an extra context per virtqueue.
*/
struct virtqueue_info {
const char *name;
@@ -80,13 +80,13 @@ struct virtqueue_info {
* Returns the first 64 feature bits.
* @get_extended_features:
* vdev: the virtio_device
- * Returns the first VIRTIO_FEATURES_MAX feature bits (all we currently
+ * Returns the first VIRTIO_FEATURES_BITS feature bits (all we currently
* need).
* @finalize_features: confirm what device features we'll be using.
* vdev: the virtio_device
* This sends the driver feature bits to the device: it can change
* the dev->feature bits if it wants.
- * Note that despite the name this can be called any number of
+ * Note that despite the name this can be called any number of
* times.
* Returns 0 on success or error status
* @bus_name: return the bus name associated with the device (optional)
@@ -141,8 +141,8 @@ struct virtio_config_ops {
/**
* struct virtio_map_ops - operations for mapping buffer for a virtio device
- * Note: For transport that has its own mapping logic it must
- * implements all of the operations
+ * Note: For a transport that has its own mapping logic it must
+ * implement all of the operations
* @map_page: map a buffer to the device
* map: metadata for performing mapping
* page: the page that will be mapped by the device
@@ -150,7 +150,7 @@ struct virtio_config_ops {
* size: the buffer size
* dir: mapping direction
* attrs: mapping attributes
- * Returns: the mapped address
+ * Returns the mapped address
* @unmap_page: unmap a buffer from the device
* map: device specific mapping map
* map_handle: the mapped address
@@ -172,23 +172,23 @@ struct virtio_config_ops {
* size: the size of the buffer
* map_handle: the mapping address to sync
* gfp: allocation flag (GFP_XXX)
- * Returns: virtual address of the allocated buffer
+ * Returns virtual address of the allocated buffer
* @free: free a coherent buffer mapping
* map: metadata for performing mapping
* size: the size of the buffer
* vaddr: virtual address of the buffer
- * map_handle: the mapping address to sync
+ * map_handle: the mapping address that needs to be freed
* attrs: unmapping attributes
* @need_sync: if the buffer needs synchronization
* map: metadata for performing mapping
* map_handle: the mapped address
- * Returns: whether the buffer needs synchronization
+ * Returns whether the buffer needs synchronization
* @mapping_error: if the mapping address is error
* map: metadata for performing mapping
* map_handle: the mapped address
* @max_mapping_size: get the maximum buffer size that can be mapped
* map: metadata for performing mapping
- * Returns: the maximum buffer size that can be mapped
+ * Returns the maximum buffer size that can be mapped
*/
struct virtio_map_ops {
dma_addr_t (*map_page)(union virtio_map map, struct page *page,
@@ -362,7 +362,7 @@ void virtio_device_ready(struct virtio_device *dev)
* specific set_status() method.
*
* A well behaved device will only notify a virtqueue after
- * DRIVER_OK, this means the device should "see" the coherenct
+ * DRIVER_OK, this means the device should "see" the coherent
* memory write that set vq->broken as false which is done by
* the driver when it sees DRIVER_OK, then the following
* driver's vring_interrupt() will see vq->broken as false so
@@ -384,7 +384,7 @@ const char *virtio_bus_name(struct virtio_device *vdev)
* @vq: the virtqueue
* @cpu_mask: the cpu mask
*
- * Pay attention the function are best-effort: the affinity hint may not be set
+ * Note that this function is best-effort: the affinity hint may not be set
* due to config support, irq type and sharing.
*
*/
diff --git a/include/linux/virtio_features.h b/include/linux/virtio_features.h
index f748f2f87de8..ea2ad8717882 100644
--- a/include/linux/virtio_features.h
+++ b/include/linux/virtio_features.h
@@ -4,15 +4,16 @@
#include <linux/bits.h>
-#define VIRTIO_FEATURES_DWORDS 2
-#define VIRTIO_FEATURES_MAX (VIRTIO_FEATURES_DWORDS * 64)
-#define VIRTIO_FEATURES_WORDS (VIRTIO_FEATURES_DWORDS * 2)
+#define VIRTIO_FEATURES_U64S 2
+#define VIRTIO_FEATURES_BITS (VIRTIO_FEATURES_U64S * 64)
+
#define VIRTIO_BIT(b) BIT_ULL((b) & 0x3f)
-#define VIRTIO_DWORD(b) ((b) >> 6)
+#define VIRTIO_U64(b) ((b) >> 6)
+
#define VIRTIO_DECLARE_FEATURES(name) \
union { \
u64 name; \
- u64 name##_array[VIRTIO_FEATURES_DWORDS];\
+ u64 name##_array[VIRTIO_FEATURES_U64S];\
}
static inline bool virtio_features_chk_bit(unsigned int bit)
@@ -22,9 +23,9 @@ static inline bool virtio_features_chk_bit(unsigned int bit)
* Don't care returning the correct value: the build
* will fail before any bad features access
*/
- BUILD_BUG_ON(bit >= VIRTIO_FEATURES_MAX);
+ BUILD_BUG_ON(bit >= VIRTIO_FEATURES_BITS);
} else {
- if (WARN_ON_ONCE(bit >= VIRTIO_FEATURES_MAX))
+ if (WARN_ON_ONCE(bit >= VIRTIO_FEATURES_BITS))
return false;
}
return true;
@@ -34,26 +35,26 @@ static inline bool virtio_features_test_bit(const u64 *features,
unsigned int bit)
{
return virtio_features_chk_bit(bit) &&
- !!(features[VIRTIO_DWORD(bit)] & VIRTIO_BIT(bit));
+ !!(features[VIRTIO_U64(bit)] & VIRTIO_BIT(bit));
}
static inline void virtio_features_set_bit(u64 *features,
unsigned int bit)
{
if (virtio_features_chk_bit(bit))
- features[VIRTIO_DWORD(bit)] |= VIRTIO_BIT(bit);
+ features[VIRTIO_U64(bit)] |= VIRTIO_BIT(bit);
}
static inline void virtio_features_clear_bit(u64 *features,
unsigned int bit)
{
if (virtio_features_chk_bit(bit))
- features[VIRTIO_DWORD(bit)] &= ~VIRTIO_BIT(bit);
+ features[VIRTIO_U64(bit)] &= ~VIRTIO_BIT(bit);
}
static inline void virtio_features_zero(u64 *features)
{
- memset(features, 0, sizeof(features[0]) * VIRTIO_FEATURES_DWORDS);
+ memset(features, 0, sizeof(features[0]) * VIRTIO_FEATURES_U64S);
}
static inline void virtio_features_from_u64(u64 *features, u64 from)
@@ -66,7 +67,7 @@ static inline bool virtio_features_equal(const u64 *f1, const u64 *f2)
{
int i;
- for (i = 0; i < VIRTIO_FEATURES_DWORDS; ++i)
+ for (i = 0; i < VIRTIO_FEATURES_U64S; ++i)
if (f1[i] != f2[i])
return false;
return true;
@@ -74,14 +75,14 @@ static inline bool virtio_features_equal(const u64 *f1, const u64 *f2)
static inline void virtio_features_copy(u64 *to, const u64 *from)
{
- memcpy(to, from, sizeof(to[0]) * VIRTIO_FEATURES_DWORDS);
+ memcpy(to, from, sizeof(to[0]) * VIRTIO_FEATURES_U64S);
}
static inline void virtio_features_andnot(u64 *to, const u64 *f1, const u64 *f2)
{
int i;
- for (i = 0; i < VIRTIO_FEATURES_DWORDS; i++)
+ for (i = 0; i < VIRTIO_FEATURES_U64S; i++)
to[i] = f1[i] & ~f2[i];
}
diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h
index 48bc12d1045b..9a3f2fc53bd6 100644
--- a/include/linux/virtio_pci_modern.h
+++ b/include/linux/virtio_pci_modern.h
@@ -107,7 +107,7 @@ void vp_modern_set_extended_features(struct virtio_pci_modern_device *mdev,
static inline u64
vp_modern_get_features(struct virtio_pci_modern_device *mdev)
{
- u64 features_array[VIRTIO_FEATURES_DWORDS];
+ u64 features_array[VIRTIO_FEATURES_U64S];
vp_modern_get_extended_features(mdev, features_array);
return features_array[0];
@@ -116,11 +116,11 @@ vp_modern_get_features(struct virtio_pci_modern_device *mdev)
static inline u64
vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev)
{
- u64 features_array[VIRTIO_FEATURES_DWORDS];
+ u64 features_array[VIRTIO_FEATURES_U64S];
int i;
vp_modern_get_driver_extended_features(mdev, features_array);
- for (i = 1; i < VIRTIO_FEATURES_DWORDS; ++i)
+ for (i = 1; i < VIRTIO_FEATURES_U64S; ++i)
WARN_ON_ONCE(features_array[i]);
return features_array[0];
}
@@ -128,7 +128,7 @@ vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev)
static inline void
vp_modern_set_features(struct virtio_pci_modern_device *mdev, u64 features)
{
- u64 features_array[VIRTIO_FEATURES_DWORDS];
+ u64 features_array[VIRTIO_FEATURES_U64S];
virtio_features_from_u64(features_array, features);
vp_modern_set_extended_features(mdev, features_array);