diff options
Diffstat (limited to 'drivers/iommu/generic_pt/fmt/amdv1.h')
| -rw-r--r-- | drivers/iommu/generic_pt/fmt/amdv1.h | 411 |
1 files changed, 411 insertions, 0 deletions
diff --git a/drivers/iommu/generic_pt/fmt/amdv1.h b/drivers/iommu/generic_pt/fmt/amdv1.h new file mode 100644 index 000000000000..aa8e1a8ec95f --- /dev/null +++ b/drivers/iommu/generic_pt/fmt/amdv1.h @@ -0,0 +1,411 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES + * + * AMD IOMMU v1 page table + * + * This is described in Section "2.2.3 I/O Page Tables for Host Translations" + * of the "AMD I/O Virtualization Technology (IOMMU) Specification" + * + * Note the level numbering here matches the core code, so level 0 is the same + * as mode 1. + * + */ +#ifndef __GENERIC_PT_FMT_AMDV1_H +#define __GENERIC_PT_FMT_AMDV1_H + +#include "defs_amdv1.h" +#include "../pt_defs.h" + +#include <asm/page.h> +#include <linux/bitfield.h> +#include <linux/container_of.h> +#include <linux/mem_encrypt.h> +#include <linux/minmax.h> +#include <linux/sizes.h> +#include <linux/string.h> + +enum { + PT_ITEM_WORD_SIZE = sizeof(u64), + /* + * The IOMMUFD selftest uses the AMDv1 format with some alterations It + * uses a 2k page size to test cases where the CPU page size is not the + * same. + */ +#ifdef AMDV1_IOMMUFD_SELFTEST + PT_MAX_VA_ADDRESS_LG2 = 56, + PT_MAX_OUTPUT_ADDRESS_LG2 = 51, + PT_MAX_TOP_LEVEL = 4, + PT_GRANULE_LG2SZ = 11, +#else + PT_MAX_VA_ADDRESS_LG2 = 64, + PT_MAX_OUTPUT_ADDRESS_LG2 = 52, + PT_MAX_TOP_LEVEL = 5, + PT_GRANULE_LG2SZ = 12, +#endif + PT_TABLEMEM_LG2SZ = 12, + + /* The DTE only has these bits for the top phyiscal address */ + PT_TOP_PHYS_MASK = GENMASK_ULL(51, 12), +}; + +/* PTE bits */ +enum { + AMDV1PT_FMT_PR = BIT(0), + AMDV1PT_FMT_D = BIT(6), + AMDV1PT_FMT_NEXT_LEVEL = GENMASK_ULL(11, 9), + AMDV1PT_FMT_OA = GENMASK_ULL(51, 12), + AMDV1PT_FMT_FC = BIT_ULL(60), + AMDV1PT_FMT_IR = BIT_ULL(61), + AMDV1PT_FMT_IW = BIT_ULL(62), +}; + +/* + * gcc 13 has a bug where it thinks the output of FIELD_GET() is an enum, make + * these defines to avoid it. + */ +#define AMDV1PT_FMT_NL_DEFAULT 0 +#define AMDV1PT_FMT_NL_SIZE 7 + +static inline pt_oaddr_t amdv1pt_table_pa(const struct pt_state *pts) +{ + u64 entry = pts->entry; + + if (pts_feature(pts, PT_FEAT_AMDV1_ENCRYPT_TABLES)) + entry = __sme_clr(entry); + return oalog2_mul(FIELD_GET(AMDV1PT_FMT_OA, entry), PT_GRANULE_LG2SZ); +} +#define pt_table_pa amdv1pt_table_pa + +/* Returns the oa for the start of the contiguous entry */ +static inline pt_oaddr_t amdv1pt_entry_oa(const struct pt_state *pts) +{ + u64 entry = pts->entry; + pt_oaddr_t oa; + + if (pts_feature(pts, PT_FEAT_AMDV1_ENCRYPT_TABLES)) + entry = __sme_clr(entry); + oa = FIELD_GET(AMDV1PT_FMT_OA, entry); + + if (FIELD_GET(AMDV1PT_FMT_NEXT_LEVEL, entry) == AMDV1PT_FMT_NL_SIZE) { + unsigned int sz_bits = oaffz(oa); + + oa = oalog2_set_mod(oa, 0, sz_bits); + } else if (PT_WARN_ON(FIELD_GET(AMDV1PT_FMT_NEXT_LEVEL, entry) != + AMDV1PT_FMT_NL_DEFAULT)) + return 0; + return oalog2_mul(oa, PT_GRANULE_LG2SZ); +} +#define pt_entry_oa amdv1pt_entry_oa + +static inline bool amdv1pt_can_have_leaf(const struct pt_state *pts) +{ + /* + * Table 15: Page Table Level Parameters + * The top most level cannot have translation entries + */ + return pts->level < PT_MAX_TOP_LEVEL; +} +#define pt_can_have_leaf amdv1pt_can_have_leaf + +/* Body in pt_fmt_defaults.h */ +static inline unsigned int pt_table_item_lg2sz(const struct pt_state *pts); + +static inline unsigned int +amdv1pt_entry_num_contig_lg2(const struct pt_state *pts) +{ + u32 code; + + if (FIELD_GET(AMDV1PT_FMT_NEXT_LEVEL, pts->entry) == + AMDV1PT_FMT_NL_DEFAULT) + return ilog2(1); + + PT_WARN_ON(FIELD_GET(AMDV1PT_FMT_NEXT_LEVEL, pts->entry) != + AMDV1PT_FMT_NL_SIZE); + + /* + * The contiguous size is encoded in the length of a string of 1's in + * the low bits of the OA. Reverse the equation: + * code = log2_to_int(num_contig_lg2 + item_lg2sz - + * PT_GRANULE_LG2SZ - 1) - 1 + * Which can be expressed as: + * num_contig_lg2 = oalog2_ffz(code) + 1 - + * item_lg2sz - PT_GRANULE_LG2SZ + * + * Assume the bit layout is correct and remove the masking. Reorganize + * the equation to move all the arithmetic before the ffz. + */ + code = pts->entry >> (__bf_shf(AMDV1PT_FMT_OA) - 1 + + pt_table_item_lg2sz(pts) - PT_GRANULE_LG2SZ); + return ffz_t(u32, code); +} +#define pt_entry_num_contig_lg2 amdv1pt_entry_num_contig_lg2 + +static inline unsigned int amdv1pt_num_items_lg2(const struct pt_state *pts) +{ + /* + * Top entry covers bits [63:57] only, this is handled through + * max_vasz_lg2. + */ + if (PT_WARN_ON(pts->level == 5)) + return 7; + return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64)); +} +#define pt_num_items_lg2 amdv1pt_num_items_lg2 + +static inline pt_vaddr_t amdv1pt_possible_sizes(const struct pt_state *pts) +{ + unsigned int isz_lg2 = pt_table_item_lg2sz(pts); + + if (!amdv1pt_can_have_leaf(pts)) + return 0; + + /* + * Table 14: Example Page Size Encodings + * Address bits 51:32 can be used to encode page sizes greater than 4 + * Gbytes. Address bits 63:52 are zero-extended. + * + * 512GB Pages are not supported due to a hardware bug. + * Otherwise every power of two size is supported. + */ + return GENMASK_ULL(min(51, isz_lg2 + amdv1pt_num_items_lg2(pts) - 1), + isz_lg2) & ~SZ_512G; +} +#define pt_possible_sizes amdv1pt_possible_sizes + +static inline enum pt_entry_type amdv1pt_load_entry_raw(struct pt_state *pts) +{ + const u64 *tablep = pt_cur_table(pts, u64) + pts->index; + unsigned int next_level; + u64 entry; + + pts->entry = entry = READ_ONCE(*tablep); + if (!(entry & AMDV1PT_FMT_PR)) + return PT_ENTRY_EMPTY; + + next_level = FIELD_GET(AMDV1PT_FMT_NEXT_LEVEL, pts->entry); + if (pts->level == 0 || next_level == AMDV1PT_FMT_NL_DEFAULT || + next_level == AMDV1PT_FMT_NL_SIZE) + return PT_ENTRY_OA; + return PT_ENTRY_TABLE; +} +#define pt_load_entry_raw amdv1pt_load_entry_raw + +static inline void +amdv1pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa, + unsigned int oasz_lg2, + const struct pt_write_attrs *attrs) +{ + unsigned int isz_lg2 = pt_table_item_lg2sz(pts); + u64 *tablep = pt_cur_table(pts, u64) + pts->index; + u64 entry; + + if (!pt_check_install_leaf_args(pts, oa, oasz_lg2)) + return; + + entry = AMDV1PT_FMT_PR | + FIELD_PREP(AMDV1PT_FMT_OA, log2_div(oa, PT_GRANULE_LG2SZ)) | + attrs->descriptor_bits; + + if (oasz_lg2 == isz_lg2) { + entry |= FIELD_PREP(AMDV1PT_FMT_NEXT_LEVEL, + AMDV1PT_FMT_NL_DEFAULT); + WRITE_ONCE(*tablep, entry); + } else { + unsigned int num_contig_lg2 = oasz_lg2 - isz_lg2; + u64 *end = tablep + log2_to_int(num_contig_lg2); + + entry |= FIELD_PREP(AMDV1PT_FMT_NEXT_LEVEL, + AMDV1PT_FMT_NL_SIZE) | + FIELD_PREP(AMDV1PT_FMT_OA, + oalog2_to_int(oasz_lg2 - PT_GRANULE_LG2SZ - + 1) - + 1); + + /* See amdv1pt_clear_entries() */ + if (num_contig_lg2 <= ilog2(32)) { + for (; tablep != end; tablep++) + WRITE_ONCE(*tablep, entry); + } else { + memset64(tablep, entry, log2_to_int(num_contig_lg2)); + } + } + pts->entry = entry; +} +#define pt_install_leaf_entry amdv1pt_install_leaf_entry + +static inline bool amdv1pt_install_table(struct pt_state *pts, + pt_oaddr_t table_pa, + const struct pt_write_attrs *attrs) +{ + u64 entry; + + /* + * IR and IW are ANDed from the table levels along with the PTE. We + * always control permissions from the PTE, so always set IR and IW for + * tables. + */ + entry = AMDV1PT_FMT_PR | + FIELD_PREP(AMDV1PT_FMT_NEXT_LEVEL, pts->level) | + FIELD_PREP(AMDV1PT_FMT_OA, + log2_div(table_pa, PT_GRANULE_LG2SZ)) | + AMDV1PT_FMT_IR | AMDV1PT_FMT_IW; + if (pts_feature(pts, PT_FEAT_AMDV1_ENCRYPT_TABLES)) + entry = __sme_set(entry); + return pt_table_install64(pts, entry); +} +#define pt_install_table amdv1pt_install_table + +static inline void amdv1pt_attr_from_entry(const struct pt_state *pts, + struct pt_write_attrs *attrs) +{ + attrs->descriptor_bits = + pts->entry & (AMDV1PT_FMT_FC | AMDV1PT_FMT_IR | AMDV1PT_FMT_IW); +} +#define pt_attr_from_entry amdv1pt_attr_from_entry + +static inline void amdv1pt_clear_entries(struct pt_state *pts, + unsigned int num_contig_lg2) +{ + u64 *tablep = pt_cur_table(pts, u64) + pts->index; + u64 *end = tablep + log2_to_int(num_contig_lg2); + + /* + * gcc generates rep stos for the io-pgtable code, and this difference + * can show in microbenchmarks with larger contiguous page sizes. + * rep is slower for small cases. + */ + if (num_contig_lg2 <= ilog2(32)) { + for (; tablep != end; tablep++) + WRITE_ONCE(*tablep, 0); + } else { + memset64(tablep, 0, log2_to_int(num_contig_lg2)); + } +} +#define pt_clear_entries amdv1pt_clear_entries + +static inline bool amdv1pt_entry_is_write_dirty(const struct pt_state *pts) +{ + unsigned int num_contig_lg2 = amdv1pt_entry_num_contig_lg2(pts); + u64 *tablep = pt_cur_table(pts, u64) + + log2_set_mod(pts->index, 0, num_contig_lg2); + u64 *end = tablep + log2_to_int(num_contig_lg2); + + for (; tablep != end; tablep++) + if (READ_ONCE(*tablep) & AMDV1PT_FMT_D) + return true; + return false; +} +#define pt_entry_is_write_dirty amdv1pt_entry_is_write_dirty + +static inline void amdv1pt_entry_make_write_clean(struct pt_state *pts) +{ + unsigned int num_contig_lg2 = amdv1pt_entry_num_contig_lg2(pts); + u64 *tablep = pt_cur_table(pts, u64) + + log2_set_mod(pts->index, 0, num_contig_lg2); + u64 *end = tablep + log2_to_int(num_contig_lg2); + + for (; tablep != end; tablep++) + WRITE_ONCE(*tablep, READ_ONCE(*tablep) & ~(u64)AMDV1PT_FMT_D); +} +#define pt_entry_make_write_clean amdv1pt_entry_make_write_clean + +static inline bool amdv1pt_entry_make_write_dirty(struct pt_state *pts) +{ + u64 *tablep = pt_cur_table(pts, u64) + pts->index; + u64 new = pts->entry | AMDV1PT_FMT_D; + + return try_cmpxchg64(tablep, &pts->entry, new); +} +#define pt_entry_make_write_dirty amdv1pt_entry_make_write_dirty + +/* --- iommu */ +#include <linux/generic_pt/iommu.h> +#include <linux/iommu.h> + +#define pt_iommu_table pt_iommu_amdv1 + +/* The common struct is in the per-format common struct */ +static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table) +{ + return &container_of(iommu_table, struct pt_iommu_amdv1, iommu) + ->amdpt.common; +} + +static inline struct pt_iommu *iommu_from_common(struct pt_common *common) +{ + return &container_of(common, struct pt_iommu_amdv1, amdpt.common)->iommu; +} + +static inline int amdv1pt_iommu_set_prot(struct pt_common *common, + struct pt_write_attrs *attrs, + unsigned int iommu_prot) +{ + u64 pte = 0; + + if (pt_feature(common, PT_FEAT_AMDV1_FORCE_COHERENCE)) + pte |= AMDV1PT_FMT_FC; + if (iommu_prot & IOMMU_READ) + pte |= AMDV1PT_FMT_IR; + if (iommu_prot & IOMMU_WRITE) + pte |= AMDV1PT_FMT_IW; + + /* + * Ideally we'd have an IOMMU_ENCRYPTED flag set by higher levels to + * control this. For now if the tables use sme_set then so do the ptes. + */ + if (pt_feature(common, PT_FEAT_AMDV1_ENCRYPT_TABLES)) + pte = __sme_set(pte); + + attrs->descriptor_bits = pte; + return 0; +} +#define pt_iommu_set_prot amdv1pt_iommu_set_prot + +static inline int amdv1pt_iommu_fmt_init(struct pt_iommu_amdv1 *iommu_table, + const struct pt_iommu_amdv1_cfg *cfg) +{ + struct pt_amdv1 *table = &iommu_table->amdpt; + unsigned int max_vasz_lg2 = PT_MAX_VA_ADDRESS_LG2; + + if (cfg->starting_level == 0 || cfg->starting_level > PT_MAX_TOP_LEVEL) + return -EINVAL; + + if (!pt_feature(&table->common, PT_FEAT_DYNAMIC_TOP) && + cfg->starting_level != PT_MAX_TOP_LEVEL) + max_vasz_lg2 = PT_GRANULE_LG2SZ + + (PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64))) * + (cfg->starting_level + 1); + + table->common.max_vasz_lg2 = + min(max_vasz_lg2, cfg->common.hw_max_vasz_lg2); + table->common.max_oasz_lg2 = + min(PT_MAX_OUTPUT_ADDRESS_LG2, cfg->common.hw_max_oasz_lg2); + pt_top_set_level(&table->common, cfg->starting_level); + return 0; +} +#define pt_iommu_fmt_init amdv1pt_iommu_fmt_init + +#ifndef PT_FMT_VARIANT +static inline void +amdv1pt_iommu_fmt_hw_info(struct pt_iommu_amdv1 *table, + const struct pt_range *top_range, + struct pt_iommu_amdv1_hw_info *info) +{ + info->host_pt_root = virt_to_phys(top_range->top_table); + PT_WARN_ON(info->host_pt_root & ~PT_TOP_PHYS_MASK); + info->mode = top_range->top_level + 1; +} +#define pt_iommu_fmt_hw_info amdv1pt_iommu_fmt_hw_info +#endif + +#if defined(GENERIC_PT_KUNIT) +static const struct pt_iommu_amdv1_cfg amdv1_kunit_fmt_cfgs[] = { + /* Matches what io_pgtable does */ + [0] = { .starting_level = 2 }, +}; +#define kunit_fmt_cfgs amdv1_kunit_fmt_cfgs +enum { KUNIT_FMT_FEATURES = 0 }; +#endif + +#endif |
