diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
203 files changed, 12739 insertions, 8081 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 1a11cab741ac..1acfed2f92ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -2,7 +2,7 @@ config DRM_AMDGPU tristate "AMD GPU" - depends on DRM && PCI && MMU + depends on DRM && PCI depends on !UML select FW_LOADER select DRM_CLIENT @@ -68,7 +68,6 @@ config DRM_AMDGPU_CIK config DRM_AMDGPU_USERPTR bool "Always enable userptr write support" depends on DRM_AMDGPU - depends on MMU select HMM_MIRROR select MMU_NOTIFIER help @@ -77,7 +76,7 @@ config DRM_AMDGPU_USERPTR config DRM_AMD_ISP bool "Enable AMD Image Signal Processor IP support" - depends on DRM_AMDGPU + depends on DRM_AMDGPU && ACPI select MFD_CORE select PM_GENERIC_DOMAINS if PM help diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index aacc810cabb3..930de203d533 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -66,7 +66,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \ - amdgpu_cper.o + amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu_ip.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o @@ -174,7 +174,10 @@ amdgpu-y += \ amdgpu-y += \ amdgpu_mes.o \ mes_v11_0.o \ - mes_v12_0.o + mes_v12_0.o \ + +# add GFX userqueue support +amdgpu-y += mes_userqueue.o # add UVD block amdgpu-y += \ @@ -253,6 +256,8 @@ amdgpu-y += \ # add amdkfd interfaces amdgpu-y += amdgpu_amdkfd.o +# add gfx usermode queue +amdgpu-y += amdgpu_userq.o ifneq ($(CONFIG_HSA_AMD),) AMDKFD_PATH := ../amdkfd diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index e13fbd974141..9569dc16dd3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -71,18 +71,29 @@ aldebaran_get_reset_handler(struct amdgpu_reset_control *reset_ctl, return NULL; } +static inline uint32_t aldebaran_get_ip_block_mask(struct amdgpu_device *adev) +{ + uint32_t ip_block_mask = BIT(AMD_IP_BLOCK_TYPE_GFX) | + BIT(AMD_IP_BLOCK_TYPE_SDMA); + + if (adev->aid_mask) + ip_block_mask |= BIT(AMD_IP_BLOCK_TYPE_IH); + + return ip_block_mask; +} + static int aldebaran_mode2_suspend_ip(struct amdgpu_device *adev) { + uint32_t ip_block_mask = aldebaran_get_ip_block_mask(adev); + uint32_t ip_block; int r, i; amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { - if (!(adev->ip_blocks[i].version->type == - AMD_IP_BLOCK_TYPE_GFX || - adev->ip_blocks[i].version->type == - AMD_IP_BLOCK_TYPE_SDMA)) + ip_block = BIT(adev->ip_blocks[i].version->type); + if (!(ip_block_mask & ip_block)) continue; r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]); @@ -200,8 +211,10 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev) { struct amdgpu_firmware_info *ucode_list[AMDGPU_UCODE_ID_MAXIMUM]; + uint32_t ip_block_mask = aldebaran_get_ip_block_mask(adev); struct amdgpu_firmware_info *ucode; struct amdgpu_ip_block *cmn_block; + struct amdgpu_ip_block *ih_block; int ucode_count = 0; int i, r; @@ -243,6 +256,18 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev) if (r) return r; + if (ip_block_mask & BIT(AMD_IP_BLOCK_TYPE_IH)) { + ih_block = amdgpu_device_ip_get_ip_block(adev, + AMD_IP_BLOCK_TYPE_IH); + if (unlikely(!ih_block)) { + dev_err(adev->dev, "Failed to get IH handle\n"); + return -EINVAL; + } + r = amdgpu_ip_block_resume(ih_block); + if (r) + return r; + } + /* Reinit GFXHUB */ adev->gfxhub.funcs->init(adev); r = adev->gfxhub.funcs->gart_enable(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6d83ccfa42ee..ef3af170dda4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -113,6 +113,8 @@ #include "amdgpu_xcp.h" #include "amdgpu_seq64.h" #include "amdgpu_reg_state.h" +#include "amdgpu_userq.h" +#include "amdgpu_eviction_fence.h" #if defined(CONFIG_DRM_AMD_ISP) #include "amdgpu_isp.h" #endif @@ -228,7 +230,7 @@ extern int amdgpu_force_asic_type; extern int amdgpu_smartshift_bias; extern int amdgpu_use_xgmi_p2p; extern int amdgpu_mtype_local; -extern bool enforce_isolation; +extern int amdgpu_enforce_isolation; #ifdef CONFIG_HSA_AMD extern int sched_policy; extern bool debug_evictions; @@ -266,8 +268,10 @@ extern int amdgpu_umsch_mm_fwlog; extern int amdgpu_user_partt_mode; extern int amdgpu_agp; +extern int amdgpu_rebar; extern int amdgpu_wbrf; +extern int amdgpu_user_queue; #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) @@ -353,7 +357,6 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, AMDGPU_CP_KIQ_IRQ_LAST }; -#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ #define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ #define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ #define MAX_KIQ_REG_TRY 1000 @@ -467,9 +470,6 @@ struct amdgpu_sa_manager { void *cpu_ptr; }; -int amdgpu_fence_slab_init(void); -void amdgpu_fence_slab_fini(void); - /* * IRQS. */ @@ -489,7 +489,6 @@ struct amdgpu_flip_work { bool async; }; - /* * file private structure */ @@ -502,6 +501,11 @@ struct amdgpu_fpriv { struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; + struct amdgpu_userq_mgr userq_mgr; + + /* Eviction fence infra */ + struct amdgpu_eviction_fence_mgr evf_mgr; + /** GPU partition selection */ uint32_t xcp_id; }; @@ -513,12 +517,62 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); */ #define AMDGPU_MAX_WB 1024 /* Reserve at most 1024 WB slots for amdgpu-owned rings. */ +/** + * amdgpu_wb - This struct is used for small GPU memory allocation. + * + * This struct is used to allocate a small amount of GPU memory that can be + * used to shadow certain states into the memory. This is especially useful for + * providing easy CPU access to some states without requiring register access + * (e.g., if some block is power gated, reading register may be problematic). + * + * Note: the term writeback was initially used because many of the amdgpu + * components had some level of writeback memory, and this struct initially + * described those components. + */ struct amdgpu_wb { + + /** + * @wb_obj: + * + * Buffer Object used for the writeback memory. + */ struct amdgpu_bo *wb_obj; + + /** + * @wb: + * + * Pointer to the first writeback slot. In terms of CPU address + * this value can be accessed directly by using the offset as an index. + * For the GPU address, it is necessary to use gpu_addr and the offset. + */ volatile uint32_t *wb; + + /** + * @gpu_addr: + * + * Writeback base address in the GPU. + */ uint64_t gpu_addr; - u32 num_wb; /* Number of wb slots actually reserved for amdgpu. */ + + /** + * @num_wb: + * + * Number of writeback slots reserved for amdgpu. + */ + u32 num_wb; + + /** + * @used: + * + * Track the writeback slot already used. + */ unsigned long used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)]; + + /** + * @lock: + * + * Protects read and write of the used field array. + */ spinlock_t lock; }; @@ -552,6 +606,7 @@ struct amdgpu_allowed_register_entry { * are reset depends on the ASIC. Notably doesn't reset IPs * shared with the CPU on APUs or the memory controllers (so * VRAM is not lost). Not available on all ASICs. + * @AMD_RESET_LINK: Triggers SW-UP link reset on other GPUs * @AMD_RESET_BACO: BACO (Bus Alive, Chip Off) method powers off and on the card * but without powering off the PCI bus. Suitable only for * discrete GPUs. @@ -569,6 +624,7 @@ enum amd_reset_method { AMD_RESET_METHOD_MODE0, AMD_RESET_METHOD_MODE1, AMD_RESET_METHOD_MODE2, + AMD_RESET_METHOD_LINK, AMD_RESET_METHOD_BACO, AMD_RESET_METHOD_PCI, AMD_RESET_METHOD_ON_INIT, @@ -822,6 +878,12 @@ struct amdgpu_mqd_prop { uint32_t hqd_queue_priority; bool allow_tunneling; bool hqd_active; + uint64_t shadow_addr; + uint64_t gds_bkup_addr; + uint64_t csa_addr; + uint64_t fence_address; + bool tmz_queue; + bool kernel_queue; }; struct amdgpu_mqd { @@ -830,6 +892,12 @@ struct amdgpu_mqd { struct amdgpu_mqd_prop *p); }; +struct amdgpu_pcie_reset_ctx { + bool in_link_reset; + bool occurs_dpc; + bool audio_suspended; +}; + /* * Custom Init levels could be defined for different situations where a full * initialization of all hardware blocks are not expected. Sample cases are @@ -854,6 +922,14 @@ struct amdgpu_init_level { struct amdgpu_reset_domain; struct amdgpu_fru_info; +enum amdgpu_enforce_isolation_mode { + AMDGPU_ENFORCE_ISOLATION_DISABLE = 0, + AMDGPU_ENFORCE_ISOLATION_ENABLE = 1, + AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY = 2, + AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER = 3, +}; + + /* * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise. */ @@ -1082,6 +1158,13 @@ struct amdgpu_device { bool enable_uni_mes; struct amdgpu_mes mes; struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM]; + const struct amdgpu_userq_funcs *userq_funcs[AMDGPU_HW_IP_NUM]; + + /* xarray used to retrieve the user queue fence driver reference + * in the EOP interrupt handler to signal the particular user + * queue fence. + */ + struct xarray userq_xa; /* df */ struct amdgpu_df df; @@ -1124,6 +1207,7 @@ struct amdgpu_device { bool in_s3; bool in_s4; bool in_s0ix; + suspend_state_t last_suspend_state; enum pp_mp1_state mp1_state; struct amdgpu_doorbell_index doorbell_index; @@ -1160,6 +1244,8 @@ struct amdgpu_device { struct pci_saved_state *pci_state; pci_channel_state_t pci_channel_state; + struct amdgpu_pcie_reset_ctx pcie_reset_ctx; + /* Track auto wait count on s_barrier settings */ bool barrier_has_auto_waitcnt; @@ -1193,10 +1279,12 @@ struct amdgpu_device { bool debug_enable_ras_aca; bool debug_exp_resets; bool debug_disable_gpu_ring_reset; + bool debug_vm_userptr; + bool debug_disable_ce_logs; /* Protection for the following isolation structure */ struct mutex enforce_isolation_mutex; - bool enforce_isolation[MAX_XCP]; + enum amdgpu_enforce_isolation_mode enforce_isolation[MAX_XCP]; struct amdgpu_isolation { void *owner; struct dma_fence *spearhead; @@ -1210,6 +1298,10 @@ struct amdgpu_device { * in KFD: VRAM or GTT. */ bool apu_prefer_gtt; + + struct list_head userq_mgr_list; + struct mutex userq_mutex; + bool userq_halt_for_enforce_isolation; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, @@ -1243,6 +1335,11 @@ static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_device *bdev) return container_of(bdev, struct amdgpu_device, mman.bdev); } +static inline bool amdgpu_is_multi_aid(struct amdgpu_device *adev) +{ + return !!adev->aid_mask; +} + int amdgpu_device_init(struct amdgpu_device *adev, uint32_t flags); void amdgpu_device_fini_hw(struct amdgpu_device *adev); @@ -1294,7 +1391,8 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev, u64 reg_addr, u64 reg_data); u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev); -bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type); +bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev, + enum amd_asic_type asic_type); bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev); @@ -1464,16 +1562,17 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, const u32 array_size); int amdgpu_device_mode1_reset(struct amdgpu_device *adev); -bool amdgpu_device_supports_atpx(struct drm_device *dev); -bool amdgpu_device_supports_px(struct drm_device *dev); -bool amdgpu_device_supports_boco(struct drm_device *dev); -bool amdgpu_device_supports_smart_shift(struct drm_device *dev); -int amdgpu_device_supports_baco(struct drm_device *dev); +int amdgpu_device_link_reset(struct amdgpu_device *adev); +bool amdgpu_device_supports_atpx(struct amdgpu_device *adev); +bool amdgpu_device_supports_px(struct amdgpu_device *adev); +bool amdgpu_device_supports_boco(struct amdgpu_device *adev); +bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev); +int amdgpu_device_supports_baco(struct amdgpu_device *adev); void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev); bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); -int amdgpu_device_baco_enter(struct drm_device *dev); -int amdgpu_device_baco_exit(struct drm_device *dev); +int amdgpu_device_baco_enter(struct amdgpu_device *adev); +int amdgpu_device_baco_exit(struct amdgpu_device *adev); void amdgpu_device_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring); @@ -1525,6 +1624,7 @@ void amdgpu_driver_release_kms(struct drm_device *dev); int amdgpu_device_ip_suspend(struct amdgpu_device *adev); int amdgpu_device_prepare(struct drm_device *dev); +void amdgpu_device_complete(struct drm_device *dev); int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); int amdgpu_device_resume(struct drm_device *dev, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); @@ -1575,7 +1675,8 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev, u8 perf_req, bool advertise); int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, u8 dev_state, bool drv_state); -int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state); +int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev, + enum amdgpu_ss ss_state); int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev); int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset, u64 *tmr_size); @@ -1606,19 +1707,24 @@ static inline void amdgpu_acpi_release(void) { } static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; } static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, u8 dev_state, bool drv_state) { return 0; } -static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev, - enum amdgpu_ss ss_state) { return 0; } +static inline int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev, + enum amdgpu_ss ss_state) +{ + return 0; +} static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps) { } #endif #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); -void amdgpu_choose_low_power_state(struct amdgpu_device *adev); #else static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } -static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { } +#endif + +#if defined(CONFIG_DRM_AMD_ISP) +int amdgpu_acpi_get_isp4_dev(struct acpi_device **dev); #endif void amdgpu_register_gpu_instance(struct amdgpu_device *adev); @@ -1664,4 +1770,19 @@ extern const struct attribute_group amdgpu_flash_attr_group; void amdgpu_set_init_level(struct amdgpu_device *adev, enum amdgpu_init_lvl_id lvl); + +static inline int amdgpu_device_bus_status_check(struct amdgpu_device *adev) +{ + u32 status; + int r; + + r = pci_read_config_dword(adev->pdev, PCI_COMMAND, &status); + if (r || PCI_POSSIBLE_ERROR(status)) { + dev_err(adev->dev, "device lost from bus!"); + return -ENODEV; + } + + return 0; +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index dc47f5fd4ea1..cbc40cad581b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -115,11 +115,19 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID; int i; + if (adev->debug_disable_ce_logs && + bank->smu_err_type == ACA_SMU_TYPE_CE && + !ACA_BANK_ERR_IS_DEFFERED(bank)) + return; + RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n"); /* plus 1 for output format, e.g: ACA[08/08]: xxxx */ for (i = 0; i < ARRAY_SIZE(aca_regs); i++) RAS_EVENT_LOG(adev, event_id, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n", idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]); + + if (ACA_REG__STATUS__SCRUB(bank->regs[ACA_REG_IDX_STATUS])) + RAS_EVENT_LOG(adev, event_id, HW_ERR "hardware error logged by the scrubber\n"); } static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type, @@ -195,6 +203,10 @@ static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, { const struct aca_bank_ops *bank_ops = handle->bank_ops; + /* Parse all deferred errors with UMC aca handle */ + if (ACA_BANK_ERR_IS_DEFFERED(bank)) + return handle->hwip == ACA_HWIP_TYPE_UMC; + if (!aca_bank_hwip_is_matched(bank, handle->hwip)) return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 6b180f1b33fd..38c88897e1ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -80,14 +80,6 @@ struct ras_query_context; (ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \ ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS])) -#define ACA_BANK_ERR_CE_DE_DECODE(bank) \ - (ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \ - ACA_ERROR_TYPE_CE) - -#define ACA_BANK_ERR_UE_DE_DECODE(bank) \ - (ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \ - ACA_ERROR_TYPE_UE) - enum aca_reg_idx { ACA_REG_IDX_CTL = 0, ACA_REG_IDX_STATUS = 1, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index b7f8f2ff143d..6c62e27b9800 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -811,18 +811,18 @@ int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, /** * amdgpu_acpi_smart_shift_update - update dGPU device state to SBIOS * - * @dev: drm_device pointer + * @adev: amdgpu device pointer * @ss_state: current smart shift event * * returns 0 on success, * otherwise return error number. */ -int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state) +int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev, + enum amdgpu_ss ss_state) { - struct amdgpu_device *adev = drm_to_adev(dev); int r; - if (!amdgpu_device_supports_smart_shift(dev)) + if (!amdgpu_device_supports_smart_shift(adev)) return 0; switch (ss_state) { @@ -1532,23 +1532,35 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) return true; #endif /* CONFIG_AMD_PMC */ } +#endif /* CONFIG_SUSPEND */ -/** - * amdgpu_choose_low_power_state - * - * @adev: amdgpu_device_pointer - * - * Choose the target low power state for the GPU - */ -void amdgpu_choose_low_power_state(struct amdgpu_device *adev) -{ - if (adev->in_runpm) - return; +#if IS_ENABLED(CONFIG_DRM_AMD_ISP) +static const struct acpi_device_id isp_sensor_ids[] = { + { "OMNI5C10" }, + { } +}; - if (amdgpu_acpi_is_s0ix_active(adev)) - adev->in_s0ix = true; - else if (amdgpu_acpi_is_s3_active(adev)) - adev->in_s3 = true; +static int isp_match_acpi_device_ids(struct device *dev, const void *data) +{ + return acpi_match_device(data, dev) ? 1 : 0; } -#endif /* CONFIG_SUSPEND */ +int amdgpu_acpi_get_isp4_dev(struct acpi_device **dev) +{ + struct device *pdev __free(put_device) = NULL; + struct acpi_device *acpi_pdev; + + pdev = bus_find_device(&platform_bus_type, NULL, isp_sensor_ids, + isp_match_acpi_device_ids); + if (!pdev) + return -EINVAL; + + acpi_pdev = ACPI_COMPANION(pdev); + if (!acpi_pdev) + return -ENODEV; + + *dev = acpi_pdev; + + return 0; +} +#endif /* CONFIG_DRM_AMD_ISP */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 4cec3a873995..fbe7616555c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -248,18 +248,34 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry); } -void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm) +void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc) { if (adev->kfd.dev) - kgd2kfd_suspend(adev->kfd.dev, run_pm); + kgd2kfd_suspend(adev->kfd.dev, suspend_proc); } -int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm) +int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc) { int r = 0; if (adev->kfd.dev) - r = kgd2kfd_resume(adev->kfd.dev, run_pm); + r = kgd2kfd_resume(adev->kfd.dev, resume_proc); + + return r; +} + +void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev) +{ + if (adev->kfd.dev) + kgd2kfd_suspend_process(adev->kfd.dev); +} + +int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev) +{ + int r = 0; + + if (adev->kfd.dev) + r = kgd2kfd_resume_process(adev->kfd.dev); return r; } @@ -368,6 +384,9 @@ void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj) { struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj; + if (!bo || !*bo) + return; + (void)amdgpu_bo_reserve(*bo, true); amdgpu_bo_kunmap(*bo); amdgpu_bo_unpin(*bo); @@ -639,7 +658,7 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, goto err; } - ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job); + ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job, 0); if (ret) goto err; @@ -746,12 +765,12 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev) { - return kgd2kfd_check_and_lock_kfd(); + return kgd2kfd_check_and_lock_kfd(adev->kfd.dev); } void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev) { - kgd2kfd_unlock_kfd(); + kgd2kfd_unlock_kfd(adev->kfd.dev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index b6ca41859b53..33eb4826b58b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -154,8 +154,10 @@ struct amdkfd_process_info { int amdgpu_amdkfd_init(void); void amdgpu_amdkfd_fini(void); -void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm); -int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm); +void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc); +int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc); +void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev); +int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev); void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry); void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); @@ -411,16 +413,18 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf); bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources); void kgd2kfd_device_exit(struct kfd_dev *kfd); -void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm); -int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm); +void kgd2kfd_suspend(struct kfd_dev *kfd, bool suspend_proc); +int kgd2kfd_resume(struct kfd_dev *kfd, bool resume_proc); +void kgd2kfd_suspend_process(struct kfd_dev *kfd); +int kgd2kfd_resume_process(struct kfd_dev *kfd); int kgd2kfd_pre_reset(struct kfd_dev *kfd, struct amdgpu_reset_context *reset_context); int kgd2kfd_post_reset(struct kfd_dev *kfd); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask); -int kgd2kfd_check_and_lock_kfd(void); -void kgd2kfd_unlock_kfd(void); +int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd); +void kgd2kfd_unlock_kfd(struct kfd_dev *kfd); int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id); @@ -454,11 +458,20 @@ static inline void kgd2kfd_device_exit(struct kfd_dev *kfd) { } -static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) +static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool suspend_proc) { } -static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) +static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool resume_proc) +{ + return 0; +} + +static inline void kgd2kfd_suspend_process(struct kfd_dev *kfd) +{ +} + +static inline int kgd2kfd_resume_process(struct kfd_dev *kfd) { return 0; } @@ -489,12 +502,12 @@ void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) { } -static inline int kgd2kfd_check_and_lock_kfd(void) +static inline int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd) { return 0; } -static inline void kgd2kfd_unlock_kfd(void) +static inline void kgd2kfd_unlock_kfd(struct kfd_dev *kfd) { } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index ffbaa8bc5eea..1105a09e55dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -320,7 +320,7 @@ static void set_barrier_auto_waitcnt(struct amdgpu_device *adev, bool enable_wai if (!down_read_trylock(&adev->reset_domain->sem)) return; - amdgpu_amdkfd_suspend(adev, false); + amdgpu_amdkfd_suspend(adev, true); if (suspend_resume_compute_scheduler(adev, true)) goto out; @@ -333,7 +333,7 @@ static void set_barrier_auto_waitcnt(struct amdgpu_device *adev, bool enable_wai out: suspend_resume_compute_scheduler(adev, false); - amdgpu_amdkfd_resume(adev, false); + amdgpu_amdkfd_resume(adev, true); up_read(&adev->reset_domain->sem); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index ca4a6b82817f..df77558e03ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -561,6 +561,13 @@ static uint32_t read_vmid_from_vmfault_reg(struct amdgpu_device *adev) return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); } +static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) + +{ + return 0; +} + const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -578,4 +585,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .set_scratch_backing_va = set_scratch_backing_va, .set_vm_context_page_table_base = set_vm_context_page_table_base, .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, + .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 0f3e2944edd7..e68c0fa8d751 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -582,6 +582,13 @@ static void set_vm_context_page_table_base(struct amdgpu_device *adev, lower_32_bits(page_table_base)); } +static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) + +{ + return 0; +} + const struct kfd2kgd_calls gfx_v8_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -599,4 +606,5 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = { get_atc_vmid_pasid_mapping_info, .set_scratch_backing_va = set_scratch_backing_va, .set_vm_context_page_table_base = set_vm_context_page_table_base, + .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d2ec4130a316..260165bbe373 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2559,6 +2559,18 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, if (ret != -EFAULT) return ret; + /* If applications unmap memory before destroying the userptr + * from the KFD, trigger a segmentation fault in VM debug mode. + */ + if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) { + pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n", + pid_nr(process_info->pid), mem->va); + + // Send GPU VM fault to user space + kfd_signal_vm_fault_event_with_userptr(kfd_lookup_process_by_pid(process_info->pid), + mem->va); + } + ret = 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index eb015bdda8a7..c7d32fb216e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -281,6 +281,9 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev, case ATOM_DGPU_VRAM_TYPE_GDDR6: vram_type = AMDGPU_VRAM_TYPE_GDDR6; break; + case ATOM_DGPU_VRAM_TYPE_HBM3E: + vram_type = AMDGPU_VRAM_TYPE_HBM3E; + break; default: vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 75fcc521c171..00e96419fcda 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -447,6 +447,13 @@ success: return true; } +static bool amdgpu_prefer_rom_resource(struct amdgpu_device *adev) +{ + struct resource *res = &adev->pdev->resource[PCI_ROM_RESOURCE]; + + return (res->flags & IORESOURCE_ROM_SHADOW); +} + static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev) { if (amdgpu_atrm_get_bios(adev)) { @@ -465,14 +472,27 @@ static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev) goto success; } - if (amdgpu_read_platform_bios(adev)) { - dev_info(adev->dev, "Fetched VBIOS from platform\n"); - goto success; - } + if (amdgpu_prefer_rom_resource(adev)) { + if (amdgpu_read_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n"); + goto success; + } - if (amdgpu_read_bios(adev)) { - dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n"); - goto success; + if (amdgpu_read_platform_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from platform\n"); + goto success; + } + + } else { + if (amdgpu_read_platform_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from platform\n"); + goto success; + } + + if (amdgpu_read_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n"); + goto success; + } } if (amdgpu_read_bios_from_rom(adev)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 68bce6a6d09d..004a6a9d6b9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -252,83 +252,22 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, if (!adev->pm.fw) { switch (adev->asic_type) { - case CHIP_TAHITI: - strcpy(fw_name, "radeon/tahiti_smc.bin"); - break; - case CHIP_PITCAIRN: - if ((adev->pdev->revision == 0x81) && - ((adev->pdev->device == 0x6810) || - (adev->pdev->device == 0x6811))) { - info->is_kicker = true; - strcpy(fw_name, "radeon/pitcairn_k_smc.bin"); - } else { - strcpy(fw_name, "radeon/pitcairn_smc.bin"); - } - break; - case CHIP_VERDE: - if (((adev->pdev->device == 0x6820) && - ((adev->pdev->revision == 0x81) || - (adev->pdev->revision == 0x83))) || - ((adev->pdev->device == 0x6821) && - ((adev->pdev->revision == 0x83) || - (adev->pdev->revision == 0x87))) || - ((adev->pdev->revision == 0x87) && - ((adev->pdev->device == 0x6823) || - (adev->pdev->device == 0x682b)))) { - info->is_kicker = true; - strcpy(fw_name, "radeon/verde_k_smc.bin"); - } else { - strcpy(fw_name, "radeon/verde_smc.bin"); - } - break; - case CHIP_OLAND: - if (((adev->pdev->revision == 0x81) && - ((adev->pdev->device == 0x6600) || - (adev->pdev->device == 0x6604) || - (adev->pdev->device == 0x6605) || - (adev->pdev->device == 0x6610))) || - ((adev->pdev->revision == 0x83) && - (adev->pdev->device == 0x6610))) { - info->is_kicker = true; - strcpy(fw_name, "radeon/oland_k_smc.bin"); - } else { - strcpy(fw_name, "radeon/oland_smc.bin"); - } - break; - case CHIP_HAINAN: - if (((adev->pdev->revision == 0x81) && - (adev->pdev->device == 0x6660)) || - ((adev->pdev->revision == 0x83) && - ((adev->pdev->device == 0x6660) || - (adev->pdev->device == 0x6663) || - (adev->pdev->device == 0x6665) || - (adev->pdev->device == 0x6667)))) { - info->is_kicker = true; - strcpy(fw_name, "radeon/hainan_k_smc.bin"); - } else if ((adev->pdev->revision == 0xc3) && - (adev->pdev->device == 0x6665)) { - info->is_kicker = true; - strcpy(fw_name, "radeon/banks_k_2_smc.bin"); - } else { - strcpy(fw_name, "radeon/hainan_smc.bin"); - } - break; case CHIP_BONAIRE: if ((adev->pdev->revision == 0x80) || (adev->pdev->revision == 0x81) || (adev->pdev->device == 0x665f)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/bonaire_k_smc.bin"); + strscpy(fw_name, "amdgpu/bonaire_k_smc.bin"); } else { - strcpy(fw_name, "amdgpu/bonaire_smc.bin"); + strscpy(fw_name, "amdgpu/bonaire_smc.bin"); } break; case CHIP_HAWAII: if (adev->pdev->revision == 0x80) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/hawaii_k_smc.bin"); + strscpy(fw_name, "amdgpu/hawaii_k_smc.bin"); } else { - strcpy(fw_name, "amdgpu/hawaii_smc.bin"); + strscpy(fw_name, "amdgpu/hawaii_smc.bin"); } break; case CHIP_TOPAZ: @@ -338,76 +277,76 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD1)) || ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD3))) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/topaz_k_smc.bin"); + strscpy(fw_name, "amdgpu/topaz_k_smc.bin"); } else - strcpy(fw_name, "amdgpu/topaz_smc.bin"); + strscpy(fw_name, "amdgpu/topaz_smc.bin"); break; case CHIP_TONGA: if (((adev->pdev->device == 0x6939) && (adev->pdev->revision == 0xf1)) || ((adev->pdev->device == 0x6938) && (adev->pdev->revision == 0xf1))) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/tonga_k_smc.bin"); + strscpy(fw_name, "amdgpu/tonga_k_smc.bin"); } else - strcpy(fw_name, "amdgpu/tonga_smc.bin"); + strscpy(fw_name, "amdgpu/tonga_smc.bin"); break; case CHIP_FIJI: - strcpy(fw_name, "amdgpu/fiji_smc.bin"); + strscpy(fw_name, "amdgpu/fiji_smc.bin"); break; case CHIP_POLARIS11: if (type == CGS_UCODE_ID_SMU) { if (ASICID_IS_P21(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris11_k_smc.bin"); + strscpy(fw_name, "amdgpu/polaris11_k_smc.bin"); } else if (ASICID_IS_P31(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris11_k2_smc.bin"); + strscpy(fw_name, "amdgpu/polaris11_k2_smc.bin"); } else { - strcpy(fw_name, "amdgpu/polaris11_smc.bin"); + strscpy(fw_name, "amdgpu/polaris11_smc.bin"); } } else if (type == CGS_UCODE_ID_SMU_SK) { - strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin"); + strscpy(fw_name, "amdgpu/polaris11_smc_sk.bin"); } break; case CHIP_POLARIS10: if (type == CGS_UCODE_ID_SMU) { if (ASICID_IS_P20(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris10_k_smc.bin"); + strscpy(fw_name, "amdgpu/polaris10_k_smc.bin"); } else if (ASICID_IS_P30(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris10_k2_smc.bin"); + strscpy(fw_name, "amdgpu/polaris10_k2_smc.bin"); } else { - strcpy(fw_name, "amdgpu/polaris10_smc.bin"); + strscpy(fw_name, "amdgpu/polaris10_smc.bin"); } } else if (type == CGS_UCODE_ID_SMU_SK) { - strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin"); + strscpy(fw_name, "amdgpu/polaris10_smc_sk.bin"); } break; case CHIP_POLARIS12: if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris12_k_smc.bin"); + strscpy(fw_name, "amdgpu/polaris12_k_smc.bin"); } else { - strcpy(fw_name, "amdgpu/polaris12_smc.bin"); + strscpy(fw_name, "amdgpu/polaris12_smc.bin"); } break; case CHIP_VEGAM: - strcpy(fw_name, "amdgpu/vegam_smc.bin"); + strscpy(fw_name, "amdgpu/vegam_smc.bin"); break; case CHIP_VEGA10: if ((adev->pdev->device == 0x687f) && ((adev->pdev->revision == 0xc0) || (adev->pdev->revision == 0xc1) || (adev->pdev->revision == 0xc3))) - strcpy(fw_name, "amdgpu/vega10_acg_smc.bin"); + strscpy(fw_name, "amdgpu/vega10_acg_smc.bin"); else - strcpy(fw_name, "amdgpu/vega10_smc.bin"); + strscpy(fw_name, "amdgpu/vega10_smc.bin"); break; case CHIP_VEGA12: - strcpy(fw_name, "amdgpu/vega12_smc.bin"); + strscpy(fw_name, "amdgpu/vega12_smc.bin"); break; case CHIP_VEGA20: - strcpy(fw_name, "amdgpu/vega20_smc.bin"); + strscpy(fw_name, "amdgpu/vega20_smc.bin"); break; default: DRM_ERROR("SMC firmware not supported\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c index 360e07a5c7c1..25252231a68a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c @@ -212,7 +212,7 @@ int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev NONSTD_SEC_OFFSET(hdr->sec_cnt, idx)); amdgpu_cper_entry_fill_section_desc(adev, section_desc, true, false, - CPER_SEV_NUM, RUNTIME, NONSTD_SEC_LEN, + CPER_SEV_FATAL, RUNTIME, NONSTD_SEC_LEN, NONSTD_SEC_OFFSET(hdr->sec_cnt, idx)); section->hdr.valid_bits.err_info_cnt = 1; @@ -326,7 +326,9 @@ int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev) return -ENOMEM; } - amdgpu_cper_entry_fill_hdr(adev, bp_threshold, AMDGPU_CPER_TYPE_BP_THRESHOLD, CPER_SEV_NUM); + amdgpu_cper_entry_fill_hdr(adev, bp_threshold, + AMDGPU_CPER_TYPE_BP_THRESHOLD, + CPER_SEV_FATAL); ret = amdgpu_cper_entry_fill_bad_page_threshold_section(adev, bp_threshold, 0); if (ret) return ret; @@ -457,7 +459,7 @@ calc: void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count) { - u64 pos, wptr_old, rptr = *ring->rptr_cpu_addr & ring->ptr_mask; + u64 pos, wptr_old, rptr; int rec_cnt_dw = count >> 2; u32 chunk, ent_sz; u8 *s = (u8 *)src; @@ -470,9 +472,11 @@ void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count) return; } + mutex_lock(&ring->adev->cper.ring_lock); + wptr_old = ring->wptr; + rptr = *ring->rptr_cpu_addr & ring->ptr_mask; - mutex_lock(&ring->adev->cper.ring_lock); while (count) { ent_sz = amdgpu_cper_ring_get_ent_sz(ring, ring->wptr); chunk = umin(ent_sz, count); @@ -549,7 +553,7 @@ int amdgpu_cper_init(struct amdgpu_device *adev) { int r; - if (!amdgpu_aca_is_enabled(adev)) + if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev)) return 0; r = amdgpu_cper_ring_init(adev); @@ -568,7 +572,7 @@ int amdgpu_cper_init(struct amdgpu_device *adev) int amdgpu_cper_fini(struct amdgpu_device *adev) { - if (!amdgpu_aca_is_enabled(adev)) + if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev)) return 0; adev->cper.enabled = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 82df06a72ee0..a2adaacf6adb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -293,10 +293,29 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, for (i = 0; i < p->gang_size; ++i) { ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm, - num_ibs[i], &p->jobs[i]); + num_ibs[i], &p->jobs[i], + p->filp->client_id); if (ret) goto free_all_kdata; - p->jobs[i]->enforce_isolation = p->adev->enforce_isolation[fpriv->xcp_id]; + switch (p->adev->enforce_isolation[fpriv->xcp_id]) { + case AMDGPU_ENFORCE_ISOLATION_DISABLE: + default: + p->jobs[i]->enforce_isolation = false; + p->jobs[i]->run_cleaner_shader = false; + break; + case AMDGPU_ENFORCE_ISOLATION_ENABLE: + p->jobs[i]->enforce_isolation = true; + p->jobs[i]->run_cleaner_shader = true; + break; + case AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY: + p->jobs[i]->enforce_isolation = true; + p->jobs[i]->run_cleaner_shader = false; + break; + case AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER: + p->jobs[i]->enforce_isolation = true; + p->jobs[i]->run_cleaner_shader = false; + break; + } } p->gang_leader = p->jobs[p->gang_leader_idx]; @@ -349,6 +368,10 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, ring = amdgpu_job_ring(job); ib = &job->ibs[job->num_ibs++]; + /* submissions to kernel queues are disabled */ + if (ring->no_user_submission) + return -EINVAL; + /* MM engine doesn't support user fences */ if (p->uf_bo && ring->funcs->no_user_fence) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index cfdf558b48b6..02138aa55793 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -109,7 +109,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; int r; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = amdgpu_vm_lock_pd(vm, &exec, 0); if (likely(!r)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index c43d1b6e5d66..f5d5c45ddc0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -919,7 +919,7 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) return timeout; } -void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) +static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) { struct amdgpu_ctx *ctx; struct idr *idp; @@ -944,24 +944,13 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) drm_sched_entity_fini(entity); } } + kref_put(&ctx->refcount, amdgpu_ctx_fini); } } void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) { - struct amdgpu_ctx *ctx; - struct idr *idp; - uint32_t id; - amdgpu_ctx_mgr_entity_fini(mgr); - - idp = &mgr->ctx_handles; - - idr_for_each_entry(idp, ctx, id) { - if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1) - DRM_ERROR("ctx %p is still alive\n", ctx); - } - idr_destroy(&mgr->ctx_handles); mutex_destroy(&mgr->lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index 85376baaa92f..090dfe86f75b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -92,7 +92,6 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr, struct amdgpu_device *adev); -void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index a1450f13d963..0e6e2e2acf5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1786,7 +1786,7 @@ static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused) ti = amdgpu_vm_get_task_info_vm(vm); if (ti) { - seq_printf(m, "pid:%d\tProcess:%s ----------\n", ti->pid, ti->process_name); + seq_printf(m, "pid:%d\tProcess:%s ----------\n", ti->task.pid, ti->process_name); amdgpu_vm_put_task_info(ti); } @@ -1902,7 +1902,7 @@ no_preempt: continue; } job = to_amdgpu_job(s_job); - if (preempted && (&job->hw_fence) == fence) + if (preempted && (&job->hw_fence.base) == fence) /* mark the job as preempted */ job->preemption_status |= AMDGPU_IB_PREEMPTED; } @@ -2105,6 +2105,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_rap_debugfs_init(adev); amdgpu_securedisplay_debugfs_init(adev); amdgpu_fw_attestation_debugfs_init(adev); + amdgpu_psp_debugfs_init(adev); debugfs_create_file("amdgpu_evict_vram", 0400, root, adev, &amdgpu_evict_vram_fops); @@ -2130,6 +2131,55 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) return 0; } +static int amdgpu_pt_info_read(struct seq_file *m, void *unused) +{ + struct drm_file *file; + struct amdgpu_fpriv *fpriv; + struct amdgpu_bo *root_bo; + int r; + + file = m->private; + if (!file) + return -EINVAL; + + fpriv = file->driver_priv; + if (!fpriv || !fpriv->vm.root.bo) + return -ENODEV; + + root_bo = amdgpu_bo_ref(fpriv->vm.root.bo); + r = amdgpu_bo_reserve(root_bo, true); + if (r) { + amdgpu_bo_unref(&root_bo); + return -EINVAL; + } + + seq_printf(m, "gpu_address: 0x%llx\n", amdgpu_bo_gpu_offset(fpriv->vm.root.bo)); + + amdgpu_bo_unreserve(root_bo); + amdgpu_bo_unref(&root_bo); + + return 0; +} + +static int amdgpu_pt_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, amdgpu_pt_info_read, inode->i_private); +} + +static const struct file_operations amdgpu_pt_info_fops = { + .owner = THIS_MODULE, + .open = amdgpu_pt_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void amdgpu_debugfs_vm_init(struct drm_file *file) +{ + debugfs_create_file("vm_pagetable_info", 0444, file->debugfs_client, file, + &amdgpu_pt_info_fops); +} + #else int amdgpu_debugfs_init(struct amdgpu_device *adev) { @@ -2139,4 +2189,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) { return 0; } +void amdgpu_debugfs_vm_init(struct drm_file *file) +{ +} #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h index 0425432d8659..e7b3c38e5186 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h @@ -33,4 +33,5 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev); void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev); +void amdgpu_debugfs_vm_init(struct drm_file *file); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index 7b50741dc097..8a026bc9ea44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -220,10 +220,10 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, coredump->reset_time.tv_nsec); - if (coredump->reset_task_info.pid) + if (coredump->reset_task_info.task.pid) drm_printf(&p, "process_name: %s PID: %d\n", coredump->reset_task_info.process_name, - coredump->reset_task_info.pid); + coredump->reset_task_info.task.pid); /* SOC Information */ drm_printf(&p, "\nSOC Information\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a30111d2c3ea..6f93473436be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -85,6 +85,7 @@ #if IS_ENABLED(CONFIG_X86) #include <asm/intel-family.h> +#include <asm/cpu_device_id.h> #endif MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); @@ -231,7 +232,7 @@ static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev) { int ret = 0; - if (!amdgpu_sriov_vf(adev)) + if (amdgpu_nbio_is_replay_cnt_supported(adev)) ret = sysfs_create_file(&adev->dev->kobj, &dev_attr_pcie_replay_count.attr); @@ -240,7 +241,7 @@ static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev) static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev) { - if (!amdgpu_sriov_vf(adev)) + if (amdgpu_nbio_is_replay_cnt_supported(adev)) sysfs_remove_file(&adev->dev->kobj, &dev_attr_pcie_replay_count.attr); } @@ -410,19 +411,16 @@ static const struct attribute_group amdgpu_board_attrs_group = { static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); - /** * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control * - * @dev: drm_device pointer + * @adev: amdgpu device pointer * * Returns true if the device is a dGPU with ATPX power control, * otherwise return false. */ -bool amdgpu_device_supports_px(struct drm_device *dev) +bool amdgpu_device_supports_px(struct amdgpu_device *adev) { - struct amdgpu_device *adev = drm_to_adev(dev); - if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid()) return true; return false; @@ -431,15 +429,13 @@ bool amdgpu_device_supports_px(struct drm_device *dev) /** * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources * - * @dev: drm_device pointer + * @adev: amdgpu device pointer * * Returns true if the device is a dGPU with ACPI power control, * otherwise return false. */ -bool amdgpu_device_supports_boco(struct drm_device *dev) +bool amdgpu_device_supports_boco(struct amdgpu_device *adev) { - struct amdgpu_device *adev = drm_to_adev(dev); - if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) return false; @@ -452,29 +448,24 @@ bool amdgpu_device_supports_boco(struct drm_device *dev) /** * amdgpu_device_supports_baco - Does the device support BACO * - * @dev: drm_device pointer + * @adev: amdgpu device pointer * * Return: * 1 if the device supports BACO; * 3 if the device supports MACO (only works if BACO is supported) * otherwise return 0. */ -int amdgpu_device_supports_baco(struct drm_device *dev) +int amdgpu_device_supports_baco(struct amdgpu_device *adev) { - struct amdgpu_device *adev = drm_to_adev(dev); - return amdgpu_asic_supports_baco(adev); } void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev) { - struct drm_device *dev; int bamaco_support; - dev = adev_to_drm(adev); - adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; - bamaco_support = amdgpu_device_supports_baco(dev); + bamaco_support = amdgpu_device_supports_baco(adev); switch (amdgpu_runtime_pm) { case 2: @@ -494,10 +485,12 @@ void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev) break; case -1: case -2: - if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */ + if (amdgpu_device_supports_px(adev)) { + /* enable PX as runtime mode */ adev->pm.rpm_mode = AMDGPU_RUNPM_PX; dev_info(adev->dev, "Using ATPX for runtime pm\n"); - } else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */ + } else if (amdgpu_device_supports_boco(adev)) { + /* enable boco as runtime mode */ adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; dev_info(adev->dev, "Using BOCO for runtime pm\n"); } else { @@ -511,12 +504,13 @@ void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev) break; case CHIP_VEGA10: /* enable BACO as runpm mode if noretry=0 */ - if (!adev->gmc.noretry) + if (!adev->gmc.noretry && !amdgpu_passthrough(adev)) adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; default: /* enable BACO as runpm mode on CI+ */ - adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + if (!amdgpu_passthrough(adev)) + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; } @@ -545,14 +539,14 @@ no_runtime_pm: * amdgpu_device_supports_smart_shift - Is the device dGPU with * smart shift support * - * @dev: drm_device pointer + * @adev: amdgpu device pointer * * Returns true if the device is a dGPU with Smart Shift support, * otherwise returns false. */ -bool amdgpu_device_supports_smart_shift(struct drm_device *dev) +bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev) { - return (amdgpu_device_supports_boco(dev) && + return (amdgpu_device_supports_boco(adev) && amdgpu_acpi_is_power_shift_control_supported()); } @@ -1286,14 +1280,14 @@ u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev) */ static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) { - DRM_ERROR("Invalid callback to read register 0x%04X\n", reg); + dev_err(adev->dev, "Invalid callback to read register 0x%04X\n", reg); BUG(); return 0; } static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg) { - DRM_ERROR("Invalid callback to read register 0x%llX\n", reg); + dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg); BUG(); return 0; } @@ -1310,15 +1304,17 @@ static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg */ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) { - DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n", - reg, v); + dev_err(adev->dev, + "Invalid callback to write register 0x%04X with 0x%08X\n", reg, + v); BUG(); } static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v) { - DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n", - reg, v); + dev_err(adev->dev, + "Invalid callback to write register 0x%llX with 0x%08X\n", reg, + v); BUG(); } @@ -1334,14 +1330,15 @@ static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, ui */ static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) { - DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg); + dev_err(adev->dev, "Invalid callback to read 64 bit register 0x%04X\n", + reg); BUG(); return 0; } static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg) { - DRM_ERROR("Invalid callback to read register 0x%llX\n", reg); + dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg); BUG(); return 0; } @@ -1358,15 +1355,17 @@ static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t r */ static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) { - DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", - reg, v); + dev_err(adev->dev, + "Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", + reg, v); BUG(); } static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v) { - DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n", - reg, v); + dev_err(adev->dev, + "Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n", + reg, v); BUG(); } @@ -1384,8 +1383,9 @@ static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, uint32_t block, uint32_t reg) { - DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n", - reg, block); + dev_err(adev->dev, + "Invalid callback to read register 0x%04X in block 0x%04X\n", + reg, block); BUG(); return 0; } @@ -1405,8 +1405,9 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, uint32_t block, uint32_t reg, uint32_t v) { - DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", - reg, block, v); + dev_err(adev->dev, + "Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", + reg, block, v); BUG(); } @@ -1680,6 +1681,9 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; + if (!amdgpu_rebar) + return 0; + /* resizing on Dell G5 SE platforms causes problems with runtime pm */ if ((amdgpu_runtime_pm != 0) && adev->pdev->vendor == PCI_VENDOR_ID_ATI && @@ -1689,7 +1693,9 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */ if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR)) - DRM_WARN("System can't access extended configuration space, please check!!\n"); + dev_warn( + adev->dev, + "System can't access extended configuration space, please check!!\n"); /* skip if the bios has already enabled large BAR */ if (adev->gmc.real_vram_size && @@ -1729,9 +1735,10 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) r = pci_resize_resource(adev->pdev, 0, rbar_size); if (r == -ENOSPC) - DRM_INFO("Not enough PCI address space for a large BAR."); + dev_info(adev->dev, + "Not enough PCI address space for a large BAR."); else if (r && r != -ENOTSUPP) - DRM_ERROR("Problem resizing BAR0 (%d).", r); + dev_err(adev->dev, "Problem resizing BAR0 (%d).", r); pci_assign_unassigned_bus_resources(adev->pdev->bus); @@ -1833,8 +1840,8 @@ bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev) case 0: return false; default: - DRM_ERROR("Invalid value for amdgpu.seamless: %d\n", - amdgpu_seamless); + dev_err(adev->dev, "Invalid value for amdgpu.seamless: %d\n", + amdgpu_seamless); return false; } @@ -1870,6 +1877,35 @@ static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device return true; } +static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev) +{ +#if IS_ENABLED(CONFIG_X86) + struct cpuinfo_x86 *c = &cpu_data(0); + + if (!(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) + return false; + + if (c->x86 == 6 && + adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) { + switch (c->x86_model) { + case VFM_MODEL(INTEL_ALDERLAKE): + case VFM_MODEL(INTEL_ALDERLAKE_L): + case VFM_MODEL(INTEL_RAPTORLAKE): + case VFM_MODEL(INTEL_RAPTORLAKE_P): + case VFM_MODEL(INTEL_RAPTORLAKE_S): + return true; + default: + return false; + } + } else { + return false; + } +#else + return false; +#endif +} + /** * amdgpu_device_should_use_aspm - check if the device should program ASPM * @@ -1894,7 +1930,7 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) } if (adev->flags & AMD_IS_APU) return false; - if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK)) + if (amdgpu_device_aspm_support_quirk(adev)) return false; return pcie_aspm_enabled(adev->pdev); } @@ -1981,7 +2017,7 @@ static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) return; if (!is_os_64) { - DRM_WARN("Not 64-bit OS, feature not supported\n"); + dev_warn(adev->dev, "Not 64-bit OS, feature not supported\n"); goto def_value; } si_meminfo(&si); @@ -1996,7 +2032,7 @@ static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) if (total_memory < dram_size_seven_GB) goto def_value1; } else { - DRM_WARN("Smu memory pool size not supported\n"); + dev_warn(adev->dev, "Smu memory pool size not supported\n"); goto def_value; } adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; @@ -2004,7 +2040,7 @@ static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) return; def_value1: - DRM_WARN("No enough system memory\n"); + dev_warn(adev->dev, "No enough system memory\n"); def_value: adev->pm.smu_prv_buffer_size = 0; } @@ -2112,8 +2148,31 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - for (i = 0; i < MAX_XCP; i++) - adev->enforce_isolation[i] = !!enforce_isolation; + for (i = 0; i < MAX_XCP; i++) { + switch (amdgpu_enforce_isolation) { + case -1: + case 0: + default: + /* disable */ + adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE; + break; + case 1: + /* enable */ + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_ENABLE; + break; + case 2: + /* enable legacy mode */ + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY; + break; + case 3: + /* enable only process isolation without submitting cleaner shader */ + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER; + break; + } + } return 0; } @@ -2133,7 +2192,8 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, struct drm_device *dev = pci_get_drvdata(pdev); int r; - if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF) + if (amdgpu_device_supports_px(drm_to_adev(dev)) && + state == VGA_SWITCHEROO_OFF) return; if (state == VGA_SWITCHEROO_ON) { @@ -2145,12 +2205,13 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, amdgpu_device_load_pci_state(pdev); r = pci_enable_device(pdev); if (r) - DRM_WARN("pci_enable_device failed (%d)\n", r); + dev_warn(&pdev->dev, "pci_enable_device failed (%d)\n", + r); amdgpu_device_resume(dev, true); dev->switch_power_state = DRM_SWITCH_POWER_ON; } else { - pr_info("switched off\n"); + dev_info(&pdev->dev, "switched off\n"); dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; amdgpu_device_prepare(dev); amdgpu_device_suspend(dev, true); @@ -2217,8 +2278,9 @@ int amdgpu_device_ip_set_clockgating_state(void *dev, r = adev->ip_blocks[i].version->funcs->set_clockgating_state( &adev->ip_blocks[i], state); if (r) - DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "set_clockgating_state of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); } return r; } @@ -2251,8 +2313,9 @@ int amdgpu_device_ip_set_powergating_state(void *dev, r = adev->ip_blocks[i].version->funcs->set_powergating_state( &adev->ip_blocks[i], state); if (r) - DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "set_powergating_state of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); } return r; } @@ -2468,9 +2531,11 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) } } - DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n", - amdgpu_virtual_display, pci_address_name, - adev->enable_virtual_display, adev->mode_info.num_crtc); + dev_info( + adev->dev, + "virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n", + amdgpu_virtual_display, pci_address_name, + adev->enable_virtual_display, adev->mode_info.num_crtc); kfree(pciaddstr); } @@ -2481,8 +2546,9 @@ void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) { adev->mode_info.num_crtc = 1; adev->enable_virtual_display = true; - DRM_INFO("virtual_display:%d, num_crtc:%d\n", - adev->enable_virtual_display, adev->mode_info.num_crtc); + dev_info(adev->dev, "virtual_display:%d, num_crtc:%d\n", + adev->enable_virtual_display, + adev->mode_info.num_crtc); } } @@ -2689,6 +2755,13 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) break; } + /* Check for IP version 9.4.3 with A0 hardware */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) && + !amdgpu_device_get_rev_id(adev)) { + dev_err(adev->dev, "Unsupported A0 hardware\n"); + return -ENODEV; /* device unsupported - no device error */ + } + if (amdgpu_has_atpx() && (amdgpu_is_atpx_hybrid() || amdgpu_has_atpx_dgpu_power_cntl()) && @@ -2701,7 +2774,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->has_pr3 = parent ? pci_pr3_present(parent) : false; } - adev->pm.pp_feature = amdgpu_pp_feature_mask; if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; @@ -2710,21 +2782,29 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (!amdgpu_device_pcie_dynamic_switching_supported(adev)) adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK; + adev->virt.is_xgmi_node_migrate_enabled = false; + if (amdgpu_sriov_vf(adev)) { + adev->virt.is_xgmi_node_migrate_enabled = + amdgpu_ip_version((adev), GC_HWIP, 0) == IP_VERSION(9, 4, 4); + } + total = true; for (i = 0; i < adev->num_ip_blocks; i++) { ip_block = &adev->ip_blocks[i]; if ((amdgpu_ip_block_mask & (1 << i)) == 0) { - DRM_WARN("disabled ip block: %d <%s>\n", - i, adev->ip_blocks[i].version->funcs->name); + dev_warn(adev->dev, "disabled ip block: %d <%s>\n", i, + adev->ip_blocks[i].version->funcs->name); adev->ip_blocks[i].status.valid = false; } else if (ip_block->version->funcs->early_init) { r = ip_block->version->funcs->early_init(ip_block); if (r == -ENOENT) { adev->ip_blocks[i].status.valid = false; } else if (r) { - DRM_ERROR("early_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "early_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, + r); total = false; } else { adev->ip_blocks[i].status.valid = true; @@ -2805,8 +2885,10 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { - DRM_ERROR("hw_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "hw_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, + r); return r; } adev->ip_blocks[i].status.hw = true; @@ -2830,8 +2912,9 @@ static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) continue; r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { - DRM_ERROR("hw_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "hw_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); return r; } adev->ip_blocks[i].status.hw = true; @@ -2869,8 +2952,11 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) } else { r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { - DRM_ERROR("hw_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "hw_init of IP block <%s> failed %d\n", + adev->ip_blocks[i] + .version->funcs->name, + r); return r; } adev->ip_blocks[i].status.hw = true; @@ -2925,25 +3011,29 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) r = drm_sched_init(&ring->sched, &args); if (r) { - DRM_ERROR("Failed to create scheduler on ring %s.\n", - ring->name); + dev_err(adev->dev, + "Failed to create scheduler on ring %s.\n", + ring->name); return r; } r = amdgpu_uvd_entity_init(adev, ring); if (r) { - DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n", - ring->name); + dev_err(adev->dev, + "Failed to create UVD scheduling entity on ring %s.\n", + ring->name); return r; } r = amdgpu_vce_entity_init(adev, ring); if (r) { - DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n", - ring->name); + dev_err(adev->dev, + "Failed to create VCE scheduling entity on ring %s.\n", + ring->name); return r; } } - amdgpu_xcp_update_partition_sched_list(adev); + if (adev->xcp_mgr) + amdgpu_xcp_update_partition_sched_list(adev); return 0; } @@ -2975,8 +3065,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->funcs->sw_init) { r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]); if (r) { - DRM_ERROR("sw_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "sw_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, + r); goto init_failed; } } @@ -2990,7 +3082,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) /* need to do common hw init early so everything is set up for gmc */ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { - DRM_ERROR("hw_init %d failed %d\n", i, r); + dev_err(adev->dev, "hw_init %d failed %d\n", i, + r); goto init_failed; } adev->ip_blocks[i].status.hw = true; @@ -3002,17 +3095,21 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) r = amdgpu_device_mem_scratch_init(adev); if (r) { - DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r); + dev_err(adev->dev, + "amdgpu_mem_scratch_init failed %d\n", + r); goto init_failed; } r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { - DRM_ERROR("hw_init %d failed %d\n", i, r); + dev_err(adev->dev, "hw_init %d failed %d\n", i, + r); goto init_failed; } r = amdgpu_device_wb_init(adev); if (r) { - DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); + dev_err(adev->dev, + "amdgpu_device_wb_init failed %d\n", r); goto init_failed; } adev->ip_blocks[i].status.hw = true; @@ -3024,14 +3121,16 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) AMDGPU_GEM_DOMAIN_GTT, AMDGPU_CSA_SIZE); if (r) { - DRM_ERROR("allocate CSA failed %d\n", r); + dev_err(adev->dev, + "allocate CSA failed %d\n", r); goto init_failed; } } r = amdgpu_seq64_init(adev); if (r) { - DRM_ERROR("allocate seq64 failed %d\n", r); + dev_err(adev->dev, "allocate seq64 failed %d\n", + r); goto init_failed; } } @@ -3172,6 +3271,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) * always assumed to be lost. */ switch (amdgpu_asic_reset_method(adev)) { + case AMD_RESET_METHOD_LINK: case AMD_RESET_METHOD_BACO: case AMD_RESET_METHOD_MODE1: return true; @@ -3220,8 +3320,10 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev, r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i], state); if (r) { - DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "set_clockgating_state(gate) of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, + r); return r; } } @@ -3257,8 +3359,10 @@ int amdgpu_device_set_pg_state(struct amdgpu_device *adev, r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i], state); if (r) { - DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "set_powergating_state(gate) of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, + r); return r; } } @@ -3324,8 +3428,10 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->funcs->late_init) { r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]); if (r) { - DRM_ERROR("late_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "late_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, + r); return r; } } @@ -3334,7 +3440,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) r = amdgpu_ras_late_init(adev); if (r) { - DRM_ERROR("amdgpu_ras_late_init failed %d", r); + dev_err(adev->dev, "amdgpu_ras_late_init failed %d", r); return r; } @@ -3348,7 +3454,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) r = amdgpu_device_enable_mgpu_fan_boost(); if (r) - DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); + dev_err(adev->dev, "enable mgpu fan boost failed (%d).\n", r); /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */ if (amdgpu_passthrough(adev) && @@ -3381,7 +3487,9 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) r = amdgpu_xgmi_set_pstate(gpu_instance->adev, AMDGPU_XGMI_PSTATE_MIN); if (r) { - DRM_ERROR("pstate setting failed (%d).\n", r); + dev_err(adev->dev, + "pstate setting failed (%d).\n", + r); break; } } @@ -3395,17 +3503,19 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block) { + struct amdgpu_device *adev = ip_block->adev; int r; if (!ip_block->version->funcs->hw_fini) { - DRM_ERROR("hw_fini of IP block <%s> not defined\n", - ip_block->version->funcs->name); + dev_err(adev->dev, "hw_fini of IP block <%s> not defined\n", + ip_block->version->funcs->name); } else { r = ip_block->version->funcs->hw_fini(ip_block); /* XXX handle errors */ if (r) { - DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", - ip_block->version->funcs->name, r); + dev_dbg(adev->dev, + "hw_fini of IP block <%s> failed %d\n", + ip_block->version->funcs->name, r); } } @@ -3446,15 +3556,17 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]); if (r) { - DRM_DEBUG("early_fini of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_dbg(adev->dev, + "early_fini of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); } } amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); - amdgpu_amdkfd_suspend(adev, false); + amdgpu_amdkfd_suspend(adev, true); + amdgpu_userq_suspend(adev); /* Workaround for ASICs need to disable SMC first */ amdgpu_device_smu_fini_early(adev); @@ -3468,7 +3580,8 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) { if (amdgpu_virt_release_full_gpu(adev, false)) - DRM_ERROR("failed to release exclusive mode on fini\n"); + dev_err(adev->dev, + "failed to release exclusive mode on fini\n"); } return 0; @@ -3510,13 +3623,16 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) amdgpu_device_mem_scratch_fini(adev); amdgpu_ib_pool_fini(adev); amdgpu_seq64_fini(adev); + amdgpu_doorbell_fini(adev); } if (adev->ip_blocks[i].version->funcs->sw_fini) { r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]); /* XXX handle errors */ if (r) { - DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_dbg(adev->dev, + "sw_fini of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, + r); } } adev->ip_blocks[i].status.sw = false; @@ -3549,7 +3665,7 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work) r = amdgpu_ib_ring_tests(adev); if (r) - DRM_ERROR("ib ring test failed (%d).\n", r); + dev_err(adev->dev, "ib ring test failed (%d).\n", r); } static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) @@ -3643,6 +3759,13 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) adev, adev->ip_blocks[i].version->type)) continue; + /* Since we skip suspend for S0i3, we need to cancel the delayed + * idle work here as the suspend callback never gets called. + */ + if (adev->in_s0ix && + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX && + amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0)) + cancel_delayed_work_sync(&adev->gfx.idle_work); /* skip suspend of gfx/mes and psp for S0ix * gfx is in gfxoff state, so on resume it will exit gfxoff just * like at runtime. PSP is also part of the always on hardware @@ -3683,8 +3806,9 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state); if (r) { - DRM_ERROR("SMC failed to set mp1 state %d, %d\n", - adev->mp1_state, r); + dev_err(adev->dev, + "SMC failed to set mp1 state %d, %d\n", + adev->mp1_state, r); return r; } } @@ -3968,12 +4092,14 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) /** * amdgpu_device_asic_has_dc_support - determine if DC supports the asic * + * @pdev : pci device context * @asic_type: AMD asic type * * Check if there is DC (new modesetting infrastructre) support for an asic. * returns true if DC has support, false if not. */ -bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) +bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev, + enum amd_asic_type asic_type) { switch (asic_type) { #ifdef CONFIG_DRM_AMDGPU_SI @@ -4016,7 +4142,9 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) #else default: if (amdgpu_dc > 0) - DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n"); + dev_info_once( + &pdev->dev, + "Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n"); return false; #endif } @@ -4035,7 +4163,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)) return false; - return amdgpu_device_asic_has_dc_support(adev->asic_type); + return amdgpu_device_asic_has_dc_support(adev->pdev, adev->asic_type); } static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) @@ -4057,13 +4185,13 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { task_barrier_enter(&hive->tb); - adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev)); + adev->asic_reset_res = amdgpu_device_baco_enter(adev); if (adev->asic_reset_res) goto fail; task_barrier_exit(&hive->tb); - adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev)); + adev->asic_reset_res = amdgpu_device_baco_exit(adev); if (adev->asic_reset_res) goto fail; @@ -4077,7 +4205,8 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) fail: if (adev->asic_reset_res) - DRM_WARN("ASIC reset failed with error, %d for drm dev, %s", + dev_warn(adev->dev, + "ASIC reset failed with error, %d for drm dev, %s", adev->asic_reset_res, adev_to_drm(adev)->unique); amdgpu_put_xgmi_hive(hive); } @@ -4091,18 +4220,10 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) int ret = 0; /* - * By default timeout for non compute jobs is 10000 - * and 60000 for compute jobs. - * In SR-IOV or passthrough mode, timeout for compute - * jobs are 60000 by default. + * By default timeout for jobs is 10 sec */ - adev->gfx_timeout = msecs_to_jiffies(10000); + adev->compute_timeout = adev->gfx_timeout = msecs_to_jiffies(10000); adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; - if (amdgpu_sriov_vf(adev)) - adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ? - msecs_to_jiffies(60000) : msecs_to_jiffies(10000); - else - adev->compute_timeout = msecs_to_jiffies(60000); if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { while ((timeout_setting = strsep(&input, ",")) && @@ -4201,7 +4322,7 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) adev->gfx.mcbp = true; if (adev->gfx.mcbp) - DRM_INFO("MCBP is enabled\n"); + dev_info(adev->dev, "MCBP is enabled\n"); } /** @@ -4217,7 +4338,6 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) int amdgpu_device_init(struct amdgpu_device *adev, uint32_t flags) { - struct drm_device *ddev = adev_to_drm(adev); struct pci_dev *pdev = adev->pdev; int r, i; bool px = false; @@ -4269,9 +4389,11 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; - DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", - amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, - pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); + dev_info( + adev->dev, + "initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", + amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, + pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); /* mutex initialization are all done here so we * can recall function without having locking issues @@ -4299,9 +4421,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_sync_create(&adev->isolation[i].active); amdgpu_sync_create(&adev->isolation[i].prev); } - mutex_init(&adev->gfx.kfd_sch_mutex); + mutex_init(&adev->gfx.userq_sch_mutex); mutex_init(&adev->gfx.workload_profile_mutex); mutex_init(&adev->vcn.workload_profile_mutex); + mutex_init(&adev->userq_mutex); amdgpu_device_init_apu_flags(adev); @@ -4321,12 +4444,16 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->virt.rlcg_reg_lock); spin_lock_init(&adev->wb.lock); + xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ); + INIT_LIST_HEAD(&adev->reset_list); INIT_LIST_HEAD(&adev->ras_list); INIT_LIST_HEAD(&adev->pm.od_kobj_list); + INIT_LIST_HEAD(&adev->userq_mgr_list); + INIT_DELAYED_WORK(&adev->delayed_init_work, amdgpu_device_delayed_init_work_handler); INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, @@ -4383,8 +4510,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (!adev->rmmio) return -ENOMEM; - DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); - DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size); + dev_info(adev->dev, "register mmio base: 0x%08X\n", + (uint32_t)adev->rmmio_base); + dev_info(adev->dev, "register mmio size: %u\n", + (unsigned int)adev->rmmio_size); /* * Reset domain needs to be present early, before XGMI hive discovered @@ -4521,7 +4650,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = -EINVAL; goto failed; } - DRM_INFO("GPU posting now...\n"); + dev_info(adev->dev, "GPU posting now...\n"); r = amdgpu_device_asic_init(adev); if (r) { dev_err(adev->dev, "gpu post error!\n"); @@ -4631,12 +4760,12 @@ fence_driver_init: r = amdgpu_pm_sysfs_init(adev); if (r) - DRM_ERROR("registering pm sysfs failed (%d).\n", r); + dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r); r = amdgpu_ucode_sysfs_init(adev); if (r) { adev->ucode_sysfs_en = false; - DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); + dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r); } else adev->ucode_sysfs_en = true; @@ -4651,7 +4780,7 @@ fence_driver_init: amdgpu_fru_sysfs_init(adev); amdgpu_reg_state_sysfs_init(adev); - amdgpu_xcp_cfg_sysfs_init(adev); + amdgpu_xcp_sysfs_init(adev); if (IS_ENABLED(CONFIG_PERF_EVENTS)) r = amdgpu_pmu_init(adev); @@ -4669,7 +4798,7 @@ fence_driver_init: if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) vga_client_register(adev->pdev, amdgpu_device_vga_set_decode); - px = amdgpu_device_supports_px(ddev); + px = amdgpu_device_supports_px(adev); if (px || (!dev_is_removable(&adev->pdev->dev) && apple_gmux_detect(NULL, NULL))) @@ -4781,7 +4910,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_fru_sysfs_fini(adev); amdgpu_reg_state_sysfs_fini(adev); - amdgpu_xcp_cfg_sysfs_fini(adev); + amdgpu_xcp_sysfs_fini(adev); /* disable ras feature must before hw fini */ amdgpu_ras_pre_fini(adev); @@ -4835,7 +4964,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) kfree(adev->xcp_mgr); adev->xcp_mgr = NULL; - px = amdgpu_device_supports_px(adev_to_drm(adev)); + px = amdgpu_device_supports_px(adev); if (px || (!dev_is_removable(&adev->pdev->dev) && apple_gmux_detect(NULL, NULL))) @@ -4851,7 +4980,6 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) iounmap(adev->rmmio); adev->rmmio = NULL; - amdgpu_doorbell_fini(adev); drm_dev_exit(idx); } @@ -4885,8 +5013,16 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) return 0; ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); - if (ret) - DRM_WARN("evicting device resources failed\n"); + if (ret) { + dev_warn(adev->dev, "evicting device resources failed\n"); + return ret; + } + + if (adev->in_s4) { + ret = ttm_device_prepare_hibernation(&adev->mman.bdev); + if (ret) + dev_err(adev->dev, "prepare hibernation failed, %d\n", ret); + } return ret; } @@ -4900,28 +5036,20 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) * @data: data * * This function is called when the system is about to suspend or hibernate. - * It is used to evict resources from the device before the system goes to - * sleep while there is still access to swap. + * It is used to set the appropriate flags so that eviction can be optimized + * in the pm prepare callback. */ static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, void *data) { struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb); - int r; switch (mode) { case PM_HIBERNATION_PREPARE: adev->in_s4 = true; - fallthrough; - case PM_SUSPEND_PREPARE: - r = amdgpu_device_evict_resources(adev); - /* - * This is considered non-fatal at this time because - * amdgpu_device_prepare() will also fatally evict resources. - * See https://gitlab.freedesktop.org/drm/amd/-/issues/3781 - */ - if (r) - drm_warn(adev_to_drm(adev), "Failed to evict resources, freeze active processes if problems occur: %d\n", r); + break; + case PM_POST_HIBERNATION: + adev->in_s4 = false; break; } @@ -4942,15 +5070,13 @@ int amdgpu_device_prepare(struct drm_device *dev) struct amdgpu_device *adev = drm_to_adev(dev); int i, r; - amdgpu_choose_low_power_state(adev); - if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; /* Evict the majority of BOs before starting suspend sequence */ r = amdgpu_device_evict_resources(adev); if (r) - goto unprepare; + return r; flush_delayed_work(&adev->gfx.gfx_off_delay_work); @@ -4961,15 +5087,32 @@ int amdgpu_device_prepare(struct drm_device *dev) continue; r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]); if (r) - goto unprepare; + return r; } return 0; +} -unprepare: - adev->in_s0ix = adev->in_s3 = adev->in_s4 = false; +/** + * amdgpu_device_complete - complete power state transition + * + * @dev: drm dev pointer + * + * Undo the changes from amdgpu_device_prepare. This will be + * called on all resume transitions, including those that failed. + */ +void amdgpu_device_complete(struct drm_device *dev) +{ + struct amdgpu_device *adev = drm_to_adev(dev); + int i; - return r; + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + if (!adev->ip_blocks[i].version->funcs->complete) + continue; + adev->ip_blocks[i].version->funcs->complete(&adev->ip_blocks[i]); + } } /** @@ -4993,14 +5136,16 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) adev->in_suspend = true; if (amdgpu_sriov_vf(adev)) { + if (!adev->in_s0ix && !adev->in_runpm) + amdgpu_amdkfd_suspend_process(adev); amdgpu_virt_fini_data_exchange(adev); r = amdgpu_virt_request_full_gpu(adev, false); if (r) return r; } - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3)) - DRM_WARN("smart shift update failed\n"); + if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3)) + dev_warn(adev->dev, "smart shift update failed\n"); if (notify_clients) drm_client_dev_suspend(adev_to_drm(adev), false); @@ -5011,8 +5156,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) amdgpu_device_ip_suspend_phase1(adev); - if (!adev->in_s0ix) - amdgpu_amdkfd_suspend(adev, adev->in_runpm); + if (!adev->in_s0ix) { + amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); + amdgpu_userq_suspend(adev); + } r = amdgpu_device_evict_resources(adev); if (r) @@ -5034,6 +5181,32 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) return 0; } +static inline int amdgpu_virt_resume(struct amdgpu_device *adev) +{ + int r; + unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id; + + /* During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO) + * may not work. The access could be blocked by nBIF protection as VF isn't in + * exclusive access mode. Exclusive access is enabled now, disable/enable MSIX + * so that QEMU reprograms MSIX table. + */ + amdgpu_restore_msix(adev); + + r = adev->gfxhub.funcs->get_xgmi_info(adev); + if (r) + return r; + + dev_info(adev->dev, "xgmi node, old id %d, new id %d\n", + prev_physical_node_id, adev->gmc.xgmi.physical_node_id); + + adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev); + adev->vm_manager.vram_base_offset += + adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; + + return 0; +} + /** * amdgpu_device_resume - initiate device resume * @@ -5055,6 +5228,12 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients) return r; } + if (amdgpu_virt_xgmi_migrate_enabled(adev)) { + r = amdgpu_virt_resume(adev); + if (r) + goto exit; + } + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; @@ -5076,7 +5255,11 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients) } if (!adev->in_s0ix) { - r = amdgpu_amdkfd_resume(adev, adev->in_runpm); + r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); + if (r) + goto exit; + + r = amdgpu_userq_resume(adev); if (r) goto exit; } @@ -5091,6 +5274,9 @@ exit: if (amdgpu_sriov_vf(adev)) { amdgpu_virt_init_data_exchange(adev); amdgpu_virt_release_full_gpu(adev, true); + + if (!adev->in_s0ix && !r && !adev->in_runpm) + r = amdgpu_amdkfd_resume_process(adev); } if (r) @@ -5125,13 +5311,12 @@ exit: dev->dev->power.disable_depth--; #endif } - adev->in_suspend = false; - if (adev->enable_mes) - amdgpu_mes_self_test(adev); + amdgpu_vram_mgr_clear_reset_blocks(adev); + adev->in_suspend = false; - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0)) - DRM_WARN("smart shift update failed\n"); + if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0)) + dev_warn(adev->dev, "smart shift update failed\n"); return 0; } @@ -5510,6 +5695,29 @@ mode1_reset_failed: return ret; } +int amdgpu_device_link_reset(struct amdgpu_device *adev) +{ + int ret = 0; + + dev_info(adev->dev, "GPU link reset\n"); + + if (!adev->pcie_reset_ctx.occurs_dpc) + ret = amdgpu_dpm_link_reset(adev); + + if (ret) + goto link_reset_failed; + + ret = amdgpu_psp_wait_for_bootloader(adev); + if (ret) + goto link_reset_failed; + + return 0; + +link_reset_failed: + dev_err(adev->dev, "GPU link reset failed\n"); + return ret; +} + int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context) { @@ -5639,7 +5847,9 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job); if (vram_lost) { - DRM_INFO("VRAM is lost due to GPU reset!\n"); + dev_info( + tmp_adev->dev, + "VRAM is lost due to GPU reset!\n"); amdgpu_inc_vram_lost(tmp_adev); } @@ -5814,6 +6024,7 @@ static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev) switch (amdgpu_asic_reset_method(adev)) { case AMD_RESET_METHOD_MODE1: + case AMD_RESET_METHOD_LINK: adev->mp1_state = PP_MP1_STATE_SHUTDOWN; break; case AMD_RESET_METHOD_MODE2: @@ -5917,117 +6128,85 @@ static int amdgpu_device_health_check(struct list_head *device_list_handle) { struct amdgpu_device *tmp_adev; int ret = 0; - u32 status; list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status); - if (PCI_POSSIBLE_ERROR(status)) { - dev_err(tmp_adev->dev, "device lost from bus!"); - ret = -ENODEV; - } + ret |= amdgpu_device_bus_status_check(tmp_adev); } return ret; } -/** - * amdgpu_device_gpu_recover - reset the asic and recover scheduler - * - * @adev: amdgpu_device pointer - * @job: which job trigger hang - * @reset_context: amdgpu reset context pointer - * - * Attempt to reset the GPU if it has hung (all asics). - * Attempt to do soft-reset or full-reset and reinitialize Asic - * Returns 0 for success or an error on failure. - */ - -int amdgpu_device_gpu_recover(struct amdgpu_device *adev, - struct amdgpu_job *job, - struct amdgpu_reset_context *reset_context) +static int amdgpu_device_recovery_prepare(struct amdgpu_device *adev, + struct list_head *device_list, + struct amdgpu_hive_info *hive) { - struct list_head device_list, *device_list_handle = NULL; - bool job_signaled = false; - struct amdgpu_hive_info *hive = NULL; struct amdgpu_device *tmp_adev = NULL; - int i, r = 0; - bool need_emergency_restart = false; - bool audio_suspended = false; - int retry_limit = AMDGPU_MAX_RETRY_LIMIT; - - /* - * If it reaches here because of hang/timeout and a RAS error is - * detected at the same time, let RAS recovery take care of it. - */ - if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) && - !amdgpu_sriov_vf(adev) && - reset_context->src != AMDGPU_RESET_SRC_RAS) { - dev_dbg(adev->dev, - "Gpu recovery from source: %d yielding to RAS error recovery handling", - reset_context->src); - return 0; - } - /* - * Special case: RAS triggered and full reset isn't supported - */ - need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); + int r; /* - * Flush RAM to disk so that after reboot - * the user can read log and see why the system rebooted. - */ - if (need_emergency_restart && amdgpu_ras_get_context(adev) && - amdgpu_ras_get_context(adev)->reboot) { - DRM_WARN("Emergency reboot."); - - ksys_sync_helper(); - emergency_restart(); - } - - dev_info(adev->dev, "GPU %s begin!\n", - need_emergency_restart ? "jobs stop":"reset"); - - if (!amdgpu_sriov_vf(adev)) - hive = amdgpu_get_xgmi_hive(adev); - if (hive) - mutex_lock(&hive->hive_lock); - - reset_context->job = job; - reset_context->hive = hive; - /* * Build list of devices to reset. * In case we are in XGMI hive mode, resort the device list * to put adev in the 1st position. */ - INIT_LIST_HEAD(&device_list); if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) { list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - list_add_tail(&tmp_adev->reset_list, &device_list); + list_add_tail(&tmp_adev->reset_list, device_list); if (adev->shutdown) tmp_adev->shutdown = true; + if (adev->pcie_reset_ctx.occurs_dpc) + tmp_adev->pcie_reset_ctx.in_link_reset = true; } - if (!list_is_first(&adev->reset_list, &device_list)) - list_rotate_to_front(&adev->reset_list, &device_list); - device_list_handle = &device_list; + if (!list_is_first(&adev->reset_list, device_list)) + list_rotate_to_front(&adev->reset_list, device_list); } else { - list_add_tail(&adev->reset_list, &device_list); - device_list_handle = &device_list; + list_add_tail(&adev->reset_list, device_list); } - if (!amdgpu_sriov_vf(adev)) { - r = amdgpu_device_health_check(device_list_handle); + if (!amdgpu_sriov_vf(adev) && (!adev->pcie_reset_ctx.occurs_dpc)) { + r = amdgpu_device_health_check(device_list); if (r) - goto end_reset; + return r; } - /* We need to lock reset domain only once both for XGMI and single device */ - tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, - reset_list); + return 0; +} + +static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev, + struct list_head *device_list) +{ + struct amdgpu_device *tmp_adev = NULL; + + if (list_empty(device_list)) + return; + tmp_adev = + list_first_entry(device_list, struct amdgpu_device, reset_list); amdgpu_device_lock_reset_domain(tmp_adev->reset_domain); +} - /* block all schedulers and reset given job's ring */ - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { +static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev, + struct list_head *device_list) +{ + struct amdgpu_device *tmp_adev = NULL; + + if (list_empty(device_list)) + return; + tmp_adev = + list_first_entry(device_list, struct amdgpu_device, reset_list); + amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); +} +static void amdgpu_device_halt_activities(struct amdgpu_device *adev, + struct amdgpu_job *job, + struct amdgpu_reset_context *reset_context, + struct list_head *device_list, + struct amdgpu_hive_info *hive, + bool need_emergency_restart) +{ + struct amdgpu_device *tmp_adev = NULL; + int i; + + /* block all schedulers and reset given job's ring */ + list_for_each_entry(tmp_adev, device_list, reset_list) { amdgpu_device_set_mp1_state(tmp_adev); /* @@ -6041,7 +6220,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * some audio codec errors. */ if (!amdgpu_device_suspend_display_audio(tmp_adev)) - audio_suspended = true; + tmp_adev->pcie_reset_ctx.audio_suspended = true; amdgpu_ras_set_error_query_ready(tmp_adev, false); @@ -6059,6 +6238,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* disable ras on ALL IPs */ if (!need_emergency_restart && + (!adev->pcie_reset_ctx.occurs_dpc) && amdgpu_device_ip_need_full_reset(tmp_adev)) amdgpu_ras_suspend(tmp_adev); @@ -6075,25 +6255,23 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, } atomic_inc(&tmp_adev->gpu_reset_counter); } +} - if (need_emergency_restart) - goto skip_sched_resume; - - /* - * Must check guilty signal here since after this point all old - * HW fences are force signaled. - * - * job->base holds a reference to parent fence - */ - if (job && dma_fence_is_signaled(&job->hw_fence)) { - job_signaled = true; - dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); - goto skip_hw_reset; - } +static int amdgpu_device_asic_reset(struct amdgpu_device *adev, + struct list_head *device_list, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_device *tmp_adev = NULL; + int retry_limit = AMDGPU_MAX_RETRY_LIMIT; + int r = 0; retry: /* Rest of adevs pre asic reset from XGMI hive. */ - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + list_for_each_entry(tmp_adev, device_list, reset_list) { + if (adev->pcie_reset_ctx.occurs_dpc) + tmp_adev->no_hw_access = true; r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context); + if (adev->pcie_reset_ctx.occurs_dpc) + tmp_adev->no_hw_access = false; /*TODO Should we stop ?*/ if (r) { dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", @@ -6105,6 +6283,11 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ /* Actual ASIC resets if needed.*/ /* Host driver will handle XGMI hive reset for SRIOV */ if (amdgpu_sriov_vf(adev)) { + + /* Bail out of reset early */ + if (amdgpu_ras_is_rma(adev)) + return -ENODEV; + if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) { dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n"); amdgpu_ras_set_fed(adev, true); @@ -6119,12 +6302,12 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ if (r) adev->asic_reset_res = r; } else { - r = amdgpu_do_asic_reset(device_list_handle, reset_context); + r = amdgpu_do_asic_reset(device_list, reset_context); if (r && r == -EAGAIN) goto retry; } - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + list_for_each_entry(tmp_adev, device_list, reset_list) { /* * Drop any pending non scheduler resets queued before reset is done. * Any reset scheduled after this point would be valid. Scheduler resets @@ -6134,10 +6317,18 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ amdgpu_device_stop_pending_resets(tmp_adev); } -skip_hw_reset: + return r; +} + +static int amdgpu_device_sched_resume(struct list_head *device_list, + struct amdgpu_reset_context *reset_context, + bool job_signaled) +{ + struct amdgpu_device *tmp_adev = NULL; + int i, r = 0; /* Post ASIC reset for all devs .*/ - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + list_for_each_entry(tmp_adev, device_list, reset_list) { for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; @@ -6168,13 +6359,23 @@ skip_hw_reset: amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); } else { dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); - if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0)) - DRM_WARN("smart shift update failed\n"); + if (amdgpu_acpi_smart_shift_update(tmp_adev, + AMDGPU_SS_DEV_D0)) + dev_warn(tmp_adev->dev, + "smart shift update failed\n"); } } -skip_sched_resume: - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + return r; +} + +static void amdgpu_device_gpu_resume(struct amdgpu_device *adev, + struct list_head *device_list, + bool need_emergency_restart) +{ + struct amdgpu_device *tmp_adev = NULL; + + list_for_each_entry(tmp_adev, device_list, reset_list) { /* unlock kfd: SRIOV would do it separately */ if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev)) amdgpu_amdkfd_post_reset(tmp_adev); @@ -6185,18 +6386,114 @@ skip_sched_resume: if (!adev->kfd.init_complete) amdgpu_amdkfd_device_init(adev); - if (audio_suspended) + if (tmp_adev->pcie_reset_ctx.audio_suspended) amdgpu_device_resume_display_audio(tmp_adev); amdgpu_device_unset_mp1_state(tmp_adev); amdgpu_ras_set_error_query_ready(tmp_adev, true); + } +} - tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, - reset_list); - amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); +/** + * amdgpu_device_gpu_recover - reset the asic and recover scheduler + * + * @adev: amdgpu_device pointer + * @job: which job trigger hang + * @reset_context: amdgpu reset context pointer + * + * Attempt to reset the GPU if it has hung (all asics). + * Attempt to do soft-reset or full-reset and reinitialize Asic + * Returns 0 for success or an error on failure. + */ + +int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + struct amdgpu_job *job, + struct amdgpu_reset_context *reset_context) +{ + struct list_head device_list; + bool job_signaled = false; + struct amdgpu_hive_info *hive = NULL; + int r = 0; + bool need_emergency_restart = false; + + /* + * If it reaches here because of hang/timeout and a RAS error is + * detected at the same time, let RAS recovery take care of it. + */ + if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) && + !amdgpu_sriov_vf(adev) && + reset_context->src != AMDGPU_RESET_SRC_RAS) { + dev_dbg(adev->dev, + "Gpu recovery from source: %d yielding to RAS error recovery handling", + reset_context->src); + return 0; + } + + /* + * Special case: RAS triggered and full reset isn't supported + */ + need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); + + /* + * Flush RAM to disk so that after reboot + * the user can read log and see why the system rebooted. + */ + if (need_emergency_restart && amdgpu_ras_get_context(adev) && + amdgpu_ras_get_context(adev)->reboot) { + dev_warn(adev->dev, "Emergency reboot."); + + ksys_sync_helper(); + emergency_restart(); + } + + dev_info(adev->dev, "GPU %s begin!\n", + need_emergency_restart ? "jobs stop":"reset"); + + if (!amdgpu_sriov_vf(adev)) + hive = amdgpu_get_xgmi_hive(adev); + if (hive) + mutex_lock(&hive->hive_lock); + + reset_context->job = job; + reset_context->hive = hive; + INIT_LIST_HEAD(&device_list); + + if (amdgpu_device_recovery_prepare(adev, &device_list, hive)) + goto end_reset; + + /* We need to lock reset domain only once both for XGMI and single device */ + amdgpu_device_recovery_get_reset_lock(adev, &device_list); + + amdgpu_device_halt_activities(adev, job, reset_context, &device_list, + hive, need_emergency_restart); + if (need_emergency_restart) + goto skip_sched_resume; + /* + * Must check guilty signal here since after this point all old + * HW fences are force signaled. + * + * job->base holds a reference to parent fence + */ + if (job && dma_fence_is_signaled(&job->hw_fence.base)) { + job_signaled = true; + dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); + goto skip_hw_reset; + } + + r = amdgpu_device_asic_reset(adev, &device_list, reset_context); + if (r) + goto reset_unlock; +skip_hw_reset: + r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled); + if (r) + goto reset_unlock; +skip_sched_resume: + amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart); +reset_unlock: + amdgpu_device_recovery_put_reset_lock(adev, &device_list); end_reset: if (hive) { mutex_unlock(&hive->hive_lock); @@ -6208,8 +6505,17 @@ end_reset: atomic_set(&adev->reset_domain->reset_res, r); - if (!r) - drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE); + if (!r) { + struct amdgpu_task_info *ti = NULL; + + if (job) + ti = amdgpu_vm_get_task_info_pasid(adev, job->pasid); + + drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, + ti ? &ti->task : NULL); + + amdgpu_vm_put_task_info(ti); + } return r; } @@ -6528,12 +6834,11 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, #endif } -int amdgpu_device_baco_enter(struct drm_device *dev) +int amdgpu_device_baco_enter(struct amdgpu_device *adev) { - struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - if (!amdgpu_device_supports_baco(dev)) + if (!amdgpu_device_supports_baco(adev)) return -ENOTSUPP; if (ras && adev->ras_enabled && @@ -6543,13 +6848,12 @@ int amdgpu_device_baco_enter(struct drm_device *dev) return amdgpu_dpm_baco_enter(adev); } -int amdgpu_device_baco_exit(struct drm_device *dev) +int amdgpu_device_baco_exit(struct amdgpu_device *adev) { - struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); int ret = 0; - if (!amdgpu_device_supports_baco(dev)) + if (!amdgpu_device_supports_baco(adev)) return -ENOTSUPP; ret = amdgpu_dpm_baco_exit(adev); @@ -6580,12 +6884,14 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta { struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); - int i; + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + struct amdgpu_reset_context reset_context; + struct list_head device_list; - DRM_INFO("PCI error: detected callback, state(%d)!!\n", state); + dev_info(adev->dev, "PCI error: detected callback!!\n"); - if (adev->gmc.xgmi.num_physical_nodes > 1) { - DRM_WARN("No support for XGMI hive yet..."); + if (!amdgpu_dpm_is_link_reset_supported(adev)) { + dev_warn(adev->dev, "No support for XGMI hive yet...\n"); return PCI_ERS_RESULT_DISCONNECT; } @@ -6593,32 +6899,30 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta switch (state) { case pci_channel_io_normal: + dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state); return PCI_ERS_RESULT_CAN_RECOVER; - /* Fatal error, prepare for slot reset */ case pci_channel_io_frozen: - /* - * Locking adev->reset_domain->sem will prevent any external access - * to GPU during PCI error recovery - */ - amdgpu_device_lock_reset_domain(adev->reset_domain); - amdgpu_device_set_mp1_state(adev); - - /* - * Block any work scheduling as we do for regular GPU reset - * for the duration of the recovery - */ - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; - - if (!amdgpu_ring_sched_ready(ring)) - continue; - - drm_sched_stop(&ring->sched, NULL); + /* Fatal error, prepare for slot reset */ + dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state); + + if (hive) + mutex_lock(&hive->hive_lock); + adev->pcie_reset_ctx.occurs_dpc = true; + memset(&reset_context, 0, sizeof(reset_context)); + INIT_LIST_HEAD(&device_list); + + amdgpu_device_recovery_prepare(adev, &device_list, hive); + amdgpu_device_recovery_get_reset_lock(adev, &device_list); + amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list, + hive, false); + if (hive) { + mutex_unlock(&hive->hive_lock); + amdgpu_put_xgmi_hive(hive); } - atomic_inc(&adev->gpu_reset_counter); return PCI_ERS_RESULT_NEED_RESET; case pci_channel_io_perm_failure: /* Permanent error, prepare for device removal */ + dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state); return PCI_ERS_RESULT_DISCONNECT; } @@ -6631,8 +6935,10 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta */ pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev) { + struct drm_device *dev = pci_get_drvdata(pdev); + struct amdgpu_device *adev = drm_to_adev(dev); - DRM_INFO("PCI error: mmio enabled callback!!\n"); + dev_info(adev->dev, "PCI error: mmio enabled callback!!\n"); /* TODO - dump whatever for debugging purposes */ @@ -6656,10 +6962,12 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); - int r, i; struct amdgpu_reset_context reset_context; - u32 memsize; + struct amdgpu_device *tmp_adev; + struct amdgpu_hive_info *hive; struct list_head device_list; + int r = 0, i; + u32 memsize; /* PCI error slot reset should be skipped During RAS recovery */ if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || @@ -6667,15 +6975,12 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) amdgpu_ras_in_recovery(adev)) return PCI_ERS_RESULT_RECOVERED; - DRM_INFO("PCI error: slot reset callback!!\n"); + dev_info(adev->dev, "PCI error: slot reset callback!!\n"); memset(&reset_context, 0, sizeof(reset_context)); - INIT_LIST_HEAD(&device_list); - list_add_tail(&adev->reset_list, &device_list); - /* wait for asic to come out of reset */ - msleep(500); + msleep(700); /* Restore PCI confspace */ amdgpu_device_load_pci_state(pdev); @@ -6696,26 +7001,40 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); - - adev->no_hw_access = true; - r = amdgpu_device_pre_asic_reset(adev, &reset_context); - adev->no_hw_access = false; - if (r) - goto out; + set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); + INIT_LIST_HEAD(&device_list); - r = amdgpu_do_asic_reset(&device_list, &reset_context); + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + mutex_lock(&hive->hive_lock); + reset_context.hive = hive; + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + tmp_adev->pcie_reset_ctx.in_link_reset = true; + list_add_tail(&tmp_adev->reset_list, &device_list); + } + } else { + set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); + list_add_tail(&adev->reset_list, &device_list); + } + r = amdgpu_device_asic_reset(adev, &device_list, &reset_context); out: if (!r) { if (amdgpu_device_cache_pci_state(adev->pdev)) pci_restore_state(adev->pdev); - - DRM_INFO("PCIe error recovery succeeded\n"); + dev_info(adev->dev, "PCIe error recovery succeeded\n"); } else { - DRM_ERROR("PCIe error recovery failed, err:%d", r); - amdgpu_device_unset_mp1_state(adev); - amdgpu_device_unlock_reset_domain(adev->reset_domain); + dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r); + if (hive) { + list_for_each_entry(tmp_adev, &device_list, reset_list) + amdgpu_device_unset_mp1_state(tmp_adev); + } + amdgpu_device_recovery_put_reset_lock(adev, &device_list); + } + + if (hive) { + mutex_unlock(&hive->hive_lock); + amdgpu_put_xgmi_hive(hive); } return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; @@ -6732,26 +7051,37 @@ void amdgpu_pci_resume(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); - int i; - + struct list_head device_list; + struct amdgpu_hive_info *hive = NULL; + struct amdgpu_device *tmp_adev = NULL; - DRM_INFO("PCI error: resume callback!!\n"); + dev_info(adev->dev, "PCI error: resume callback!!\n"); /* Only continue execution for the case of pci_channel_io_frozen */ if (adev->pci_channel_state != pci_channel_io_frozen) return; - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; + INIT_LIST_HEAD(&device_list); - if (!amdgpu_ring_sched_ready(ring)) - continue; + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + mutex_lock(&hive->hive_lock); + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + tmp_adev->pcie_reset_ctx.in_link_reset = false; + list_add_tail(&tmp_adev->reset_list, &device_list); + } + } else + list_add_tail(&adev->reset_list, &device_list); - drm_sched_start(&ring->sched, 0); - } + amdgpu_device_sched_resume(&device_list, NULL, NULL); + amdgpu_device_gpu_resume(adev, &device_list, false); + amdgpu_device_recovery_put_reset_lock(adev, &device_list); + adev->pcie_reset_ctx.occurs_dpc = false; - amdgpu_device_unset_mp1_state(adev); - amdgpu_device_unlock_reset_domain(adev->reset_domain); + if (hive) { + mutex_unlock(&hive->hive_lock); + amdgpu_put_xgmi_hive(hive); + } } bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) @@ -6770,11 +7100,11 @@ bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) adev->pci_state = pci_store_saved_state(pdev); if (!adev->pci_state) { - DRM_ERROR("Failed to store PCI saved state"); + dev_err(adev->dev, "Failed to store PCI saved state"); return false; } } else { - DRM_WARN("Failed to save PCI state, err:%d\n", r); + dev_warn(adev->dev, "Failed to save PCI state, err:%d\n", r); return false; } @@ -6795,7 +7125,7 @@ bool amdgpu_device_load_pci_state(struct pci_dev *pdev) if (!r) { pci_restore_state(pdev); } else { - DRM_WARN("Failed to load PCI state, err:%d\n", r); + dev_warn(adev->dev, "Failed to load PCI state, err:%d\n", r); return false; } @@ -7041,7 +7371,7 @@ struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev, dep = amdgpu_sync_peek_fence(&isolation->prev, ring); r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT); if (r) - DRM_WARN("OOM tracking isolation\n"); + dev_warn(adev->dev, "OOM tracking isolation\n"); out_grab_ref: dma_fence_get(dep); @@ -7109,9 +7439,11 @@ uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev, tmp_ = RREG32(reg_addr); loop--; if (!loop) { - DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn", - inst, reg_name, (uint32_t)expected_value, - (uint32_t)(tmp_ & (mask))); + dev_warn( + adev->dev, + "Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn", + inst, reg_name, (uint32_t)expected_value, + (uint32_t)(tmp_ & (mask))); ret = -ETIMEDOUT; break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index dc2713ec95a5..81b3443c8d7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -120,6 +120,8 @@ MODULE_FIRMWARE("amdgpu/vega20_ip_discovery.bin"); MODULE_FIRMWARE("amdgpu/raven_ip_discovery.bin"); MODULE_FIRMWARE("amdgpu/raven2_ip_discovery.bin"); MODULE_FIRMWARE("amdgpu/picasso_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/aldebaran_ip_discovery.bin"); #define mmIP_DISCOVERY_VERSION 0x16A00 #define mmRCC_CONFIG_MEMSIZE 0xde3 @@ -268,9 +270,10 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, uint8_t *binary) { + bool sz_valid = true; uint64_t vram_size; - u32 msg; int i, ret = 0; + u32 msg; if (!amdgpu_sriov_vf(adev)) { /* It can take up to a second for IFWI init to complete on some dGPUs, @@ -289,9 +292,13 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, } } - vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; + vram_size = RREG32(mmRCC_CONFIG_MEMSIZE); + if (!vram_size || vram_size == U32_MAX) + sz_valid = false; + else + vram_size <<= 20; - if (vram_size) { + if (sz_valid) { uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, adev->mman.discovery_tmr_size, false); @@ -299,6 +306,11 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary); } + if (ret) + dev_err(adev->dev, + "failed to read discovery info from memory, vram size read: %llx", + vram_size); + return ret; } @@ -309,10 +321,12 @@ static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, const struct firmware *fw; int r; - r = request_firmware(&fw, fw_name, adev->dev); + r = firmware_request_nowarn(&fw, fw_name, adev->dev); if (r) { - dev_err(adev->dev, "can't load firmware \"%s\"\n", - fw_name); + if (amdgpu_discovery == 2) + dev_err(adev->dev, "can't load firmware \"%s\"\n", fw_name); + else + drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fw_name); return r; } @@ -447,16 +461,12 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) /* Read from file if it is the preferred option */ fw_name = amdgpu_discovery_get_fw_name(adev); if (fw_name != NULL) { - dev_info(adev->dev, "use ip discovery information from file"); + drm_dbg(&adev->ddev, "use ip discovery information from file"); r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin, fw_name); - - if (r) { - dev_err(adev->dev, "failed to read ip discovery binary from file\n"); - r = -EINVAL; + if (r) goto out; - } - } else { + drm_dbg(&adev->ddev, "use ip discovery information from memory"); r = amdgpu_discovery_read_binary_from_mem( adev, adev->mman.discovery_bin); if (r) @@ -1326,10 +1336,8 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) int r; r = amdgpu_discovery_init(adev); - if (r) { - DRM_ERROR("amdgpu_discovery_init failed\n"); + if (r) return r; - } wafl_ver = 0; adev->gfx.xcc_mask = 0; @@ -2567,8 +2575,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; default: r = amdgpu_discovery_reg_base_init(adev); - if (r) - return -EINVAL; + if (r) { + drm_err(&adev->ddev, "discovery failed: %d\n", r); + return r; + } amdgpu_discovery_harvest_ip(adev); amdgpu_discovery_get_gfx_info(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 35c778426a7c..51bab32fd8c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1196,13 +1196,14 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev, struct amdgpu_framebuffer *rfb, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj) { int ret; rfb->base.obj[0] = obj; - drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, &rfb->base, info, mode_cmd); /* Verify that the modifier is supported. */ if (!drm_any_plane_has_format(dev, mode_cmd->pixel_format, mode_cmd->modifier[0])) { @@ -1297,6 +1298,7 @@ static int amdgpu_display_framebuffer_init(struct drm_device *dev, struct drm_framebuffer * amdgpu_display_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct amdgpu_framebuffer *amdgpu_fb; @@ -1317,7 +1319,7 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev, /* Handle is imported dma-buf, so cannot be migrated to VRAM for scanout */ bo = gem_to_amdgpu_bo(obj); domains = amdgpu_display_supported_domains(drm_to_adev(dev), bo->flags); - if (obj->import_attach && !(domains & AMDGPU_GEM_DOMAIN_GTT)) { + if (drm_gem_is_imported(obj) && !(domains & AMDGPU_GEM_DOMAIN_GTT)) { drm_dbg_kms(dev, "Cannot create framebuffer from imported dma_buf\n"); drm_gem_object_put(obj); return ERR_PTR(-EINVAL); @@ -1330,7 +1332,7 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev, } ret = amdgpu_display_gem_fb_verify_and_init(dev, amdgpu_fb, file_priv, - mode_cmd, obj); + info, mode_cmd, obj); if (ret) { kfree(amdgpu_fb); drm_gem_object_put(obj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index dfa0d642ac16..930c171473b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -44,6 +44,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, struct drm_framebuffer * amdgpu_display_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); const struct drm_format_info * amdgpu_lookup_format_info(u32 format, uint64_t modifier); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 9f627caedc3f..ff98c87b2e0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -43,6 +43,29 @@ #include <linux/dma-fence-array.h> #include <linux/pci-p2pdma.h> +static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops; + +/** + * dma_buf_attach_adev - Helper to get adev of an attachment + * + * @attach: attachment + * + * Returns: + * A struct amdgpu_device * if the attaching device is an amdgpu device or + * partition, NULL otherwise. + */ +static struct amdgpu_device *dma_buf_attach_adev(struct dma_buf_attachment *attach) +{ + if (attach->importer_ops == &amdgpu_dma_buf_attach_ops) { + struct drm_gem_object *obj = attach->importer_priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + return amdgpu_ttm_adev(bo->tbo.bdev); + } + + return NULL; +} + /** * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation * @@ -54,11 +77,13 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) { + struct amdgpu_device *attach_adev = dma_buf_attach_adev(attach); struct drm_gem_object *obj = dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) + if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) && + pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; amdgpu_vm_bo_update_shared(bo); @@ -75,11 +100,35 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, */ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) { - struct drm_gem_object *obj = attach->dmabuf->priv; - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + struct dma_buf *dmabuf = attach->dmabuf; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(dmabuf->priv); + u32 domains = bo->allowed_domains; + + dma_resv_assert_held(dmabuf->resv); + + /* Try pinning into VRAM to allow P2P with RDMA NICs without ODP + * support if all attachments can do P2P. If any attachment can't do + * P2P just pin into GTT instead. + * + * To avoid with conflicting pinnings between GPUs and RDMA when move + * notifiers are disabled, only allow pinning in VRAM when move + * notiers are enabled. + */ + if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) { + domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + } else { + list_for_each_entry(attach, &dmabuf->attachments, node) + if (!attach->peer2peer) + domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + } - /* pin buffer into GTT */ - return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + if (domains & AMDGPU_GEM_DOMAIN_VRAM) + bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + + if (WARN_ON(!domains)) + return -EINVAL; + + return amdgpu_bo_pin(bo, domains); } /** @@ -134,9 +183,6 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) return ERR_PTR(r); - - } else if (bo->tbo.resource->mem_type != TTM_PL_TT) { - return ERR_PTR(-EBUSY); } switch (bo->tbo.resource->mem_type) { @@ -153,6 +199,11 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, break; case TTM_PL_VRAM: + /* XGMI-accessible memory should never be DMA-mapped */ + if (WARN_ON(amdgpu_dmabuf_is_xgmi_accessible( + dma_buf_attach_adev(attach), bo))) + return ERR_PTR(-EINVAL); + r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0, bo->tbo.base.size, attach->dev, dir, &sgt); @@ -184,7 +235,7 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach, struct sg_table *sgt, enum dma_data_direction dir) { - if (sgt->sgl->page_link) { + if (sg_page(sgt->sgl)) { dma_unmap_sgtable(attach->dev, sgt, dir, 0); sg_free_table(sgt); kfree(sgt); @@ -459,8 +510,11 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev, struct drm_gem_object *obj = &bo->tbo.base; struct drm_gem_object *gobj; - if (obj->import_attach) { - struct dma_buf *dma_buf = obj->import_attach->dmabuf; + if (!adev) + return false; + + if (drm_gem_is_imported(obj)) { + struct dma_buf *dma_buf = obj->dma_buf; if (dma_buf->ops != &amdgpu_dmabuf_ops) /* No XGMI with non AMD GPUs */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c index 3f3662e8b871..3040437d99c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c @@ -41,7 +41,8 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) if (index < adev->doorbell.num_kernel_doorbells) return readl(adev->doorbell.cpu_addr + index); - DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); + dev_err(adev->dev, "reading beyond doorbell aperture: 0x%08x!\n", + index); return 0; } @@ -63,7 +64,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) if (index < adev->doorbell.num_kernel_doorbells) writel(v, adev->doorbell.cpu_addr + index); else - DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); + dev_err(adev->dev, + "writing beyond doorbell aperture: 0x%08x!\n", index); } /** @@ -83,7 +85,8 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) if (index < adev->doorbell.num_kernel_doorbells) return atomic64_read((atomic64_t *)(adev->doorbell.cpu_addr + index)); - DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); + dev_err(adev->dev, "reading beyond doorbell aperture: 0x%08x!\n", + index); return 0; } @@ -105,7 +108,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) if (index < adev->doorbell.num_kernel_doorbells) atomic64_set((atomic64_t *)(adev->doorbell.cpu_addr + index), v); else - DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); + dev_err(adev->dev, + "writing beyond doorbell aperture: 0x%08x!\n", index); } /** @@ -166,7 +170,8 @@ int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev) NULL, (void **)&adev->doorbell.cpu_addr); if (r) { - DRM_ERROR("Failed to allocate kernel doorbells, err=%d\n", r); + dev_err(adev->dev, + "Failed to allocate kernel doorbells, err=%d\n", r); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 23cfce5aa1fc..395c6be901ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -51,6 +51,8 @@ #include "amdgpu_reset.h" #include "amdgpu_sched.h" #include "amdgpu_xgmi.h" +#include "amdgpu_userq.h" +#include "amdgpu_userq_fence.h" #include "../amdxcp/amdgpu_xcp_drv.h" /* @@ -123,9 +125,10 @@ * - 3.61.0 - Contains fix for RV/PCO compute queues * - 3.62.0 - Add AMDGPU_IDS_FLAGS_MODE_PF, AMDGPU_IDS_FLAGS_MODE_VF & AMDGPU_IDS_FLAGS_MODE_PT * - 3.63.0 - GFX12 display DCC supports 256B max compressed block size + * - 3.64.0 - Userq IP support query */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 63 +#define KMS_DRIVER_MINOR 64 #define KMS_DRIVER_PATCHLEVEL 0 /* @@ -140,6 +143,8 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5), AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6), AMDGPU_DEBUG_SMU_POOL = BIT(7), + AMDGPU_DEBUG_VM_USERPTR = BIT(8), + AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9) }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -176,7 +181,7 @@ uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu; char *amdgpu_virtual_display; -bool enforce_isolation; +int amdgpu_enforce_isolation = -1; int amdgpu_modeset = -1; /* Specifies the default granularity for SVM, used in buffer @@ -238,6 +243,8 @@ int amdgpu_agp = -1; /* auto */ int amdgpu_wbrf = -1; int amdgpu_damage_clips = -1; /* auto */ int amdgpu_umsch_mm_fwlog; +int amdgpu_rebar = -1; /* auto */ +int amdgpu_user_queue = -1; DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0, "DRM_UT_CORE", @@ -355,12 +362,12 @@ module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint * The second one is for Compute. The third and fourth ones are * for SDMA and Video. * - * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video) - * jobs is 10000. The timeout for compute is 60000. + * By default(with no lockup_timeout settings), the timeout for all jobs is 10000. */ -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and 60000 for compute jobs; " - "for passthrough or sriov, 10000 for all jobs. 0: keep default value. negative: infinity timeout), format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; " - "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video]."); +MODULE_PARM_DESC(lockup_timeout, + "GPU lockup timeout in ms (default: 10000 for all jobs. " + "0: keep default value. negative: infinity timeout), format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; " + "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video]."); module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444); /** @@ -1033,11 +1040,13 @@ module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444); /** - * DOC: enforce_isolation (bool) - * enforce process isolation between graphics and compute via using the same reserved vmid. + * DOC: enforce_isolation (int) + * enforce process isolation between graphics and compute. + * (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader) */ -module_param(enforce_isolation, bool, 0444); -MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on"); +module_param_named(enforce_isolation, amdgpu_enforce_isolation, int, 0444); +MODULE_PARM_DESC(enforce_isolation, +"enforce process isolation between graphics and compute. (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader)"); /** * DOC: modeset (int) @@ -1096,6 +1105,28 @@ MODULE_PARM_DESC(wbrf, "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)"); module_param_named(wbrf, amdgpu_wbrf, int, 0444); +/** + * DOC: rebar (int) + * Allow BAR resizing. Disable this to prevent the driver from attempting + * to resize the BAR if the GPU supports it and there is available MMIO space. + * Note that this just prevents the driver from resizing the BAR. The BIOS + * may have already resized the BAR at boot time. + */ +MODULE_PARM_DESC(rebar, "Resizable BAR (-1 = auto (default), 0 = disable, 1 = enable)"); +module_param_named(rebar, amdgpu_rebar, int, 0444); + +/** + * DOC: user_queue (int) + * Enable user queues on systems that support user queues. Possible values: + * + * - -1 = auto (ASIC specific default) + * - 0 = user queues disabled + * - 1 = user queues enabled and kernel queues enabled (if supported) + * - 2 = user queues enabled and kernel queues disabled + */ +MODULE_PARM_DESC(user_queue, "Enable user queues (-1 = auto (default), 0 = disable, 1 = enable, 2 = enable UQs and disable KQs)"); +module_param_named(user_queue, amdgpu_user_queue, int, 0444); + /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ @@ -1809,7 +1840,6 @@ static const u16 amdgpu_unsupported_pciidlist[] = { }; static const struct pci_device_id pciidlist[] = { -#ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, {0x1002, 0x6784, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, {0x1002, 0x6788, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, @@ -1882,8 +1912,6 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x6665, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY}, {0x1002, 0x6667, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY}, {0x1002, 0x666F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY}, -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK /* Kaveri */ {0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU}, {0x1002, 0x1305, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU}, @@ -1966,7 +1994,6 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x985D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU}, {0x1002, 0x985E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU}, {0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU}, -#endif /* topaz */ {0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ}, {0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ}, @@ -2248,6 +2275,15 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: use vram for smu pool\n"); adev->pm.smu_debug_mask |= SMU_DEBUG_POOL_USE_VRAM; } + if (amdgpu_debug_mask & AMDGPU_DEBUG_VM_USERPTR) { + pr_info("debug: VM mode debug for userptr is enabled\n"); + adev->debug_vm_userptr = true; + } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_RAS_CE_LOG) { + pr_info("debug: disable kernel logs of correctable errors\n"); + adev->debug_disable_ce_logs = true; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) @@ -2291,7 +2327,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, amdgpu_aspm = 0; if (amdgpu_virtual_display || - amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK)) + amdgpu_device_asic_has_dc_support(pdev, flags & AMD_ASIC_MASK)) supports_atomic = true; if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) { @@ -2313,14 +2349,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, return -ENOTSUPP; } + switch (flags & AMD_ASIC_MASK) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_HAINAN: #ifdef CONFIG_DRM_AMDGPU_SI - if (!amdgpu_si_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - case CHIP_HAINAN: + if (!amdgpu_si_support) { dev_info(&pdev->dev, "SI support provided by radeon.\n"); dev_info(&pdev->dev, @@ -2328,16 +2364,18 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, ); return -ENODEV; } - } + break; +#else + dev_info(&pdev->dev, "amdgpu is built without SI support.\n"); + return -ENODEV; #endif + case CHIP_KAVERI: + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: #ifdef CONFIG_DRM_AMDGPU_CIK - if (!amdgpu_cik_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_KAVERI: - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KABINI: - case CHIP_MULLINS: + if (!amdgpu_cik_support) { dev_info(&pdev->dev, "CIK support provided by radeon.\n"); dev_info(&pdev->dev, @@ -2345,8 +2383,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, ); return -ENODEV; } - } + break; +#else + dev_info(&pdev->dev, "amdgpu is built without CIK support.\n"); + return -ENODEV; #endif + default: + break; + } adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev); if (IS_ERR(adev)) @@ -2413,10 +2457,10 @@ retry_init: if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { /* only need to skip on ATPX */ - if (amdgpu_device_supports_px(ddev)) + if (amdgpu_device_supports_px(adev)) dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); /* we want direct complete for BOCO */ - if (amdgpu_device_supports_boco(ddev)) + if (amdgpu_device_supports_boco(adev)) dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_SMART_PREPARE | DPM_FLAG_SMART_SUSPEND | DPM_FLAG_MAY_SKIP_RESUME); @@ -2449,9 +2493,9 @@ retry_init: * into D0 state. Then there will be a PMFW-aware D-state * transition(D0->D3) on runpm suspend. */ - if (amdgpu_device_supports_baco(ddev) && + if (amdgpu_device_supports_baco(adev) && !(adev->flags & AMD_IS_APU) && - (adev->asic_type >= CHIP_NAVI10)) + adev->asic_type >= CHIP_NAVI10) amdgpu_get_secondary_funcs(adev); } @@ -2468,6 +2512,7 @@ amdgpu_pci_remove(struct pci_dev *pdev) struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); + amdgpu_ras_eeprom_check_and_recover(adev); amdgpu_xcp_dev_unplug(adev); amdgpu_gmc_prepare_nps_mode_change(adev); drm_dev_unplug(dev); @@ -2497,6 +2542,10 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) if (amdgpu_ras_intr_triggered()) return; + /* device maybe not resumed here, return immediately in this case */ + if (adev->in_s4 && adev->in_suspend) + return; + /* if we are running in a VM, make sure the device * torn down properly on reboot/shutdown. * unfortunately we can't detect certain @@ -2513,11 +2562,14 @@ static int amdgpu_pmops_prepare(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + /* device maybe not resumed here, return immediately in this case */ + if (adev->in_s4 && adev->in_suspend) + return 0; + /* Return a positive number here so * DPM_FLAG_SMART_SUSPEND works properly */ - if (amdgpu_device_supports_boco(drm_dev) && - pm_runtime_suspended(dev)) + if (amdgpu_device_supports_boco(adev) && pm_runtime_suspended(dev)) return 1; /* if we will not support s3 or s2i for the device @@ -2532,7 +2584,7 @@ static int amdgpu_pmops_prepare(struct device *dev) static void amdgpu_pmops_complete(struct device *dev) { - /* nothing to do */ + amdgpu_device_complete(dev_get_drvdata(dev)); } static int amdgpu_pmops_suspend(struct device *dev) @@ -2544,8 +2596,20 @@ static int amdgpu_pmops_suspend(struct device *dev) adev->in_s0ix = true; else if (amdgpu_acpi_is_s3_active(adev)) adev->in_s3 = true; - if (!adev->in_s0ix && !adev->in_s3) + if (!adev->in_s0ix && !adev->in_s3) { + /* don't allow going deep first time followed by s2idle the next time */ + if (adev->last_suspend_state != PM_SUSPEND_ON && + adev->last_suspend_state != pm_suspend_target_state) { + drm_err_once(drm_dev, "Unsupported suspend state %d\n", + pm_suspend_target_state); + return -EINVAL; + } return 0; + } + + /* cache the state last used for suspend */ + adev->last_suspend_state = pm_suspend_target_state; + return amdgpu_device_suspend(drm_dev, true); } @@ -2599,18 +2663,22 @@ static int amdgpu_pmops_freeze(struct device *dev) static int amdgpu_pmops_thaw(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct amdgpu_device *adev = drm_to_adev(drm_dev); - int r; - r = amdgpu_device_resume(drm_dev, true); - adev->in_s4 = false; + /* do not resume device if it's normal hibernation */ + if (!pm_hibernate_is_recovering()) + return 0; - return r; + return amdgpu_device_resume(drm_dev, true); } static int amdgpu_pmops_poweroff(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + + /* device maybe not resumed here, return immediately in this case */ + if (adev->in_s4 && adev->in_suspend) + return 0; return amdgpu_device_suspend(drm_dev, true); } @@ -2618,9 +2686,6 @@ static int amdgpu_pmops_poweroff(struct device *dev) static int amdgpu_pmops_restore(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct amdgpu_device *adev = drm_to_adev(drm_dev); - - adev->in_s4 = false; return amdgpu_device_resume(drm_dev, true); } @@ -2692,6 +2757,29 @@ static int amdgpu_runtime_idle_check_display(struct device *dev) return 0; } +static int amdgpu_runtime_idle_check_userq(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + int queue_id; + int ret = 0; + + mutex_lock(&adev->userq_mutex); + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { + ret = -EBUSY; + goto done; + } + } +done: + mutex_unlock(&adev->userq_mutex); + + return ret; +} + static int amdgpu_pmops_runtime_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -2707,6 +2795,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) ret = amdgpu_runtime_idle_check_display(dev); if (ret) return ret; + ret = amdgpu_runtime_idle_check_userq(dev); + if (ret) + return ret; /* wait for all rings to drain before suspending */ for (i = 0; i < AMDGPU_MAX_RINGS; i++) { @@ -2760,7 +2851,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) /* nothing to do */ } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) { - amdgpu_device_baco_enter(drm_dev); + amdgpu_device_baco_enter(adev); } dev_dbg(&pdev->dev, "asic/device is runtime suspended\n"); @@ -2801,7 +2892,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) pci_set_master(pdev); } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) { - amdgpu_device_baco_exit(drm_dev); + amdgpu_device_baco_exit(adev); } ret = amdgpu_device_resume(drm_dev, false); if (ret) { @@ -2828,12 +2919,30 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) } ret = amdgpu_runtime_idle_check_display(dev); + if (ret) + goto done; + ret = amdgpu_runtime_idle_check_userq(dev); +done: pm_runtime_mark_last_busy(dev); pm_runtime_autosuspend(dev); return ret; } +static int amdgpu_drm_release(struct inode *inode, struct file *filp) +{ + struct drm_file *file_priv = filp->private_data; + struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + + if (fpriv) { + fpriv->evf_mgr.fd_closing = true; + amdgpu_eviction_fence_destroy(&fpriv->evf_mgr); + amdgpu_userq_mgr_fini(&fpriv->userq_mgr); + } + + return drm_release(inode, filp); +} + long amdgpu_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -2885,7 +2994,7 @@ static const struct file_operations amdgpu_driver_kms_fops = { .owner = THIS_MODULE, .open = drm_open, .flush = amdgpu_flush, - .release = drm_release, + .release = amdgpu_drm_release, .unlocked_ioctl = amdgpu_drm_ioctl, .mmap = drm_gem_mmap, .poll = drm_poll, @@ -2932,6 +3041,9 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), }; static const struct drm_driver amdgpu_kms_driver = { @@ -3018,7 +3130,7 @@ static int __init amdgpu_init(void) if (r) goto error_sync; - r = amdgpu_fence_slab_init(); + r = amdgpu_userq_fence_slab_init(); if (r) goto error_fence; @@ -3052,7 +3164,7 @@ static void __exit amdgpu_exit(void) amdgpu_unregister_atpx_handler(); amdgpu_acpi_release(); amdgpu_sync_fini(); - amdgpu_fence_slab_fini(); + amdgpu_userq_fence_slab_fini(); mmu_notifier_synchronize(); amdgpu_xcp_drv_release(); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c new file mode 100644 index 000000000000..23d7d0b0d625 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -0,0 +1,241 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/sched.h> +#include <drm/drm_exec.h> +#include "amdgpu.h" + +#define work_to_evf_mgr(w, name) container_of(w, struct amdgpu_eviction_fence_mgr, name) +#define evf_mgr_to_fpriv(e) container_of(e, struct amdgpu_fpriv, evf_mgr) + +static const char * +amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence) +{ + return "amdgpu_eviction_fence"; +} + +static const char * +amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f) +{ + struct amdgpu_eviction_fence *ef; + + ef = container_of(f, struct amdgpu_eviction_fence, base); + return ef->timeline_name; +} + +int +amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct drm_exec *exec) +{ + struct amdgpu_eviction_fence *old_ef, *new_ef; + struct drm_gem_object *obj; + unsigned long index; + int ret; + + if (evf_mgr->ev_fence && + !dma_fence_is_signaled(&evf_mgr->ev_fence->base)) + return 0; + /* + * Steps to replace eviction fence: + * * lock all objects in exec (caller) + * * create a new eviction fence + * * update new eviction fence in evf_mgr + * * attach the new eviction fence to BOs + * * release the old fence + * * unlock the objects (caller) + */ + new_ef = amdgpu_eviction_fence_create(evf_mgr); + if (!new_ef) { + DRM_ERROR("Failed to create new eviction fence\n"); + return -ENOMEM; + } + + /* Update the eviction fence now */ + spin_lock(&evf_mgr->ev_fence_lock); + old_ef = evf_mgr->ev_fence; + evf_mgr->ev_fence = new_ef; + spin_unlock(&evf_mgr->ev_fence_lock); + + /* Attach the new fence */ + drm_exec_for_each_locked_object(exec, index, obj) { + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + if (!bo) + continue; + ret = amdgpu_eviction_fence_attach(evf_mgr, bo); + if (ret) { + DRM_ERROR("Failed to attch new eviction fence\n"); + goto free_err; + } + } + + /* Free old fence */ + if (old_ef) + dma_fence_put(&old_ef->base); + return 0; + +free_err: + kfree(new_ef); + return ret; +} + +static void +amdgpu_eviction_fence_suspend_worker(struct work_struct *work) +{ + struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work); + struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr); + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; + struct amdgpu_eviction_fence *ev_fence; + + mutex_lock(&uq_mgr->userq_mutex); + spin_lock(&evf_mgr->ev_fence_lock); + ev_fence = evf_mgr->ev_fence; + if (ev_fence) + dma_fence_get(&ev_fence->base); + else + goto unlock; + spin_unlock(&evf_mgr->ev_fence_lock); + + amdgpu_userq_evict(uq_mgr, ev_fence); + + mutex_unlock(&uq_mgr->userq_mutex); + dma_fence_put(&ev_fence->base); + return; + +unlock: + spin_unlock(&evf_mgr->ev_fence_lock); + mutex_unlock(&uq_mgr->userq_mutex); +} + +static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f) +{ + struct amdgpu_eviction_fence_mgr *evf_mgr; + struct amdgpu_eviction_fence *ev_fence; + + if (!f) + return true; + + ev_fence = to_ev_fence(f); + evf_mgr = ev_fence->evf_mgr; + + schedule_delayed_work(&evf_mgr->suspend_work, 0); + return true; +} + +static const struct dma_fence_ops amdgpu_eviction_fence_ops = { + .get_driver_name = amdgpu_eviction_fence_get_driver_name, + .get_timeline_name = amdgpu_eviction_fence_get_timeline_name, + .enable_signaling = amdgpu_eviction_fence_enable_signaling, +}; + +void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_eviction_fence *ev_fence) +{ + spin_lock(&evf_mgr->ev_fence_lock); + dma_fence_signal(&ev_fence->base); + spin_unlock(&evf_mgr->ev_fence_lock); +} + +struct amdgpu_eviction_fence * +amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + struct amdgpu_eviction_fence *ev_fence; + + ev_fence = kzalloc(sizeof(*ev_fence), GFP_KERNEL); + if (!ev_fence) + return NULL; + + ev_fence->evf_mgr = evf_mgr; + get_task_comm(ev_fence->timeline_name, current); + spin_lock_init(&ev_fence->lock); + dma_fence_init64(&ev_fence->base, &amdgpu_eviction_fence_ops, + &ev_fence->lock, evf_mgr->ev_fence_ctx, + atomic_inc_return(&evf_mgr->ev_fence_seq)); + return ev_fence; +} + +void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + struct amdgpu_eviction_fence *ev_fence; + + /* Wait for any pending work to execute */ + flush_delayed_work(&evf_mgr->suspend_work); + + spin_lock(&evf_mgr->ev_fence_lock); + ev_fence = evf_mgr->ev_fence; + spin_unlock(&evf_mgr->ev_fence_lock); + + if (!ev_fence) + return; + + dma_fence_wait(&ev_fence->base, false); + + /* Last unref of ev_fence */ + dma_fence_put(&ev_fence->base); +} + +int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo) +{ + struct amdgpu_eviction_fence *ev_fence; + struct dma_resv *resv = bo->tbo.base.resv; + int ret; + + if (!resv) + return 0; + + ret = dma_resv_reserve_fences(resv, 1); + if (ret) { + DRM_DEBUG_DRIVER("Failed to resv fence space\n"); + return ret; + } + + spin_lock(&evf_mgr->ev_fence_lock); + ev_fence = evf_mgr->ev_fence; + if (ev_fence) + dma_resv_add_fence(resv, &ev_fence->base, DMA_RESV_USAGE_BOOKKEEP); + spin_unlock(&evf_mgr->ev_fence_lock); + + return 0; +} + +void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo) +{ + struct dma_fence *stub = dma_fence_get_stub(); + + dma_resv_replace_fences(bo->tbo.base.resv, evf_mgr->ev_fence_ctx, + stub, DMA_RESV_USAGE_BOOKKEEP); + dma_fence_put(stub); +} + +int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + /* This needs to be done one time per open */ + atomic_set(&evf_mgr->ev_fence_seq, 0); + evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1); + spin_lock_init(&evf_mgr->ev_fence_lock); + + INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h new file mode 100644 index 000000000000..fcd867b7147d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef AMDGPU_EV_FENCE_H_ +#define AMDGPU_EV_FENCE_H_ + +struct amdgpu_eviction_fence { + struct dma_fence base; + spinlock_t lock; + char timeline_name[TASK_COMM_LEN]; + struct amdgpu_eviction_fence_mgr *evf_mgr; +}; + +struct amdgpu_eviction_fence_mgr { + u64 ev_fence_ctx; + atomic_t ev_fence_seq; + spinlock_t ev_fence_lock; + struct amdgpu_eviction_fence *ev_fence; + struct delayed_work suspend_work; + uint8_t fd_closing; +}; + +/* Eviction fence helper functions */ +struct amdgpu_eviction_fence * +amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr); + +void +amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr); + +int +amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo); + +void +amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo); + +int +amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr); + +void +amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_eviction_fence *ev_fence); + +int +amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct drm_exec *exec); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 2f24a6aa13bf..9e7506965cab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -42,37 +42,6 @@ #include "amdgpu_reset.h" /* - * Fences mark an event in the GPUs pipeline and are used - * for GPU/CPU synchronization. When the fence is written, - * it is expected that all buffers associated with that fence - * are no longer in use by the associated ring on the GPU and - * that the relevant GPU caches have been flushed. - */ - -struct amdgpu_fence { - struct dma_fence base; - - /* RB, DMA, etc. */ - struct amdgpu_ring *ring; - ktime_t start_timestamp; -}; - -static struct kmem_cache *amdgpu_fence_slab; - -int amdgpu_fence_slab_init(void) -{ - amdgpu_fence_slab = KMEM_CACHE(amdgpu_fence, SLAB_HWCACHE_ALIGN); - if (!amdgpu_fence_slab) - return -ENOMEM; - return 0; -} - -void amdgpu_fence_slab_fini(void) -{ - rcu_barrier(); - kmem_cache_destroy(amdgpu_fence_slab); -} -/* * Cast helper */ static const struct dma_fence_ops amdgpu_fence_ops; @@ -130,14 +99,14 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) * * @ring: ring the fence is associated with * @f: resulting fence object - * @job: job the fence is embedded in + * @af: amdgpu fence input * @flags: flags to pass into the subordinate .emit_fence() call * * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amdgpu_job *job, - unsigned int flags) +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, + struct amdgpu_fence *af, unsigned int flags) { struct amdgpu_device *adev = ring->adev; struct dma_fence *fence; @@ -146,40 +115,35 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd uint32_t seq; int r; - if (job == NULL) { - /* create a sperate hw fence */ - am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC); - if (am_fence == NULL) + if (!af) { + /* create a separate hw fence */ + am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL); + if (!am_fence) return -ENOMEM; - fence = &am_fence->base; - am_fence->ring = ring; + am_fence->context = 0; } else { - /* take use of job-embedded fence */ - fence = &job->hw_fence; + am_fence = af; } + fence = &am_fence->base; + am_fence->ring = ring; seq = ++ring->fence_drv.sync_seq; - if (job && job->job_run_counter) { - /* reinit seq for resubmitted jobs */ - fence->seqno = seq; - /* TO be inline with external fence creation and other drivers */ + am_fence->seq = seq; + if (af) { + dma_fence_init(fence, &amdgpu_job_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, seq); + /* Against remove in amdgpu_job_{free, free_cb} */ dma_fence_get(fence); } else { - if (job) { - dma_fence_init(fence, &amdgpu_job_fence_ops, - &ring->fence_drv.lock, - adev->fence_context + ring->idx, seq); - /* Against remove in amdgpu_job_{free, free_cb} */ - dma_fence_get(fence); - } else { - dma_fence_init(fence, &amdgpu_fence_ops, - &ring->fence_drv.lock, - adev->fence_context + ring->idx, seq); - } + dma_fence_init(fence, &amdgpu_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, seq); } amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); + amdgpu_fence_save_wptr(fence); pm_runtime_get_noresume(adev_to_drm(adev)->dev); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; if (unlikely(rcu_dereference_protected(*ptr, 1))) { @@ -280,7 +244,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq); - if (del_timer(&ring->fence_drv.fallback_timer) && + if (timer_delete(&ring->fence_drv.fallback_timer) && seq != ring->fence_drv.sync_seq) amdgpu_fence_schedule_fallback(ring); @@ -292,6 +256,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) do { struct dma_fence *fence, **ptr; + struct amdgpu_fence *am_fence; ++last_seq; last_seq &= drv->num_fences_mask; @@ -304,6 +269,12 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) if (!fence) continue; + /* Save the wptr in the fence driver so we know what the last processed + * wptr was. This is required for re-emitting the ring state for + * queues that are reset but are not guilty and thus have no guilty fence. + */ + am_fence = container_of(fence, struct amdgpu_fence, base); + drv->signalled_wptr = am_fence->wptr; dma_fence_signal(fence); dma_fence_put(fence); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); @@ -322,11 +293,13 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) */ static void amdgpu_fence_fallback(struct timer_list *t) { - struct amdgpu_ring *ring = from_timer(ring, t, - fence_drv.fallback_timer); + struct amdgpu_ring *ring = timer_container_of(ring, t, + fence_drv.fallback_timer); if (amdgpu_fence_process(ring)) - DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name); + dev_warn(ring->adev->dev, + "Fence fallback timer expired on ring %s\n", + ring->name); } /** @@ -618,7 +591,7 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, ring->fence_drv.irq_src, ring->fence_drv.irq_type); - del_timer_sync(&ring->fence_drv.fallback_timer); + timer_delete_sync(&ring->fence_drv.fallback_timer); } } @@ -718,7 +691,7 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) * it right here or we won't be able to track them in fence_drv * and they will remain unsignaled during sa_bo free. */ - job = container_of(old, struct amdgpu_job, hw_fence); + job = container_of(old, struct amdgpu_job, hw_fence.base); if (!job->base.s_fence && !dma_fence_is_signaled(old)) dma_fence_signal(old); RCU_INIT_POINTER(*ptr, NULL); @@ -764,6 +737,86 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) amdgpu_fence_process(ring); } + +/** + * Kernel queue reset handling + * + * The driver can reset individual queues for most engines, but those queues + * may contain work from multiple contexts. Resetting the queue will reset + * lose all of that state. In order to minimize the collateral damage, the + * driver will save the ring contents which are not associated with the guilty + * context prior to resetting the queue. After resetting the queue the queue + * contents from the other contexts is re-emitted to the rings so that it can + * be processed by the engine. To handle this, we save the queue's write + * pointer (wptr) in the fences associated with each context. If we get a + * queue timeout, we can then use the wptrs from the fences to determine + * which data needs to be saved out of the queue's ring buffer. + */ + +/** + * amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence + * + * @fence: fence of the ring to signal + * + */ +void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence) +{ + dma_fence_set_error(&fence->base, -ETIME); + amdgpu_fence_write(fence->ring, fence->seq); + amdgpu_fence_process(fence->ring); +} + +void amdgpu_fence_save_wptr(struct dma_fence *fence) +{ + struct amdgpu_fence *am_fence = container_of(fence, struct amdgpu_fence, base); + + am_fence->wptr = am_fence->ring->wptr; +} + +static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring, + u64 start_wptr, u32 end_wptr) +{ + unsigned int first_idx = start_wptr & ring->buf_mask; + unsigned int last_idx = end_wptr & ring->buf_mask; + unsigned int i; + + /* Backup the contents of the ring buffer. */ + for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask) + ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i]; +} + +void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence) +{ + struct dma_fence *unprocessed; + struct dma_fence __rcu **ptr; + struct amdgpu_fence *fence; + u64 wptr, i, seqno; + + seqno = amdgpu_fence_read(ring); + wptr = ring->fence_drv.signalled_wptr; + ring->ring_backup_entries_to_copy = 0; + + for (i = seqno + 1; i <= ring->fence_drv.sync_seq; ++i) { + ptr = &ring->fence_drv.fences[i & ring->fence_drv.num_fences_mask]; + rcu_read_lock(); + unprocessed = rcu_dereference(*ptr); + + if (unprocessed && !dma_fence_is_signaled(unprocessed)) { + fence = container_of(unprocessed, struct amdgpu_fence, base); + + /* save everything if the ring is not guilty, otherwise + * just save the content from other contexts. + */ + if (!guilty_fence || (fence->context != guilty_fence->context)) + amdgpu_ring_backup_unprocessed_command(ring, wptr, + fence->wptr); + wptr = fence->wptr; + } + rcu_read_unlock(); + } +} + /* * Common fence implementation */ @@ -780,7 +833,7 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); return (const char *)to_amdgpu_ring(job->base.sched)->name; } @@ -810,7 +863,7 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) */ static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); @@ -830,7 +883,7 @@ static void amdgpu_fence_free(struct rcu_head *rcu) struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); /* free fence_slab if it's separated fence*/ - kmem_cache_free(amdgpu_fence_slab, to_amdgpu_fence(f)); + kfree(to_amdgpu_fence(f)); } /** @@ -845,7 +898,7 @@ static void amdgpu_job_fence_free(struct rcu_head *rcu) struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); /* free job if fence has a parent job */ - kfree(container_of(f, struct amdgpu_job, hw_fence)); + kfree(container_of(f, struct amdgpu_job, hw_fence.base)); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index 1ae88c459da5..b0082aa7f3c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -144,7 +144,8 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) /* If algo exists, it means that the i2c_adapter's initialized */ if (!adev->pm.fru_eeprom_i2c_bus || !adev->pm.fru_eeprom_i2c_bus->algo) { - DRM_WARN("Cannot access FRU, EEPROM accessor not initialized"); + dev_warn(adev->dev, + "Cannot access FRU, EEPROM accessor not initialized"); return -ENODEV; } @@ -152,19 +153,22 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, fru_addr, buf, sizeof(buf)); if (len != 8) { - DRM_ERROR("Couldn't read the IPMI Common Header: %d", len); + dev_err(adev->dev, "Couldn't read the IPMI Common Header: %d", + len); return len < 0 ? len : -EIO; } if (buf[0] != 1) { - DRM_ERROR("Bad IPMI Common Header version: 0x%02x", buf[0]); + dev_err(adev->dev, "Bad IPMI Common Header version: 0x%02x", + buf[0]); return -EIO; } for (csum = 0; len > 0; len--) csum += buf[len - 1]; if (csum) { - DRM_ERROR("Bad IPMI Common Header checksum: 0x%02x", csum); + dev_err(adev->dev, "Bad IPMI Common Header checksum: 0x%02x", + csum); return -EIO; } @@ -179,12 +183,14 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) /* Read the header of the PIA. */ len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, buf, 3); if (len != 3) { - DRM_ERROR("Couldn't read the Product Info Area header: %d", len); + dev_err(adev->dev, + "Couldn't read the Product Info Area header: %d", len); return len < 0 ? len : -EIO; } if (buf[0] != 1) { - DRM_ERROR("Bad IPMI Product Info Area version: 0x%02x", buf[0]); + dev_err(adev->dev, "Bad IPMI Product Info Area version: 0x%02x", + buf[0]); return -EIO; } @@ -197,14 +203,16 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, pia, size); if (len != size) { kfree(pia); - DRM_ERROR("Couldn't read the Product Info Area: %d", len); + dev_err(adev->dev, "Couldn't read the Product Info Area: %d", + len); return len < 0 ? len : -EIO; } for (csum = 0; size > 0; size--) csum += pia[size - 1]; if (csum) { - DRM_ERROR("Bad Product Info Area checksum: 0x%02x", csum); + dev_err(adev->dev, "Bad Product Info Area checksum: 0x%02x", + csum); kfree(pia); return -EIO; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 69429df09477..6626a6e64ff5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -36,6 +36,7 @@ #include <drm/drm_exec.h> #include <drm/drm_gem_ttm_helper.h> #include <drm/ttm/ttm_tt.h> +#include <drm/drm_syncobj.h> #include "amdgpu.h" #include "amdgpu_display.h" @@ -44,6 +45,114 @@ #include "amdgpu_xgmi.h" #include "amdgpu_vm.h" +static int +amdgpu_gem_add_input_fence(struct drm_file *filp, + uint64_t syncobj_handles_array, + uint32_t num_syncobj_handles) +{ + struct dma_fence *fence; + uint32_t *syncobj_handles; + int ret, i; + + if (!num_syncobj_handles) + return 0; + + syncobj_handles = memdup_user(u64_to_user_ptr(syncobj_handles_array), + size_mul(sizeof(uint32_t), num_syncobj_handles)); + if (IS_ERR(syncobj_handles)) + return PTR_ERR(syncobj_handles); + + for (i = 0; i < num_syncobj_handles; i++) { + + if (!syncobj_handles[i]) { + ret = -EINVAL; + goto free_memdup; + } + + ret = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 0, &fence); + if (ret) + goto free_memdup; + + dma_fence_wait(fence, false); + + /* TODO: optimize async handling */ + dma_fence_put(fence); + } + +free_memdup: + kfree(syncobj_handles); + return ret; +} + +static int +amdgpu_gem_update_timeline_node(struct drm_file *filp, + uint32_t syncobj_handle, + uint64_t point, + struct drm_syncobj **syncobj, + struct dma_fence_chain **chain) +{ + if (!syncobj_handle) + return 0; + + /* Find the sync object */ + *syncobj = drm_syncobj_find(filp, syncobj_handle); + if (!*syncobj) + return -ENOENT; + + if (!point) + return 0; + + /* Allocate the chain node */ + *chain = dma_fence_chain_alloc(); + if (!*chain) { + drm_syncobj_put(*syncobj); + return -ENOMEM; + } + + return 0; +} + +static void +amdgpu_gem_update_bo_mapping(struct drm_file *filp, + struct amdgpu_bo_va *bo_va, + uint32_t operation, + uint64_t point, + struct dma_fence *fence, + struct drm_syncobj *syncobj, + struct dma_fence_chain *chain) +{ + struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; + struct dma_fence *last_update; + + if (!syncobj) + return; + + /* Find the last update fence */ + switch (operation) { + case AMDGPU_VA_OP_MAP: + case AMDGPU_VA_OP_REPLACE: + if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)) + last_update = vm->last_update; + else + last_update = bo_va->last_pt_update; + break; + case AMDGPU_VA_OP_UNMAP: + case AMDGPU_VA_OP_CLEAR: + last_update = fence; + break; + default: + return; + } + + /* Add fence to timeline */ + if (!point) + drm_syncobj_replace_fence(syncobj, last_update); + else + drm_syncobj_add_point(syncobj, chain, last_update, point); +} + static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf) { struct ttm_buffer_object *bo = vmf->vma->vm_private_data; @@ -184,6 +293,15 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, bo_va = amdgpu_vm_bo_add(adev, vm, abo); else ++bo_va->ref_count; + + /* attach gfx eviction fence */ + r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo); + if (r) { + DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n"); + amdgpu_bo_unreserve(abo); + return r; + } + amdgpu_bo_unreserve(abo); /* Validate and add eviction fence to DMABuf imports with dynamic @@ -199,8 +317,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, */ if (!vm->is_compute_context || !vm->process_info) return 0; - if (!obj->import_attach || - !dma_buf_is_dynamic(obj->import_attach->dmabuf)) + if (!drm_gem_is_imported(obj) || !dma_buf_is_dynamic(obj->dma_buf)) return 0; mutex_lock_nested(&vm->process_info->lock, 1); if (!WARN_ON(!vm->process_info->eviction_fence)) { @@ -211,7 +328,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, dev_warn(adev->dev, "validate_and_fence failed: %d\n", r); if (ti) { - dev_warn(adev->dev, "pid %d\n", ti->pid); + dev_warn(adev->dev, "pid %d\n", ti->task.pid); amdgpu_vm_put_task_info(ti); } } @@ -247,6 +364,9 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, goto out_unlock; } + if (!amdgpu_vm_is_bo_always_valid(vm, bo)) + amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo); + bo_va = amdgpu_vm_bo_find(vm, bo); if (!bo_va || --bo_va->ref_count) goto out_unlock; @@ -321,10 +441,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, uint32_t handle, initial_domain; int r; - /* reject DOORBELLs until userspace code to use it is available */ - if (args->in.domains & AMDGPU_GEM_DOMAIN_DOORBELL) - return -EINVAL; - /* reject invalid gem flags */ if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | @@ -638,18 +754,23 @@ out: * * Update the bo_va directly after setting its address. Errors are not * vital here, so they are not reported back to userspace. + * + * Returns resulting fence if freed BO(s) got cleared from the PT. + * otherwise stub fence in case of error. */ -static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_bo_va *bo_va, - uint32_t operation) +static struct dma_fence * +amdgpu_gem_va_update_vm(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_bo_va *bo_va, + uint32_t operation) { + struct dma_fence *fence = dma_fence_get_stub(); int r; if (!amdgpu_vm_ready(vm)) - return; + return fence; - r = amdgpu_vm_clear_freed(adev, vm, NULL); + r = amdgpu_vm_clear_freed(adev, vm, &fence); if (r) goto error; @@ -665,6 +786,8 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, error: if (r && r != -ERESTARTSYS) DRM_ERROR("Couldn't update BO_VA (%d)\n", r); + + return fence; } /** @@ -713,6 +836,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_bo *abo; struct amdgpu_bo_va *bo_va; + struct drm_syncobj *timeline_syncobj = NULL; + struct dma_fence_chain *timeline_chain = NULL; + struct dma_fence *fence; struct drm_exec exec; uint64_t va_flags; uint64_t vm_size; @@ -774,6 +900,12 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, abo = NULL; } + r = amdgpu_gem_add_input_fence(filp, + args->input_fence_syncobj_handles, + args->num_syncobj_handles); + if (r) + goto error_put_gobj; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { @@ -802,6 +934,14 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, bo_va = NULL; } + r = amdgpu_gem_update_timeline_node(filp, + args->vm_timeline_syncobj_out, + args->vm_timeline_point, + &timeline_syncobj, + &timeline_chain); + if (r) + goto error; + switch (args->operation) { case AMDGPU_VA_OP_MAP: va_flags = amdgpu_gem_va_map_flags(adev, args->flags); @@ -827,12 +967,24 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, default: break; } - if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) - amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, - args->operation); + if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) { + fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, + args->operation); + + if (timeline_syncobj) + amdgpu_gem_update_bo_mapping(filp, bo_va, + args->operation, + args->vm_timeline_point, + fence, timeline_syncobj, + timeline_chain); + else + dma_fence_put(fence); + + } error: drm_exec_fini(&exec); +error_put_gobj: drm_gem_object_put(gobj); return r; } @@ -871,7 +1023,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, break; } case AMDGPU_GEM_OP_SET_PLACEMENT: - if (robj->tbo.base.import_attach && + if (drm_gem_is_imported(&robj->tbo.base) && args->value & AMDGPU_GEM_DOMAIN_VRAM) { r = -EINVAL; amdgpu_bo_unreserve(robj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 72af5e5a894a..c80c8f543532 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -33,6 +33,7 @@ #include "amdgpu_reset.h" #include "amdgpu_xcp.h" #include "amdgpu_xgmi.h" +#include "nvd.h" /* delay 0.1 second to enable gfx off feature */ #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) @@ -74,14 +75,15 @@ bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, adev->gfx.mec_bitmap[xcc_id].queue_bitmap); } -int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, - int me, int pipe, int queue) +static int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, + int me, int pipe, int queue) { + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ int bit = 0; bit += me * adev->gfx.me.num_pipe_per_me - * adev->gfx.me.num_queue_per_pipe; - bit += pipe * adev->gfx.me.num_queue_per_pipe; + * num_queue_per_pipe; + bit += pipe * num_queue_per_pipe; bit += queue; return bit; @@ -147,7 +149,7 @@ static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev) static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev) { if (amdgpu_compute_multipipe != -1) { - DRM_INFO("amdgpu: forcing compute pipe policy %d\n", + dev_info(adev->dev, "amdgpu: forcing compute pipe policy %d\n", amdgpu_compute_multipipe); return amdgpu_compute_multipipe == 1; } @@ -238,8 +240,8 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) { int i, queue, pipe; bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev); - int max_queues_per_me = adev->gfx.me.num_pipe_per_me * - adev->gfx.me.num_queue_per_pipe; + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ + int max_queues_per_me = adev->gfx.me.num_pipe_per_me * num_queue_per_pipe; if (multipipe_policy) { /* policy: amdgpu owns the first queue per pipe at this stage @@ -247,9 +249,9 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) for (i = 0; i < max_queues_per_me; i++) { pipe = i % adev->gfx.me.num_pipe_per_me; queue = (i / adev->gfx.me.num_pipe_per_me) % - adev->gfx.me.num_queue_per_pipe; + num_queue_per_pipe; - set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue, + set_bit(pipe * num_queue_per_pipe + queue, adev->gfx.me.queue_bitmap); } } else { @@ -258,8 +260,9 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) } /* update the number of active graphics rings */ - adev->gfx.num_gfx_rings = - bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); + if (adev->gfx.num_gfx_rings) + adev->gfx.num_gfx_rings = + bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); } static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, @@ -671,7 +674,7 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) * generation exposes more than 64 queues. If so, the * definition of queue_mask needs updating */ if (WARN_ON(i > (sizeof(queue_mask)*8))) { - DRM_ERROR("Invalid KCQ enabled: %d\n", i); + dev_err(adev->dev, "Invalid KCQ enabled: %d\n", i); break; } @@ -680,15 +683,15 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) amdgpu_device_flush_hdp(adev, NULL); - DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, - kiq_ring->queue); + dev_info(adev->dev, "kiq ring mec %d pipe %d q %d\n", kiq_ring->me, + kiq_ring->pipe, kiq_ring->queue); spin_lock(&kiq->ring_lock); r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * adev->gfx.num_compute_rings + kiq->pmf->set_resources_size); if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); + dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r); spin_unlock(&kiq->ring_lock); return r; } @@ -709,7 +712,7 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) r = amdgpu_ring_test_helper(kiq_ring); spin_unlock(&kiq->ring_lock); if (r) - DRM_ERROR("KCQ enable failed\n"); + dev_err(adev->dev, "KCQ enable failed\n"); return r; } @@ -731,7 +734,7 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) r = amdgpu_mes_map_legacy_queue(adev, &adev->gfx.gfx_ring[j]); if (r) { - DRM_ERROR("failed to map gfx queue\n"); + dev_err(adev->dev, "failed to map gfx queue\n"); return r; } } @@ -745,7 +748,7 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * adev->gfx.num_gfx_rings); if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); + dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r); spin_unlock(&kiq->ring_lock); return r; } @@ -766,7 +769,7 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) r = amdgpu_ring_test_helper(kiq_ring); spin_unlock(&kiq->ring_lock); if (r) - DRM_ERROR("KGQ enable failed\n"); + dev_err(adev->dev, "KGQ enable failed\n"); return r; } @@ -1027,7 +1030,7 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, ih_data.head = *ras_if; - DRM_ERROR("CP ECC ERROR IRQ\n"); + dev_err(adev->dev, "CP ECC ERROR IRQ\n"); amdgpu_ras_interrupt_dispatch(adev, &ih_data); return 0; } @@ -1351,6 +1354,10 @@ static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev, struct amdgpu_device *adev = drm_to_adev(ddev); int mode; + /* Only minimal precaution taken to reject requests while in reset.*/ + if (amdgpu_in_reset(adev)) + return -EPERM; + mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr, AMDGPU_XCP_FL_NONE); @@ -1394,8 +1401,14 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, return -EINVAL; } + /* Don't allow a switch while under reset */ + if (!down_read_trylock(&adev->reset_domain->sem)) + return -EPERM; + ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode); + up_read(&adev->reset_domain->sem); + if (ret) return ret; @@ -1438,9 +1451,11 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; struct drm_gpu_scheduler *sched = &ring->sched; struct drm_sched_entity entity; + static atomic_t counter; struct dma_fence *f; struct amdgpu_job *job; struct amdgpu_ib *ib; + void *owner; int i, r; /* Initialize the scheduler entity */ @@ -1451,13 +1466,21 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) goto err; } - r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL, - 64, 0, - &job); + /* + * Use some unique dummy value as the owner to make sure we execute + * the cleaner shader on each submission. The value just need to change + * for each submission and is otherwise meaningless. + */ + owner = (void *)(unsigned long)atomic_inc_return(&counter); + + r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner, + 64, 0, &job); if (r) goto err; job->enforce_isolation = true; + /* always run the cleaner shader */ + job->run_cleaner_shader = true; ib = &job->ibs[0]; for (i = 0; i <= ring->funcs->align_mask; ++i) @@ -1544,6 +1567,9 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, if (adev->in_suspend && !adev->in_runpm) return -EPERM; + if (adev->gfx.disable_kq) + return -EPERM; + ret = kstrtol(buf, 0, &value); if (ret) @@ -1586,7 +1612,8 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, * Provides the sysfs read interface to get the current settings of the 'enforce_isolation' * feature for each GPU partition. Reading from the 'enforce_isolation' * sysfs file returns the isolation settings for all partitions, where '0' - * indicates disabled and '1' indicates enabled. + * indicates disabled, '1' indicates enabled, and '2' indicates enabled in legacy mode, + * and '3' indicates enabled without cleaner shader. * * Return: The number of bytes read from the sysfs file. */ @@ -1621,9 +1648,12 @@ static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, * @count: The size of the input data * * This function allows control over the 'enforce_isolation' feature, which - * serializes access to the graphics engine. Writing '1' or '0' to the - * 'enforce_isolation' sysfs file enables or disables process isolation for - * each partition. The input should specify the setting for all partitions. + * serializes access to the graphics engine. Writing '0' to disable, '1' to + * enable isolation with cleaner shader, '2' to enable legacy isolation without + * cleaner shader, or '3' to enable process isolation without submitting the + * cleaner shader to the 'enforce_isolation' sysfs file sets the isolation mode + * for each partition. The input should specify the setting for all + * partitions. * * Return: The number of bytes written to the sysfs file. */ @@ -1660,13 +1690,34 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, return -EINVAL; for (i = 0; i < num_partitions; i++) { - if (partition_values[i] != 0 && partition_values[i] != 1) + if (partition_values[i] != 0 && + partition_values[i] != 1 && + partition_values[i] != 2 && + partition_values[i] != 3) return -EINVAL; } mutex_lock(&adev->enforce_isolation_mutex); - for (i = 0; i < num_partitions; i++) - adev->enforce_isolation[i] = partition_values[i]; + for (i = 0; i < num_partitions; i++) { + switch (partition_values[i]) { + case 0: + default: + adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE; + break; + case 1: + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_ENABLE; + break; + case 2: + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY; + break; + case 3: + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER; + break; + } + } mutex_unlock(&adev->enforce_isolation_mutex); amdgpu_mes_update_enforce_isolation(adev); @@ -1915,39 +1966,41 @@ void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx, bool enable) { - mutex_lock(&adev->gfx.kfd_sch_mutex); + mutex_lock(&adev->gfx.userq_sch_mutex); if (enable) { /* If the count is already 0, it means there's an imbalance bug somewhere. * Note that the bug may be in a different caller than the one which triggers the * WARN_ON_ONCE. */ - if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) { + if (WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx] == 0)) { dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n"); goto unlock; } - adev->gfx.kfd_sch_req_count[idx]--; + adev->gfx.userq_sch_req_count[idx]--; - if (adev->gfx.kfd_sch_req_count[idx] == 0 && - adev->gfx.kfd_sch_inactive[idx]) { + if (adev->gfx.userq_sch_req_count[idx] == 0 && + adev->gfx.userq_sch_inactive[idx]) { schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx])); } } else { - if (adev->gfx.kfd_sch_req_count[idx] == 0) { + if (adev->gfx.userq_sch_req_count[idx] == 0) { cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work); - if (!adev->gfx.kfd_sch_inactive[idx]) { - amdgpu_amdkfd_stop_sched(adev, idx); - adev->gfx.kfd_sch_inactive[idx] = true; + if (!adev->gfx.userq_sch_inactive[idx]) { + amdgpu_userq_stop_sched_for_enforce_isolation(adev, idx); + if (adev->kfd.init_complete) + amdgpu_amdkfd_stop_sched(adev, idx); + adev->gfx.userq_sch_inactive[idx] = true; } } - adev->gfx.kfd_sch_req_count[idx]++; + adev->gfx.userq_sch_req_count[idx]++; } unlock: - mutex_unlock(&adev->gfx.kfd_sch_mutex); + mutex_unlock(&adev->gfx.userq_sch_mutex); } /** @@ -1992,12 +2045,13 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) msecs_to_jiffies(1)); } else { /* Tell KFD to resume the runqueue */ - if (adev->kfd.init_complete) { - WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]); - WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]); + WARN_ON_ONCE(!adev->gfx.userq_sch_inactive[idx]); + WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx]); + + amdgpu_userq_start_sched_for_enforce_isolation(adev, idx); + if (adev->kfd.init_complete) amdgpu_amdkfd_start_sched(adev, idx); - adev->gfx.kfd_sch_inactive[idx] = false; - } + adev->gfx.userq_sch_inactive[idx] = false; } mutex_unlock(&adev->enforce_isolation_mutex); } @@ -2021,7 +2075,7 @@ amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev, bool wait = false; mutex_lock(&adev->enforce_isolation_mutex); - if (adev->enforce_isolation[idx]) { + if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) { /* set the initial values if nothing is set */ if (!adev->gfx.enforce_isolation_jiffies[idx]) { adev->gfx.enforce_isolation_jiffies[idx] = jiffies; @@ -2088,7 +2142,7 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx); mutex_lock(&adev->enforce_isolation_mutex); - if (adev->enforce_isolation[idx]) { + if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) { if (adev->kfd.init_complete) sched_work = true; } @@ -2125,7 +2179,7 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) return; mutex_lock(&adev->enforce_isolation_mutex); - if (adev->enforce_isolation[idx]) { + if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) { if (adev->kfd.init_complete) sched_work = true; } @@ -2174,6 +2228,9 @@ void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring) enum PP_SMC_POWER_PROFILE profile; int r; + if (amdgpu_dpm_is_overdrive_enabled(adev)) + return; + if (adev->gfx.num_gfx_rings) profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D; else @@ -2204,11 +2261,84 @@ void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring) void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + + if (amdgpu_dpm_is_overdrive_enabled(adev)) + return; + atomic_dec(&ring->adev->gfx.total_submission_cnt); schedule_delayed_work(&ring->adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT); } +/** + * amdgpu_gfx_csb_preamble_start - Set CSB preamble start + * + * @buffer: This is an output variable that gets the PACKET3 preamble setup. + * + * Return: + * return the latest index. + */ +u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer) +{ + u32 count = 0; + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + buffer[count++] = cpu_to_le32(0x80000000); + buffer[count++] = cpu_to_le32(0x80000000); + + return count; +} + +/** + * amdgpu_gfx_csb_data_parser - Parser CS data + * + * @adev: amdgpu_device pointer used to get the CS data and other gfx info. + * @buffer: This is an output variable that gets the PACKET3 preamble end. + * @count: Index to start set the preemble end. + * + * Return: + * return the latest index. + */ +u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer, u32 count) +{ + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + u32 i; + + for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) { + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); + buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START); + + for (i = 0; i < ext->reg_count; i++) + buffer[count++] = cpu_to_le32(ext->extent[i]); + } + } + } + + return count; +} + +/** + * amdgpu_gfx_csb_preamble_end - Set CSB preamble end + * + * @buffer: This is an output variable that gets the PACKET3 preamble end. + * @count: Index to start set the preemble end. + */ +void amdgpu_gfx_csb_preamble_end(volatile u32 *buffer, u32 count) +{ + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); + buffer[count++] = cpu_to_le32(0); +} + /* * debugfs for to enable/disable gfx job submission to specific core. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 87e862188766..08f268dab8f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -170,10 +170,46 @@ struct amdgpu_kiq { #define AMDGPU_GFX_MAX_SE 4 #define AMDGPU_GFX_MAX_SH_PER_SE 2 +/** + * amdgpu_rb_config - Configure a single Render Backend (RB) + * + * Bad RBs are fused off and there is a harvest register the driver reads to + * determine which RB(s) are fused off so that the driver can configure the + * hardware state so that nothing gets sent to them. There are also user + * harvest registers that the driver can program to disable additional RBs, + * etc., for testing purposes. + */ struct amdgpu_rb_config { + /** + * @rb_backend_disable: + * + * The value captured from register RB_BACKEND_DISABLE indicates if the + * RB backend is disabled or not. + */ uint32_t rb_backend_disable; + + /** + * @user_rb_backend_disable: + * + * The value captured from register USER_RB_BACKEND_DISABLE indicates + * if the User RB backend is disabled or not. + */ uint32_t user_rb_backend_disable; + + /** + * @raster_config: + * + * To set up all of the states, it is necessary to have two registers + * to keep all of the states. This field holds the first register. + */ uint32_t raster_config; + + /** + * @raster_config_1: + * + * To set up all of the states, it is necessary to have two registers + * to keep all of the states. This field holds the second register. + */ uint32_t raster_config_1; }; @@ -221,6 +257,13 @@ struct amdgpu_gfx_config { uint32_t macrotile_mode_array[16]; struct gb_addr_config gb_addr_config_fields; + + /** + * @rb_config: + * + * Matrix that keeps all the Render Backend (color and depth buffer + * handling) configuration on the 3D engine. + */ struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE]; /* gfx configure feature */ @@ -305,7 +348,8 @@ struct amdgpu_gfx_funcs { void (*init_spm_golden)(struct amdgpu_device *adev); void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable); int (*get_gfx_shadow_info)(struct amdgpu_device *adev, - struct amdgpu_gfx_shadow_info *shadow_info); + struct amdgpu_gfx_shadow_info *shadow_info, + bool skip_check); enum amdgpu_gfx_partition (*query_partition_mode)(struct amdgpu_device *adev); int (*switch_partition_mode)(struct amdgpu_device *adev, @@ -474,9 +518,9 @@ struct amdgpu_gfx { bool enable_cleaner_shader; struct amdgpu_isolation_work enforce_isolation[MAX_XCP]; /* Mutex for synchronizing KFD scheduler operations */ - struct mutex kfd_sch_mutex; - u64 kfd_sch_req_count[MAX_XCP]; - bool kfd_sch_inactive[MAX_XCP]; + struct mutex userq_sch_mutex; + u64 userq_sch_req_count[MAX_XCP]; + bool userq_sch_inactive[MAX_XCP]; unsigned long enforce_isolation_jiffies[MAX_XCP]; unsigned long enforce_isolation_time[MAX_XCP]; @@ -484,6 +528,9 @@ struct amdgpu_gfx { struct delayed_work idle_work; bool workload_profile_active; struct mutex workload_profile_mutex; + + bool disable_kq; + bool disable_uq; }; struct amdgpu_gfx_ras_reg_entry { @@ -503,7 +550,7 @@ struct amdgpu_gfx_ras_mem_id_entry { #define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id))) #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid, xcc_id) ((adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid), (xcc_id))) #define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev)) -#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si))) +#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si), false)) /** * amdgpu_gfx_create_bitmask - create a bitmask @@ -550,8 +597,6 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); -int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me, - int pipe, int queue); bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, int pipe, int queue); void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); @@ -597,6 +642,9 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring); void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work); void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring); void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring); +u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer); +u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer, u32 count); +void amdgpu_gfx_csb_preamble_end(volatile u32 *buffer, u32 count); void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev); void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 464625282872..97b562a79ea8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -38,6 +38,13 @@ #include <drm/drm_drv.h> #include <drm/ttm/ttm_tt.h> +static const u64 four_gb = 0x100000000ULL; + +bool amdgpu_gmc_is_pdb0_enabled(struct amdgpu_device *adev) +{ + return adev->gmc.xgmi.connected_to_cpu || amdgpu_virt_xgmi_migrate_enabled(adev); +} + /** * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0 * @@ -251,10 +258,20 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes - 1; mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id; mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1; - mc->gart_start = hive_vram_end + 1; + /* node_segment_size may not 4GB aligned on SRIOV, align up is needed. */ + mc->gart_start = ALIGN(hive_vram_end + 1, four_gb); mc->gart_end = mc->gart_start + mc->gart_size - 1; - mc->fb_start = hive_vram_start; - mc->fb_end = hive_vram_end; + if (amdgpu_virt_xgmi_migrate_enabled(adev)) { + /* set mc->vram_start to 0 to switch the returned GPU address of + * amdgpu_bo_create_reserved() from FB aperture to GART aperture. + */ + mc->vram_start = 0; + mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; + mc->visible_vram_size = min(mc->visible_vram_size, mc->real_vram_size); + } else { + mc->fb_start = hive_vram_start; + mc->fb_end = hive_vram_end; + } dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n", mc->mc_vram_size >> 20, mc->vram_start, mc->vram_end, mc->real_vram_size >> 20); @@ -276,7 +293,6 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, enum amdgpu_gart_placement gart_placement) { - const uint64_t four_gb = 0x100000000ULL; u64 size_af, size_bf; /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/ u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1); @@ -699,12 +715,10 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, uint32_t flush_type, bool all_hub, uint32_t inst) { - u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : - adev->usec_timeout; struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; unsigned int ndw; - int r; + int r, cnt = 0; uint32_t seq; /* @@ -761,10 +775,21 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq[inst].ring_lock); - if (amdgpu_fence_wait_polling(ring, seq, usec_timeout) < 1) { + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY && + !amdgpu_reset_pending(adev->reset_domain)) { + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) { dev_err(adev->dev, "timeout waiting for kiq fence\n"); r = -ETIME; - } + } else + r = 0; } error_unlock_reset: @@ -1032,9 +1057,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) */ u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes; u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21; - u64 vram_addr = adev->vm_manager.vram_base_offset - - adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; - u64 vram_end = vram_addr + vram_size; + u64 vram_addr, vram_end; u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo); int idx; @@ -1047,6 +1070,11 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1)); flags |= AMDGPU_PDE_PTE_FLAG(adev); + vram_addr = adev->vm_manager.vram_base_offset; + if (!amdgpu_virt_xgmi_migrate_enabled(adev)) + vram_addr -= adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; + vram_end = vram_addr + vram_size; + /* The first n PDE0 entries are used as PTE, * pointing to vram */ @@ -1221,6 +1249,10 @@ static ssize_t current_memory_partition_show( struct amdgpu_device *adev = drm_to_adev(ddev); enum amdgpu_memory_partition mode; + /* Only minimal precaution taken to reject requests while in reset */ + if (amdgpu_in_reset(adev)) + return -EPERM; + mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); if ((mode >= ARRAY_SIZE(nps_desc)) || (BIT(mode) & AMDGPU_ALL_NPS_MASK) != BIT(mode)) @@ -1416,3 +1448,232 @@ bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev) return false; } + +enum amdgpu_memory_partition +amdgpu_gmc_get_vf_memory_partition(struct amdgpu_device *adev) +{ + switch (adev->gmc.num_mem_partitions) { + case 0: + return UNKNOWN_MEMORY_PARTITION_MODE; + case 1: + return AMDGPU_NPS1_PARTITION_MODE; + case 2: + return AMDGPU_NPS2_PARTITION_MODE; + case 4: + return AMDGPU_NPS4_PARTITION_MODE; + case 8: + return AMDGPU_NPS8_PARTITION_MODE; + default: + return AMDGPU_NPS1_PARTITION_MODE; + } +} + +enum amdgpu_memory_partition +amdgpu_gmc_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes) +{ + enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE; + + if (adev->nbio.funcs && + adev->nbio.funcs->get_memory_partition_mode) + mode = adev->nbio.funcs->get_memory_partition_mode(adev, + supp_modes); + else + dev_warn(adev->dev, "memory partition mode query is not supported\n"); + + return mode; +} + +enum amdgpu_memory_partition +amdgpu_gmc_query_memory_partition(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) + return amdgpu_gmc_get_vf_memory_partition(adev); + else + return amdgpu_gmc_get_memory_partition(adev, NULL); +} + +static bool amdgpu_gmc_validate_partition_info(struct amdgpu_device *adev) +{ + enum amdgpu_memory_partition mode; + u32 supp_modes; + bool valid; + + mode = amdgpu_gmc_get_memory_partition(adev, &supp_modes); + + /* Mode detected by hardware not present in supported modes */ + if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) && + !(BIT(mode - 1) & supp_modes)) + return false; + + switch (mode) { + case UNKNOWN_MEMORY_PARTITION_MODE: + case AMDGPU_NPS1_PARTITION_MODE: + valid = (adev->gmc.num_mem_partitions == 1); + break; + case AMDGPU_NPS2_PARTITION_MODE: + valid = (adev->gmc.num_mem_partitions == 2); + break; + case AMDGPU_NPS4_PARTITION_MODE: + valid = (adev->gmc.num_mem_partitions == 3 || + adev->gmc.num_mem_partitions == 4); + break; + case AMDGPU_NPS8_PARTITION_MODE: + valid = (adev->gmc.num_mem_partitions == 8); + break; + default: + valid = false; + } + + return valid; +} + +static bool amdgpu_gmc_is_node_present(int *node_ids, int num_ids, int nid) +{ + int i; + + /* Check if node with id 'nid' is present in 'node_ids' array */ + for (i = 0; i < num_ids; ++i) + if (node_ids[i] == nid) + return true; + + return false; +} + +static void +amdgpu_gmc_init_acpi_mem_ranges(struct amdgpu_device *adev, + struct amdgpu_mem_partition_info *mem_ranges) +{ + struct amdgpu_numa_info numa_info; + int node_ids[AMDGPU_MAX_MEM_RANGES]; + int num_ranges = 0, ret; + int num_xcc, xcc_id; + uint32_t xcc_mask; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + xcc_mask = (1U << num_xcc) - 1; + + for_each_inst(xcc_id, xcc_mask) { + ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info); + if (ret) + continue; + + if (numa_info.nid == NUMA_NO_NODE) { + mem_ranges[0].size = numa_info.size; + mem_ranges[0].numa.node = numa_info.nid; + num_ranges = 1; + break; + } + + if (amdgpu_gmc_is_node_present(node_ids, num_ranges, + numa_info.nid)) + continue; + + node_ids[num_ranges] = numa_info.nid; + mem_ranges[num_ranges].numa.node = numa_info.nid; + mem_ranges[num_ranges].size = numa_info.size; + ++num_ranges; + } + + adev->gmc.num_mem_partitions = num_ranges; +} + +void amdgpu_gmc_init_sw_mem_ranges(struct amdgpu_device *adev, + struct amdgpu_mem_partition_info *mem_ranges) +{ + enum amdgpu_memory_partition mode; + u32 start_addr = 0, size; + int i, r, l; + + mode = amdgpu_gmc_query_memory_partition(adev); + + switch (mode) { + case UNKNOWN_MEMORY_PARTITION_MODE: + adev->gmc.num_mem_partitions = 0; + break; + case AMDGPU_NPS1_PARTITION_MODE: + adev->gmc.num_mem_partitions = 1; + break; + case AMDGPU_NPS2_PARTITION_MODE: + adev->gmc.num_mem_partitions = 2; + break; + case AMDGPU_NPS4_PARTITION_MODE: + if (adev->flags & AMD_IS_APU) + adev->gmc.num_mem_partitions = 3; + else + adev->gmc.num_mem_partitions = 4; + break; + case AMDGPU_NPS8_PARTITION_MODE: + adev->gmc.num_mem_partitions = 8; + break; + default: + adev->gmc.num_mem_partitions = 1; + break; + } + + /* Use NPS range info, if populated */ + r = amdgpu_gmc_get_nps_memranges(adev, mem_ranges, + &adev->gmc.num_mem_partitions); + if (!r) { + l = 0; + for (i = 1; i < adev->gmc.num_mem_partitions; ++i) { + if (mem_ranges[i].range.lpfn > + mem_ranges[i - 1].range.lpfn) + l = i; + } + + } else { + if (!adev->gmc.num_mem_partitions) { + dev_warn(adev->dev, + "Not able to detect NPS mode, fall back to NPS1\n"); + adev->gmc.num_mem_partitions = 1; + } + /* Fallback to sw based calculation */ + size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT; + size /= adev->gmc.num_mem_partitions; + + for (i = 0; i < adev->gmc.num_mem_partitions; ++i) { + mem_ranges[i].range.fpfn = start_addr; + mem_ranges[i].size = + ((u64)size << AMDGPU_GPU_PAGE_SHIFT); + mem_ranges[i].range.lpfn = start_addr + size - 1; + start_addr += size; + } + + l = adev->gmc.num_mem_partitions - 1; + } + + /* Adjust the last one */ + mem_ranges[l].range.lpfn = + (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1; + mem_ranges[l].size = + adev->gmc.real_vram_size - + ((u64)mem_ranges[l].range.fpfn << AMDGPU_GPU_PAGE_SHIFT); +} + +int amdgpu_gmc_init_mem_ranges(struct amdgpu_device *adev) +{ + bool valid; + + adev->gmc.mem_partitions = kcalloc(AMDGPU_MAX_MEM_RANGES, + sizeof(struct amdgpu_mem_partition_info), + GFP_KERNEL); + if (!adev->gmc.mem_partitions) + return -ENOMEM; + + if (adev->gmc.is_app_apu) + amdgpu_gmc_init_acpi_mem_ranges(adev, adev->gmc.mem_partitions); + else + amdgpu_gmc_init_sw_mem_ranges(adev, adev->gmc.mem_partitions); + + if (amdgpu_sriov_vf(adev)) + valid = true; + else + valid = amdgpu_gmc_validate_partition_info(adev); + if (!valid) { + /* TODO: handle invalid case */ + dev_warn(adev->dev, + "Mem ranges not matching with hardware config\n"); + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index bd7fc123b8f9..397c6ccdb903 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -62,6 +62,9 @@ */ #define AMDGPU_GMC_FAULT_TIMEOUT 5000ULL +/* XNACK flags */ +#define AMDGPU_GMC_XNACK_FLAG_CHAIN BIT(0) + struct firmware; enum amdgpu_memory_partition { @@ -81,6 +84,8 @@ enum amdgpu_memory_partition { #define AMDGPU_GMC_INIT_RESET_NPS BIT(0) +#define AMDGPU_MAX_MEM_RANGES 8 + /* * GMC page fault information */ @@ -301,6 +306,7 @@ struct amdgpu_gmc { struct amdgpu_xgmi xgmi; struct amdgpu_irq_src ecc_irq; int noretry; + uint32_t xnack_flags; uint32_t vmid0_page_table_block_size; uint32_t vmid0_page_table_depth; @@ -390,6 +396,7 @@ static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr) return addr; } +bool amdgpu_gmc_is_pdb0_enabled(struct amdgpu_device *adev); int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev); void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level, uint64_t *addr, uint64_t *flags); @@ -451,5 +458,13 @@ int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev, int nps_mode); void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev); bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev); - +enum amdgpu_memory_partition +amdgpu_gmc_get_vf_memory_partition(struct amdgpu_device *adev); +enum amdgpu_memory_partition +amdgpu_gmc_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes); +enum amdgpu_memory_partition +amdgpu_gmc_query_memory_partition(struct amdgpu_device *adev); +int amdgpu_gmc_init_mem_ranges(struct amdgpu_device *adev); +void amdgpu_gmc_init_sw_mem_ranges(struct amdgpu_device *adev, + struct amdgpu_mem_partition_info *mem_ranges); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c index b6cf801939aa..6e02fb9ac2f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c @@ -22,6 +22,7 @@ */ #include "amdgpu.h" #include "amdgpu_ras.h" +#include <uapi/linux/kfd_ioctl.h> int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev) { @@ -46,3 +47,22 @@ int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev) /* hdp ras follows amdgpu_ras_block_late_init_default for late init */ return 0; } + +void amdgpu_hdp_generic_flush(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) { + WREG32((adev->rmmio_remap.reg_offset + + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> + 2, + 0); + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); + } else { + amdgpu_ring_emit_wreg(ring, + (adev->rmmio_remap.reg_offset + + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> + 2, + 0); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h index 7b8a6152dc8d..4cfd932b7e91 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h @@ -44,4 +44,6 @@ struct amdgpu_hdp { }; int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev); +void amdgpu_hdp_generic_flush(struct amdgpu_device *adev, + struct amdgpu_ring *ring); #endif /* __AMDGPU_HDP_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c index 8179d0814db9..57101d24422f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c @@ -24,7 +24,6 @@ * Alex Deucher */ -#include <linux/export.h> #include <linux/pci.h> #include <drm/drm_edid.h> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 2ea98ec60220..7d9bcb72e8dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -128,6 +128,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; struct dma_fence *tmp = NULL; + struct amdgpu_fence *af; bool need_ctx_switch; struct amdgpu_vm *vm; uint64_t fence_ctx; @@ -138,7 +139,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, int vmid = AMDGPU_JOB_GET_VMID(job); bool need_pipe_sync = false; unsigned int cond_exec; - unsigned int i; int r = 0; @@ -154,6 +154,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, csa_va = job->csa_va; gds_va = job->gds_va; init_shadow = job->init_shadow; + af = &job->hw_fence; + /* Save the context of the job for reset handling. + * The driver needs this so it can skip the ring + * contents for guilty contexts. + */ + af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0; } else { vm = NULL; fence_ctx = 0; @@ -161,14 +167,15 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, csa_va = 0; gds_va = 0; init_shadow = false; + af = NULL; } - if (!ring->sched.ready && !ring->is_mes_queue) { + if (!ring->sched.ready) { dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name); return -EINVAL; } - if (vm && !job->vmid && !ring->is_mes_queue) { + if (vm && !job->vmid) { dev_err(adev->dev, "VM IB without ID\n"); return -EINVAL; } @@ -282,7 +289,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr); } - r = amdgpu_fence_emit(ring, f, job, fence_flags); + r = amdgpu_fence_emit(ring, f, af, fence_flags); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vmid) @@ -304,8 +311,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH) ring->funcs->emit_wave_limit(ring, false); + /* Save the wptr associated with this fence. + * This must be last for resets to work properly + * as we need to save the wptr associated with this + * fence so we know what rings contents to backup + * after we reset the queue. + */ + amdgpu_fence_save_wptr(*f); + amdgpu_ring_ib_end(ring); amdgpu_ring_commit(ring); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 4c4e087230ac..5dd78a9cb12d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -576,8 +576,16 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) INIT_LIST_HEAD(&id_mgr->ids_lru); id_mgr->reserved_use_count = 0; - /* manage only VMIDs not used by KFD */ - id_mgr->num_ids = adev->vm_manager.first_kfd_vmid; + /* for GC <10, SDMA uses MMHUB so use first_kfd_vmid for both GC and MM */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) + /* manage only VMIDs not used by KFD */ + id_mgr->num_ids = adev->vm_manager.first_kfd_vmid; + else if (AMDGPU_IS_MMHUB0(i) || + AMDGPU_IS_MMHUB1(i)) + id_mgr->num_ids = 16; + else + /* manage only VMIDs not used by KFD */ + id_mgr->num_ids = adev->vm_manager.first_kfd_vmid; /* skip over VMID 0, since it is the system VM */ for (j = 1; j < id_mgr->num_ids; ++j) { @@ -588,7 +596,7 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) } /* alloc a default reserved vmid to enforce isolation */ for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { - if (adev->enforce_isolation[i]) + if (adev->enforce_isolation[i] != AMDGPU_ENFORCE_ISOLATION_DISABLE) amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 901f8b12c672..a6419246e9c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_ih.h" +#include "amdgpu_reset.h" /** * amdgpu_ih_ring_init - initialize the IH state @@ -217,7 +218,7 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) restart_ih: count = AMDGPU_IH_MAX_NUM_IVS; - DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr); + dev_dbg(adev->dev, "%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr); /* Order reading of wptr vs. reading of IH ring data */ rmb(); @@ -227,13 +228,23 @@ restart_ih: ih->rptr &= ih->ptr_mask; } - amdgpu_ih_set_rptr(adev, ih); + if (!ih->overflow) + amdgpu_ih_set_rptr(adev, ih); + wake_up_all(&ih->wait_process); /* make sure wptr hasn't changed while processing */ wptr = amdgpu_ih_get_wptr(adev, ih); if (wptr != ih->rptr) - goto restart_ih; + if (!ih->overflow) + goto restart_ih; + + if (ih->overflow) + if (amdgpu_sriov_runtime(adev)) + WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, + &adev->virt.flr_work), + "Failed to queue work! at %s", + __func__); return IRQ_HANDLED; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index b0a88f92cd82..7f7ea046e209 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -72,6 +72,7 @@ struct amdgpu_ih_ring { /* For waiting on IH processing at checkpoint. */ wait_queue_head_t wait_process; uint64_t processed_timestamp; + bool overflow; }; /* return true if time stamp t2 is after t1 with 48bit wrap around */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c new file mode 100644 index 000000000000..99e1cf4fc955 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c @@ -0,0 +1,96 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_ip.h" + +static int8_t amdgpu_logical_to_dev_inst(struct amdgpu_device *adev, + enum amd_hw_ip_block_type block, + int8_t inst) +{ + int8_t dev_inst; + + switch (block) { + case GC_HWIP: + case SDMA0_HWIP: + /* Both JPEG and VCN as JPEG is only alias of VCN */ + case VCN_HWIP: + dev_inst = adev->ip_map.dev_inst[block][inst]; + break; + default: + /* For rest of the IPs, no look up required. + * Assume 'logical instance == physical instance' for all configs. */ + dev_inst = inst; + break; + } + + return dev_inst; +} + +static uint32_t amdgpu_logical_to_dev_mask(struct amdgpu_device *adev, + enum amd_hw_ip_block_type block, + uint32_t mask) +{ + uint32_t dev_mask = 0; + int8_t log_inst, dev_inst; + + while (mask) { + log_inst = ffs(mask) - 1; + dev_inst = amdgpu_logical_to_dev_inst(adev, block, log_inst); + dev_mask |= (1 << dev_inst); + mask &= ~(1 << log_inst); + } + + return dev_mask; +} + +static void amdgpu_populate_ip_map(struct amdgpu_device *adev, + enum amd_hw_ip_block_type ip_block, + uint32_t inst_mask) +{ + int l = 0, i; + + while (inst_mask) { + i = ffs(inst_mask) - 1; + adev->ip_map.dev_inst[ip_block][l++] = i; + inst_mask &= ~(1 << i); + } + for (; l < HWIP_MAX_INSTANCE; l++) + adev->ip_map.dev_inst[ip_block][l] = -1; +} + +void amdgpu_ip_map_init(struct amdgpu_device *adev) +{ + u32 ip_map[][2] = { + { GC_HWIP, adev->gfx.xcc_mask }, + { SDMA0_HWIP, adev->sdma.sdma_mask }, + { VCN_HWIP, adev->vcn.inst_mask }, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(ip_map); ++i) + amdgpu_populate_ip_map(adev, ip_map[i][0], ip_map[i][1]); + + adev->ip_map.logical_to_dev_inst = amdgpu_logical_to_dev_inst; + adev->ip_map.logical_to_dev_mask = amdgpu_logical_to_dev_mask; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h new file mode 100644 index 000000000000..2490fd322aec --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h @@ -0,0 +1,29 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_IP_H__ +#define __AMDGPU_IP_H__ + +void amdgpu_ip_map_init(struct amdgpu_device *adev); + +#endif /* __AMDGPU_IP_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 19ce4da285e8..8112ffc85995 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -142,8 +142,9 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev) r = src->funcs->set(adev, src, k, AMDGPU_IRQ_STATE_DISABLE); if (r) - DRM_ERROR("error disabling interrupt (%d)\n", - r); + dev_err(adev->dev, + "error disabling interrupt (%d)\n", + r); } } } @@ -242,7 +243,7 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev) return true; } -static void amdgpu_restore_msix(struct amdgpu_device *adev) +void amdgpu_restore_msix(struct amdgpu_device *adev) { u16 ctrl; @@ -315,7 +316,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) adev->irq.irq = irq; adev_to_drm(adev)->max_vblank_count = 0x00ffffff; - DRM_DEBUG("amdgpu: irq initialized.\n"); + dev_dbg(adev->dev, "amdgpu: irq initialized.\n"); return 0; free_vectors: @@ -461,10 +462,10 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, src_id = entry.src_id; if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) { - DRM_DEBUG("Invalid client_id in IV: %d\n", client_id); + dev_dbg(adev->dev, "Invalid client_id in IV: %d\n", client_id); } else if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) { - DRM_DEBUG("Invalid src_id in IV: %d\n", src_id); + dev_dbg(adev->dev, "Invalid src_id in IV: %d\n", src_id); } else if (((client_id == AMDGPU_IRQ_CLIENTID_LEGACY) || (client_id == SOC15_IH_CLIENTID_ISP)) && @@ -472,18 +473,21 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, generic_handle_domain_irq(adev->irq.domain, src_id); } else if (!adev->irq.client[client_id].sources) { - DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n", - client_id, src_id); + dev_dbg(adev->dev, + "Unregistered interrupt client_id: %d src_id: %d\n", + client_id, src_id); } else if ((src = adev->irq.client[client_id].sources[src_id])) { r = src->funcs->process(adev, src, &entry); if (r < 0) - DRM_ERROR("error processing interrupt (%d)\n", r); + dev_err(adev->dev, "error processing interrupt (%d)\n", + r); else if (r) handled = true; } else { - DRM_DEBUG("Unregistered interrupt src_id: %d of client_id:%d\n", + dev_dbg(adev->dev, + "Unregistered interrupt src_id: %d of client_id:%d\n", src_id, client_id); } @@ -619,6 +623,10 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src, int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned int type) { + /* When the threshold is reached,the interrupt source may not be enabled.return -EINVAL */ + if (amdgpu_ras_is_rma(adev) && !amdgpu_irq_enabled(adev, src, type)) + return -EINVAL; + if (!adev->irq.installed) return -ENOENT; @@ -725,10 +733,10 @@ static const struct irq_domain_ops amdgpu_hw_irqdomain_ops = { */ int amdgpu_irq_add_domain(struct amdgpu_device *adev) { - adev->irq.domain = irq_domain_add_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID, - &amdgpu_hw_irqdomain_ops, adev); + adev->irq.domain = irq_domain_create_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID, + &amdgpu_hw_irqdomain_ops, adev); if (!adev->irq.domain) { - DRM_ERROR("GPU irq add domain failed\n"); + dev_err(adev->dev, "GPU irq add domain failed\n"); return -ENODEV; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index 04c0b4fa17a4..9f0417456abd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -146,5 +146,6 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev); int amdgpu_irq_add_domain(struct amdgpu_device *adev); void amdgpu_irq_remove_domain(struct amdgpu_device *adev); unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id); +void amdgpu_restore_msix(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c index 43fc941dfa57..9cddbf50442a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c @@ -33,6 +33,8 @@ #include "isp_v4_1_0.h" #include "isp_v4_1_1.h" +#define ISP_MC_ADDR_ALIGN (1024 * 32) + /** * isp_hw_init - start and test isp block * @@ -141,6 +143,179 @@ static int isp_set_powergating_state(struct amdgpu_ip_block *ip_block, return 0; } +static int is_valid_isp_device(struct device *isp_parent, struct device *amdgpu_dev) +{ + if (isp_parent != amdgpu_dev) + return -EINVAL; + + return 0; +} + +/** + * isp_user_buffer_alloc - create user buffer object (BO) for isp + * + * @dev: isp device handle + * @dmabuf: DMABUF handle for isp buffer allocated in system memory + * @buf_obj: GPU buffer object handle to initialize + * @buf_addr: GPU addr of the pinned BO to initialize + * + * Imports isp DMABUF to allocate and pin a user BO for isp internal use. It does + * GART alloc to generate GPU addr for BO to make it accessible through the + * GART aperture for ISP HW. + * + * This function is exported to allow the V4L2 isp device external to drm device + * to create and access the isp user BO. + * + * Returns: + * 0 on success, negative error code otherwise. + */ +int isp_user_buffer_alloc(struct device *dev, void *dmabuf, + void **buf_obj, u64 *buf_addr) +{ + struct platform_device *ispdev = to_platform_device(dev); + const struct isp_platform_data *isp_pdata; + struct amdgpu_device *adev; + struct mfd_cell *mfd_cell; + struct amdgpu_bo *bo; + u64 gpu_addr; + int ret; + + if (WARN_ON(!ispdev)) + return -ENODEV; + + if (WARN_ON(!buf_obj)) + return -EINVAL; + + if (WARN_ON(!buf_addr)) + return -EINVAL; + + mfd_cell = &ispdev->mfd_cell[0]; + if (!mfd_cell) + return -ENODEV; + + isp_pdata = mfd_cell->platform_data; + adev = isp_pdata->adev; + + ret = is_valid_isp_device(ispdev->dev.parent, adev->dev); + if (ret) + return ret; + + ret = amdgpu_bo_create_isp_user(adev, dmabuf, + AMDGPU_GEM_DOMAIN_GTT, &bo, &gpu_addr); + if (ret) { + drm_err(&adev->ddev, "failed to alloc gart user buffer (%d)", ret); + return ret; + } + + *buf_obj = (void *)bo; + *buf_addr = gpu_addr; + + return 0; +} +EXPORT_SYMBOL(isp_user_buffer_alloc); + +/** + * isp_user_buffer_free - free isp user buffer object (BO) + * + * @buf_obj: amdgpu isp user BO to free + * + * unpin and unref BO for isp internal use. + * + * This function is exported to allow the V4L2 isp device + * external to drm device to free the isp user BO. + */ +void isp_user_buffer_free(void *buf_obj) +{ + amdgpu_bo_free_isp_user(buf_obj); +} +EXPORT_SYMBOL(isp_user_buffer_free); + +/** + * isp_kernel_buffer_alloc - create kernel buffer object (BO) for isp + * + * @dev: isp device handle + * @size: size for the new BO + * @buf_obj: GPU BO handle to initialize + * @gpu_addr: GPU addr of the pinned BO + * @cpu_addr: CPU address mapping of BO + * + * Allocates and pins a kernel BO for internal isp firmware use. + * + * This function is exported to allow the V4L2 isp device + * external to drm device to create and access the kernel BO. + * + * Returns: + * 0 on success, negative error code otherwise. + */ +int isp_kernel_buffer_alloc(struct device *dev, u64 size, + void **buf_obj, u64 *gpu_addr, void **cpu_addr) +{ + struct platform_device *ispdev = to_platform_device(dev); + struct amdgpu_bo **bo = (struct amdgpu_bo **)buf_obj; + const struct isp_platform_data *isp_pdata; + struct amdgpu_device *adev; + struct mfd_cell *mfd_cell; + int ret; + + if (WARN_ON(!ispdev)) + return -ENODEV; + + if (WARN_ON(!buf_obj)) + return -EINVAL; + + if (WARN_ON(!gpu_addr)) + return -EINVAL; + + if (WARN_ON(!cpu_addr)) + return -EINVAL; + + mfd_cell = &ispdev->mfd_cell[0]; + if (!mfd_cell) + return -ENODEV; + + isp_pdata = mfd_cell->platform_data; + adev = isp_pdata->adev; + + ret = is_valid_isp_device(ispdev->dev.parent, adev->dev); + if (ret) + return ret; + + ret = amdgpu_bo_create_kernel(adev, + size, + ISP_MC_ADDR_ALIGN, + AMDGPU_GEM_DOMAIN_GTT, + bo, + gpu_addr, + cpu_addr); + if (!cpu_addr || ret) { + drm_err(&adev->ddev, "failed to alloc gart kernel buffer (%d)", ret); + return ret; + } + + return 0; +} +EXPORT_SYMBOL(isp_kernel_buffer_alloc); + +/** + * isp_kernel_buffer_free - free isp kernel buffer object (BO) + * + * @buf_obj: amdgpu isp user BO to free + * @gpu_addr: GPU addr of isp kernel BO + * @cpu_addr: CPU addr of isp kernel BO + * + * unmaps and unpin a isp kernel BO. + * + * This function is exported to allow the V4L2 isp device + * external to drm device to free the kernel BO. + */ +void isp_kernel_buffer_free(void **buf_obj, u64 *gpu_addr, void **cpu_addr) +{ + struct amdgpu_bo **bo = (struct amdgpu_bo **)buf_obj; + + amdgpu_bo_free_kernel(bo, gpu_addr, cpu_addr); +} +EXPORT_SYMBOL(isp_kernel_buffer_free); + static const struct amd_ip_funcs isp_ip_funcs = { .name = "isp_ip", .early_init = isp_early_init, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h index 4f3b7b5d9c1f..d6f4ffa4c97c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h @@ -28,16 +28,13 @@ #ifndef __AMDGPU_ISP_H__ #define __AMDGPU_ISP_H__ +#include <drm/amd/isp.h> +#include <linux/pm_domain.h> + #define ISP_REGS_OFFSET_END 0x629A4 struct amdgpu_isp; -struct isp_platform_data { - void *adev; - u32 asic_type; - resource_size_t base_rmmio_size; -}; - struct isp_funcs { int (*hw_init)(struct amdgpu_isp *isp); int (*hw_fini)(struct amdgpu_isp *isp); @@ -54,6 +51,7 @@ struct amdgpu_isp { struct isp_platform_data *isp_pdata; unsigned int harvest_config; const struct firmware *fw; + struct generic_pm_domain ispgpd; }; extern const struct amdgpu_ip_block_version isp_v4_1_0_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index acb21fc8b3ce..e6061d45f142 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -89,10 +89,10 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) { struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); struct amdgpu_job *job = to_amdgpu_job(s_job); - struct amdgpu_task_info *ti; + struct drm_wedge_task_info *info = NULL; + struct amdgpu_task_info *ti = NULL; struct amdgpu_device *adev = ring->adev; - int idx; - int r; + int idx, r; if (!drm_dev_enter(adev_to_drm(adev), &idx)) { dev_info(adev->dev, "%s - device unplugged skipping recovery on scheduler:%s", @@ -112,6 +112,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) amdgpu_job_core_dump(adev, job); if (amdgpu_gpu_recovery && + amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_SOFT_RESET) && amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { dev_err(adev->dev, "ring %s timeout, but soft recovered\n", s_job->sched->name); @@ -124,53 +125,30 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid); if (ti) { - dev_err(adev->dev, - "Process information: process %s pid %d thread %s pid %d\n", - ti->process_name, ti->tgid, ti->task_name, ti->pid); - amdgpu_vm_put_task_info(ti); + amdgpu_vm_print_task_info(adev, ti); + info = &ti->task; } /* attempt a per ring reset */ if (unlikely(adev->debug_disable_gpu_ring_reset)) { dev_err(adev->dev, "Ring reset disabled by debug mask\n"); - } else if (amdgpu_gpu_recovery && ring->funcs->reset) { - bool is_guilty; - - dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name); - /* stop the scheduler, but don't mess with the - * bad job yet because if ring reset fails - * we'll fall back to full GPU reset. - */ - drm_sched_wqueue_stop(&ring->sched); - - /* for engine resets, we need to reset the engine, - * but individual queues may be unaffected. - * check here to make sure the accounting is correct. - */ - if (ring->funcs->is_guilty) - is_guilty = ring->funcs->is_guilty(ring); - else - is_guilty = true; - - if (is_guilty) - dma_fence_set_error(&s_job->s_fence->finished, -ETIME); - - r = amdgpu_ring_reset(ring, job->vmid); + } else if (amdgpu_gpu_recovery && + amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) && + ring->funcs->reset) { + dev_err(adev->dev, "Starting %s ring reset\n", + s_job->sched->name); + r = amdgpu_ring_reset(ring, job->vmid, &job->hw_fence); if (!r) { - if (amdgpu_ring_sched_ready(ring)) - drm_sched_stop(&ring->sched, s_job); - if (is_guilty) { - atomic_inc(&ring->adev->gpu_reset_counter); - amdgpu_fence_driver_force_completion(ring); - } - if (amdgpu_ring_sched_ready(ring)) - drm_sched_start(&ring->sched, 0); - dev_err(adev->dev, "Ring %s reset succeeded\n", ring->sched.name); - drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE); + atomic_inc(&ring->adev->gpu_reset_counter); + dev_err(adev->dev, "Ring %s reset succeeded\n", + ring->sched.name); + drm_dev_wedged_event(adev_to_drm(adev), + DRM_WEDGE_RECOVERY_NONE, info); goto exit; } - dev_err(adev->dev, "Ring %s reset failure\n", ring->sched.name); + dev_err(adev->dev, "Ring %s reset failed\n", ring->sched.name); } + dma_fence_set_error(&s_job->s_fence->finished, -ETIME); if (amdgpu_device_should_recover_gpu(ring->adev)) { @@ -198,13 +176,15 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) } exit: + amdgpu_vm_put_task_info(ti); drm_dev_exit(idx); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_sched_entity *entity, void *owner, - unsigned int num_ibs, struct amdgpu_job **job) + unsigned int num_ibs, struct amdgpu_job **job, + u64 drm_client_id) { if (num_ibs == 0) return -EINVAL; @@ -222,7 +202,8 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (!entity) return 0; - return drm_sched_job_init(&(*job)->base, entity, 1, owner); + return drm_sched_job_init(&(*job)->base, entity, 1, owner, + drm_client_id); } int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, @@ -232,7 +213,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, { int r; - r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, job); + r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, job, 0); if (r) return r; @@ -272,8 +253,8 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) /* Check if any fences where initialized */ if (job->base.s_fence && job->base.s_fence->finished.ops) f = &job->base.s_fence->finished; - else if (job->hw_fence.ops) - f = &job->hw_fence; + else if (job->hw_fence.base.ops) + f = &job->hw_fence.base; else f = NULL; @@ -290,10 +271,10 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->explicit_sync); /* only put the hw fence if has embedded fence */ - if (!job->hw_fence.ops) + if (!job->hw_fence.base.ops) kfree(job); else - dma_fence_put(&job->hw_fence); + dma_fence_put(&job->hw_fence.base); } void amdgpu_job_set_gang_leader(struct amdgpu_job *job, @@ -322,10 +303,10 @@ void amdgpu_job_free(struct amdgpu_job *job) if (job->gang_submit != &job->base.s_fence->scheduled) dma_fence_put(job->gang_submit); - if (!job->hw_fence.ops) + if (!job->hw_fence.base.ops) kfree(job); else - dma_fence_put(&job->hw_fence); + dma_fence_put(&job->hw_fence.base); } struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index ce6b9ba967ff..2f302266662b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -48,7 +48,7 @@ struct amdgpu_job { struct drm_sched_job base; struct amdgpu_vm *vm; struct amdgpu_sync explicit_sync; - struct dma_fence hw_fence; + struct amdgpu_fence hw_fence; struct dma_fence *gang_submit; uint32_t preamble_status; uint32_t preemption_status; @@ -78,6 +78,7 @@ struct amdgpu_job { /* enforce isolation */ bool enforce_isolation; + bool run_cleaner_shader; uint32_t num_ibs; struct amdgpu_ib ibs[]; @@ -90,7 +91,8 @@ static inline struct amdgpu_ring *amdgpu_job_ring(struct amdgpu_job *job) int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_sched_entity *entity, void *owner, - unsigned int num_ibs, struct amdgpu_job **job); + unsigned int num_ibs, struct amdgpu_job **job, + u64 drm_client_id); int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, struct drm_sched_entity *entity, void *owner, size_t size, enum amdgpu_ib_pool_type pool_type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index dda29132dfb2..82d58ac7afb0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -463,7 +463,8 @@ int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev, adev->jpeg.ip_dump = kcalloc(adev->jpeg.num_jpeg_inst * count, sizeof(uint32_t), GFP_KERNEL); if (!adev->jpeg.ip_dump) { - DRM_ERROR("Failed to allocate memory for JPEG IP Dump\n"); + dev_err(adev->dev, + "Failed to allocate memory for JPEG IP Dump\n"); return -ENOMEM; } adev->jpeg.reg_list = reg; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 27bfe9c8af06..8a76960803c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -45,6 +45,7 @@ #include "amdgpu_ras.h" #include "amdgpu_reset.h" #include "amd_pcie.h" +#include "amdgpu_userq.h" void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) { @@ -90,7 +91,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev) if (adev->rmmio == NULL) return; - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_UNLOAD)) + if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_UNLOAD)) DRM_WARN("smart shift update failed\n"); amdgpu_acpi_fini(adev); @@ -160,7 +161,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) if (acpi_status) dev_dbg(dev->dev, "Error during ACPI methods call\n"); - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_LOAD)) + if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_LOAD)) DRM_WARN("smart shift update failed\n"); out: @@ -370,6 +371,26 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, return 0; } +static int amdgpu_userq_metadata_info_gfx(struct amdgpu_device *adev, + struct drm_amdgpu_info *info, + struct drm_amdgpu_info_uq_metadata_gfx *meta) +{ + int ret = -EOPNOTSUPP; + + if (adev->gfx.funcs->get_gfx_shadow_info) { + struct amdgpu_gfx_shadow_info shadow = {}; + + adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow, true); + meta->shadow_size = shadow.shadow_size; + meta->shadow_alignment = shadow.shadow_alignment; + meta->csa_size = shadow.csa_size; + meta->csa_alignment = shadow.csa_alignment; + ret = 0; + } + + return ret; +} + static int amdgpu_hw_ip_info(struct amdgpu_device *adev, struct drm_amdgpu_info *info, struct drm_amdgpu_info_hw_ip *result) @@ -378,6 +399,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, uint32_t ib_size_alignment = 0; enum amd_ip_block_type type; unsigned int num_rings = 0; + uint32_t num_slots = 0; unsigned int i, j; if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT) @@ -387,24 +409,45 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_GFX: type = AMD_IP_BLOCK_TYPE_GFX; for (i = 0; i < adev->gfx.num_gfx_rings; i++) - if (adev->gfx.gfx_ring[i].sched.ready) + if (adev->gfx.gfx_ring[i].sched.ready && + !adev->gfx.gfx_ring[i].no_user_submission) ++num_rings; + + if (!adev->gfx.disable_uq) { + for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) + num_slots += hweight32(adev->mes.gfx_hqd_mask[i]); + } + ib_start_alignment = 32; ib_size_alignment = 32; break; case AMDGPU_HW_IP_COMPUTE: type = AMD_IP_BLOCK_TYPE_GFX; for (i = 0; i < adev->gfx.num_compute_rings; i++) - if (adev->gfx.compute_ring[i].sched.ready) + if (adev->gfx.compute_ring[i].sched.ready && + !adev->gfx.compute_ring[i].no_user_submission) ++num_rings; + + if (!adev->sdma.disable_uq) { + for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) + num_slots += hweight32(adev->mes.compute_hqd_mask[i]); + } + ib_start_alignment = 32; ib_size_alignment = 32; break; case AMDGPU_HW_IP_DMA: type = AMD_IP_BLOCK_TYPE_SDMA; for (i = 0; i < adev->sdma.num_instances; i++) - if (adev->sdma.instance[i].ring.sched.ready) + if (adev->sdma.instance[i].ring.sched.ready && + !adev->sdma.instance[i].ring.no_user_submission) ++num_rings; + + if (!adev->gfx.disable_uq) { + for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) + num_slots += hweight32(adev->mes.sdma_hqd_mask[i]); + } + ib_start_alignment = 256; ib_size_alignment = 4; break; @@ -414,7 +457,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->uvd.harvest_config & (1 << i)) continue; - if (adev->uvd.inst[i].ring.sched.ready) + if (adev->uvd.inst[i].ring.sched.ready && + !adev->uvd.inst[i].ring.no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -423,7 +467,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_VCE: type = AMD_IP_BLOCK_TYPE_VCE; for (i = 0; i < adev->vce.num_rings; i++) - if (adev->vce.ring[i].sched.ready) + if (adev->vce.ring[i].sched.ready && + !adev->vce.ring[i].no_user_submission) ++num_rings; ib_start_alignment = 256; ib_size_alignment = 4; @@ -435,7 +480,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, continue; for (j = 0; j < adev->uvd.num_enc_rings; j++) - if (adev->uvd.inst[i].ring_enc[j].sched.ready) + if (adev->uvd.inst[i].ring_enc[j].sched.ready && + !adev->uvd.inst[i].ring_enc[j].no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -447,7 +493,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->vcn.harvest_config & (1 << i)) continue; - if (adev->vcn.inst[i].ring_dec.sched.ready) + if (adev->vcn.inst[i].ring_dec.sched.ready && + !adev->vcn.inst[i].ring_dec.no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -460,7 +507,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, continue; for (j = 0; j < adev->vcn.inst[i].num_enc_rings; j++) - if (adev->vcn.inst[i].ring_enc[j].sched.ready) + if (adev->vcn.inst[i].ring_enc[j].sched.ready && + !adev->vcn.inst[i].ring_enc[j].no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -475,7 +523,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, continue; for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) - if (adev->jpeg.inst[i].ring_dec[j].sched.ready) + if (adev->jpeg.inst[i].ring_dec[j].sched.ready && + !adev->jpeg.inst[i].ring_dec[j].no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -483,7 +532,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, break; case AMDGPU_HW_IP_VPE: type = AMD_IP_BLOCK_TYPE_VPE; - if (adev->vpe.ring.sched.ready) + if (adev->vpe.ring.sched.ready && + !adev->vpe.ring.no_user_submission) ++num_rings; ib_start_alignment = 256; ib_size_alignment = 4; @@ -539,6 +589,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, } result->capabilities_flags = 0; result->available_rings = (1 << num_rings) - 1; + result->userq_num_slots = num_slots; result->ib_start_alignment = ib_start_alignment; result->ib_size_alignment = ib_size_alignment; return 0; @@ -978,6 +1029,8 @@ out: } } + dev_info->userq_ip_mask = amdgpu_userq_get_supported_ip_mask(adev); + ret = copy_to_user(out, dev_info, min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0; kfree(dev_info); @@ -1293,6 +1346,22 @@ out: return copy_to_user(out, &gpuvm_fault, min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0; } + case AMDGPU_INFO_UQ_FW_AREAS: { + struct drm_amdgpu_info_uq_metadata meta_info = {}; + + switch (info->query_hw_ip.type) { + case AMDGPU_HW_IP_GFX: + ret = amdgpu_userq_metadata_info_gfx(adev, info, &meta_info.gfx); + if (ret) + return ret; + + ret = copy_to_user(out, &meta_info, + min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0; + return 0; + default: + return -EINVAL; + } + } default: DRM_DEBUG_KMS("Invalid request %d\n", info->query); return -EINVAL; @@ -1346,6 +1415,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) if (r) goto error_pasid; + amdgpu_debugfs_vm_init(file_priv); + r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id); if (r) goto error_pasid; @@ -1376,6 +1447,14 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) mutex_init(&fpriv->bo_list_lock); idr_init_base(&fpriv->bo_list_handles, 1); + r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, file_priv, adev); + if (r) + DRM_WARN("Can't setup usermode queues, use legacy workload submission only\n"); + + r = amdgpu_eviction_fence_init(&fpriv->evf_mgr); + if (r) + goto error_vm; + amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev); file_priv->driver_priv = fpriv; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 85f774063f9b..135598502c8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -39,42 +39,6 @@ int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) PAGE_SIZE); } -static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev, - int ip_type, uint64_t *doorbell_index) -{ - unsigned int offset, found; - struct amdgpu_mes *mes = &adev->mes; - - if (ip_type == AMDGPU_RING_TYPE_SDMA) - offset = adev->doorbell_index.sdma_engine[0]; - else - offset = 0; - - found = find_next_zero_bit(mes->doorbell_bitmap, mes->num_mes_dbs, offset); - if (found >= mes->num_mes_dbs) { - DRM_WARN("No doorbell available\n"); - return -ENOSPC; - } - - set_bit(found, mes->doorbell_bitmap); - - /* Get the absolute doorbell index on BAR */ - *doorbell_index = mes->db_start_dw_offset + found * 2; - return 0; -} - -static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev, - uint32_t doorbell_index) -{ - unsigned int old, rel_index; - struct amdgpu_mes *mes = &adev->mes; - - /* Find the relative index of the doorbell in this object */ - rel_index = (doorbell_index - mes->db_start_dw_offset) / 2; - old = test_and_clear_bit(rel_index, mes->doorbell_bitmap); - WARN_ON(!old); -} - static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) { int i; @@ -83,7 +47,7 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) /* Bitmap for dynamic allocation of kernel doorbells */ mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL); if (!mes->doorbell_bitmap) { - DRM_ERROR("Failed to allocate MES doorbell bitmap\n"); + dev_err(adev->dev, "Failed to allocate MES doorbell bitmap\n"); return -ENOMEM; } @@ -126,7 +90,7 @@ static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev) int amdgpu_mes_init(struct amdgpu_device *adev) { - int i, r; + int i, r, num_pipes; adev->mes.adev = adev; @@ -142,19 +106,52 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; adev->mes.vmid_mask_mmhub = 0xffffff00; - adev->mes.vmid_mask_gfxhub = 0xffffff00; + adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xfffffffe : 0xffffff00; + + num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me; + if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES) + dev_warn(adev->dev, "more gfx pipes than supported by MES! (%d vs %d)\n", + num_pipes, AMDGPU_MES_MAX_GFX_PIPES); + + for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) { + if (i >= num_pipes) + break; + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= + IP_VERSION(12, 0, 0)) + /* + * GFX V12 has only one GFX pipe, but 8 queues in it. + * GFX pipe 0 queue 0 is being used by Kernel queue. + * Set GFX pipe 0 queue 1-7 for MES scheduling + * mask = 1111 1110b + */ + adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0xFF : 0xFE; + else + /* + * GFX pipe 0 queue 0 is being used by Kernel queue. + * Set GFX pipe 0 queue 1 for MES scheduling + * mask = 10b + */ + adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0x3 : 0x2; + } + + num_pipes = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec; + if (num_pipes > AMDGPU_MES_MAX_COMPUTE_PIPES) + dev_warn(adev->dev, "more compute pipes than supported by MES! (%d vs %d)\n", + num_pipes, AMDGPU_MES_MAX_COMPUTE_PIPES); for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { - if (i >= (adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec)) + if (i >= num_pipes) break; - adev->mes.compute_hqd_mask[i] = 0xc; + adev->mes.compute_hqd_mask[i] = adev->gfx.disable_kq ? 0xF : 0xC; } - for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) - adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe; + num_pipes = adev->sdma.num_instances; + if (num_pipes > AMDGPU_MES_MAX_SDMA_PIPES) + dev_warn(adev->dev, "more SDMA pipes than supported by MES! (%d vs %d)\n", + num_pipes, AMDGPU_MES_MAX_SDMA_PIPES); for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { - if (i >= adev->sdma.num_instances) + if (i >= num_pipes) break; adev->mes.sdma_hqd_mask[i] = 0xfc; } @@ -240,244 +237,6 @@ void amdgpu_mes_fini(struct amdgpu_device *adev) mutex_destroy(&adev->mes.mutex_hidden); } -static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q) -{ - amdgpu_bo_free_kernel(&q->mqd_obj, - &q->mqd_gpu_addr, - &q->mqd_cpu_ptr); -} - -int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, - struct amdgpu_vm *vm) -{ - struct amdgpu_mes_process *process; - int r; - - /* allocate the mes process buffer */ - process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL); - if (!process) { - DRM_ERROR("no more memory to create mes process\n"); - return -ENOMEM; - } - - /* allocate the process context bo and map it */ - r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, - &process->proc_ctx_bo, - &process->proc_ctx_gpu_addr, - &process->proc_ctx_cpu_ptr); - if (r) { - DRM_ERROR("failed to allocate process context bo\n"); - goto clean_up_memory; - } - memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - /* add the mes process to idr list */ - r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1, - GFP_KERNEL); - if (r < 0) { - DRM_ERROR("failed to lock pasid=%d\n", pasid); - goto clean_up_ctx; - } - - INIT_LIST_HEAD(&process->gang_list); - process->vm = vm; - process->pasid = pasid; - process->process_quantum = adev->mes.default_process_quantum; - process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo); - - amdgpu_mes_unlock(&adev->mes); - return 0; - -clean_up_ctx: - amdgpu_mes_unlock(&adev->mes); - amdgpu_bo_free_kernel(&process->proc_ctx_bo, - &process->proc_ctx_gpu_addr, - &process->proc_ctx_cpu_ptr); -clean_up_memory: - kfree(process); - return r; -} - -void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid) -{ - struct amdgpu_mes_process *process; - struct amdgpu_mes_gang *gang, *tmp1; - struct amdgpu_mes_queue *queue, *tmp2; - struct mes_remove_queue_input queue_input; - unsigned long flags; - int r; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - process = idr_find(&adev->mes.pasid_idr, pasid); - if (!process) { - DRM_WARN("pasid %d doesn't exist\n", pasid); - amdgpu_mes_unlock(&adev->mes); - return; - } - - /* Remove all queues from hardware */ - list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) { - list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) { - spin_lock_irqsave(&adev->mes.queue_id_lock, flags); - idr_remove(&adev->mes.queue_id_idr, queue->queue_id); - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - - queue_input.doorbell_offset = queue->doorbell_off; - queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; - - r = adev->mes.funcs->remove_hw_queue(&adev->mes, - &queue_input); - if (r) - DRM_WARN("failed to remove hardware queue\n"); - } - - idr_remove(&adev->mes.gang_id_idr, gang->gang_id); - } - - idr_remove(&adev->mes.pasid_idr, pasid); - amdgpu_mes_unlock(&adev->mes); - - /* free all memory allocated by the process */ - list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) { - /* free all queues in the gang */ - list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) { - amdgpu_mes_queue_free_mqd(queue); - list_del(&queue->list); - kfree(queue); - } - amdgpu_bo_free_kernel(&gang->gang_ctx_bo, - &gang->gang_ctx_gpu_addr, - &gang->gang_ctx_cpu_ptr); - list_del(&gang->list); - kfree(gang); - - } - amdgpu_bo_free_kernel(&process->proc_ctx_bo, - &process->proc_ctx_gpu_addr, - &process->proc_ctx_cpu_ptr); - kfree(process); -} - -int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid, - struct amdgpu_mes_gang_properties *gprops, - int *gang_id) -{ - struct amdgpu_mes_process *process; - struct amdgpu_mes_gang *gang; - int r; - - /* allocate the mes gang buffer */ - gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL); - if (!gang) { - return -ENOMEM; - } - - /* allocate the gang context bo and map it to cpu space */ - r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, - &gang->gang_ctx_bo, - &gang->gang_ctx_gpu_addr, - &gang->gang_ctx_cpu_ptr); - if (r) { - DRM_ERROR("failed to allocate process context bo\n"); - goto clean_up_mem; - } - memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - process = idr_find(&adev->mes.pasid_idr, pasid); - if (!process) { - DRM_ERROR("pasid %d doesn't exist\n", pasid); - r = -EINVAL; - goto clean_up_ctx; - } - - /* add the mes gang to idr list */ - r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0, - GFP_KERNEL); - if (r < 0) { - DRM_ERROR("failed to allocate idr for gang\n"); - goto clean_up_ctx; - } - - gang->gang_id = r; - *gang_id = r; - - INIT_LIST_HEAD(&gang->queue_list); - gang->process = process; - gang->priority = gprops->priority; - gang->gang_quantum = gprops->gang_quantum ? - gprops->gang_quantum : adev->mes.default_gang_quantum; - gang->global_priority_level = gprops->global_priority_level; - gang->inprocess_gang_priority = gprops->inprocess_gang_priority; - list_add_tail(&gang->list, &process->gang_list); - - amdgpu_mes_unlock(&adev->mes); - return 0; - -clean_up_ctx: - amdgpu_mes_unlock(&adev->mes); - amdgpu_bo_free_kernel(&gang->gang_ctx_bo, - &gang->gang_ctx_gpu_addr, - &gang->gang_ctx_cpu_ptr); -clean_up_mem: - kfree(gang); - return r; -} - -int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id) -{ - struct amdgpu_mes_gang *gang; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - gang = idr_find(&adev->mes.gang_id_idr, gang_id); - if (!gang) { - DRM_ERROR("gang id %d doesn't exist\n", gang_id); - amdgpu_mes_unlock(&adev->mes); - return -EINVAL; - } - - if (!list_empty(&gang->queue_list)) { - DRM_ERROR("queue list is not empty\n"); - amdgpu_mes_unlock(&adev->mes); - return -EBUSY; - } - - idr_remove(&adev->mes.gang_id_idr, gang->gang_id); - list_del(&gang->list); - amdgpu_mes_unlock(&adev->mes); - - amdgpu_bo_free_kernel(&gang->gang_ctx_bo, - &gang->gang_ctx_gpu_addr, - &gang->gang_ctx_cpu_ptr); - - kfree(gang); - - return 0; -} - int amdgpu_mes_suspend(struct amdgpu_device *adev) { struct mes_suspend_gang_input input; @@ -497,7 +256,7 @@ int amdgpu_mes_suspend(struct amdgpu_device *adev) r = adev->mes.funcs->suspend_gang(&adev->mes, &input); amdgpu_mes_unlock(&adev->mes); if (r) - DRM_ERROR("failed to suspend all gangs"); + dev_err(adev->dev, "failed to suspend all gangs"); return r; } @@ -521,306 +280,8 @@ int amdgpu_mes_resume(struct amdgpu_device *adev) r = adev->mes.funcs->resume_gang(&adev->mes, &input); amdgpu_mes_unlock(&adev->mes); if (r) - DRM_ERROR("failed to resume all gangs"); - - return r; -} - -static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev, - struct amdgpu_mes_queue *q, - struct amdgpu_mes_queue_properties *p) -{ - struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type]; - u32 mqd_size = mqd_mgr->mqd_size; - int r; - - r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, - &q->mqd_obj, - &q->mqd_gpu_addr, &q->mqd_cpu_ptr); - if (r) { - dev_warn(adev->dev, "failed to create queue mqd bo (%d)", r); - return r; - } - memset(q->mqd_cpu_ptr, 0, mqd_size); - - r = amdgpu_bo_reserve(q->mqd_obj, false); - if (unlikely(r != 0)) - goto clean_up; - - return 0; - -clean_up: - amdgpu_bo_free_kernel(&q->mqd_obj, - &q->mqd_gpu_addr, - &q->mqd_cpu_ptr); - return r; -} - -static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev, - struct amdgpu_mes_queue *q, - struct amdgpu_mes_queue_properties *p) -{ - struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type]; - struct amdgpu_mqd_prop mqd_prop = {0}; - - mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr; - mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr; - mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr; - mqd_prop.wptr_gpu_addr = p->wptr_gpu_addr; - mqd_prop.queue_size = p->queue_size; - mqd_prop.use_doorbell = true; - mqd_prop.doorbell_index = p->doorbell_off; - mqd_prop.eop_gpu_addr = p->eop_gpu_addr; - mqd_prop.hqd_pipe_priority = p->hqd_pipe_priority; - mqd_prop.hqd_queue_priority = p->hqd_queue_priority; - mqd_prop.hqd_active = false; - - if (p->queue_type == AMDGPU_RING_TYPE_GFX || - p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { - mutex_lock(&adev->srbm_mutex); - amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0); - } - - mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop); - - if (p->queue_type == AMDGPU_RING_TYPE_GFX || - p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { - amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - } - - amdgpu_bo_unreserve(q->mqd_obj); -} - -int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, - struct amdgpu_mes_queue_properties *qprops, - int *queue_id) -{ - struct amdgpu_mes_queue *queue; - struct amdgpu_mes_gang *gang; - struct mes_add_queue_input queue_input; - unsigned long flags; - int r; - - memset(&queue_input, 0, sizeof(struct mes_add_queue_input)); - - /* allocate the mes queue buffer */ - queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL); - if (!queue) { - DRM_ERROR("Failed to allocate memory for queue\n"); - return -ENOMEM; - } - - /* Allocate the queue mqd */ - r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops); - if (r) - goto clean_up_memory; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - gang = idr_find(&adev->mes.gang_id_idr, gang_id); - if (!gang) { - DRM_ERROR("gang id %d doesn't exist\n", gang_id); - r = -EINVAL; - goto clean_up_mqd; - } - - /* add the mes gang to idr list */ - spin_lock_irqsave(&adev->mes.queue_id_lock, flags); - r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0, - GFP_ATOMIC); - if (r < 0) { - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - goto clean_up_mqd; - } - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - *queue_id = queue->queue_id = r; - - /* allocate a doorbell index for the queue */ - r = amdgpu_mes_kernel_doorbell_get(adev, - qprops->queue_type, - &qprops->doorbell_off); - if (r) - goto clean_up_queue_id; - - /* initialize the queue mqd */ - amdgpu_mes_queue_init_mqd(adev, queue, qprops); - - /* add hw queue to mes */ - queue_input.process_id = gang->process->pasid; - - queue_input.page_table_base_addr = - adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr - - adev->gmc.vram_start; - - queue_input.process_va_start = 0; - queue_input.process_va_end = - (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT; - queue_input.process_quantum = gang->process->process_quantum; - queue_input.process_context_addr = gang->process->proc_ctx_gpu_addr; - queue_input.gang_quantum = gang->gang_quantum; - queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; - queue_input.inprocess_gang_priority = gang->inprocess_gang_priority; - queue_input.gang_global_priority_level = gang->global_priority_level; - queue_input.doorbell_offset = qprops->doorbell_off; - queue_input.mqd_addr = queue->mqd_gpu_addr; - queue_input.wptr_addr = qprops->wptr_gpu_addr; - queue_input.wptr_mc_addr = qprops->wptr_mc_addr; - queue_input.queue_type = qprops->queue_type; - queue_input.paging = qprops->paging; - queue_input.is_kfd_process = 0; - - r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); - if (r) { - DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n", - qprops->doorbell_off); - goto clean_up_doorbell; - } - - DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, " - "queue type=%d, doorbell=0x%llx\n", - gang->process->pasid, gang_id, qprops->queue_type, - qprops->doorbell_off); - - queue->ring = qprops->ring; - queue->doorbell_off = qprops->doorbell_off; - queue->wptr_gpu_addr = qprops->wptr_gpu_addr; - queue->queue_type = qprops->queue_type; - queue->paging = qprops->paging; - queue->gang = gang; - queue->ring->mqd_ptr = queue->mqd_cpu_ptr; - list_add_tail(&queue->list, &gang->queue_list); - - amdgpu_mes_unlock(&adev->mes); - return 0; - -clean_up_doorbell: - amdgpu_mes_kernel_doorbell_free(adev, qprops->doorbell_off); -clean_up_queue_id: - spin_lock_irqsave(&adev->mes.queue_id_lock, flags); - idr_remove(&adev->mes.queue_id_idr, queue->queue_id); - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); -clean_up_mqd: - amdgpu_mes_unlock(&adev->mes); - amdgpu_mes_queue_free_mqd(queue); -clean_up_memory: - kfree(queue); - return r; -} - -int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) -{ - unsigned long flags; - struct amdgpu_mes_queue *queue; - struct amdgpu_mes_gang *gang; - struct mes_remove_queue_input queue_input; - int r; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - /* remove the mes gang from idr list */ - spin_lock_irqsave(&adev->mes.queue_id_lock, flags); - - queue = idr_find(&adev->mes.queue_id_idr, queue_id); - if (!queue) { - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - amdgpu_mes_unlock(&adev->mes); - DRM_ERROR("queue id %d doesn't exist\n", queue_id); - return -EINVAL; - } - - idr_remove(&adev->mes.queue_id_idr, queue_id); - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - - DRM_DEBUG("try to remove queue, doorbell off = 0x%llx\n", - queue->doorbell_off); - - gang = queue->gang; - queue_input.doorbell_offset = queue->doorbell_off; - queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; - - r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); - if (r) - DRM_ERROR("failed to remove hardware queue, queue id = %d\n", - queue_id); - - list_del(&queue->list); - amdgpu_mes_kernel_doorbell_free(adev, queue->doorbell_off); - amdgpu_mes_unlock(&adev->mes); - - amdgpu_mes_queue_free_mqd(queue); - kfree(queue); - return 0; -} - -int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id) -{ - unsigned long flags; - struct amdgpu_mes_queue *queue; - struct amdgpu_mes_gang *gang; - struct mes_reset_queue_input queue_input; - int r; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - /* remove the mes gang from idr list */ - spin_lock_irqsave(&adev->mes.queue_id_lock, flags); - - queue = idr_find(&adev->mes.queue_id_idr, queue_id); - if (!queue) { - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - amdgpu_mes_unlock(&adev->mes); - DRM_ERROR("queue id %d doesn't exist\n", queue_id); - return -EINVAL; - } - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - - DRM_DEBUG("try to reset queue, doorbell off = 0x%llx\n", - queue->doorbell_off); - - gang = queue->gang; - queue_input.doorbell_offset = queue->doorbell_off; - queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; - - r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); - if (r) - DRM_ERROR("failed to reset hardware queue, queue id = %d\n", - queue_id); + dev_err(adev->dev, "failed to resume all gangs"); - amdgpu_mes_unlock(&adev->mes); - - return 0; -} - -int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type, - int me_id, int pipe_id, int queue_id, int vmid) -{ - struct mes_reset_queue_input queue_input; - int r; - - queue_input.queue_type = queue_type; - queue_input.use_mmio = true; - queue_input.me_id = me_id; - queue_input.pipe_id = pipe_id; - queue_input.queue_id = queue_id; - queue_input.vmid = vmid; - r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); - if (r) - DRM_ERROR("failed to reset hardware queue by mmio, queue id = %d\n", - queue_id); return r; } @@ -839,9 +300,11 @@ int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); queue_input.wptr_addr = ring->wptr_gpu_addr; + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); if (r) - DRM_ERROR("failed to map legacy queue\n"); + dev_err(adev->dev, "failed to map legacy queue\n"); return r; } @@ -862,9 +325,11 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, queue_input.trail_fence_addr = gpu_addr; queue_input.trail_fence_data = seq; + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); if (r) - DRM_ERROR("failed to unmap legacy queue\n"); + dev_err(adev->dev, "failed to unmap legacy queue\n"); return r; } @@ -874,7 +339,7 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, unsigned int vmid, bool use_mmio) { - struct mes_reset_legacy_queue_input queue_input; + struct mes_reset_queue_input queue_input; int r; memset(&queue_input, 0, sizeof(queue_input)); @@ -888,10 +353,15 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, queue_input.wptr_addr = ring->wptr_gpu_addr; queue_input.vmid = vmid; queue_input.use_mmio = use_mmio; + queue_input.is_kq = true; + if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) + queue_input.legacy_gfx = true; - r = adev->mes.funcs->reset_legacy_queue(&adev->mes, &queue_input); + amdgpu_mes_lock(&adev->mes); + r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); if (r) - DRM_ERROR("failed to reset legacy queue\n"); + dev_err(adev->dev, "failed to reset legacy queue\n"); return r; } @@ -905,7 +375,7 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) uint32_t *read_val_ptr; if (amdgpu_device_wb_get(adev, &addr_offset)) { - DRM_ERROR("critical bug! too many mes readers\n"); + dev_err(adev->dev, "critical bug! too many mes readers\n"); goto error; } read_val_gpu_addr = adev->wb.gpu_addr + (addr_offset * 4); @@ -915,13 +385,15 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) op_input.read_reg.buffer_addr = read_val_gpu_addr; if (!adev->mes.funcs->misc_op) { - DRM_ERROR("mes rreg is not supported!\n"); + dev_err(adev->dev, "mes rreg is not supported!\n"); goto error; } + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + amdgpu_mes_unlock(&adev->mes); if (r) - DRM_ERROR("failed to read reg (0x%x)\n", reg); + dev_err(adev->dev, "failed to read reg (0x%x)\n", reg); else val = *(read_val_ptr); @@ -942,14 +414,16 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev, op_input.write_reg.reg_value = val; if (!adev->mes.funcs->misc_op) { - DRM_ERROR("mes wreg is not supported!\n"); + dev_err(adev->dev, "mes wreg is not supported!\n"); r = -EINVAL; goto error; } + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + amdgpu_mes_unlock(&adev->mes); if (r) - DRM_ERROR("failed to write reg (0x%x)\n", reg); + dev_err(adev->dev, "failed to write reg (0x%x)\n", reg); error: return r; @@ -969,39 +443,16 @@ int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, op_input.wrm_reg.mask = mask; if (!adev->mes.funcs->misc_op) { - DRM_ERROR("mes reg_write_reg_wait is not supported!\n"); - r = -EINVAL; - goto error; - } - - r = adev->mes.funcs->misc_op(&adev->mes, &op_input); - if (r) - DRM_ERROR("failed to reg_write_reg_wait\n"); - -error: - return r; -} - -int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg, - uint32_t val, uint32_t mask) -{ - struct mes_misc_op_input op_input; - int r; - - op_input.op = MES_MISC_OP_WRM_REG_WAIT; - op_input.wrm_reg.reg0 = reg; - op_input.wrm_reg.ref = val; - op_input.wrm_reg.mask = mask; - - if (!adev->mes.funcs->misc_op) { - DRM_ERROR("mes reg wait is not supported!\n"); + dev_err(adev->dev, "mes reg_write_reg_wait is not supported!\n"); r = -EINVAL; goto error; } + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + amdgpu_mes_unlock(&adev->mes); if (r) - DRM_ERROR("failed to reg_write_reg_wait\n"); + dev_err(adev->dev, "failed to reg_write_reg_wait\n"); error: return r; @@ -1018,7 +469,8 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, int r; if (!adev->mes.funcs->misc_op) { - DRM_ERROR("mes set shader debugger is not supported!\n"); + dev_err(adev->dev, + "mes set shader debugger is not supported!\n"); return -EINVAL; } @@ -1042,7 +494,7 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, r = adev->mes.funcs->misc_op(&adev->mes, &op_input); if (r) - DRM_ERROR("failed to set_shader_debugger\n"); + dev_err(adev->dev, "failed to set_shader_debugger\n"); amdgpu_mes_unlock(&adev->mes); @@ -1056,7 +508,8 @@ int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, int r; if (!adev->mes.funcs->misc_op) { - DRM_ERROR("mes flush shader debugger is not supported!\n"); + dev_err(adev->dev, + "mes flush shader debugger is not supported!\n"); return -EINVAL; } @@ -1068,515 +521,19 @@ int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, r = adev->mes.funcs->misc_op(&adev->mes, &op_input); if (r) - DRM_ERROR("failed to set_shader_debugger\n"); - - amdgpu_mes_unlock(&adev->mes); - - return r; -} - -static void -amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_mes_queue_properties *props) -{ - props->queue_type = ring->funcs->type; - props->hqd_base_gpu_addr = ring->gpu_addr; - props->rptr_gpu_addr = ring->rptr_gpu_addr; - props->wptr_gpu_addr = ring->wptr_gpu_addr; - props->wptr_mc_addr = - ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs; - props->queue_size = ring->ring_size; - props->eop_gpu_addr = ring->eop_gpu_addr; - props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; - props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM; - props->paging = false; - props->ring = ring; -} - -#define DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(_eng) \ -do { \ - if (id_offs < AMDGPU_MES_CTX_MAX_OFFS) \ - return offsetof(struct amdgpu_mes_ctx_meta_data, \ - _eng[ring->idx].slots[id_offs]); \ - else if (id_offs == AMDGPU_MES_CTX_RING_OFFS) \ - return offsetof(struct amdgpu_mes_ctx_meta_data, \ - _eng[ring->idx].ring); \ - else if (id_offs == AMDGPU_MES_CTX_IB_OFFS) \ - return offsetof(struct amdgpu_mes_ctx_meta_data, \ - _eng[ring->idx].ib); \ - else if (id_offs == AMDGPU_MES_CTX_PADDING_OFFS) \ - return offsetof(struct amdgpu_mes_ctx_meta_data, \ - _eng[ring->idx].padding); \ -} while(0) - -int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs) -{ - switch (ring->funcs->type) { - case AMDGPU_RING_TYPE_GFX: - DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(gfx); - break; - case AMDGPU_RING_TYPE_COMPUTE: - DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(compute); - break; - case AMDGPU_RING_TYPE_SDMA: - DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(sdma); - break; - default: - break; - } - - WARN_ON(1); - return -EINVAL; -} - -int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, - int queue_type, int idx, - struct amdgpu_mes_ctx_data *ctx_data, - struct amdgpu_ring **out) -{ - struct amdgpu_ring *ring; - struct amdgpu_mes_gang *gang; - struct amdgpu_mes_queue_properties qprops = {0}; - int r, queue_id, pasid; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - gang = idr_find(&adev->mes.gang_id_idr, gang_id); - if (!gang) { - DRM_ERROR("gang id %d doesn't exist\n", gang_id); - amdgpu_mes_unlock(&adev->mes); - return -EINVAL; - } - pasid = gang->process->pasid; - - ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL); - if (!ring) { - amdgpu_mes_unlock(&adev->mes); - return -ENOMEM; - } - - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->is_mes_queue = true; - ring->mes_ctx = ctx_data; - ring->idx = idx; - ring->no_scheduler = true; - - if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { - int offset = offsetof(struct amdgpu_mes_ctx_meta_data, - compute[ring->idx].mec_hpd); - ring->eop_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } + dev_err(adev->dev, "failed to set_shader_debugger\n"); - switch (queue_type) { - case AMDGPU_RING_TYPE_GFX: - ring->funcs = adev->gfx.gfx_ring[0].funcs; - ring->me = adev->gfx.gfx_ring[0].me; - ring->pipe = adev->gfx.gfx_ring[0].pipe; - break; - case AMDGPU_RING_TYPE_COMPUTE: - ring->funcs = adev->gfx.compute_ring[0].funcs; - ring->me = adev->gfx.compute_ring[0].me; - ring->pipe = adev->gfx.compute_ring[0].pipe; - break; - case AMDGPU_RING_TYPE_SDMA: - ring->funcs = adev->sdma.instance[0].ring.funcs; - break; - default: - BUG(); - } - - r = amdgpu_ring_init(adev, ring, 1024, NULL, 0, - AMDGPU_RING_PRIO_DEFAULT, NULL); - if (r) { - amdgpu_mes_unlock(&adev->mes); - goto clean_up_memory; - } - - amdgpu_mes_ring_to_queue_props(adev, ring, &qprops); - - dma_fence_wait(gang->process->vm->last_update, false); - dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false); amdgpu_mes_unlock(&adev->mes); - r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id); - if (r) - goto clean_up_ring; - - ring->hw_queue_id = queue_id; - ring->doorbell_index = qprops.doorbell_off; - - if (queue_type == AMDGPU_RING_TYPE_GFX) - sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id); - else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) - sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id, - queue_id); - else if (queue_type == AMDGPU_RING_TYPE_SDMA) - sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id, - queue_id); - else - BUG(); - - *out = ring; - return 0; - -clean_up_ring: - amdgpu_ring_fini(ring); -clean_up_memory: - kfree(ring); return r; } -void amdgpu_mes_remove_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring) - return; - - amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id); - del_timer_sync(&ring->fence_drv.fallback_timer); - amdgpu_ring_fini(ring); - kfree(ring); -} - uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, enum amdgpu_mes_priority_level prio) { return adev->mes.aggregated_doorbells[prio]; } -int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, - struct amdgpu_mes_ctx_data *ctx_data) -{ - int r; - - r = amdgpu_bo_create_kernel(adev, - sizeof(struct amdgpu_mes_ctx_meta_data), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - &ctx_data->meta_data_obj, - &ctx_data->meta_data_mc_addr, - &ctx_data->meta_data_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create CTX bo failed\n", r); - return r; - } - - if (!ctx_data->meta_data_obj) - return -ENOMEM; - - memset(ctx_data->meta_data_ptr, 0, - sizeof(struct amdgpu_mes_ctx_meta_data)); - - return 0; -} - -void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data) -{ - if (ctx_data->meta_data_obj) - amdgpu_bo_free_kernel(&ctx_data->meta_data_obj, - &ctx_data->meta_data_mc_addr, - &ctx_data->meta_data_ptr); -} - -int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_mes_ctx_data *ctx_data) -{ - struct amdgpu_bo_va *bo_va; - struct amdgpu_sync sync; - struct drm_exec exec; - int r; - - amdgpu_sync_create(&sync); - - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - r = drm_exec_lock_obj(&exec, - &ctx_data->meta_data_obj->tbo.base); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto error_fini_exec; - - r = amdgpu_vm_lock_pd(vm, &exec, 0); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto error_fini_exec; - } - - bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj); - if (!bo_va) { - DRM_ERROR("failed to create bo_va for meta data BO\n"); - r = -ENOMEM; - goto error_fini_exec; - } - - r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0, - sizeof(struct amdgpu_mes_ctx_meta_data), - AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | - AMDGPU_PTE_EXECUTABLE); - - if (r) { - DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r); - goto error_del_bo_va; - } - - r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) { - DRM_ERROR("failed to do vm_bo_update on meta data\n"); - goto error_del_bo_va; - } - amdgpu_sync_fence(&sync, bo_va->last_pt_update, GFP_KERNEL); - - r = amdgpu_vm_update_pdes(adev, vm, false); - if (r) { - DRM_ERROR("failed to update pdes on meta data\n"); - goto error_del_bo_va; - } - amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL); - - amdgpu_sync_wait(&sync, false); - drm_exec_fini(&exec); - - amdgpu_sync_free(&sync); - ctx_data->meta_data_va = bo_va; - return 0; - -error_del_bo_va: - amdgpu_vm_bo_del(adev, bo_va); - -error_fini_exec: - drm_exec_fini(&exec); - amdgpu_sync_free(&sync); - return r; -} - -int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, - struct amdgpu_mes_ctx_data *ctx_data) -{ - struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va; - struct amdgpu_bo *bo = ctx_data->meta_data_obj; - struct amdgpu_vm *vm = bo_va->base.vm; - struct dma_fence *fence; - struct drm_exec exec; - long r; - - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - r = drm_exec_lock_obj(&exec, - &ctx_data->meta_data_obj->tbo.base); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto out_unlock; - - r = amdgpu_vm_lock_pd(vm, &exec, 0); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto out_unlock; - } - - amdgpu_vm_bo_del(adev, bo_va); - if (!amdgpu_vm_ready(vm)) - goto out_unlock; - - r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, - &fence); - if (r) - goto out_unlock; - if (fence) { - amdgpu_bo_fence(bo, fence, true); - fence = NULL; - } - - r = amdgpu_vm_clear_freed(adev, vm, &fence); - if (r || !fence) - goto out_unlock; - - dma_fence_wait(fence, false); - amdgpu_bo_fence(bo, fence, true); - dma_fence_put(fence); - -out_unlock: - if (unlikely(r < 0)) - dev_err(adev->dev, "failed to clear page tables (%ld)\n", r); - drm_exec_fini(&exec); - - return r; -} - -static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev, - int pasid, int *gang_id, - int queue_type, int num_queue, - struct amdgpu_ring **added_rings, - struct amdgpu_mes_ctx_data *ctx_data) -{ - struct amdgpu_ring *ring; - struct amdgpu_mes_gang_properties gprops = {0}; - int r, j; - - /* create a gang for the process */ - gprops.priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; - gprops.gang_quantum = adev->mes.default_gang_quantum; - gprops.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; - gprops.priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; - gprops.global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; - - r = amdgpu_mes_add_gang(adev, pasid, &gprops, gang_id); - if (r) { - DRM_ERROR("failed to add gang\n"); - return r; - } - - /* create queues for the gang */ - for (j = 0; j < num_queue; j++) { - r = amdgpu_mes_add_ring(adev, *gang_id, queue_type, j, - ctx_data, &ring); - if (r) { - DRM_ERROR("failed to add ring\n"); - break; - } - - DRM_INFO("ring %s was added\n", ring->name); - added_rings[j] = ring; - } - - return 0; -} - -static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings) -{ - struct amdgpu_ring *ring; - int i, r; - - for (i = 0; i < AMDGPU_MES_CTX_MAX_RINGS; i++) { - ring = added_rings[i]; - if (!ring) - continue; - - r = amdgpu_ring_test_helper(ring); - if (r) - return r; - - r = amdgpu_ring_test_ib(ring, 1000 * 10); - if (r) { - DRM_DEV_ERROR(ring->adev->dev, - "ring %s ib test failed (%d)\n", - ring->name, r); - return r; - } else - DRM_INFO("ring %s ib test pass\n", ring->name); - } - - return 0; -} - -int amdgpu_mes_self_test(struct amdgpu_device *adev) -{ - struct amdgpu_vm *vm = NULL; - struct amdgpu_mes_ctx_data ctx_data = {0}; - struct amdgpu_ring *added_rings[AMDGPU_MES_CTX_MAX_RINGS] = { NULL }; - int gang_ids[3] = {0}; - int queue_types[][2] = { { AMDGPU_RING_TYPE_GFX, 1 }, - { AMDGPU_RING_TYPE_COMPUTE, 1 }, - { AMDGPU_RING_TYPE_SDMA, 1} }; - int i, r, pasid, k = 0; - - pasid = amdgpu_pasid_alloc(16); - if (pasid < 0) { - dev_warn(adev->dev, "No more PASIDs available!"); - pasid = 0; - } - - vm = kzalloc(sizeof(*vm), GFP_KERNEL); - if (!vm) { - r = -ENOMEM; - goto error_pasid; - } - - r = amdgpu_vm_init(adev, vm, -1); - if (r) { - DRM_ERROR("failed to initialize vm\n"); - goto error_pasid; - } - - r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data); - if (r) { - DRM_ERROR("failed to alloc ctx meta data\n"); - goto error_fini; - } - - ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_BOTTOM; - r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data); - if (r) { - DRM_ERROR("failed to map ctx meta data\n"); - goto error_vm; - } - - r = amdgpu_mes_create_process(adev, pasid, vm); - if (r) { - DRM_ERROR("failed to create MES process\n"); - goto error_vm; - } - - for (i = 0; i < ARRAY_SIZE(queue_types); i++) { - /* On GFX v10.3, fw hasn't supported to map sdma queue. */ - if (amdgpu_ip_version(adev, GC_HWIP, 0) >= - IP_VERSION(10, 3, 0) && - amdgpu_ip_version(adev, GC_HWIP, 0) < - IP_VERSION(11, 0, 0) && - queue_types[i][0] == AMDGPU_RING_TYPE_SDMA) - continue; - - r = amdgpu_mes_test_create_gang_and_queues(adev, pasid, - &gang_ids[i], - queue_types[i][0], - queue_types[i][1], - &added_rings[k], - &ctx_data); - if (r) - goto error_queues; - - k += queue_types[i][1]; - } - - /* start ring test and ib test for MES queues */ - amdgpu_mes_test_queues(added_rings); - -error_queues: - /* remove all queues */ - for (i = 0; i < ARRAY_SIZE(added_rings); i++) { - if (!added_rings[i]) - continue; - amdgpu_mes_remove_ring(adev, added_rings[i]); - } - - for (i = 0; i < ARRAY_SIZE(gang_ids); i++) { - if (!gang_ids[i]) - continue; - amdgpu_mes_remove_gang(adev, gang_ids[i]); - } - - amdgpu_mes_destroy_process(adev, pasid); - -error_vm: - amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data); - -error_fini: - amdgpu_vm_fini(adev, vm); - -error_pasid: - if (pasid) - amdgpu_pasid_free(pasid); - - amdgpu_mes_ctx_free_meta_data(&ctx_data); - kfree(vm); - return 0; -} - int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) { const struct mes_firmware_header_v1_0 *mes_hdr; @@ -1690,7 +647,9 @@ static int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, goto error; } + amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + amdgpu_mes_unlock(&adev->mes); if (r) dev_err(adev->dev, "failed to change_config.\n"); @@ -1705,7 +664,7 @@ int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev) if (adev->enable_mes && adev->gfx.enable_cleaner_shader) { mutex_lock(&adev->enforce_isolation_mutex); for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { - if (adev->enforce_isolation[i]) + if (adev->enforce_isolation[i] == AMDGPU_ENFORCE_ISOLATION_ENABLE) r |= amdgpu_mes_set_enforce_isolation(adev, i, true); else r |= amdgpu_mes_set_enforce_isolation(adev, i, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index da2c9a8cb3e0..c0d2c195fe2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -111,8 +111,8 @@ struct amdgpu_mes { uint32_t vmid_mask_gfxhub; uint32_t vmid_mask_mmhub; - uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES]; uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES]; + uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES]; uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES]; uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES]; @@ -149,19 +149,6 @@ struct amdgpu_mes { }; -struct amdgpu_mes_process { - int pasid; - struct amdgpu_vm *vm; - uint64_t pd_gpu_addr; - struct amdgpu_bo *proc_ctx_bo; - uint64_t proc_ctx_gpu_addr; - void *proc_ctx_cpu_ptr; - uint64_t process_quantum; - struct list_head gang_list; - uint32_t doorbell_index; - struct mutex doorbell_lock; -}; - struct amdgpu_mes_gang { int gang_id; int priority; @@ -248,18 +235,6 @@ struct mes_remove_queue_input { uint64_t gang_context_addr; }; -struct mes_reset_queue_input { - uint32_t doorbell_offset; - uint64_t gang_context_addr; - bool use_mmio; - uint32_t queue_type; - uint32_t me_id; - uint32_t pipe_id; - uint32_t queue_id; - uint32_t xcc_id; - uint32_t vmid; -}; - struct mes_map_legacy_queue_input { uint32_t queue_type; uint32_t doorbell_offset; @@ -291,7 +266,7 @@ struct mes_resume_gang_input { uint64_t gang_context_addr; }; -struct mes_reset_legacy_queue_input { +struct mes_reset_queue_input { uint32_t queue_type; uint32_t doorbell_offset; bool use_mmio; @@ -301,6 +276,8 @@ struct mes_reset_legacy_queue_input { uint64_t mqd_addr; uint64_t wptr_addr; uint32_t vmid; + bool legacy_gfx; + bool is_kq; }; enum mes_misc_opcode { @@ -388,9 +365,6 @@ struct amdgpu_mes_funcs { int (*misc_op)(struct amdgpu_mes *mes, struct mes_misc_op_input *input); - int (*reset_legacy_queue)(struct amdgpu_mes *mes, - struct mes_reset_legacy_queue_input *input); - int (*reset_hw_queue)(struct amdgpu_mes *mes, struct mes_reset_queue_input *input); }; @@ -398,32 +372,13 @@ struct amdgpu_mes_funcs { #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) #define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev)) -int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs); - int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe); int amdgpu_mes_init(struct amdgpu_device *adev); void amdgpu_mes_fini(struct amdgpu_device *adev); -int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, - struct amdgpu_vm *vm); -void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid); - -int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid, - struct amdgpu_mes_gang_properties *gprops, - int *gang_id); -int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id); - int amdgpu_mes_suspend(struct amdgpu_device *adev); int amdgpu_mes_resume(struct amdgpu_device *adev); -int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, - struct amdgpu_mes_queue_properties *qprops, - int *queue_id); -int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id); -int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id); -int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type, - int me_id, int pipe_id, int queue_id, int vmid); - int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, @@ -438,8 +393,6 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg); int amdgpu_mes_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t val); -int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg, - uint32_t val, uint32_t mask); int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t reg1, uint32_t ref, uint32_t mask); @@ -451,27 +404,10 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, bool trap_en); int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, uint64_t process_context_addr); -int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, - int queue_type, int idx, - struct amdgpu_mes_ctx_data *ctx_data, - struct amdgpu_ring **out); -void amdgpu_mes_remove_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring); uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, enum amdgpu_mes_priority_level prio); -int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, - struct amdgpu_mes_ctx_data *ctx_data); -void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data); -int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_mes_ctx_data *ctx_data); -int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, - struct amdgpu_mes_ctx_data *ctx_data); - -int amdgpu_mes_self_test(struct amdgpu_device *adev); - int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev); /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c index d085687a47ea..e56ba93a8df6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -53,6 +53,15 @@ u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev) return 0; } +bool amdgpu_nbio_is_replay_cnt_supported(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev) || !adev->asic_funcs->get_pcie_replay_count || + (!adev->nbio.funcs || !adev->nbio.funcs->get_pcie_replay_count)) + return false; + + return true; +} + int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 79c2f807b9fe..b528de6a01f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -119,4 +119,6 @@ int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev); int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev); +bool amdgpu_nbio_is_replay_cnt_supported(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 80cd6f5273db..122a88294883 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -32,6 +32,7 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/dma-buf.h> +#include <linux/export.h> #include <drm/drm_drv.h> #include <drm/amdgpu_drm.h> @@ -62,7 +63,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo) amdgpu_bo_kunmap(bo); - if (bo->tbo.base.import_attach) + if (drm_gem_is_imported(&bo->tbo.base)) drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg); drm_gem_object_release(&bo->tbo.base); amdgpu_bo_unref(&bo->parent); @@ -163,8 +164,8 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) * When GTT is just an alternative to VRAM make sure that we * only use it as fallback and still try to fill up VRAM first. */ - if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM && - !(adev->flags & AMD_IS_APU)) + if (abo->tbo.resource && !(adev->flags & AMD_IS_APU) && + domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) places[c].flags |= TTM_PL_FLAG_FALLBACK; c++; } @@ -351,7 +352,6 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, return 0; } -EXPORT_SYMBOL(amdgpu_bo_create_kernel); /** * amdgpu_bo_create_isp_user - create user BO for isp @@ -420,7 +420,6 @@ error_unreserve: return r; } -EXPORT_SYMBOL(amdgpu_bo_create_isp_user); /** * amdgpu_bo_create_kernel_at - create BO for kernel use at specific location @@ -524,7 +523,6 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, if (cpu_addr) *cpu_addr = NULL; } -EXPORT_SYMBOL(amdgpu_bo_free_kernel); /** * amdgpu_bo_free_isp_user - free BO for isp use @@ -547,7 +545,6 @@ void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo) } amdgpu_bo_unref(&bo); } -EXPORT_SYMBOL(amdgpu_bo_free_isp_user); /* Validate bo size is bit bigger than the request domain */ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, @@ -939,7 +936,7 @@ int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain) domain = bo->preferred_domains & domain; /* A shared bo cannot be migrated to VRAM */ - if (bo->tbo.base.import_attach) { + if (drm_gem_is_imported(&bo->tbo.base)) { if (domain & AMDGPU_GEM_DOMAIN_GTT) domain = AMDGPU_GEM_DOMAIN_GTT; else @@ -967,7 +964,7 @@ int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain) */ domain = amdgpu_bo_get_preferred_domain(adev, domain); - if (bo->tbo.base.import_attach) + if (drm_gem_is_imported(&bo->tbo.base)) dma_buf_pin(bo->tbo.base.import_attach); /* force to pin into visible video ram */ @@ -1018,7 +1015,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo) if (bo->tbo.pin_count) return; - if (bo->tbo.base.import_attach) + if (drm_gem_is_imported(&bo->tbo.base)) dma_buf_unpin(bo->tbo.base.import_attach); if (bo->tbo.resource->mem_type == TTM_PL_VRAM) { @@ -1044,7 +1041,8 @@ static const char * const amdgpu_vram_names[] = { "GDDR6", "DDR5", "LPDDR4", - "LPDDR5" + "LPDDR5", + "HBM3E" }; /** @@ -1262,7 +1260,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, amdgpu_bo_kunmap(abo); - if (abo->tbo.base.dma_buf && !abo->tbo.base.import_attach && + if (abo->tbo.base.dma_buf && !drm_gem_is_imported(&abo->tbo.base) && old_mem && old_mem->mem_type != TTM_PL_SYSTEM) dma_buf_move_notify(abo->tbo.base.dma_buf); @@ -1472,6 +1470,26 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) } /** + * amdgpu_bo_fb_aper_addr - return FB aperture GPU offset of the VRAM bo + * @bo: amdgpu VRAM buffer object for which we query the offset + * + * Returns: + * current FB aperture GPU offset of the object. + */ +u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + uint64_t offset, fb_base; + + WARN_ON_ONCE(bo->tbo.resource->mem_type != TTM_PL_VRAM); + + fb_base = adev->gmc.fb_start; + fb_base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; + offset = (bo->tbo.resource->start << PAGE_SHIFT) + fb_base; + return amdgpu_gmc_sign_extend(offset); +} + +/** * amdgpu_bo_gpu_offset_no_check - return GPU offset of bo * @bo: amdgpu object for which we query the offset * @@ -1644,7 +1662,11 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS); amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID); amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC); - + /* Add the gem obj resv fence dump*/ + if (dma_resv_trylock(bo->tbo.base.resv)) { + dma_resv_describe(bo->tbo.base.resv, m); + dma_resv_unlock(bo->tbo.base.resv); + } seq_puts(m, "\n"); return size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 375448627f7b..c316920f3450 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -304,6 +304,7 @@ int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv, bool intr); int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr); u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); +u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo); u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo); uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo); uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index df5d5dbd7f0f..0bd51a04be79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -252,6 +252,7 @@ static int psp_early_init(struct amdgpu_ip_block *ip_block) break; case IP_VERSION(14, 0, 2): case IP_VERSION(14, 0, 3): + adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev); psp_v14_0_set_psp_funcs(psp); break; case IP_VERSION(14, 0, 5): @@ -574,9 +575,11 @@ static int psp_sw_fini(struct amdgpu_ip_block *ip_block) return 0; } -int psp_wait_for(struct psp_context *psp, uint32_t reg_index, - uint32_t reg_val, uint32_t mask, bool check_changed) +int psp_wait_for(struct psp_context *psp, uint32_t reg_index, uint32_t reg_val, + uint32_t mask, uint32_t flags) { + bool check_changed = flags & PSP_WAITREG_CHANGED; + bool verbose = !(flags & PSP_WAITREG_NOVERBOSE); uint32_t val; int i; struct amdgpu_device *adev = psp->adev; @@ -596,6 +599,11 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index, udelay(1); } + if (verbose) + dev_err(adev->dev, + "psp reg (0x%x) wait timed out, mask: %x, read: %x exp: %x", + reg_index, mask, val, reg_val); + return -ETIME; } @@ -654,6 +662,10 @@ static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id) return "BOOT_CFG"; case GFX_CMD_ID_CONFIG_SQ_PERFMON: return "CONFIG_SQ_PERFMON"; + case GFX_CMD_ID_FB_FW_RESERV_ADDR: + return "FB_FW_RESERV_ADDR"; + case GFX_CMD_ID_FB_FW_RESERV_EXT_ADDR: + return "FB_FW_RESERV_EXT_ADDR"; default: return "UNKNOWN CMD"; } @@ -871,6 +883,8 @@ static int psp_tmr_init(struct psp_context *psp) &psp->tmr_bo, &psp->tmr_mc_addr, pptr); } + if (amdgpu_virt_xgmi_migrate_enabled(psp->adev) && psp->tmr_bo) + psp->tmr_mc_addr = amdgpu_bo_fb_aper_addr(psp->tmr_bo); return ret; } @@ -984,6 +998,93 @@ int psp_get_fw_attestation_records_addr(struct psp_context *psp, return ret; } +static int psp_get_fw_reservation_info(struct psp_context *psp, + uint32_t cmd_id, + uint64_t *addr, + uint32_t *size) +{ + int ret; + uint32_t status; + struct psp_gfx_cmd_resp *cmd; + + cmd = acquire_psp_cmd_buf(psp); + + cmd->cmd_id = cmd_id; + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + if (ret) { + release_psp_cmd_buf(psp); + return ret; + } + + status = cmd->resp.status; + if (status == PSP_ERR_UNKNOWN_COMMAND) { + release_psp_cmd_buf(psp); + *addr = 0; + *size = 0; + return 0; + } + + *addr = (uint64_t)cmd->resp.uresp.fw_reserve_info.reserve_base_address_hi << 32 | + cmd->resp.uresp.fw_reserve_info.reserve_base_address_lo; + *size = cmd->resp.uresp.fw_reserve_info.reserve_size; + + release_psp_cmd_buf(psp); + + return 0; +} + +int psp_update_fw_reservation(struct psp_context *psp) +{ + int ret; + uint64_t reserv_addr, reserv_addr_ext; + uint32_t reserv_size, reserv_size_ext; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(14, 0, 2)) && + (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(14, 0, 3))) + return 0; + + ret = psp_get_fw_reservation_info(psp, GFX_CMD_ID_FB_FW_RESERV_ADDR, &reserv_addr, &reserv_size); + if (ret) + return ret; + ret = psp_get_fw_reservation_info(psp, GFX_CMD_ID_FB_FW_RESERV_EXT_ADDR, &reserv_addr_ext, &reserv_size_ext); + if (ret) + return ret; + + if (reserv_addr != adev->gmc.real_vram_size - reserv_size) { + dev_warn(adev->dev, "reserve fw region is not valid!\n"); + return 0; + } + + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL); + + reserv_size = roundup(reserv_size, SZ_1M); + + ret = amdgpu_bo_create_kernel_at(adev, reserv_addr, reserv_size, &adev->mman.fw_reserved_memory, NULL); + if (ret) { + dev_err(adev->dev, "reserve fw region failed(%d)!\n", ret); + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL); + return ret; + } + + reserv_size_ext = roundup(reserv_size_ext, SZ_1M); + + ret = amdgpu_bo_create_kernel_at(adev, reserv_addr_ext, reserv_size_ext, + &adev->mman.fw_reserved_memory_extend, NULL); + if (ret) { + dev_err(adev->dev, "reserve extend fw region failed(%d)!\n", ret); + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory_extend, NULL, NULL); + return ret; + } + + return 0; +} + static int psp_boot_config_get(struct amdgpu_device *adev, uint32_t *boot_cfg) { struct psp_context *psp = &adev->psp; @@ -1270,6 +1371,11 @@ int psp_ta_load(struct psp_context *psp, struct ta_context *context) psp_copy_fw(psp, context->bin_desc.start_addr, context->bin_desc.size_bytes); + if (amdgpu_virt_xgmi_migrate_enabled(psp->adev) && + context->mem_context.shared_bo) + context->mem_context.shared_mc_addr = + amdgpu_bo_fb_aper_addr(context->mem_context.shared_bo); + psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, context); ret = psp_cmd_submit_buf(psp, NULL, cmd, @@ -2214,7 +2320,8 @@ static int psp_securedisplay_initialize(struct psp_context *psp) if (!psp->securedisplay_context.context.bin_desc.size_bytes || !psp->securedisplay_context.context.bin_desc.start_addr) { - dev_info(psp->adev->dev, "SECUREDISPLAY: securedisplay ta ucode is not available\n"); + dev_info(psp->adev->dev, + "SECUREDISPLAY: optional securedisplay ta ucode is not available\n"); return 0; } @@ -2336,11 +2443,27 @@ bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev) return false; } +static void psp_update_gpu_addresses(struct amdgpu_device *adev) +{ + struct psp_context *psp = &adev->psp; + + if (psp->cmd_buf_bo && psp->cmd_buf_mem) { + psp->fw_pri_mc_addr = amdgpu_bo_fb_aper_addr(psp->fw_pri_bo); + psp->fence_buf_mc_addr = amdgpu_bo_fb_aper_addr(psp->fence_buf_bo); + psp->cmd_buf_mc_addr = amdgpu_bo_fb_aper_addr(psp->cmd_buf_bo); + } + if (adev->firmware.rbuf && psp->km_ring.ring_mem) + psp->km_ring.ring_mem_mc_addr = amdgpu_bo_fb_aper_addr(adev->firmware.rbuf); +} + static int psp_hw_start(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; int ret; + if (amdgpu_virt_xgmi_migrate_enabled(adev)) + psp_update_gpu_addresses(adev); + if (!amdgpu_sriov_vf(adev)) { if ((is_psp_fw_valid(psp->kdb)) && (psp->funcs->bootloader_load_kdb != NULL)) { @@ -2439,6 +2562,14 @@ static int psp_hw_start(struct psp_context *psp) return ret; } + if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + ret = psp_update_fw_reservation(psp); + if (ret) { + dev_err(adev->dev, "update fw reservation failed!\n"); + return ret; + } + } + if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) goto skip_pin_bo; @@ -3521,8 +3652,12 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) uint8_t *ucode_array_start_addr; int err = 0; - err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_sos.bin", chip_name); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sos_kicker.bin", chip_name); + else + err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sos.bin", chip_name); if (err) goto out; @@ -3798,8 +3933,12 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name) struct amdgpu_device *adev = psp->adev; int err; - err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_ta.bin", chip_name); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_ta_kicker.bin", chip_name); + else + err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_ta.bin", chip_name); if (err) return err; @@ -4116,8 +4255,8 @@ rel_buf: static const struct bin_attribute psp_vbflash_bin_attr = { .attr = {.name = "psp_vbflash", .mode = 0660}, .size = 0, - .write_new = amdgpu_psp_vbflash_write, - .read_new = amdgpu_psp_vbflash_read, + .write = amdgpu_psp_vbflash_write, + .read = amdgpu_psp_vbflash_read, }; /** @@ -4180,11 +4319,115 @@ static umode_t amdgpu_bin_flash_attr_is_visible(struct kobject *kobj, const struct attribute_group amdgpu_flash_attr_group = { .attrs = flash_attrs, - .bin_attrs_new = bin_flash_attrs, + .bin_attrs = bin_flash_attrs, .is_bin_visible = amdgpu_bin_flash_attr_is_visible, .is_visible = amdgpu_flash_attr_is_visible, }; +#if defined(CONFIG_DEBUG_FS) +static int psp_read_spirom_debugfs_open(struct inode *inode, struct file *filp) +{ + struct amdgpu_device *adev = filp->f_inode->i_private; + struct spirom_bo *bo_triplet; + int ret; + + /* serialize the open() file calling */ + if (!mutex_trylock(&adev->psp.mutex)) + return -EBUSY; + + /* + * make sure only one userpace process is alive for dumping so that + * only one memory buffer of AMD_VBIOS_FILE_MAX_SIZE * 2 is consumed. + * let's say the case where one process try opening the file while + * another one has proceeded to read or release. In this way, eliminate + * the use of mutex for read() or release() callback as well. + */ + if (adev->psp.spirom_dump_trip) { + mutex_unlock(&adev->psp.mutex); + return -EBUSY; + } + + bo_triplet = kzalloc(sizeof(struct spirom_bo), GFP_KERNEL); + if (!bo_triplet) { + mutex_unlock(&adev->psp.mutex); + return -ENOMEM; + } + + ret = amdgpu_bo_create_kernel(adev, AMD_VBIOS_FILE_MAX_SIZE_B * 2, + AMDGPU_GPU_PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &bo_triplet->bo, + &bo_triplet->mc_addr, + &bo_triplet->cpu_addr); + if (ret) + goto rel_trip; + + ret = psp_dump_spirom(&adev->psp, bo_triplet->mc_addr); + if (ret) + goto rel_bo; + + adev->psp.spirom_dump_trip = bo_triplet; + mutex_unlock(&adev->psp.mutex); + return 0; +rel_bo: + amdgpu_bo_free_kernel(&bo_triplet->bo, &bo_triplet->mc_addr, + &bo_triplet->cpu_addr); +rel_trip: + kfree(bo_triplet); + mutex_unlock(&adev->psp.mutex); + dev_err(adev->dev, "Trying IFWI dump fails, err = %d\n", ret); + return ret; +} + +static ssize_t psp_read_spirom_debugfs_read(struct file *filp, char __user *buf, size_t size, + loff_t *pos) +{ + struct amdgpu_device *adev = filp->f_inode->i_private; + struct spirom_bo *bo_triplet = adev->psp.spirom_dump_trip; + + if (!bo_triplet) + return -EINVAL; + + return simple_read_from_buffer(buf, + size, + pos, bo_triplet->cpu_addr, + AMD_VBIOS_FILE_MAX_SIZE_B * 2); +} + +static int psp_read_spirom_debugfs_release(struct inode *inode, struct file *filp) +{ + struct amdgpu_device *adev = filp->f_inode->i_private; + struct spirom_bo *bo_triplet = adev->psp.spirom_dump_trip; + + if (bo_triplet) { + amdgpu_bo_free_kernel(&bo_triplet->bo, &bo_triplet->mc_addr, + &bo_triplet->cpu_addr); + kfree(bo_triplet); + } + + adev->psp.spirom_dump_trip = NULL; + return 0; +} + +static const struct file_operations psp_dump_spirom_debugfs_ops = { + .owner = THIS_MODULE, + .open = psp_read_spirom_debugfs_open, + .read = psp_read_spirom_debugfs_read, + .release = psp_read_spirom_debugfs_release, + .llseek = default_llseek, +}; +#endif + +void amdgpu_psp_debugfs_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + + debugfs_create_file_size("psp_spirom_dump", 0444, minor->debugfs_root, + adev, &psp_dump_spirom_debugfs_ops, AMD_VBIOS_FILE_MAX_SIZE_B * 2); +#endif +} + const struct amd_ip_funcs psp_ip_funcs = { .name = "psp", .early_init = psp_early_init, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 8d5acc415d38..237b624aa51c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -39,6 +39,29 @@ #define PSP_TMR_ALIGNMENT 0x100000 #define PSP_FW_NAME_LEN 0x24 +/* VBIOS gfl defines */ +#define MBOX_READY_MASK 0x80000000 +#define MBOX_STATUS_MASK 0x0000FFFF +#define MBOX_COMMAND_MASK 0x00FF0000 +#define MBOX_READY_FLAG 0x80000000 +#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2 +#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3 +#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4 +#define C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_LO 0xf +#define C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_HI 0x10 +#define C2PMSG_CMD_SPI_GET_FLASH_IMAGE 0x11 + +/* Command register bit 31 set to indicate readiness */ +#define MBOX_TOS_READY_FLAG (GFX_FLAG_RESPONSE) +#define MBOX_TOS_READY_MASK (GFX_CMD_RESPONSE_MASK | GFX_CMD_STATUS_MASK) + +/* Values to check for a successful GFX_CMD response wait. Check against + * both status bits and response state - helps to detect a command failure + * or other unexpected cases like a device drop reading all 0xFFs + */ +#define MBOX_TOS_RESP_FLAG (GFX_FLAG_RESPONSE) +#define MBOX_TOS_RESP_MASK (GFX_CMD_RESPONSE_MASK | GFX_CMD_STATUS_MASK) + extern const struct attribute_group amdgpu_flash_attr_group; enum psp_shared_mem_size { @@ -107,9 +130,13 @@ enum psp_reg_prog_id { PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */ PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */ PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */ + PSP_REG_MMHUB_L1_TLB_CNTL = 25, PSP_REG_LAST }; +#define PSP_WAITREG_CHANGED BIT(0) /* check if the value has changed */ +#define PSP_WAITREG_NOVERBOSE BIT(1) /* No error verbose */ + struct psp_funcs { int (*init_microcode)(struct psp_context *psp); int (*wait_for_bootloader)(struct psp_context *psp); @@ -137,11 +164,14 @@ struct psp_funcs { int (*load_usbc_pd_fw)(struct psp_context *psp, uint64_t fw_pri_mc_addr); int (*read_usbc_pd_fw)(struct psp_context *psp, uint32_t *fw_ver); int (*update_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr); + int (*dump_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr); int (*vbflash_stat)(struct psp_context *psp); int (*fatal_error_recovery_quirk)(struct psp_context *psp); bool (*get_ras_capability)(struct psp_context *psp); bool (*is_aux_sos_load_required)(struct psp_context *psp); bool (*is_reload_needed)(struct psp_context *psp); + int (*reg_program_no_ring)(struct psp_context *psp, uint32_t val, + enum psp_reg_prog_id id); }; struct ta_funcs { @@ -319,6 +349,14 @@ struct psp_runtime_scpm_entry { enum psp_runtime_scpm_authentication scpm_status; }; +#if defined(CONFIG_DEBUG_FS) +struct spirom_bo { + struct amdgpu_bo *bo; + uint64_t mc_addr; + void *cpu_addr; +}; +#endif + struct psp_context { struct amdgpu_device *adev; struct psp_ring km_ring; @@ -406,6 +444,9 @@ struct psp_context { char *vbflash_tmp_buf; size_t vbflash_image_size; bool vbflash_done; +#if defined(CONFIG_DEBUG_FS) + struct spirom_bo *spirom_dump_trip; +#endif }; struct amdgpu_psp_funcs { @@ -464,6 +505,10 @@ struct amdgpu_psp_funcs { ((psp)->funcs->update_spirom ? \ (psp)->funcs->update_spirom((psp), fw_pri_mc_addr) : -EINVAL) +#define psp_dump_spirom(psp, fw_pri_mc_addr) \ + ((psp)->funcs->dump_spirom ? \ + (psp)->funcs->dump_spirom((psp), fw_pri_mc_addr) : -EINVAL) + #define psp_vbflash_status(psp) \ ((psp)->funcs->vbflash_stat ? \ (psp)->funcs->vbflash_stat((psp)) : -EINVAL) @@ -475,6 +520,10 @@ struct amdgpu_psp_funcs { #define psp_is_aux_sos_load_required(psp) \ ((psp)->funcs->is_aux_sos_load_required ? (psp)->funcs->is_aux_sos_load_required((psp)) : 0) +#define psp_reg_program_no_ring(psp, val, id) \ + ((psp)->funcs->reg_program_no_ring ? \ + (psp)->funcs->reg_program_no_ring((psp), val, id) : -EINVAL) + extern const struct amd_ip_funcs psp_ip_funcs; extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; @@ -486,8 +535,8 @@ extern const struct amdgpu_ip_block_version psp_v13_0_ip_block; extern const struct amdgpu_ip_block_version psp_v13_0_4_ip_block; extern const struct amdgpu_ip_block_version psp_v14_0_ip_block; -extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, - uint32_t field_val, uint32_t mask, bool check_changed); +int psp_wait_for(struct psp_context *psp, uint32_t reg_index, + uint32_t field_val, uint32_t mask, uint32_t flags); extern int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_index, uint32_t field_val, uint32_t mask, uint32_t msec_timeout); @@ -553,7 +602,7 @@ int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name); int psp_get_fw_attestation_records_addr(struct psp_context *psp, uint64_t *output_ptr); - +int psp_update_fw_reservation(struct psp_context *psp); int psp_load_fw_list(struct psp_context *psp, struct amdgpu_firmware_info **ucode_list, int ucode_count); void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size); @@ -569,5 +618,9 @@ bool amdgpu_psp_get_ras_capability(struct psp_context *psp); int psp_config_sq_perfmon(struct psp_context *psp, uint32_t xcp_id, bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable); bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev); +int amdgpu_psp_reg_program_no_ring(struct psp_context *psp, uint32_t val, + enum psp_reg_prog_id id); +void amdgpu_psp_debugfs_init(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 68685aca2835..540817e296da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -77,6 +77,7 @@ const char *ras_block_string[] = { "jpeg", "ih", "mpio", + "mmsch", }; const char *ras_mca_block_string[] = { @@ -1106,6 +1107,9 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, err_info->de_count, blk_name); } } else { + if (adev->debug_disable_ce_logs) + return; + for_each_ras_error(err_node, err_data) { err_info = &err_node->err_info; mcm_info = &err_info->mcm_info; @@ -1497,6 +1501,9 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, !amdgpu_ras_get_aca_debug_mode(adev)) return -EOPNOTSUPP; + if (amdgpu_sriov_vf(adev)) + return -EOPNOTSUPP; + /* skip ras error reset in gpu reset */ if ((amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) && ((smu_funcs && smu_funcs->set_debug_mode) || @@ -2120,7 +2127,7 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) con->badpages_attr = bin_attr_gpu_vram_bad_pages; sysfs_bin_attr_init(&con->badpages_attr); bin_attrs[0] = &con->badpages_attr; - group.bin_attrs_new = bin_attrs; + group.bin_attrs = bin_attrs; } r = sysfs_create_group(&adev->dev->kobj, &group); @@ -2160,7 +2167,7 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev) /* Fatal error events are handled on host side */ if (amdgpu_sriov_vf(adev)) return; - /** + /* * If the current interrupt is caused by a non-fatal RAS error, skip * check for fatal error. For fatal errors, FED status of all devices * in XGMI hive gets set when the first device gets fatal error @@ -2850,11 +2857,27 @@ static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev, if (amdgpu_umc_pages_in_a_row(adev, err_data, bps[0].retired_page << AMDGPU_GPU_PAGE_SHIFT)) return -EINVAL; + for (i = 0; i < adev->umc.retire_unit; i++) { + err_data->err_addr[i].address = bps[0].address; + err_data->err_addr[i].mem_channel = bps[0].mem_channel; + err_data->err_addr[i].bank = bps[0].bank; + err_data->err_addr[i].err_type = bps[0].err_type; + err_data->err_addr[i].mcumc_id = bps[0].mcumc_id; + } } else { if (amdgpu_ras_mca2pa_by_idx(adev, &bps[0], err_data)) return -EINVAL; } } else { + if (bps[0].address == 0) { + /* for specific old eeprom data, mca address is not stored, + * calc it from pa + */ + if (amdgpu_umc_pa2mca(adev, bps[0].retired_page << AMDGPU_GPU_PAGE_SHIFT, + &(bps[0].address), AMDGPU_NPS1_PARTITION_MODE)) + return -EINVAL; + } + if (amdgpu_ras_mca2pa(adev, &bps[0], err_data)) { if (nps == AMDGPU_NPS1_PARTITION_MODE) memcpy(err_data->err_addr, bps, @@ -2872,6 +2895,7 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev, struct eeprom_table_record *bps, struct ras_err_data *err_data, enum amdgpu_memory_partition nps) { + int i = 0; enum amdgpu_memory_partition save_nps; save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK; @@ -2881,10 +2905,30 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev, if (amdgpu_umc_pages_in_a_row(adev, err_data, bps->retired_page << AMDGPU_GPU_PAGE_SHIFT)) return -EINVAL; + for (i = 0; i < adev->umc.retire_unit; i++) { + err_data->err_addr[i].address = bps->address; + err_data->err_addr[i].mem_channel = bps->mem_channel; + err_data->err_addr[i].bank = bps->bank; + err_data->err_addr[i].err_type = bps->err_type; + err_data->err_addr[i].mcumc_id = bps->mcumc_id; + } } else { - if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data)) - return -EINVAL; + if (bps->address) { + if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data)) + return -EINVAL; + } else { + /* for specific old eeprom data, mca address is not stored, + * calc it from pa + */ + if (amdgpu_umc_pa2mca(adev, bps->retired_page << AMDGPU_GPU_PAGE_SHIFT, + &(bps->address), AMDGPU_NPS1_PARTITION_MODE)) + return -EINVAL; + + if (amdgpu_ras_mca2pa(adev, bps, err_data)) + return -EOPNOTSUPP; + } } + return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr, adev->umc.retire_unit); } @@ -2899,7 +2943,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, &adev->psp.ras_context.ras->eeprom_control; enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; int ret = 0; - uint32_t i; + uint32_t i = 0; if (!con || !con->eh_data || !bps || pages <= 0) return 0; @@ -2920,34 +2964,36 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, mutex_lock(&con->recovery_lock); if (from_rom) { - for (i = 0; i < pages; i++) { - if (control->ras_num_recs - i >= adev->umc.retire_unit) { - if ((bps[i].address == bps[i + 1].address) && - (bps[i].mem_channel == bps[i + 1].mem_channel)) { - //deal with retire_unit records a time - ret = __amdgpu_ras_convert_rec_array_from_rom(adev, - &bps[i], &err_data, nps); - if (ret) - goto free; - i += (adev->umc.retire_unit - 1); + /* there is no pa recs in V3, so skip pa recs processing */ + if (control->tbl_hdr.version < RAS_TABLE_VER_V3) { + for (i = 0; i < pages; i++) { + if (control->ras_num_recs - i >= adev->umc.retire_unit) { + if ((bps[i].address == bps[i + 1].address) && + (bps[i].mem_channel == bps[i + 1].mem_channel)) { + /* deal with retire_unit records a time */ + ret = __amdgpu_ras_convert_rec_array_from_rom(adev, + &bps[i], &err_data, nps); + if (ret) + control->ras_num_bad_pages -= adev->umc.retire_unit; + i += (adev->umc.retire_unit - 1); + } else { + break; + } } else { break; } - } else { - break; } } for (; i < pages; i++) { ret = __amdgpu_ras_convert_rec_from_rom(adev, &bps[i], &err_data, nps); if (ret) - goto free; + control->ras_num_bad_pages -= adev->umc.retire_unit; } } else { ret = __amdgpu_ras_restore_bad_pages(adev, bps, pages); } -free: if (from_rom) kfree(err_data.err_addr); mutex_unlock(&con->recovery_lock); @@ -2975,6 +3021,15 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, return 0; } + if (!con->eeprom_control.is_eeprom_valid) { + dev_warn(adev->dev, + "Failed to save EEPROM table data because of EEPROM data corruption!"); + if (new_cnt) + *new_cnt = 0; + + return 0; + } + mutex_lock(&con->recovery_lock); control = &con->eeprom_control; data = con->eh_data; @@ -3036,21 +3091,28 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) dev_err(adev->dev, "Failed to load EEPROM table records!"); } else { if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) { - for (i = 0; i < control->ras_num_recs; i++) { - if ((control->ras_num_recs - i) >= adev->umc.retire_unit) { - if ((bps[i].address == bps[i + 1].address) && - (bps[i].mem_channel == bps[i + 1].mem_channel)) { - control->ras_num_pa_recs += adev->umc.retire_unit; - i += (adev->umc.retire_unit - 1); + /*In V3, there is no pa recs, and some cases(when address==0) may be parsed + as pa recs, so add verion check to avoid it. + */ + if (control->tbl_hdr.version < RAS_TABLE_VER_V3) { + for (i = 0; i < control->ras_num_recs; i++) { + if ((control->ras_num_recs - i) >= adev->umc.retire_unit) { + if ((bps[i].address == bps[i + 1].address) && + (bps[i].mem_channel == bps[i + 1].mem_channel)) { + control->ras_num_pa_recs += adev->umc.retire_unit; + i += (adev->umc.retire_unit - 1); + } else { + control->ras_num_mca_recs += + (control->ras_num_recs - i); + break; + } } else { - control->ras_num_mca_recs += - (control->ras_num_recs - i); + control->ras_num_mca_recs += (control->ras_num_recs - i); break; } - } else { - control->ras_num_mca_recs += (control->ras_num_recs - i); - break; } + } else { + control->ras_num_mca_recs = control->ras_num_recs; } } @@ -3259,7 +3321,6 @@ static int amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev, uint64_t de_queried_count; uint32_t new_detect_count, total_detect_count; uint32_t need_query_count = poison_creation_count; - bool query_data_timeout = false; enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION; memset(&info, 0, sizeof(info)); @@ -3288,21 +3349,13 @@ static int amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev, timeout = MAX_UMC_POISON_POLLING_TIME_ASYNC; if (timeout) { - if (!--timeout) { - query_data_timeout = true; + if (!--timeout) break; - } msleep(1); } } } while (total_detect_count < need_query_count); - if (query_data_timeout) { - dev_warn(adev->dev, "Can't find deferred error! count: %u\n", - (need_query_count - total_detect_count)); - return -ENOENT; - } - if (total_detect_count) schedule_delayed_work(&ras->page_retirement_dwork, 0); @@ -3453,16 +3506,21 @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev) control = &con->eeprom_control; ret = amdgpu_ras_eeprom_init(control); - if (ret) - return ret; + control->is_eeprom_valid = !ret; if (!adev->umc.ras || !adev->umc.ras->convert_ras_err_addr) control->ras_num_pa_recs = control->ras_num_recs; - if (control->ras_num_recs) { + if (adev->umc.ras && + adev->umc.ras->get_retire_flip_bits) + adev->umc.ras->get_retire_flip_bits(adev); + + if (control->ras_num_recs && control->is_eeprom_valid) { ret = amdgpu_ras_load_bad_pages(adev); - if (ret) - return ret; + if (ret) { + control->is_eeprom_valid = false; + return 0; + } amdgpu_dpm_send_hbm_bad_pages_num( adev, control->ras_num_bad_pages); @@ -3481,7 +3539,7 @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev) dev_warn(adev->dev, "Failed to format RAS EEPROM data in V3 version!\n"); } - return ret; + return 0; } int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info) @@ -3690,7 +3748,8 @@ static void amdgpu_ras_query_ras_capablity_from_vbios(struct amdgpu_device *adev */ if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(2, 6, 0) || amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 0) || - amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 3)) + amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 3) || + amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(5, 0, 1)) adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | 1 << AMDGPU_RAS_BLOCK__JPEG); else @@ -3792,10 +3851,12 @@ init_ras_enabled_flag: adev->ras_hw_enabled & amdgpu_ras_mask; /* aca is disabled by default except for psp v13_0_6/v13_0_12/v13_0_14 */ - adev->aca.is_enabled = - (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || - amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || - amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)); + if (!amdgpu_sriov_vf(adev)) { + adev->aca.is_enabled = + (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)); + } /* bad page feature is not applicable to specific app platform */ if (adev->gmc.is_app_apu && @@ -4372,8 +4433,10 @@ void amdgpu_ras_clear_err_state(struct amdgpu_device *adev) struct amdgpu_ras *ras; ras = amdgpu_ras_get_context(adev); - if (ras) + if (ras) { ras->ras_err_state = 0; + ras->gpu_reset_flags = 0; + } } void amdgpu_ras_set_err_poison(struct amdgpu_device *adev, @@ -4478,8 +4541,11 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) enum ras_event_type type = RAS_EVENT_TYPE_FATAL; u64 event_id; - if (amdgpu_ras_mark_ras_event(adev, type)) + if (amdgpu_ras_mark_ras_event(adev, type)) { + dev_err(adev->dev, + "uncorrectable hardware error (ERREVENT_ATHUB_INTERRUPT) detected!\n"); return; + } event_id = amdgpu_ras_acquire_event_id(adev, type); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 764e9fa0a914..927d6bff734a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -98,6 +98,7 @@ enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__JPEG, AMDGPU_RAS_BLOCK__IH, AMDGPU_RAS_BLOCK__MPIO, + AMDGPU_RAS_BLOCK__MMSCH, AMDGPU_RAS_BLOCK__LAST, AMDGPU_RAS_BLOCK__ANY = -1 @@ -795,6 +796,12 @@ amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) { return TA_RAS_BLOCK__VCN; case AMDGPU_RAS_BLOCK__JPEG: return TA_RAS_BLOCK__JPEG; + case AMDGPU_RAS_BLOCK__IH: + return TA_RAS_BLOCK__IH; + case AMDGPU_RAS_BLOCK__MPIO: + return TA_RAS_BLOCK__MPIO; + case AMDGPU_RAS_BLOCK__MMSCH: + return TA_RAS_BLOCK__MMSCH; default: WARN_ONCE(1, "RAS ERROR: unexpected block id %d\n", block); return TA_RAS_BLOCK__UMC; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 0ea7cfaf3587..9bda9ad13f88 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -277,10 +277,11 @@ static int __write_table_header(struct amdgpu_ras_eeprom_control *control) up_read(&adev->reset_domain->sem); if (res < 0) { - DRM_ERROR("Failed to write EEPROM table header:%d", res); + dev_err(adev->dev, "Failed to write EEPROM table header:%d", + res); } else if (res < RAS_TABLE_HEADER_SIZE) { - DRM_ERROR("Short write:%d out of %d\n", - res, RAS_TABLE_HEADER_SIZE); + dev_err(adev->dev, "Short write:%d out of %d\n", res, + RAS_TABLE_HEADER_SIZE); res = -EIO; } else { res = 0; @@ -323,7 +324,8 @@ static int __write_table_ras_info(struct amdgpu_ras_eeprom_control *control) buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL); if (!buf) { - DRM_ERROR("Failed to alloc buf to write table ras info\n"); + dev_err(adev->dev, + "Failed to alloc buf to write table ras info\n"); return -ENOMEM; } @@ -338,10 +340,11 @@ static int __write_table_ras_info(struct amdgpu_ras_eeprom_control *control) up_read(&adev->reset_domain->sem); if (res < 0) { - DRM_ERROR("Failed to write EEPROM table ras info:%d", res); + dev_err(adev->dev, "Failed to write EEPROM table ras info:%d", + res); } else if (res < RAS_TABLE_V2_1_INFO_SIZE) { - DRM_ERROR("Short write:%d out of %d\n", - res, RAS_TABLE_V2_1_INFO_SIZE); + dev_err(adev->dev, "Short write:%d out of %d\n", res, + RAS_TABLE_V2_1_INFO_SIZE); res = -EIO; } else { res = 0; @@ -418,6 +421,7 @@ static void amdgpu_ras_set_eeprom_table_version(struct amdgpu_ras_eeprom_control hdr->version = RAS_TABLE_VER_V2_1; return; case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 5, 0): hdr->version = RAS_TABLE_VER_V3; return; default: @@ -475,6 +479,8 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) control->ras_num_recs = 0; control->ras_num_bad_pages = 0; + control->ras_num_mca_recs = 0; + control->ras_num_pa_recs = 0; control->ras_fri = 0; amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_bad_pages); @@ -606,13 +612,13 @@ static int __amdgpu_ras_eeprom_write(struct amdgpu_ras_eeprom_control *control, buf, buf_size); up_read(&adev->reset_domain->sem); if (res < 0) { - DRM_ERROR("Writing %d EEPROM table records error:%d", - num, res); + dev_err(adev->dev, "Writing %d EEPROM table records error:%d", + num, res); } else if (res < buf_size) { /* Short write, return error. */ - DRM_ERROR("Wrote %d records out of %d", - res / RAS_TABLE_RECORD_SIZE, num); + dev_err(adev->dev, "Wrote %d records out of %d", + res / RAS_TABLE_RECORD_SIZE, num); res = -EIO; } else { res = 0; @@ -760,18 +766,17 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) dev_warn(adev->dev, "Saved bad pages %d reaches threshold value %d\n", control->ras_num_bad_pages, ras->bad_page_cnt_threshold); - control->tbl_hdr.header = RAS_TABLE_HDR_BAD; - if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) { - control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD; - control->tbl_rai.health_percent = 0; - } - if ((amdgpu_bad_page_threshold != -1) && - (amdgpu_bad_page_threshold != -2)) + (amdgpu_bad_page_threshold != -2)) { + control->tbl_hdr.header = RAS_TABLE_HDR_BAD; + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) { + control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD; + control->tbl_rai.health_percent = 0; + } ras->is_rma = true; - - /* ignore the -ENOTSUPP return value */ - amdgpu_dpm_send_rma_reason(adev); + /* ignore the -ENOTSUPP return value */ + amdgpu_dpm_send_rma_reason(adev); + } } if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) @@ -786,8 +791,9 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) buf_size = control->ras_num_recs * RAS_TABLE_RECORD_SIZE; buf = kcalloc(control->ras_num_recs, RAS_TABLE_RECORD_SIZE, GFP_KERNEL); if (!buf) { - DRM_ERROR("allocating memory for table of size %d bytes failed\n", - control->tbl_hdr.tbl_size); + dev_err(adev->dev, + "allocating memory for table of size %d bytes failed\n", + control->tbl_hdr.tbl_size); res = -ENOMEM; goto Out; } @@ -799,12 +805,11 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) buf, buf_size); up_read(&adev->reset_domain->sem); if (res < 0) { - DRM_ERROR("EEPROM failed reading records:%d\n", - res); + dev_err(adev->dev, "EEPROM failed reading records:%d\n", res); goto Out; } else if (res < buf_size) { - DRM_ERROR("EEPROM read %d out of %d bytes\n", - res, buf_size); + dev_err(adev->dev, "EEPROM read %d out of %d bytes\n", res, + buf_size); res = -EIO; goto Out; } @@ -865,11 +870,12 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, return 0; if (num == 0) { - DRM_ERROR("will not append 0 records\n"); + dev_err(adev->dev, "will not append 0 records\n"); return -EINVAL; } else if (num > control->ras_max_record_count) { - DRM_ERROR("cannot append %d records than the size of table %d\n", - num, control->ras_max_record_count); + dev_err(adev->dev, + "cannot append %d records than the size of table %d\n", + num, control->ras_max_record_count); return -EINVAL; } @@ -923,13 +929,13 @@ static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, buf, buf_size); up_read(&adev->reset_domain->sem); if (res < 0) { - DRM_ERROR("Reading %d EEPROM table records error:%d", - num, res); + dev_err(adev->dev, "Reading %d EEPROM table records error:%d", + num, res); } else if (res < buf_size) { /* Short read, return error. */ - DRM_ERROR("Read %d records out of %d", - res / RAS_TABLE_RECORD_SIZE, num); + dev_err(adev->dev, "Read %d records out of %d", + res / RAS_TABLE_RECORD_SIZE, num); res = -EIO; } else { res = 0; @@ -963,11 +969,11 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, return 0; if (num == 0) { - DRM_ERROR("will not read 0 records\n"); + dev_err(adev->dev, "will not read 0 records\n"); return -EINVAL; } else if (num > control->ras_num_recs) { - DRM_ERROR("too many records to read:%d available:%d\n", - num, control->ras_num_recs); + dev_err(adev->dev, "too many records to read:%d available:%d\n", + num, control->ras_num_recs); return -EINVAL; } @@ -1299,7 +1305,8 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control buf = kzalloc(buf_size, GFP_KERNEL); if (!buf) { - DRM_ERROR("Out of memory checking RAS table checksum.\n"); + dev_err(adev->dev, + "Out of memory checking RAS table checksum.\n"); return -ENOMEM; } @@ -1308,7 +1315,7 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control control->ras_header_offset, buf, buf_size); if (res < buf_size) { - DRM_ERROR("Partial read for checksum, res:%d\n", res); + dev_err(adev->dev, "Partial read for checksum, res:%d\n", res); /* On partial reads, return -EIO. */ if (res >= 0) @@ -1333,7 +1340,8 @@ static int __read_table_ras_info(struct amdgpu_ras_eeprom_control *control) buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL); if (!buf) { - DRM_ERROR("Failed to alloc buf to read EEPROM table ras info\n"); + dev_err(adev->dev, + "Failed to alloc buf to read EEPROM table ras info\n"); return -ENOMEM; } @@ -1345,7 +1353,8 @@ static int __read_table_ras_info(struct amdgpu_ras_eeprom_control *control) control->i2c_address + control->ras_info_offset, buf, RAS_TABLE_V2_1_INFO_SIZE); if (res < RAS_TABLE_V2_1_INFO_SIZE) { - DRM_ERROR("Failed to read EEPROM table ras info, res:%d", res); + dev_err(adev->dev, + "Failed to read EEPROM table ras info, res:%d", res); res = res >= 0 ? -EIO : res; goto Out; } @@ -1386,23 +1395,46 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) control->i2c_address + control->ras_header_offset, buf, RAS_TABLE_HEADER_SIZE); if (res < RAS_TABLE_HEADER_SIZE) { - DRM_ERROR("Failed to read EEPROM table header, res:%d", res); + dev_err(adev->dev, "Failed to read EEPROM table header, res:%d", + res); return res >= 0 ? -EIO : res; } __decode_table_header_from_buf(hdr, buf); - if (hdr->version >= RAS_TABLE_VER_V2_1) { + if (hdr->header != RAS_TABLE_HDR_VAL && + hdr->header != RAS_TABLE_HDR_BAD) { + dev_info(adev->dev, "Creating a new EEPROM table"); + return amdgpu_ras_eeprom_reset_table(control); + } + + switch (hdr->version) { + case RAS_TABLE_VER_V2_1: + case RAS_TABLE_VER_V3: control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr); control->ras_record_offset = RAS_RECORD_START_V2_1; control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1; - } else { + break; + case RAS_TABLE_VER_V1: control->ras_num_recs = RAS_NUM_RECS(hdr); control->ras_record_offset = RAS_RECORD_START; control->ras_max_record_count = RAS_MAX_RECORD_COUNT; + break; + default: + dev_err(adev->dev, + "RAS header invalid, unsupported version: %u", + hdr->version); + return -EINVAL; + } + + if (control->ras_num_recs > control->ras_max_record_count) { + dev_err(adev->dev, + "RAS header invalid, records in header: %u max allowed :%u", + control->ras_num_recs, control->ras_max_record_count); + return -EINVAL; } - control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset); + control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset); control->ras_num_mca_recs = 0; control->ras_num_pa_recs = 0; return 0; @@ -1413,7 +1445,7 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) struct amdgpu_device *adev = to_amdgpu_device(control); struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - int res; + int res = 0; if (!__is_ras_eeprom_supported(adev)) return 0; @@ -1429,8 +1461,9 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) control->ras_num_mca_recs * adev->umc.retire_unit; if (hdr->header == RAS_TABLE_HDR_VAL) { - DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", - control->ras_num_bad_pages); + dev_dbg(adev->dev, + "Found existing EEPROM table with %d records", + control->ras_num_bad_pages); if (hdr->version >= RAS_TABLE_VER_V2_1) { res = __read_table_ras_info(control); @@ -1494,11 +1527,35 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) "User defined threshold is set, runtime service will be halt when threshold is reached\n"); } } - } else { - DRM_INFO("Creating a new EEPROM table"); - - res = amdgpu_ras_eeprom_reset_table(control); } return res < 0 ? res : 0; } + +void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + struct amdgpu_ras_eeprom_control *control; + int res; + + if (!__is_ras_eeprom_supported(adev) || !ras) + return; + control = &ras->eeprom_control; + if (!control->is_eeprom_valid) + return; + res = __verify_ras_table_checksum(control); + if (res) { + dev_warn(adev->dev, + "RAS table incorrect checksum or error:%d, try to recover\n", + res); + if (!amdgpu_ras_eeprom_reset_table(control)) + if (!amdgpu_ras_save_bad_pages(adev, NULL)) + if (!__verify_ras_table_checksum(control)) { + dev_info(adev->dev, "RAS table recovery succeed\n"); + return; + } + dev_err(adev->dev, "RAS table recovery failed\n"); + control->is_eeprom_valid = false; + } + return; +}
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index ec6d7ea37ad0..ebfca4cb5688 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -114,6 +114,8 @@ struct amdgpu_ras_eeprom_control { /* Record channel info which occurred bad pages */ u32 bad_channel_bitmap; + + bool is_eeprom_valid; }; /* @@ -159,6 +161,8 @@ void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control); int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control); +void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev); + extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops; extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index d55c8b7fdb59..6379bb25bf5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -99,6 +99,29 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw) return 0; } +/** + * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit + * + * @ring: amdgpu_ring structure holding ring information + * @ndw: number of dwords to allocate in the ring buffer + * + * Allocate @ndw dwords in the ring buffer (all asics). + * doesn't check the max_dw limit as we may be reemitting + * several submissions. + */ +static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw) +{ + /* Align requested size with padding so unlock_commit can + * pad safely */ + ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask; + + ring->count_dw = ndw; + ring->wptr_old = ring->wptr; + + if (ring->funcs->begin_use) + ring->funcs->begin_use(ring); +} + /** amdgpu_ring_insert_nop - insert NOP packets * * @ring: amdgpu_ring structure holding ring information @@ -187,14 +210,10 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) } #define amdgpu_ring_get_gpu_addr(ring, offset) \ - (ring->is_mes_queue ? \ - (ring->mes_ctx->meta_data_gpu_addr + offset) : \ - (ring->adev->wb.gpu_addr + offset * 4)) + (ring->adev->wb.gpu_addr + offset * 4) #define amdgpu_ring_get_cpu_addr(ring, offset) \ - (ring->is_mes_queue ? \ - (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \ - (&ring->adev->wb.wb[offset])) + (&ring->adev->wb.wb[offset]) /** * amdgpu_ring_init - init driver ring struct. @@ -243,57 +262,42 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ring->sched_score = sched_score; ring->vmid_wait = dma_fence_get_stub(); - if (!ring->is_mes_queue) { - ring->idx = adev->num_rings++; - adev->rings[ring->idx] = ring; - } + ring->idx = adev->num_rings++; + adev->rings[ring->idx] = ring; r = amdgpu_fence_driver_init_ring(ring); if (r) return r; } - if (ring->is_mes_queue) { - ring->rptr_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_RPTR_OFFS); - ring->wptr_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_WPTR_OFFS); - ring->fence_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_FENCE_OFFS); - ring->trail_fence_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_TRAIL_FENCE_OFFS); - ring->cond_exe_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_COND_EXE_OFFS); - } else { - r = amdgpu_device_wb_get(adev, &ring->rptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->rptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->wptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->wptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->fence_offs); - if (r) { - dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->fence_offs); + if (r) { + dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs); - if (r) { - dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs); + if (r) { + dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs); - if (r) { - dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs); + if (r) { + dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r); + return r; } ring->fence_gpu_addr = @@ -352,19 +356,14 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, /* Initialize cached_rptr to 0 */ ring->cached_rptr = 0; - /* Allocate ring buffer */ - if (ring->is_mes_queue) { - int offset = 0; - - BUG_ON(ring->ring_size > PAGE_SIZE*4); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_RING_OFFS); - ring->gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ring->ring = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - amdgpu_ring_clear_ring(ring); + if (!ring->ring_backup) { + ring->ring_backup = kvzalloc(ring->ring_size, GFP_KERNEL); + if (!ring->ring_backup) + return -ENOMEM; + } - } else if (ring->ring_obj == NULL) { + /* Allocate ring buffer */ + if (ring->ring_obj == NULL) { r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &ring->ring_obj, @@ -372,6 +371,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, (void **)&ring->ring); if (r) { dev_err(adev->dev, "(%d) ring create failed\n", r); + kvfree(ring->ring_backup); return r; } amdgpu_ring_clear_ring(ring); @@ -401,32 +401,28 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) { /* Not to finish a ring which is not initialized */ - if (!(ring->adev) || - (!ring->is_mes_queue && !(ring->adev->rings[ring->idx]))) + if (!(ring->adev) || !(ring->adev->rings[ring->idx])) return; ring->sched.ready = false; - if (!ring->is_mes_queue) { - amdgpu_device_wb_free(ring->adev, ring->rptr_offs); - amdgpu_device_wb_free(ring->adev, ring->wptr_offs); + amdgpu_device_wb_free(ring->adev, ring->rptr_offs); + amdgpu_device_wb_free(ring->adev, ring->wptr_offs); - amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs); - amdgpu_device_wb_free(ring->adev, ring->fence_offs); + amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs); + amdgpu_device_wb_free(ring->adev, ring->fence_offs); - amdgpu_bo_free_kernel(&ring->ring_obj, - &ring->gpu_addr, - (void **)&ring->ring); - } else { - kfree(ring->fence_drv.fences); - } + amdgpu_bo_free_kernel(&ring->ring_obj, + &ring->gpu_addr, + (void **)&ring->ring); + kvfree(ring->ring_backup); + ring->ring_backup = NULL; dma_fence_put(ring->vmid_wait); ring->vmid_wait = NULL; ring->me = 0; - if (!ring->is_mes_queue) - ring->adev->rings[ring->idx] = NULL; + ring->adev->rings[ring->idx] = NULL; } /** @@ -463,6 +459,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, { unsigned long flags; ktime_t deadline; + bool ret; if (unlikely(ring->adev->debug_disable_soft_recovery)) return false; @@ -477,12 +474,16 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, dma_fence_set_error(fence, -ENODATA); spin_unlock_irqrestore(fence->lock, flags); - atomic_inc(&ring->adev->gpu_reset_counter); while (!dma_fence_is_signaled(fence) && ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0) ring->funcs->soft_recovery(ring, vmid); - return dma_fence_is_signaled(fence); + ret = dma_fence_is_signaled(fence); + /* increment the counter only if soft reset worked */ + if (ret) + atomic_inc(&ring->adev->gpu_reset_counter); + + return ret; } /* @@ -608,59 +609,17 @@ static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { struct amdgpu_ring *ring = file_inode(f)->i_private; - volatile u32 *mqd; - u32 *kbuf; - int r, i; - uint32_t value, result; - - if (*pos & 3 || size & 3) - return -EINVAL; - - kbuf = kmalloc(ring->mqd_size, GFP_KERNEL); - if (!kbuf) - return -ENOMEM; + ssize_t bytes = min_t(ssize_t, ring->mqd_size - *pos, size); + void *from = ((u8 *)ring->mqd_ptr) + *pos; - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto err_free; + if (*pos > ring->mqd_size) + return 0; - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); - if (r) - goto err_unreserve; - - /* - * Copy to local buffer to avoid put_user(), which might fault - * and acquire mmap_sem, under reservation_ww_class_mutex. - */ - for (i = 0; i < ring->mqd_size/sizeof(u32); i++) - kbuf[i] = mqd[i]; - - amdgpu_bo_kunmap(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); - - result = 0; - while (size) { - if (*pos >= ring->mqd_size) - break; - - value = kbuf[*pos/4]; - r = put_user(value, (uint32_t *)buf); - if (r) - goto err_free; - buf += 4; - result += 4; - size -= 4; - *pos += 4; - } - - kfree(kbuf); - return result; + if (copy_to_user(buf, from, bytes)) + return -EFAULT; -err_unreserve: - amdgpu_bo_unreserve(ring->mqd_obj); -err_free: - kfree(kbuf); - return r; + *pos += bytes; + return bytes; } static const struct file_operations amdgpu_debugfs_mqd_fops = { @@ -760,6 +719,7 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring, prop->eop_gpu_addr = ring->eop_gpu_addr; prop->use_doorbell = ring->use_doorbell; prop->doorbell_index = ring->doorbell_index; + prop->kernel_queue = true; /* map_queues packet doesn't need activate the queue, * so only kiq need set this field. @@ -831,3 +791,69 @@ bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring) return true; } + +void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence) +{ + /* Stop the scheduler to prevent anybody else from touching the ring buffer. */ + drm_sched_wqueue_stop(&ring->sched); + /* back up the non-guilty commands */ + amdgpu_ring_backup_unprocessed_commands(ring, guilty_fence); +} + +int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence) +{ + unsigned int i; + int r; + + /* verify that the ring is functional */ + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + + /* signal the fence of the bad job */ + if (guilty_fence) + amdgpu_fence_driver_guilty_force_completion(guilty_fence); + /* Re-emit the non-guilty commands */ + if (ring->ring_backup_entries_to_copy) { + amdgpu_ring_alloc_reemit(ring, ring->ring_backup_entries_to_copy); + for (i = 0; i < ring->ring_backup_entries_to_copy; i++) + amdgpu_ring_write(ring, ring->ring_backup[i]); + amdgpu_ring_commit(ring); + } + /* Start the scheduler again */ + drm_sched_wqueue_start(&ring->sched); + return 0; +} + +bool amdgpu_ring_is_reset_type_supported(struct amdgpu_ring *ring, + u32 reset_type) +{ + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_GFX: + if (ring->adev->gfx.gfx_supported_reset & reset_type) + return true; + break; + case AMDGPU_RING_TYPE_COMPUTE: + if (ring->adev->gfx.compute_supported_reset & reset_type) + return true; + break; + case AMDGPU_RING_TYPE_SDMA: + if (ring->adev->sdma.supported_reset & reset_type) + return true; + break; + case AMDGPU_RING_TYPE_VCN_DEC: + case AMDGPU_RING_TYPE_VCN_ENC: + if (ring->adev->vcn.supported_reset & reset_type) + return true; + break; + case AMDGPU_RING_TYPE_VCN_JPEG: + if (ring->adev->jpeg.supported_reset & reset_type) + return true; + break; + default: + break; + } + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index bb2b66385223..7670f5d82b9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -118,6 +118,7 @@ struct amdgpu_fence_driver { /* sync_seq is protected by ring emission lock */ uint32_t sync_seq; atomic_t last_seq; + u64 signalled_wptr; bool initialized; struct amdgpu_irq_src *irq_src; unsigned irq_type; @@ -127,11 +128,35 @@ struct amdgpu_fence_driver { struct dma_fence **fences; }; +/* + * Fences mark an event in the GPUs pipeline and are used + * for GPU/CPU synchronization. When the fence is written, + * it is expected that all buffers associated with that fence + * are no longer in use by the associated ring on the GPU and + * that the relevant GPU caches have been flushed. + */ + +struct amdgpu_fence { + struct dma_fence base; + + /* RB, DMA, etc. */ + struct amdgpu_ring *ring; + ktime_t start_timestamp; + + /* wptr for the fence for resets */ + u64 wptr; + /* fence context for resets */ + u64 context; + uint32_t seq; +}; + extern const struct drm_sched_backend_ops amdgpu_sched_ops; void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); +void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence); +void amdgpu_fence_save_wptr(struct dma_fence *fence); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, @@ -141,8 +166,8 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev); void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev); int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev); void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev); -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, struct amdgpu_job *job, - unsigned flags); +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, + struct amdgpu_fence *af, unsigned int flags); int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, uint32_t timeout); bool amdgpu_fence_process(struct amdgpu_ring *ring); @@ -164,8 +189,24 @@ void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, /* provided by hw blocks that expose a ring buffer for commands */ struct amdgpu_ring_funcs { + /** + * @type: + * + * GFX, Compute, SDMA, UVD, VCE, VCN, VPE, KIQ, MES, UMSCH, and CPER + * use ring buffers. The type field just identifies which component the + * ring buffer is associated with. + */ enum amdgpu_ring_type type; uint32_t align_mask; + + /** + * @nop: + * + * Every block in the amdgpu has no-op instructions (e.g., GFX 10 + * uses PACKET3(PACKET3_NOP, 0x3FFF), VCN 5 uses VCN_ENC_CMD_NO_OP, + * etc). This field receives the specific no-op for the component + * that initializes the ring. + */ u32 nop; bool support_64bit_ptrs; bool no_user_fence; @@ -236,11 +277,14 @@ struct amdgpu_ring_funcs { void (*patch_cntl)(struct amdgpu_ring *ring, unsigned offset); void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset); void (*patch_de)(struct amdgpu_ring *ring, unsigned offset); - int (*reset)(struct amdgpu_ring *ring, unsigned int vmid); + int (*reset)(struct amdgpu_ring *ring, unsigned int vmid, + struct amdgpu_fence *timedout_fence); void (*emit_cleaner_shader)(struct amdgpu_ring *ring); - bool (*is_guilty)(struct amdgpu_ring *ring); }; +/** + * amdgpu_ring - Holds ring information + */ struct amdgpu_ring { struct amdgpu_device *adev; const struct amdgpu_ring_funcs *funcs; @@ -249,16 +293,67 @@ struct amdgpu_ring { struct amdgpu_bo *ring_obj; uint32_t *ring; + /* backups for resets */ + uint32_t *ring_backup; + unsigned int ring_backup_entries_to_copy; unsigned rptr_offs; u64 rptr_gpu_addr; volatile u32 *rptr_cpu_addr; + + /** + * @wptr: + * + * This is part of the Ring buffer implementation and represents the + * write pointer. The wptr determines where the host has written. + */ u64 wptr; + + /** + * @wptr_old: + * + * Before update wptr with the new value, usually the old value is + * stored in the wptr_old. + */ u64 wptr_old; unsigned ring_size; + + /** + * @max_dw: + * + * Maximum number of DWords for ring allocation. This information is + * provided at the ring initialization time, and each IP block can + * specify a specific value. Check places that invoke + * amdgpu_ring_init() to see the maximum size per block. + */ unsigned max_dw; + + /** + * @count_dw: + * + * This value starts with the maximum amount of DWords supported by the + * ring. This value is updated based on the ring manipulation. + */ int count_dw; uint64_t gpu_addr; + + /** + * @ptr_mask: + * + * Some IPs provide support for 64-bit pointers and others for 32-bit + * only; this behavior is component-specific and defined by the field + * support_64bit_ptr. If the IP block supports 64-bits, the mask + * 0xffffffffffffffff is set; otherwise, this value assumes buf_mask. + * Notice that this field is used to keep wptr under a valid range. + */ uint64_t ptr_mask; + + /** + * @buf_mask: + * + * Buffer mask is a value used to keep wptr count under its + * thresholding. Buffer mask initialized during the ring buffer + * initialization time, and it is defined as (ring_size / 4) -1. + */ uint32_t buf_mask; u32 idx; u32 xcc_id; @@ -276,6 +371,13 @@ struct amdgpu_ring { bool use_pollmem; unsigned wptr_offs; u64 wptr_gpu_addr; + + /** + * @wptr_cpu_addr: + * + * This is the CPU address pointer in the writeback slot. This is used + * to commit changes to the GPU. + */ volatile u32 *wptr_cpu_addr; unsigned fence_offs; u64 fence_gpu_addr; @@ -297,20 +399,15 @@ struct amdgpu_ring { struct dma_fence *vmid_wait; bool has_compute_vm_bug; bool no_scheduler; + bool no_user_submission; int hw_prio; unsigned num_hw_submission; atomic_t *sched_score; - /* used for mes */ - bool is_mes_queue; - uint32_t hw_queue_id; - struct amdgpu_mes_ctx_data *mes_ctx; - bool is_sw_ring; unsigned int entry_index; /* store the cached rptr to restore after reset */ uint64_t cached_rptr; - }; #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib))) @@ -340,7 +437,7 @@ struct amdgpu_ring { #define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o))) #define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o))) #define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o))) -#define amdgpu_ring_reset(r, v) (r)->funcs->reset((r), (v)) +#define amdgpu_ring_reset(r, v, f) (r)->funcs->reset((r), (v), (f)) unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type); int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); @@ -435,15 +532,6 @@ static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring, ring->ring[offset] = cur - offset; } -#define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset) \ - (ring->is_mes_queue && ring->mes_ctx ? \ - (ring->mes_ctx->meta_data_gpu_addr + offset) : 0) - -#define amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset) \ - (ring->is_mes_queue && ring->mes_ctx ? \ - (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \ - NULL) - int amdgpu_ring_test_helper(struct amdgpu_ring *ring); void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, @@ -474,4 +562,12 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev); void amdgpu_ib_pool_fini(struct amdgpu_device *adev); int amdgpu_ib_ring_tests(struct amdgpu_device *adev); bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring); +void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence); +void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence); +int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence); +bool amdgpu_ring_is_reset_type_supported(struct amdgpu_ring *ring, + u32 reset_type); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c index 1c66da1c3fb4..7e7d6c3865bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c @@ -124,7 +124,7 @@ static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux) } } - del_timer(&mux->resubmit_timer); + timer_delete(&mux->resubmit_timer); mux->s_resubmit = false; } @@ -135,7 +135,8 @@ static void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux) static void amdgpu_mux_resubmit_fallback(struct timer_list *t) { - struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer); + struct amdgpu_ring_mux *mux = timer_container_of(mux, t, + resubmit_timer); if (!spin_trylock(&mux->lock)) { amdgpu_ring_mux_schedule_resubmit(mux); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index fce22d3f816b..c210625be220 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -237,6 +237,20 @@ struct amdgpu_rlc_funcs { void (*unset_safe_mode)(struct amdgpu_device *adev, int xcc_id); int (*init)(struct amdgpu_device *adev); u32 (*get_csb_size)(struct amdgpu_device *adev); + + /** + * @get_csb_buffer: Get the clear state to be put into the hardware. + * + * The parameter adev is used to get the CS data and other gfx info, + * and buffer is the RLC CS pointer + * + * Sometimes, the user space puts a request to clear the state in the + * command buffer; this function provides the clear state that gets put + * into the hardware. Note that the driver programs Clear State + * Indirect Buffer (CSB) explicitly when it sets up the kernel rings, + * and it also provides a pointer to it which is used by the firmware + * to load the clear state in some cases. + */ void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer); int (*get_cp_table_num)(struct amdgpu_device *adev); int (*resume)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 529c9696c2f3..8b8a04138711 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -26,6 +26,8 @@ #include "amdgpu_sdma.h" #include "amdgpu_ras.h" #include "amdgpu_reset.h" +#include "gc/gc_10_1_0_offset.h" +#include "gc/gc_10_3_0_sh_mask.h" #define AMDGPU_CSA_SDMA_SIZE 64 /* SDMA CSA reside in the 3rd page of CSA */ @@ -76,22 +78,14 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp) return 0; - if (ring->is_mes_queue) { - uint32_t offset = 0; + r = amdgpu_sdma_get_index_from_ring(ring, &index); - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - sdma[ring->idx].sdma_meta_data); - csa_mc_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - r = amdgpu_sdma_get_index_from_ring(ring, &index); - - if (r || index > 31) - csa_mc_addr = 0; - else - csa_mc_addr = amdgpu_csa_vaddr(adev) + - AMDGPU_CSA_SDMA_OFFSET + - index * AMDGPU_CSA_SDMA_SIZE; - } + if (r || index > 31) + csa_mc_addr = 0; + else + csa_mc_addr = amdgpu_csa_vaddr(adev) + + AMDGPU_CSA_SDMA_OFFSET + + index * AMDGPU_CSA_SDMA_SIZE; return csa_mc_addr; } @@ -537,110 +531,78 @@ bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_rin return false; } -/** - * amdgpu_sdma_register_on_reset_callbacks - Register SDMA reset callbacks - * @funcs: Pointer to the callback structure containing pre_reset and post_reset functions - * - * This function allows KFD and AMDGPU to register their own callbacks for handling - * pre-reset and post-reset operations for engine reset. These are needed because engine - * reset will stop all queues on that engine. - */ -void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs) +static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id) { - if (!funcs) - return; + struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id]; - /* Ensure the reset_callback_list is initialized */ - if (!adev->sdma.reset_callback_list.next) { - INIT_LIST_HEAD(&adev->sdma.reset_callback_list); - } - /* Initialize the list node in the callback structure */ - INIT_LIST_HEAD(&funcs->list); + if (sdma_instance->funcs->soft_reset_kernel_queue) + return sdma_instance->funcs->soft_reset_kernel_queue(adev, instance_id); - /* Add the callback structure to the global list */ - list_add_tail(&funcs->list, &adev->sdma.reset_callback_list); + return -EOPNOTSUPP; } /** * amdgpu_sdma_reset_engine - Reset a specific SDMA engine * @adev: Pointer to the AMDGPU device - * @instance_id: ID of the SDMA engine instance to reset - * - * This function performs the following steps: - * 1. Calls all registered pre_reset callbacks to allow KFD and AMDGPU to save their state. - * 2. Resets the specified SDMA engine instance. - * 3. Calls all registered post_reset callbacks to allow KFD and AMDGPU to restore their state. + * @instance_id: Logical ID of the SDMA engine instance to reset + * @caller_handles_kernel_queues: Skip kernel queue processing. Caller + * will handle it. * * Returns: 0 on success, or a negative error code on failure. */ -int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) +int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, + bool caller_handles_kernel_queues) { - struct sdma_on_reset_funcs *funcs; int ret = 0; struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id]; struct amdgpu_ring *gfx_ring = &sdma_instance->ring; struct amdgpu_ring *page_ring = &sdma_instance->page; - bool gfx_sched_stopped = false, page_sched_stopped = false; mutex_lock(&sdma_instance->engine_reset_mutex); - /* Stop the scheduler's work queue for the GFX and page rings if they are running. - * This ensures that no new tasks are submitted to the queues while - * the reset is in progress. - */ - if (!amdgpu_ring_sched_ready(gfx_ring)) { + + if (!caller_handles_kernel_queues) { + /* Stop the scheduler's work queue for the GFX and page rings if they are running. + * This ensures that no new tasks are submitted to the queues while + * the reset is in progress. + */ drm_sched_wqueue_stop(&gfx_ring->sched); - gfx_sched_stopped = true; - } - if (adev->sdma.has_page_queue && !amdgpu_ring_sched_ready(page_ring)) { - drm_sched_wqueue_stop(&page_ring->sched); - page_sched_stopped = true; + if (adev->sdma.has_page_queue) + drm_sched_wqueue_stop(&page_ring->sched); } - /* Invoke all registered pre_reset callbacks */ - list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) { - if (funcs->pre_reset) { - ret = funcs->pre_reset(adev, instance_id); - if (ret) { - dev_err(adev->dev, - "beforeReset callback failed for instance %u: %d\n", - instance_id, ret); - goto exit; - } - } + if (sdma_instance->funcs->stop_kernel_queue) { + sdma_instance->funcs->stop_kernel_queue(gfx_ring); + if (adev->sdma.has_page_queue) + sdma_instance->funcs->stop_kernel_queue(page_ring); } /* Perform the SDMA reset for the specified instance */ - ret = amdgpu_dpm_reset_sdma(adev, 1 << instance_id); + ret = amdgpu_sdma_soft_reset(adev, instance_id); if (ret) { - dev_err(adev->dev, "Failed to reset SDMA instance %u\n", instance_id); + dev_err(adev->dev, "Failed to reset SDMA logical instance %u\n", instance_id); goto exit; } - /* Invoke all registered post_reset callbacks */ - list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) { - if (funcs->post_reset) { - ret = funcs->post_reset(adev, instance_id); - if (ret) { - dev_err(adev->dev, - "afterReset callback failed for instance %u: %d\n", - instance_id, ret); - goto exit; - } - } + if (sdma_instance->funcs->start_kernel_queue) { + sdma_instance->funcs->start_kernel_queue(gfx_ring); + if (adev->sdma.has_page_queue) + sdma_instance->funcs->start_kernel_queue(page_ring); } exit: - /* Restart the scheduler's work queue for the GFX and page rings - * if they were stopped by this function. This allows new tasks - * to be submitted to the queues after the reset is complete. - */ - if (!ret) { - if (gfx_sched_stopped && amdgpu_ring_sched_ready(gfx_ring)) { + if (!caller_handles_kernel_queues) { + /* Restart the scheduler's work queue for the GFX and page rings + * if they were stopped by this function. This allows new tasks + * to be submitted to the queues after the reset is complete. + */ + if (!ret) { + amdgpu_fence_driver_force_completion(gfx_ring); drm_sched_wqueue_start(&gfx_ring->sched); - } - if (page_sched_stopped && amdgpu_ring_sched_ready(page_ring)) { - drm_sched_wqueue_start(&page_ring->sched); + if (adev->sdma.has_page_queue) { + amdgpu_fence_driver_force_completion(page_ring); + drm_sched_wqueue_start(&page_ring->sched); + } } } mutex_unlock(&sdma_instance->engine_reset_mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 47d56fd0589f..34311f32be4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -50,6 +50,12 @@ enum amdgpu_sdma_irq { #define NUM_SDMA(x) hweight32(x) +struct amdgpu_sdma_funcs { + int (*stop_kernel_queue)(struct amdgpu_ring *ring); + int (*start_kernel_queue)(struct amdgpu_ring *ring); + int (*soft_reset_kernel_queue)(struct amdgpu_device *adev, u32 instance_id); +}; + struct amdgpu_sdma_instance { /* SDMA firmware */ const struct firmware *fw; @@ -68,7 +74,7 @@ struct amdgpu_sdma_instance { /* track guilty state of GFX and PAGE queues */ bool gfx_guilty; bool page_guilty; - + const struct amdgpu_sdma_funcs *funcs; }; enum amdgpu_sdma_ras_memory_id { @@ -103,17 +109,11 @@ struct amdgpu_sdma_ras { struct amdgpu_ras_block_object ras_block; }; -struct sdma_on_reset_funcs { - int (*pre_reset)(struct amdgpu_device *adev, uint32_t instance_id); - int (*post_reset)(struct amdgpu_device *adev, uint32_t instance_id); - /* Linked list node to store this structure in a list; */ - struct list_head list; -}; - struct amdgpu_sdma { struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES]; struct amdgpu_irq_src trap_irq; struct amdgpu_irq_src illegal_inst_irq; + struct amdgpu_irq_src fence_irq; struct amdgpu_irq_src ecc_irq; struct amdgpu_irq_src vm_hole_irq; struct amdgpu_irq_src doorbell_invalid_irq; @@ -131,6 +131,8 @@ struct amdgpu_sdma { uint32_t *ip_dump; uint32_t supported_reset; struct list_head reset_callback_list; + bool no_user_submission; + bool disable_uq; }; /* @@ -170,8 +172,8 @@ struct amdgpu_buffer_funcs { uint32_t byte_count); }; -void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs); -int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id); +int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, + bool caller_handles_kernel_queues); #define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t)) #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index e22cb2b5cd92..d45ebfb642ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -45,7 +45,11 @@ */ static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev) { - return AMDGPU_VA_RESERVED_SEQ64_START(adev); + u64 addr = AMDGPU_VA_RESERVED_SEQ64_START(adev); + + addr = amdgpu_gmc_sign_extend(addr); + + return addr; } /** @@ -63,9 +67,9 @@ static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev) int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va **bo_va) { + u64 seq64_addr, va_flags; struct amdgpu_bo *bo; struct drm_exec exec; - u64 seq64_addr; int r; bo = adev->seq64.sbo; @@ -88,9 +92,11 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, goto error; } - seq64_addr = amdgpu_seq64_get_va_base(adev); + seq64_addr = amdgpu_seq64_get_va_base(adev) & AMDGPU_GMC_HOLE_MASK; + + va_flags = amdgpu_gem_va_map_flags(adev, AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC); r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE, - AMDGPU_PTE_READABLE); + va_flags); if (r) { DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r); amdgpu_vm_bo_del(adev, *bo_va); @@ -133,7 +139,7 @@ void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv) vm = &fpriv->vm; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = amdgpu_vm_lock_pd(vm, &exec, 0); if (likely(!r)) @@ -156,6 +162,7 @@ error: * * @adev: amdgpu_device pointer * @va: VA to access the seq in process address space + * @gpu_addr: GPU address to access the seq * @cpu_addr: CPU address to access the seq * * Alloc a 64 bit memory from seq64 pool. @@ -163,7 +170,8 @@ error: * Returns: * 0 on success or a negative error code on failure */ -int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 **cpu_addr) +int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, + u64 *gpu_addr, u64 **cpu_addr) { unsigned long bit_pos; @@ -172,7 +180,12 @@ int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 **cpu_addr) return -ENOSPC; __set_bit(bit_pos, adev->seq64.used); + *va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev); + + if (gpu_addr) + *gpu_addr = bit_pos * sizeof(u64) + adev->seq64.gpu_addr; + *cpu_addr = bit_pos + adev->seq64.cpu_base_addr; return 0; @@ -233,7 +246,7 @@ int amdgpu_seq64_init(struct amdgpu_device *adev) */ r = amdgpu_bo_create_kernel(adev, AMDGPU_VA_RESERVED_SEQ64_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - &adev->seq64.sbo, NULL, + &adev->seq64.sbo, &adev->seq64.gpu_addr, (void **)&adev->seq64.cpu_base_addr); if (r) { dev_warn(adev->dev, "(%d) create seq64 failed\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h index 4203b2ab318d..26a249aaaee1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h @@ -32,13 +32,14 @@ struct amdgpu_seq64 { struct amdgpu_bo *sbo; u32 num_sem; + u64 gpu_addr; u64 *cpu_base_addr; DECLARE_BITMAP(used, AMDGPU_MAX_SEQ64_SLOTS); }; void amdgpu_seq64_fini(struct amdgpu_device *adev); int amdgpu_seq64_init(struct amdgpu_device *adev); -int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, u64 **cpu_addr); +int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 *gpu_addr, u64 **cpu_addr); void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr); int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va **bo_va); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 5576ed0b508f..d6ae9974c952 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -249,9 +249,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, if (resv == NULL) return -EINVAL; - - /* TODO: Use DMA_RESV_USAGE_READ here */ - dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) { + /* Implicitly sync only to KERNEL, WRITE and READ */ + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) { dma_fence_chain_for_each(f, f) { struct dma_fence *tmp = dma_fence_chain_contained(f); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 11dd2e0f7979..d13e64a69e25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -167,25 +167,23 @@ TRACE_EVENT(amdgpu_cs_ioctl, TP_PROTO(struct amdgpu_job *job), TP_ARGS(job), TP_STRUCT__entry( - __field(uint64_t, sched_job_id) __string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) - __field(unsigned int, context) - __field(unsigned int, seqno) + __field(u64, context) + __field(u64, seqno) __field(struct dma_fence *, fence) __string(ring, to_amdgpu_ring(job->base.sched)->name) __field(u32, num_ibs) ), TP_fast_assign( - __entry->sched_job_id = job->base.id; __assign_str(timeline); __entry->context = job->base.s_fence->finished.context; __entry->seqno = job->base.s_fence->finished.seqno; __assign_str(ring); __entry->num_ibs = job->num_ibs; ), - TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", - __entry->sched_job_id, __get_str(timeline), __entry->context, + TP_printk("timeline=%s, fence=%llu:%llu, ring_name=%s, num_ibs=%u", + __get_str(timeline), __entry->context, __entry->seqno, __get_str(ring), __entry->num_ibs) ); @@ -193,24 +191,22 @@ TRACE_EVENT(amdgpu_sched_run_job, TP_PROTO(struct amdgpu_job *job), TP_ARGS(job), TP_STRUCT__entry( - __field(uint64_t, sched_job_id) __string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) - __field(unsigned int, context) - __field(unsigned int, seqno) + __field(u64, context) + __field(u64, seqno) __string(ring, to_amdgpu_ring(job->base.sched)->name) __field(u32, num_ibs) ), TP_fast_assign( - __entry->sched_job_id = job->base.id; __assign_str(timeline); __entry->context = job->base.s_fence->finished.context; __entry->seqno = job->base.s_fence->finished.seqno; __assign_str(ring); __entry->num_ibs = job->num_ibs; ), - TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", - __entry->sched_job_id, __get_str(timeline), __entry->context, + TP_printk("timeline=%s, fence=%llu:%llu, ring_name=%s, num_ibs=%u", + __get_str(timeline), __entry->context, __entry->seqno, __get_str(ring), __entry->num_ibs) ); @@ -551,23 +547,19 @@ TRACE_EVENT(amdgpu_ib_pipe_sync, TP_ARGS(sched_job, fence), TP_STRUCT__entry( __string(ring, sched_job->base.sched->name) - __field(uint64_t, id) __field(struct dma_fence *, fence) - __field(uint64_t, ctx) - __field(unsigned, seqno) + __field(u64, ctx) + __field(u64, seqno) ), TP_fast_assign( __assign_str(ring); - __entry->id = sched_job->base.id; __entry->fence = fence; __entry->ctx = fence->context; __entry->seqno = fence->seqno; ), - TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u", - __get_str(ring), __entry->id, - __entry->fence, __entry->ctx, - __entry->seqno) + TP_printk("job ring=%s need pipe sync to fence=%llu:%llu", + __get_str(ring), __entry->ctx, __entry->seqno) ); TRACE_EVENT(amdgpu_reset_reg_dumps, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 53b71e9d8076..27ab4e754b2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -299,7 +299,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, struct amdgpu_bo *abo_src, *abo_dst; if (!adev->mman.buffer_funcs_enabled) { - DRM_ERROR("Trying to move memory with ring turned off.\n"); + dev_err(adev->dev, + "Trying to move memory with ring turned off.\n"); return -EINVAL; } @@ -934,7 +935,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, if (gtt->userptr) { r = amdgpu_ttm_tt_pin_userptr(bdev, ttm); if (r) { - DRM_ERROR("failed to pin userptr\n"); + dev_err(adev->dev, "failed to pin userptr\n"); return r; } } else if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) { @@ -1060,7 +1061,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, /* if the pages have userptr pinning then clear that first */ if (gtt->userptr) { amdgpu_ttm_tt_unpin_userptr(bdev, ttm); - } else if (ttm->sg && gtt->gobj->import_attach) { + } else if (ttm->sg && drm_gem_is_imported(gtt->gobj)) { struct dma_buf_attachment *attach; attach = gtt->gobj->import_attach; @@ -1781,7 +1782,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) &ctx->c2p_bo, NULL); if (ret) { - DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret); + dev_err(adev->dev, "alloc c2p_bo failed(%d)!\n", ret); amdgpu_ttm_training_reserve_vram_fini(adev); return ret; } @@ -1793,7 +1794,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) adev, adev->gmc.real_vram_size - reserve_size, reserve_size, &adev->mman.fw_reserved_memory, NULL); if (ret) { - DRM_ERROR("alloc tmr failed(%d)!\n", ret); + dev_err(adev->dev, "alloc tmr failed(%d)!\n", ret); amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL); return ret; @@ -1864,13 +1865,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) adev->need_swiotlb, dma_addressing_limited(adev->dev)); if (r) { - DRM_ERROR("failed initializing buffer object driver(%d).\n", r); + dev_err(adev->dev, + "failed initializing buffer object driver(%d).\n", r); return r; } r = amdgpu_ttm_pools_init(adev); if (r) { - DRM_ERROR("failed to init ttm pools(%d).\n", r); + dev_err(adev->dev, "failed to init ttm pools(%d).\n", r); return r; } adev->mman.initialized = true; @@ -1878,7 +1880,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Initialize VRAM pool with all of VRAM divided into pages */ r = amdgpu_vram_mgr_init(adev); if (r) { - DRM_ERROR("Failed initializing VRAM heap.\n"); + dev_err(adev->dev, "Failed initializing VRAM heap.\n"); return r; } @@ -1958,7 +1960,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n"); } - DRM_INFO("amdgpu: %uM of VRAM memory ready\n", + dev_info(adev->dev, "amdgpu: %uM of VRAM memory ready\n", (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024))); /* Compute GTT size, either based on TTM limit @@ -1981,10 +1983,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Initialize GTT memory pool */ r = amdgpu_gtt_mgr_init(adev, gtt_size); if (r) { - DRM_ERROR("Failed initializing GTT heap.\n"); + dev_err(adev->dev, "Failed initializing GTT heap.\n"); return r; } - DRM_INFO("amdgpu: %uM of GTT memory ready.\n", + dev_info(adev->dev, "amdgpu: %uM of GTT memory ready.\n", (unsigned int)(gtt_size / (1024 * 1024))); if (adev->flags & AMD_IS_APU) { @@ -1995,40 +1997,40 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Initialize doorbell pool on PCI BAR */ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE); if (r) { - DRM_ERROR("Failed initializing doorbell heap.\n"); + dev_err(adev->dev, "Failed initializing doorbell heap.\n"); return r; } /* Create a boorbell page for kernel usages */ r = amdgpu_doorbell_create_kernel_doorbells(adev); if (r) { - DRM_ERROR("Failed to initialize kernel doorbells.\n"); + dev_err(adev->dev, "Failed to initialize kernel doorbells.\n"); return r; } /* Initialize preemptible memory pool */ r = amdgpu_preempt_mgr_init(adev); if (r) { - DRM_ERROR("Failed initializing PREEMPT heap.\n"); + dev_err(adev->dev, "Failed initializing PREEMPT heap.\n"); return r; } /* Initialize various on-chip memory pools */ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size); if (r) { - DRM_ERROR("Failed initializing GDS heap.\n"); + dev_err(adev->dev, "Failed initializing GDS heap.\n"); return r; } r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, adev->gds.gws_size); if (r) { - DRM_ERROR("Failed initializing gws heap.\n"); + dev_err(adev->dev, "Failed initializing gws heap.\n"); return r; } r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, adev->gds.oa_size); if (r) { - DRM_ERROR("Failed initializing oa heap.\n"); + dev_err(adev->dev, "Failed initializing oa heap.\n"); return r; } if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, @@ -2060,6 +2062,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) /* return the FW reserved memory back to VRAM */ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory_extend, NULL, + NULL); if (adev->mman.stolen_reserved_size) amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, NULL, NULL); @@ -2081,13 +2085,15 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_vram_mgr_fini(adev); amdgpu_gtt_mgr_fini(adev); amdgpu_preempt_mgr_fini(adev); + amdgpu_doorbell_fini(adev); + ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL); ttm_device_fini(&adev->mman.bdev); adev->mman.initialized = false; - DRM_INFO("amdgpu: ttm finalized\n"); + dev_info(adev->dev, "amdgpu: ttm finalized\n"); } /** @@ -2119,8 +2125,9 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) DRM_SCHED_PRIORITY_KERNEL, &sched, 1, NULL); if (r) { - DRM_ERROR("Failed setting up TTM BO move entity (%d)\n", - r); + dev_err(adev->dev, + "Failed setting up TTM BO move entity (%d)\n", + r); return; } @@ -2128,8 +2135,9 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) DRM_SCHED_PRIORITY_NORMAL, &sched, 1, NULL); if (r) { - DRM_ERROR("Failed setting up TTM BO move entity (%d)\n", - r); + dev_err(adev->dev, + "Failed setting up TTM BO move entity (%d)\n", + r); goto error_free_entity; } } else { @@ -2200,7 +2208,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, int r; if (!direct_submit && !ring->sched.ready) { - DRM_ERROR("Trying to move memory with ring turned off.\n"); + dev_err(adev->dev, + "Trying to move memory with ring turned off.\n"); return -EINVAL; } @@ -2235,7 +2244,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, error_free: amdgpu_job_free(job); - DRM_ERROR("Error scheduling IBs (%d)\n", r); + dev_err(adev->dev, "Error scheduling IBs (%d)\n", r); return r; } @@ -2354,7 +2363,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, int r; if (!adev->mman.buffer_funcs_enabled) { - DRM_ERROR("Trying to clear memory with ring turned off.\n"); + dev_err(adev->dev, + "Trying to clear memory with ring turned off.\n"); return -EINVAL; } @@ -2414,7 +2424,7 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type) man = ttm_manager_type(&adev->mman.bdev, mem_type); break; default: - DRM_ERROR("Trying to evict invalid memory type\n"); + dev_err(adev->dev, "Trying to evict invalid memory type\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 208b7d1d8a27..2309df3f68a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -86,6 +86,7 @@ struct amdgpu_mman { uint32_t discovery_tmr_size; /* fw reserved memory */ struct amdgpu_bo *fw_reserved_memory; + struct amdgpu_bo *fw_reserved_memory_extend; /* firmware VRAM reservation */ u64 fw_vram_usage_start_offset; @@ -154,6 +155,7 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, uint64_t start, uint64_t size); int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, uint64_t start); +void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev); bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, struct ttm_resource *res); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 3d9e9fdc10b4..e96f24e9ad57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -30,6 +30,11 @@ #define AMDGPU_UCODE_NAME_MAX (128) +static const struct kicker_device kicker_device_list[] = { + {0x744B, 0x00}, + {0x7551, 0xC8} +}; + static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header *hdr) { DRM_DEBUG("size_bytes: %u\n", le32_to_cpu(hdr->size_bytes)); @@ -765,8 +770,10 @@ FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version); FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version); FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version); FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version); +FW_VERSION_ATTR(dmcub_fw_version, 0444, dm.dmcub_fw_version); FW_VERSION_ATTR(mes_fw_version, 0444, mes.sched_version & AMDGPU_MES_VERSION_MASK); FW_VERSION_ATTR(mes_kiq_fw_version, 0444, mes.kiq_version & AMDGPU_MES_VERSION_MASK); +FW_VERSION_ATTR(pldm_fw_version, 0444, firmware.pldm_version); static struct attribute *fw_attrs[] = { &dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr, @@ -779,8 +786,9 @@ static struct attribute *fw_attrs[] = { &dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr, &dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr, &dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr, - &dev_attr_dmcu_fw_version.attr, &dev_attr_imu_fw_version.attr, - &dev_attr_mes_fw_version.attr, &dev_attr_mes_kiq_fw_version.attr, + &dev_attr_dmcu_fw_version.attr, &dev_attr_dmcub_fw_version.attr, + &dev_attr_imu_fw_version.attr, &dev_attr_mes_fw_version.attr, + &dev_attr_mes_kiq_fw_version.attr, &dev_attr_pldm_fw_version.attr, NULL }; @@ -1152,6 +1160,9 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM; } + if (amdgpu_virt_xgmi_migrate_enabled(adev) && adev->firmware.fw_buf) + adev->firmware.fw_buf_mc = amdgpu_bo_fb_aper_addr(adev->firmware.fw_buf); + for (i = 0; i < adev->firmware.max_ucodes; i++) { ucode = &adev->firmware.ucode[i]; if (ucode->fw) { @@ -1384,6 +1395,19 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl return NULL; } +bool amdgpu_is_kicker_fw(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(kicker_device_list); i++) { + if (adev->pdev->device == kicker_device_list[i].device && + adev->pdev->revision == kicker_device_list[i].revision) + return true; + } + + return false; +} + void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len) { int maj, min, rev; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 4eedd92f000b..6349aad6da35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -25,6 +25,8 @@ #include "amdgpu_socbb.h" +#define RS64_FW_UC_START_ADDR_LO 0x3000 + struct common_firmware_header { uint32_t size_bytes; /* size of the entire header+image(s) in bytes */ uint32_t header_size_bytes; /* size of just the header in bytes */ @@ -600,6 +602,12 @@ struct amdgpu_firmware { void *fw_buf_ptr; uint64_t fw_buf_mc; + uint32_t pldm_version; +}; + +struct kicker_device{ + unsigned short device; + u8 revision; }; void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr); @@ -629,5 +637,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type); const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id); void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len); +bool amdgpu_is_kicker_fw(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 0a1ef95b2866..c92b8794aa73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -529,6 +529,7 @@ int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, pfns[i] = err_data.err_addr[i].retired_page; } ret = i; + adev->umc.err_addr_cnt = err_data.err_addr_cnt; out: kfree(err_data.err_addr); @@ -561,3 +562,26 @@ int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev, return 0; } + +int amdgpu_umc_pa2mca(struct amdgpu_device *adev, + uint64_t pa, uint64_t *mca, enum amdgpu_memory_partition nps) +{ + struct ta_ras_query_address_input addr_in; + struct ta_ras_query_address_output addr_out; + int ret; + + /* nps: the pa belongs to */ + addr_in.pa.pa = pa | ((uint64_t)nps << 58); + addr_in.addr_type = TA_RAS_PA_TO_MCA; + ret = psp_ras_query_address(&adev->psp, &addr_in, &addr_out); + if (ret) { + dev_warn(adev->dev, "Failed to query RAS MCA address for 0x%llx", + pa); + + return ret; + } + + *mca = addr_out.ma.err_addr; + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 857693bcd8d4..ec203f9e5ffa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -78,6 +78,18 @@ #define UMC_NPS_SHIFT 40 #define UMC_NPS_MASK 0xffULL +/* three column bits and one row bit in MCA address flip + * in bad page retirement + */ +#define RETIRE_FLIP_BITS_NUM 4 + +struct amdgpu_umc_flip_bits { + uint32_t flip_bits_in_pa[RETIRE_FLIP_BITS_NUM]; + uint32_t flip_row_bit; + uint32_t r13_in_pa; + uint32_t bit_num; +}; + typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, void *data); @@ -100,6 +112,7 @@ struct amdgpu_umc_ras { bool dump_addr); uint32_t (*get_die_id_from_pa)(struct amdgpu_device *adev, uint64_t mca_addr, uint64_t retired_page); + void (*get_retire_flip_bits)(struct amdgpu_device *adev); }; struct amdgpu_umc_funcs { @@ -130,6 +143,10 @@ struct amdgpu_umc { /* active mask for umc node instance */ unsigned long active_mask; + + struct amdgpu_umc_flip_bits flip_bits; + + unsigned long err_addr_cnt; }; int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev); @@ -172,4 +189,6 @@ int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev, uint64_t err_addr, uint32_t ch, uint32_t umc, uint32_t node, uint32_t socket, struct ta_ras_query_address_output *addr_out, bool dump_addr); +int amdgpu_umc_pa2mca(struct amdgpu_device *adev, + uint64_t pa, uint64_t *mca, enum amdgpu_memory_partition nps); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c new file mode 100644 index 000000000000..c3ace8030530 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -0,0 +1,981 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <drm/drm_auth.h> +#include <drm/drm_exec.h> +#include <linux/pm_runtime.h> + +#include "amdgpu.h" +#include "amdgpu_vm.h" +#include "amdgpu_userq.h" +#include "amdgpu_userq_fence.h" + +u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev) +{ + int i; + u32 userq_ip_mask = 0; + + for (i = 0; i < AMDGPU_HW_IP_NUM; i++) { + if (adev->userq_funcs[i]) + userq_ip_mask |= (1 << i); + } + + return userq_ip_mask; +} + +static int +amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *userq_funcs = + adev->userq_funcs[queue->queue_type]; + int r = 0; + + if (queue->state == AMDGPU_USERQ_STATE_MAPPED) { + r = userq_funcs->unmap(uq_mgr, queue); + if (r) + queue->state = AMDGPU_USERQ_STATE_HUNG; + else + queue->state = AMDGPU_USERQ_STATE_UNMAPPED; + } + return r; +} + +static int +amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *userq_funcs = + adev->userq_funcs[queue->queue_type]; + int r = 0; + + if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) { + r = userq_funcs->map(uq_mgr, queue); + if (r) { + queue->state = AMDGPU_USERQ_STATE_HUNG; + } else { + queue->state = AMDGPU_USERQ_STATE_MAPPED; + } + } + return r; +} + +static void +amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct dma_fence *f = queue->last_fence; + int ret; + + if (f && !dma_fence_is_signaled(f)) { + ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); + if (ret <= 0) + drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n", + f->context, f->seqno); + } +} + +static void +amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + int queue_id) +{ + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type]; + + uq_funcs->mqd_destroy(uq_mgr, queue); + amdgpu_userq_fence_driver_free(queue); + idr_remove(&uq_mgr->userq_idr, queue_id); + kfree(queue); +} + +int +amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_usermode_queue *queue; + int queue_id; + int ret = 0; + + mutex_lock(&uq_mgr->userq_mutex); + /* Resume all the queues for this process */ + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) + ret += queue->state == AMDGPU_USERQ_STATE_MAPPED; + + mutex_unlock(&uq_mgr->userq_mutex); + return ret; +} + +static struct amdgpu_usermode_queue * +amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid) +{ + return idr_find(&uq_mgr->userq_idr, qid); +} + +void +amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + struct amdgpu_eviction_fence *ev_fence; + +retry: + /* Flush any pending resume work to create ev_fence */ + flush_delayed_work(&uq_mgr->resume_work); + + mutex_lock(&uq_mgr->userq_mutex); + spin_lock(&evf_mgr->ev_fence_lock); + ev_fence = evf_mgr->ev_fence; + spin_unlock(&evf_mgr->ev_fence_lock); + if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) { + mutex_unlock(&uq_mgr->userq_mutex); + /* + * Looks like there was no pending resume work, + * add one now to create a valid eviction fence + */ + schedule_delayed_work(&uq_mgr->resume_work, 0); + goto retry; + } +} + +int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_userq_obj *userq_obj, + int size) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_bo_param bp; + int r; + + memset(&bp, 0, sizeof(bp)); + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + bp.type = ttm_bo_type_kernel; + bp.size = size; + bp.resv = NULL; + bp.bo_ptr_size = sizeof(struct amdgpu_bo); + + r = amdgpu_bo_create(adev, &bp, &userq_obj->obj); + if (r) { + drm_file_err(uq_mgr->file, "Failed to allocate BO for userqueue (%d)", r); + return r; + } + + r = amdgpu_bo_reserve(userq_obj->obj, true); + if (r) { + drm_file_err(uq_mgr->file, "Failed to reserve BO to map (%d)", r); + goto free_obj; + } + + r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo); + if (r) { + drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r); + goto unresv; + } + + r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr); + if (r) { + drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r); + goto unresv; + } + + userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj); + amdgpu_bo_unreserve(userq_obj->obj); + memset(userq_obj->cpu_ptr, 0, size); + return 0; + +unresv: + amdgpu_bo_unreserve(userq_obj->obj); + +free_obj: + amdgpu_bo_unref(&userq_obj->obj); + return r; +} + +void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_userq_obj *userq_obj) +{ + amdgpu_bo_kunmap(userq_obj->obj); + amdgpu_bo_unref(&userq_obj->obj); +} + +uint64_t +amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_db_info *db_info, + struct drm_file *filp) +{ + uint64_t index; + struct drm_gem_object *gobj; + struct amdgpu_userq_obj *db_obj = db_info->db_obj; + int r, db_size; + + gobj = drm_gem_object_lookup(filp, db_info->doorbell_handle); + if (gobj == NULL) { + drm_file_err(uq_mgr->file, "Can't find GEM object for doorbell\n"); + return -EINVAL; + } + + db_obj->obj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); + drm_gem_object_put(gobj); + + r = amdgpu_bo_reserve(db_obj->obj, true); + if (r) { + drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n"); + goto unref_bo; + } + + /* Pin the BO before generating the index, unpin in queue destroy */ + r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL); + if (r) { + drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n"); + goto unresv_bo; + } + + switch (db_info->queue_type) { + case AMDGPU_HW_IP_GFX: + case AMDGPU_HW_IP_COMPUTE: + case AMDGPU_HW_IP_DMA: + db_size = sizeof(u64); + break; + + case AMDGPU_HW_IP_VCN_ENC: + db_size = sizeof(u32); + db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1; + break; + + case AMDGPU_HW_IP_VPE: + db_size = sizeof(u32); + db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VPE << 1; + break; + + default: + drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not support\n", + db_info->queue_type); + r = -EINVAL; + goto unpin_bo; + } + + index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj, + db_info->doorbell_offset, db_size); + drm_dbg_driver(adev_to_drm(uq_mgr->adev), + "[Usermode queues] doorbell index=%lld\n", index); + amdgpu_bo_unreserve(db_obj->obj); + return index; + +unpin_bo: + amdgpu_bo_unpin(db_obj->obj); +unresv_bo: + amdgpu_bo_unreserve(db_obj->obj); +unref_bo: + amdgpu_bo_unref(&db_obj->obj); + return r; +} + +static int +amdgpu_userq_destroy(struct drm_file *filp, int queue_id) +{ + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_usermode_queue *queue; + int r = 0; + + cancel_delayed_work_sync(&uq_mgr->resume_work); + mutex_lock(&uq_mgr->userq_mutex); + + queue = amdgpu_userq_find(uq_mgr, queue_id); + if (!queue) { + drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n"); + mutex_unlock(&uq_mgr->userq_mutex); + return -EINVAL; + } + amdgpu_userq_wait_for_last_fence(uq_mgr, queue); + r = amdgpu_bo_reserve(queue->db_obj.obj, true); + if (!r) { + amdgpu_bo_unpin(queue->db_obj.obj); + amdgpu_bo_unreserve(queue->db_obj.obj); + } + amdgpu_bo_unref(&queue->db_obj.obj); + +#if defined(CONFIG_DEBUG_FS) + debugfs_remove_recursive(queue->debugfs_queue); +#endif + r = amdgpu_userq_unmap_helper(uq_mgr, queue); + amdgpu_userq_cleanup(uq_mgr, queue, queue_id); + mutex_unlock(&uq_mgr->userq_mutex); + + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + return r; +} + +static int amdgpu_userq_priority_permit(struct drm_file *filp, + int priority) +{ + if (priority < AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH) + return 0; + + if (capable(CAP_SYS_NICE)) + return 0; + + if (drm_is_current_master(filp)) + return 0; + + return -EACCES; +} + +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_mqd_info_read(struct seq_file *m, void *unused) +{ + struct amdgpu_usermode_queue *queue = m->private; + struct amdgpu_bo *bo; + int r; + + if (!queue || !queue->mqd.obj) + return -EINVAL; + + bo = amdgpu_bo_ref(queue->mqd.obj); + r = amdgpu_bo_reserve(bo, true); + if (r) { + amdgpu_bo_unref(&bo); + return -EINVAL; + } + + seq_printf(m, "queue_type %d\n", queue->queue_type); + seq_printf(m, "mqd_gpu_address: 0x%llx\n", amdgpu_bo_gpu_offset(queue->mqd.obj)); + + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + + return 0; +} + +static int amdgpu_mqd_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, amdgpu_mqd_info_read, inode->i_private); +} + +static const struct file_operations amdgpu_mqd_info_fops = { + .owner = THIS_MODULE, + .open = amdgpu_mqd_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + +static int +amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) +{ + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *uq_funcs; + struct amdgpu_usermode_queue *queue; + struct amdgpu_db_info db_info; + char *queue_name; + bool skip_map_queue; + uint64_t index; + int qid, r = 0; + int priority = + (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >> + AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT; + + /* Usermode queues are only supported for GFX IP as of now */ + if (args->in.ip_type != AMDGPU_HW_IP_GFX && + args->in.ip_type != AMDGPU_HW_IP_DMA && + args->in.ip_type != AMDGPU_HW_IP_COMPUTE) { + drm_file_err(uq_mgr->file, "Usermode queue doesn't support IP type %u\n", + args->in.ip_type); + return -EINVAL; + } + + r = amdgpu_userq_priority_permit(filp, priority); + if (r) + return r; + + if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) && + (args->in.ip_type != AMDGPU_HW_IP_GFX) && + (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) && + !amdgpu_is_tmz(adev)) { + drm_file_err(uq_mgr->file, "Secure only supported on GFX/Compute queues\n"); + return -EINVAL; + } + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for userqueue create\n"); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + /* + * There could be a situation that we are creating a new queue while + * the other queues under this UQ_mgr are suspended. So if there is any + * resume work pending, wait for it to get done. + * + * This will also make sure we have a valid eviction fence ready to be used. + */ + mutex_lock(&adev->userq_mutex); + amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); + + uq_funcs = adev->userq_funcs[args->in.ip_type]; + if (!uq_funcs) { + drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n", + args->in.ip_type); + r = -EINVAL; + goto unlock; + } + + queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL); + if (!queue) { + drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n"); + r = -ENOMEM; + goto unlock; + } + queue->doorbell_handle = args->in.doorbell_handle; + queue->queue_type = args->in.ip_type; + queue->vm = &fpriv->vm; + queue->priority = priority; + + db_info.queue_type = queue->queue_type; + db_info.doorbell_handle = queue->doorbell_handle; + db_info.db_obj = &queue->db_obj; + db_info.doorbell_offset = args->in.doorbell_offset; + + /* Convert relative doorbell offset into absolute doorbell index */ + index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp); + if (index == (uint64_t)-EINVAL) { + drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n"); + kfree(queue); + goto unlock; + } + + queue->doorbell_index = index; + xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); + r = amdgpu_userq_fence_driver_alloc(adev, queue); + if (r) { + drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n"); + goto unlock; + } + + r = uq_funcs->mqd_create(uq_mgr, &args->in, queue); + if (r) { + drm_file_err(uq_mgr->file, "Failed to create Queue\n"); + amdgpu_userq_fence_driver_free(queue); + kfree(queue); + goto unlock; + } + + + qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL); + if (qid < 0) { + drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n"); + amdgpu_userq_fence_driver_free(queue); + uq_funcs->mqd_destroy(uq_mgr, queue); + kfree(queue); + r = -ENOMEM; + goto unlock; + } + + /* don't map the queue if scheduling is halted */ + if (adev->userq_halt_for_enforce_isolation && + ((queue->queue_type == AMDGPU_HW_IP_GFX) || + (queue->queue_type == AMDGPU_HW_IP_COMPUTE))) + skip_map_queue = true; + else + skip_map_queue = false; + if (!skip_map_queue) { + r = amdgpu_userq_map_helper(uq_mgr, queue); + if (r) { + drm_file_err(uq_mgr->file, "Failed to map Queue\n"); + idr_remove(&uq_mgr->userq_idr, qid); + amdgpu_userq_fence_driver_free(queue); + uq_funcs->mqd_destroy(uq_mgr, queue); + kfree(queue); + goto unlock; + } + } + + queue_name = kasprintf(GFP_KERNEL, "queue-%d", qid); + if (!queue_name) { + r = -ENOMEM; + goto unlock; + } + +#if defined(CONFIG_DEBUG_FS) + /* Queue dentry per client to hold MQD information */ + queue->debugfs_queue = debugfs_create_dir(queue_name, filp->debugfs_client); + debugfs_create_file("mqd_info", 0444, queue->debugfs_queue, queue, &amdgpu_mqd_info_fops); +#endif + kfree(queue_name); + + args->out.queue_id = qid; + +unlock: + mutex_unlock(&uq_mgr->userq_mutex); + mutex_unlock(&adev->userq_mutex); + + return r; +} + +int amdgpu_userq_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + union drm_amdgpu_userq *args = data; + int r; + + switch (args->in.op) { + case AMDGPU_USERQ_OP_CREATE: + if (args->in.flags & ~(AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK | + AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE)) + return -EINVAL; + r = amdgpu_userq_create(filp, args); + if (r) + drm_file_err(filp, "Failed to create usermode queue\n"); + break; + + case AMDGPU_USERQ_OP_FREE: + if (args->in.ip_type || + args->in.doorbell_handle || + args->in.doorbell_offset || + args->in.flags || + args->in.queue_va || + args->in.queue_size || + args->in.rptr_va || + args->in.wptr_va || + args->in.wptr_va || + args->in.mqd || + args->in.mqd_size) + return -EINVAL; + r = amdgpu_userq_destroy(filp, args->in.queue_id); + if (r) + drm_file_err(filp, "Failed to destroy usermode queue\n"); + break; + + default: + drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op); + return -EINVAL; + } + + return r; +} + +static int +amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_usermode_queue *queue; + int queue_id; + int ret = 0, r; + + /* Resume all the queues for this process */ + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { + r = amdgpu_userq_map_helper(uq_mgr, queue); + if (r) + ret = r; + } + + if (ret) + drm_file_err(uq_mgr->file, "Failed to map all the queues\n"); + return ret; +} + +static int +amdgpu_userq_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) +{ + struct ttm_operation_ctx ctx = { false, false }; + int ret; + + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + DRM_ERROR("Fail to validate\n"); + + return ret; +} + +static int +amdgpu_userq_validate_bos(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); + struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_bo_va *bo_va; + struct ww_acquire_ctx *ticket; + struct drm_exec exec; + struct amdgpu_bo *bo; + struct dma_resv *resv; + bool clear, unlock; + int ret = 0; + + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); + drm_exec_until_all_locked(&exec) { + ret = amdgpu_vm_lock_pd(vm, &exec, 2); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) { + drm_file_err(uq_mgr->file, "Failed to lock PD\n"); + goto unlock_all; + } + + /* Lock the done list */ + list_for_each_entry(bo_va, &vm->done, base.vm_status) { + bo = bo_va->base.bo; + if (!bo) + continue; + + ret = drm_exec_lock_obj(&exec, &bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto unlock_all; + } + } + + spin_lock(&vm->status_lock); + while (!list_empty(&vm->moved)) { + bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, + base.vm_status); + spin_unlock(&vm->status_lock); + + /* Per VM BOs never need to bo cleared in the page tables */ + ret = amdgpu_vm_bo_update(adev, bo_va, false); + if (ret) + goto unlock_all; + spin_lock(&vm->status_lock); + } + + ticket = &exec.ticket; + while (!list_empty(&vm->invalidated)) { + bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, + base.vm_status); + resv = bo_va->base.bo->tbo.base.resv; + spin_unlock(&vm->status_lock); + + bo = bo_va->base.bo; + ret = amdgpu_userq_validate_vm_bo(NULL, bo); + if (ret) { + drm_file_err(uq_mgr->file, "Failed to validate BO\n"); + goto unlock_all; + } + + /* Try to reserve the BO to avoid clearing its ptes */ + if (!adev->debug_vm && dma_resv_trylock(resv)) { + clear = false; + unlock = true; + /* The caller is already holding the reservation lock */ + } else if (dma_resv_locking_ctx(resv) == ticket) { + clear = false; + unlock = false; + /* Somebody else is using the BO right now */ + } else { + clear = true; + unlock = false; + } + + ret = amdgpu_vm_bo_update(adev, bo_va, clear); + + if (unlock) + dma_resv_unlock(resv); + if (ret) + goto unlock_all; + + spin_lock(&vm->status_lock); + } + spin_unlock(&vm->status_lock); + + ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec); + if (ret) + drm_file_err(uq_mgr->file, "Failed to replace eviction fence\n"); + +unlock_all: + drm_exec_fini(&exec); + return ret; +} + +static void amdgpu_userq_restore_worker(struct work_struct *work) +{ + struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work); + struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); + int ret; + + flush_delayed_work(&fpriv->evf_mgr.suspend_work); + + mutex_lock(&uq_mgr->userq_mutex); + + ret = amdgpu_userq_validate_bos(uq_mgr); + if (ret) { + drm_file_err(uq_mgr->file, "Failed to validate BOs to restore\n"); + goto unlock; + } + + ret = amdgpu_userq_restore_all(uq_mgr); + if (ret) { + drm_file_err(uq_mgr->file, "Failed to restore all queues\n"); + goto unlock; + } + +unlock: + mutex_unlock(&uq_mgr->userq_mutex); +} + +static int +amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_usermode_queue *queue; + int queue_id; + int ret = 0, r; + + /* Try to unmap all the queues in this process ctx */ + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { + r = amdgpu_userq_unmap_helper(uq_mgr, queue); + if (r) + ret = r; + } + + if (ret) + drm_file_err(uq_mgr->file, "Couldn't unmap all the queues\n"); + return ret; +} + +static int +amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_usermode_queue *queue; + int queue_id, ret; + + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { + struct dma_fence *f = queue->last_fence; + + if (!f || dma_fence_is_signaled(f)) + continue; + ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); + if (ret <= 0) { + drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n", + f->context, f->seqno); + return -ETIMEDOUT; + } + } + + return 0; +} + +void +amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_eviction_fence *ev_fence) +{ + int ret; + struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); + struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr; + + /* Wait for any pending userqueue fence work to finish */ + ret = amdgpu_userq_wait_for_signal(uq_mgr); + if (ret) { + drm_file_err(uq_mgr->file, "Not evicting userqueue, timeout waiting for work\n"); + return; + } + + ret = amdgpu_userq_evict_all(uq_mgr); + if (ret) { + drm_file_err(uq_mgr->file, "Failed to evict userqueue\n"); + return; + } + + /* Signal current eviction fence */ + amdgpu_eviction_fence_signal(evf_mgr, ev_fence); + + if (evf_mgr->fd_closing) { + cancel_delayed_work_sync(&uq_mgr->resume_work); + return; + } + + /* Schedule a resume work */ + schedule_delayed_work(&uq_mgr->resume_work, 0); +} + +int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, + struct amdgpu_device *adev) +{ + mutex_init(&userq_mgr->userq_mutex); + idr_init_base(&userq_mgr->userq_idr, 1); + userq_mgr->adev = adev; + userq_mgr->file = file_priv; + + mutex_lock(&adev->userq_mutex); + list_add(&userq_mgr->list, &adev->userq_mgr_list); + mutex_unlock(&adev->userq_mutex); + + INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker); + return 0; +} + +void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) +{ + struct amdgpu_device *adev = userq_mgr->adev; + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + uint32_t queue_id; + + cancel_delayed_work_sync(&userq_mgr->resume_work); + + mutex_lock(&adev->userq_mutex); + mutex_lock(&userq_mgr->userq_mutex); + idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) { + amdgpu_userq_wait_for_last_fence(userq_mgr, queue); + amdgpu_userq_unmap_helper(userq_mgr, queue); + amdgpu_userq_cleanup(userq_mgr, queue, queue_id); + } + + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + if (uqm == userq_mgr) { + list_del(&uqm->list); + break; + } + } + idr_destroy(&userq_mgr->userq_idr); + mutex_unlock(&userq_mgr->userq_mutex); + mutex_unlock(&adev->userq_mutex); + mutex_destroy(&userq_mgr->userq_mutex); +} + +int amdgpu_userq_suspend(struct amdgpu_device *adev) +{ + u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + int queue_id; + int ret = 0, r; + + if (!ip_mask) + return 0; + + mutex_lock(&adev->userq_mutex); + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + cancel_delayed_work_sync(&uqm->resume_work); + mutex_lock(&uqm->userq_mutex); + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { + r = amdgpu_userq_unmap_helper(uqm, queue); + if (r) + ret = r; + } + mutex_unlock(&uqm->userq_mutex); + } + mutex_unlock(&adev->userq_mutex); + return ret; +} + +int amdgpu_userq_resume(struct amdgpu_device *adev) +{ + u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + int queue_id; + int ret = 0, r; + + if (!ip_mask) + return 0; + + mutex_lock(&adev->userq_mutex); + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + mutex_lock(&uqm->userq_mutex); + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { + r = amdgpu_userq_map_helper(uqm, queue); + if (r) + ret = r; + } + mutex_unlock(&uqm->userq_mutex); + } + mutex_unlock(&adev->userq_mutex); + return ret; +} + +int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, + u32 idx) +{ + u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + int queue_id; + int ret = 0, r; + + /* only need to stop gfx/compute */ + if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE)))) + return 0; + + mutex_lock(&adev->userq_mutex); + if (adev->userq_halt_for_enforce_isolation) + dev_warn(adev->dev, "userq scheduling already stopped!\n"); + adev->userq_halt_for_enforce_isolation = true; + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + cancel_delayed_work_sync(&uqm->resume_work); + mutex_lock(&uqm->userq_mutex); + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { + if (((queue->queue_type == AMDGPU_HW_IP_GFX) || + (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && + (queue->xcp_id == idx)) { + r = amdgpu_userq_unmap_helper(uqm, queue); + if (r) + ret = r; + } + } + mutex_unlock(&uqm->userq_mutex); + } + mutex_unlock(&adev->userq_mutex); + return ret; +} + +int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, + u32 idx) +{ + u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + int queue_id; + int ret = 0, r; + + /* only need to stop gfx/compute */ + if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE)))) + return 0; + + mutex_lock(&adev->userq_mutex); + if (!adev->userq_halt_for_enforce_isolation) + dev_warn(adev->dev, "userq scheduling already started!\n"); + adev->userq_halt_for_enforce_isolation = false; + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + mutex_lock(&uqm->userq_mutex); + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { + if (((queue->queue_type == AMDGPU_HW_IP_GFX) || + (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && + (queue->xcp_id == idx)) { + r = amdgpu_userq_map_helper(uqm, queue); + if (r) + ret = r; + } + } + mutex_unlock(&uqm->userq_mutex); + } + mutex_unlock(&adev->userq_mutex); + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h new file mode 100644 index 000000000000..b1ca91b7cda4 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef AMDGPU_USERQ_H_ +#define AMDGPU_USERQ_H_ +#include "amdgpu_eviction_fence.h" + +#define AMDGPU_MAX_USERQ_COUNT 512 + +#define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base) +#define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr) +#define work_to_uq_mgr(w, name) container_of(w, struct amdgpu_userq_mgr, name) + +enum amdgpu_userq_state { + AMDGPU_USERQ_STATE_UNMAPPED = 0, + AMDGPU_USERQ_STATE_MAPPED, + AMDGPU_USERQ_STATE_PREEMPTED, + AMDGPU_USERQ_STATE_HUNG, +}; + +struct amdgpu_mqd_prop; + +struct amdgpu_userq_obj { + void *cpu_ptr; + uint64_t gpu_addr; + struct amdgpu_bo *obj; +}; + +struct amdgpu_usermode_queue { + int queue_type; + enum amdgpu_userq_state state; + uint64_t doorbell_handle; + uint64_t doorbell_index; + uint64_t flags; + struct amdgpu_mqd_prop *userq_prop; + struct amdgpu_userq_mgr *userq_mgr; + struct amdgpu_vm *vm; + struct amdgpu_userq_obj mqd; + struct amdgpu_userq_obj db_obj; + struct amdgpu_userq_obj fw_obj; + struct amdgpu_userq_obj wptr_obj; + struct xarray fence_drv_xa; + struct amdgpu_userq_fence_driver *fence_drv; + struct dma_fence *last_fence; + u32 xcp_id; + int priority; + struct dentry *debugfs_queue; +}; + +struct amdgpu_userq_funcs { + int (*mqd_create)(struct amdgpu_userq_mgr *uq_mgr, + struct drm_amdgpu_userq_in *args, + struct amdgpu_usermode_queue *queue); + void (*mqd_destroy)(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *uq); + int (*unmap)(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue); + int (*map)(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue); +}; + +/* Usermode queues for gfx */ +struct amdgpu_userq_mgr { + struct idr userq_idr; + struct mutex userq_mutex; + struct amdgpu_device *adev; + struct delayed_work resume_work; + struct list_head list; + struct drm_file *file; +}; + +struct amdgpu_db_info { + uint64_t doorbell_handle; + uint32_t queue_type; + uint32_t doorbell_offset; + struct amdgpu_userq_obj *db_obj; +}; + +int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); + +int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, + struct amdgpu_device *adev); + +void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr); + +int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_userq_obj *userq_obj, + int size); + +void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_userq_obj *userq_obj); + +void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_eviction_fence *ev_fence); + +int amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr); + +void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr, + struct amdgpu_eviction_fence_mgr *evf_mgr); + +uint64_t amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_db_info *db_info, + struct drm_file *filp); + +u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev); + +int amdgpu_userq_suspend(struct amdgpu_device *adev); +int amdgpu_userq_resume(struct amdgpu_device *adev); + +int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, + u32 idx); +int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, + u32 idx); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c new file mode 100644 index 000000000000..c2a983ff23c9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -0,0 +1,967 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/kref.h> +#include <linux/slab.h> +#include <linux/dma-fence-unwrap.h> + +#include <drm/drm_exec.h> +#include <drm/drm_syncobj.h> + +#include "amdgpu.h" +#include "amdgpu_userq_fence.h" + +static const struct dma_fence_ops amdgpu_userq_fence_ops; +static struct kmem_cache *amdgpu_userq_fence_slab; + +int amdgpu_userq_fence_slab_init(void) +{ + amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence", + sizeof(struct amdgpu_userq_fence), + 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!amdgpu_userq_fence_slab) + return -ENOMEM; + + return 0; +} + +void amdgpu_userq_fence_slab_fini(void) +{ + rcu_barrier(); + kmem_cache_destroy(amdgpu_userq_fence_slab); +} + +static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f) +{ + if (!f || f->ops != &amdgpu_userq_fence_ops) + return NULL; + + return container_of(f, struct amdgpu_userq_fence, base); +} + +static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv) +{ + return le64_to_cpu(*fence_drv->cpu_addr); +} + +int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, + struct amdgpu_usermode_queue *userq) +{ + struct amdgpu_userq_fence_driver *fence_drv; + unsigned long flags; + int r; + + fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL); + if (!fence_drv) + return -ENOMEM; + + /* Acquire seq64 memory */ + r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr, + &fence_drv->cpu_addr); + if (r) + goto free_fence_drv; + + memset(fence_drv->cpu_addr, 0, sizeof(u64)); + + kref_init(&fence_drv->refcount); + INIT_LIST_HEAD(&fence_drv->fences); + spin_lock_init(&fence_drv->fence_list_lock); + + fence_drv->adev = adev; + fence_drv->context = dma_fence_context_alloc(1); + get_task_comm(fence_drv->timeline_name, current); + + xa_lock_irqsave(&adev->userq_xa, flags); + r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index, + fence_drv, GFP_KERNEL)); + xa_unlock_irqrestore(&adev->userq_xa, flags); + if (r) + goto free_seq64; + + userq->fence_drv = fence_drv; + + return 0; + +free_seq64: + amdgpu_seq64_free(adev, fence_drv->va); +free_fence_drv: + kfree(fence_drv); + + return r; +} + +static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa) +{ + struct amdgpu_userq_fence_driver *fence_drv; + unsigned long index; + + if (xa_empty(xa)) + return; + + xa_lock(xa); + xa_for_each(xa, index, fence_drv) { + __xa_erase(xa, index); + amdgpu_userq_fence_driver_put(fence_drv); + } + + xa_unlock(xa); +} + +void +amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq) +{ + amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa); + xa_destroy(&userq->fence_drv_xa); + /* Drop the fence_drv reference held by user queue */ + amdgpu_userq_fence_driver_put(userq->fence_drv); +} + +void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) +{ + struct amdgpu_userq_fence *userq_fence, *tmp; + struct dma_fence *fence; + u64 rptr; + int i; + + if (!fence_drv) + return; + + rptr = amdgpu_userq_fence_read(fence_drv); + + spin_lock(&fence_drv->fence_list_lock); + list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) { + fence = &userq_fence->base; + + if (rptr < fence->seqno) + break; + + dma_fence_signal(fence); + + for (i = 0; i < userq_fence->fence_drv_array_count; i++) + amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]); + + list_del(&userq_fence->link); + dma_fence_put(fence); + } + spin_unlock(&fence_drv->fence_list_lock); +} + +void amdgpu_userq_fence_driver_destroy(struct kref *ref) +{ + struct amdgpu_userq_fence_driver *fence_drv = container_of(ref, + struct amdgpu_userq_fence_driver, + refcount); + struct amdgpu_userq_fence_driver *xa_fence_drv; + struct amdgpu_device *adev = fence_drv->adev; + struct amdgpu_userq_fence *fence, *tmp; + struct xarray *xa = &adev->userq_xa; + unsigned long index, flags; + struct dma_fence *f; + + spin_lock_irqsave(&fence_drv->fence_list_lock, flags); + list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) { + f = &fence->base; + + if (!dma_fence_is_signaled(f)) { + dma_fence_set_error(f, -ECANCELED); + dma_fence_signal(f); + } + + list_del(&fence->link); + dma_fence_put(f); + } + spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); + + xa_lock_irqsave(xa, flags); + xa_for_each(xa, index, xa_fence_drv) + if (xa_fence_drv == fence_drv) + __xa_erase(xa, index); + xa_unlock_irqrestore(xa, flags); + + /* Free seq64 memory */ + amdgpu_seq64_free(adev, fence_drv->va); + kfree(fence_drv); +} + +void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv) +{ + kref_get(&fence_drv->refcount); +} + +void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) +{ + kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy); +} + +static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence) +{ + *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); + return *userq_fence ? 0 : -ENOMEM; +} + +static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, + struct amdgpu_userq_fence *userq_fence, + u64 seq, struct dma_fence **f) +{ + struct amdgpu_userq_fence_driver *fence_drv; + struct dma_fence *fence; + unsigned long flags; + + fence_drv = userq->fence_drv; + if (!fence_drv) + return -EINVAL; + + spin_lock_init(&userq_fence->lock); + INIT_LIST_HEAD(&userq_fence->link); + fence = &userq_fence->base; + userq_fence->fence_drv = fence_drv; + + dma_fence_init64(fence, &amdgpu_userq_fence_ops, &userq_fence->lock, + fence_drv->context, seq); + + amdgpu_userq_fence_driver_get(fence_drv); + dma_fence_get(fence); + + if (!xa_empty(&userq->fence_drv_xa)) { + struct amdgpu_userq_fence_driver *stored_fence_drv; + unsigned long index, count = 0; + int i = 0; + + xa_lock(&userq->fence_drv_xa); + xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) + count++; + + userq_fence->fence_drv_array = + kvmalloc_array(count, + sizeof(struct amdgpu_userq_fence_driver *), + GFP_ATOMIC); + + if (userq_fence->fence_drv_array) { + xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) { + userq_fence->fence_drv_array[i] = stored_fence_drv; + __xa_erase(&userq->fence_drv_xa, index); + i++; + } + } + + userq_fence->fence_drv_array_count = i; + xa_unlock(&userq->fence_drv_xa); + } else { + userq_fence->fence_drv_array = NULL; + userq_fence->fence_drv_array_count = 0; + } + + /* Check if hardware has already processed the job */ + spin_lock_irqsave(&fence_drv->fence_list_lock, flags); + if (!dma_fence_is_signaled_locked(fence)) + list_add_tail(&userq_fence->link, &fence_drv->fences); + else + dma_fence_put(fence); + + spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); + + *f = fence; + + return 0; +} + +static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f) +{ + return "amdgpu_userq_fence"; +} + +static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f) +{ + struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); + + return fence->fence_drv->timeline_name; +} + +static bool amdgpu_userq_fence_signaled(struct dma_fence *f) +{ + struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); + struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv; + u64 rptr, wptr; + + rptr = amdgpu_userq_fence_read(fence_drv); + wptr = fence->base.seqno; + + if (rptr >= wptr) + return true; + + return false; +} + +static void amdgpu_userq_fence_free(struct rcu_head *rcu) +{ + struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu); + struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence); + struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv; + + /* Release the fence driver reference */ + amdgpu_userq_fence_driver_put(fence_drv); + + kvfree(userq_fence->fence_drv_array); + kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); +} + +static void amdgpu_userq_fence_release(struct dma_fence *f) +{ + call_rcu(&f->rcu, amdgpu_userq_fence_free); +} + +static const struct dma_fence_ops amdgpu_userq_fence_ops = { + .get_driver_name = amdgpu_userq_fence_get_driver_name, + .get_timeline_name = amdgpu_userq_fence_get_timeline_name, + .signaled = amdgpu_userq_fence_signaled, + .release = amdgpu_userq_fence_release, +}; + +/** + * amdgpu_userq_fence_read_wptr - Read the userq wptr value + * + * @queue: user mode queue structure pointer + * @wptr: write pointer value + * + * Read the wptr value from userq's MQD. The userq signal IOCTL + * creates a dma_fence for the shared buffers that expects the + * RPTR value written to seq64 memory >= WPTR. + * + * Returns wptr value on success, error on failure. + */ +static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue, + u64 *wptr) +{ + struct amdgpu_bo_va_mapping *mapping; + struct amdgpu_bo *bo; + u64 addr, *ptr; + int r; + + r = amdgpu_bo_reserve(queue->vm->root.bo, false); + if (r) + return r; + + addr = queue->userq_prop->wptr_gpu_addr; + addr &= AMDGPU_GMC_HOLE_MASK; + + mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT); + if (!mapping) { + amdgpu_bo_unreserve(queue->vm->root.bo); + DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n"); + return -EINVAL; + } + + bo = amdgpu_bo_ref(mapping->bo_va->base.bo); + amdgpu_bo_unreserve(queue->vm->root.bo); + r = amdgpu_bo_reserve(bo, true); + if (r) { + DRM_ERROR("Failed to reserve userqueue wptr bo"); + return r; + } + + r = amdgpu_bo_kmap(bo, (void **)&ptr); + if (r) { + DRM_ERROR("Failed mapping the userqueue wptr bo"); + goto map_error; + } + + *wptr = le64_to_cpu(*ptr); + + amdgpu_bo_kunmap(bo); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + + return 0; + +map_error: + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + + return r; +} + +static void amdgpu_userq_fence_cleanup(struct dma_fence *fence) +{ + dma_fence_put(fence); +} + +int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; + struct drm_amdgpu_userq_signal *args = data; + struct drm_gem_object **gobj_write = NULL; + struct drm_gem_object **gobj_read = NULL; + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_fence *userq_fence; + struct drm_syncobj **syncobj = NULL; + u32 *bo_handles_write, num_write_bo_handles; + u32 *syncobj_handles, num_syncobj_handles; + u32 *bo_handles_read, num_read_bo_handles; + int r, i, entry, rentry, wentry; + struct dma_fence *fence; + struct drm_exec exec; + u64 wptr; + + num_syncobj_handles = args->num_syncobj_handles; + syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles), + size_mul(sizeof(u32), num_syncobj_handles)); + if (IS_ERR(syncobj_handles)) + return PTR_ERR(syncobj_handles); + + /* Array of pointers to the looked up syncobjs */ + syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL); + if (!syncobj) { + r = -ENOMEM; + goto free_syncobj_handles; + } + + for (entry = 0; entry < num_syncobj_handles; entry++) { + syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]); + if (!syncobj[entry]) { + r = -ENOENT; + goto free_syncobj; + } + } + + num_read_bo_handles = args->num_bo_read_handles; + bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles), + sizeof(u32) * num_read_bo_handles); + if (IS_ERR(bo_handles_read)) { + r = PTR_ERR(bo_handles_read); + goto free_syncobj; + } + + /* Array of pointers to the GEM read objects */ + gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL); + if (!gobj_read) { + r = -ENOMEM; + goto free_bo_handles_read; + } + + for (rentry = 0; rentry < num_read_bo_handles; rentry++) { + gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]); + if (!gobj_read[rentry]) { + r = -ENOENT; + goto put_gobj_read; + } + } + + num_write_bo_handles = args->num_bo_write_handles; + bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles), + sizeof(u32) * num_write_bo_handles); + if (IS_ERR(bo_handles_write)) { + r = PTR_ERR(bo_handles_write); + goto put_gobj_read; + } + + /* Array of pointers to the GEM write objects */ + gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL); + if (!gobj_write) { + r = -ENOMEM; + goto free_bo_handles_write; + } + + for (wentry = 0; wentry < num_write_bo_handles; wentry++) { + gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]); + if (!gobj_write[wentry]) { + r = -ENOENT; + goto put_gobj_write; + } + } + + /* Retrieve the user queue */ + queue = idr_find(&userq_mgr->userq_idr, args->queue_id); + if (!queue) { + r = -ENOENT; + goto put_gobj_write; + } + + r = amdgpu_userq_fence_read_wptr(queue, &wptr); + if (r) + goto put_gobj_write; + + r = amdgpu_userq_fence_alloc(&userq_fence); + if (r) + goto put_gobj_write; + + /* We are here means UQ is active, make sure the eviction fence is valid */ + amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); + + /* Create a new fence */ + r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence); + if (r) { + mutex_unlock(&userq_mgr->userq_mutex); + kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); + goto put_gobj_write; + } + + dma_fence_put(queue->last_fence); + queue->last_fence = dma_fence_get(fence); + mutex_unlock(&userq_mgr->userq_mutex); + + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, + (num_read_bo_handles + num_write_bo_handles)); + + /* Lock all BOs with retry handling */ + drm_exec_until_all_locked(&exec) { + r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); + drm_exec_retry_on_contention(&exec); + if (r) { + amdgpu_userq_fence_cleanup(fence); + goto exec_fini; + } + + r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); + drm_exec_retry_on_contention(&exec); + if (r) { + amdgpu_userq_fence_cleanup(fence); + goto exec_fini; + } + } + + for (i = 0; i < num_read_bo_handles; i++) { + if (!gobj_read || !gobj_read[i]->resv) + continue; + + dma_resv_add_fence(gobj_read[i]->resv, fence, + DMA_RESV_USAGE_READ); + } + + for (i = 0; i < num_write_bo_handles; i++) { + if (!gobj_write || !gobj_write[i]->resv) + continue; + + dma_resv_add_fence(gobj_write[i]->resv, fence, + DMA_RESV_USAGE_WRITE); + } + + /* Add the created fence to syncobj/BO's */ + for (i = 0; i < num_syncobj_handles; i++) + drm_syncobj_replace_fence(syncobj[i], fence); + + /* drop the reference acquired in fence creation function */ + dma_fence_put(fence); + +exec_fini: + drm_exec_fini(&exec); +put_gobj_write: + while (wentry-- > 0) + drm_gem_object_put(gobj_write[wentry]); + kfree(gobj_write); +free_bo_handles_write: + kfree(bo_handles_write); +put_gobj_read: + while (rentry-- > 0) + drm_gem_object_put(gobj_read[rentry]); + kfree(gobj_read); +free_bo_handles_read: + kfree(bo_handles_read); +free_syncobj: + while (entry-- > 0) + if (syncobj[entry]) + drm_syncobj_put(syncobj[entry]); + kfree(syncobj); +free_syncobj_handles: + kfree(syncobj_handles); + + return r; +} + +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write; + u32 num_syncobj, num_read_bo_handles, num_write_bo_handles; + struct drm_amdgpu_userq_fence_info *fence_info = NULL; + struct drm_amdgpu_userq_wait *wait_info = data; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; + struct amdgpu_usermode_queue *waitq; + struct drm_gem_object **gobj_write; + struct drm_gem_object **gobj_read; + struct dma_fence **fences = NULL; + u16 num_points, num_fences = 0; + int r, i, rentry, wentry, cnt; + struct drm_exec exec; + + num_read_bo_handles = wait_info->num_bo_read_handles; + bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles), + size_mul(sizeof(u32), num_read_bo_handles)); + if (IS_ERR(bo_handles_read)) + return PTR_ERR(bo_handles_read); + + num_write_bo_handles = wait_info->num_bo_write_handles; + bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles), + size_mul(sizeof(u32), num_write_bo_handles)); + if (IS_ERR(bo_handles_write)) { + r = PTR_ERR(bo_handles_write); + goto free_bo_handles_read; + } + + num_syncobj = wait_info->num_syncobj_handles; + syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles), + size_mul(sizeof(u32), num_syncobj)); + if (IS_ERR(syncobj_handles)) { + r = PTR_ERR(syncobj_handles); + goto free_bo_handles_write; + } + + num_points = wait_info->num_syncobj_timeline_handles; + timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles), + sizeof(u32) * num_points); + if (IS_ERR(timeline_handles)) { + r = PTR_ERR(timeline_handles); + goto free_syncobj_handles; + } + + timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points), + sizeof(u32) * num_points); + if (IS_ERR(timeline_points)) { + r = PTR_ERR(timeline_points); + goto free_timeline_handles; + } + + gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL); + if (!gobj_read) { + r = -ENOMEM; + goto free_timeline_points; + } + + for (rentry = 0; rentry < num_read_bo_handles; rentry++) { + gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]); + if (!gobj_read[rentry]) { + r = -ENOENT; + goto put_gobj_read; + } + } + + gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL); + if (!gobj_write) { + r = -ENOMEM; + goto put_gobj_read; + } + + for (wentry = 0; wentry < num_write_bo_handles; wentry++) { + gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]); + if (!gobj_write[wentry]) { + r = -ENOENT; + goto put_gobj_write; + } + } + + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, + (num_read_bo_handles + num_write_bo_handles)); + + /* Lock all BOs with retry handling */ + drm_exec_until_all_locked(&exec) { + r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); + drm_exec_retry_on_contention(&exec); + if (r) { + drm_exec_fini(&exec); + goto put_gobj_write; + } + + r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); + drm_exec_retry_on_contention(&exec); + if (r) { + drm_exec_fini(&exec); + goto put_gobj_write; + } + } + + if (!wait_info->num_fences) { + if (num_points) { + struct dma_fence_unwrap iter; + struct dma_fence *fence; + struct dma_fence *f; + + for (i = 0; i < num_points; i++) { + r = drm_syncobj_find_fence(filp, timeline_handles[i], + timeline_points[i], + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + &fence); + if (r) + goto exec_fini; + + dma_fence_unwrap_for_each(f, &iter, fence) + num_fences++; + + dma_fence_put(fence); + } + } + + /* Count syncobj's fence */ + for (i = 0; i < num_syncobj; i++) { + struct dma_fence *fence; + + r = drm_syncobj_find_fence(filp, syncobj_handles[i], + 0, + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + &fence); + if (r) + goto exec_fini; + + num_fences++; + dma_fence_put(fence); + } + + /* Count GEM objects fence */ + for (i = 0; i < num_read_bo_handles; i++) { + struct dma_resv_iter resv_cursor; + struct dma_fence *fence; + + dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, + DMA_RESV_USAGE_READ, fence) + num_fences++; + } + + for (i = 0; i < num_write_bo_handles; i++) { + struct dma_resv_iter resv_cursor; + struct dma_fence *fence; + + dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, + DMA_RESV_USAGE_WRITE, fence) + num_fences++; + } + + /* + * Passing num_fences = 0 means that userspace doesn't want to + * retrieve userq_fence_info. If num_fences = 0 we skip filling + * userq_fence_info and return the actual number of fences on + * args->num_fences. + */ + wait_info->num_fences = num_fences; + } else { + /* Array of fence info */ + fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL); + if (!fence_info) { + r = -ENOMEM; + goto exec_fini; + } + + /* Array of fences */ + fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL); + if (!fences) { + r = -ENOMEM; + goto free_fence_info; + } + + /* Retrieve GEM read objects fence */ + for (i = 0; i < num_read_bo_handles; i++) { + struct dma_resv_iter resv_cursor; + struct dma_fence *fence; + + dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, + DMA_RESV_USAGE_READ, fence) { + if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + r = -EINVAL; + goto free_fences; + } + + fences[num_fences++] = fence; + dma_fence_get(fence); + } + } + + /* Retrieve GEM write objects fence */ + for (i = 0; i < num_write_bo_handles; i++) { + struct dma_resv_iter resv_cursor; + struct dma_fence *fence; + + dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, + DMA_RESV_USAGE_WRITE, fence) { + if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + r = -EINVAL; + goto free_fences; + } + + fences[num_fences++] = fence; + dma_fence_get(fence); + } + } + + if (num_points) { + struct dma_fence_unwrap iter; + struct dma_fence *fence; + struct dma_fence *f; + + for (i = 0; i < num_points; i++) { + r = drm_syncobj_find_fence(filp, timeline_handles[i], + timeline_points[i], + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + &fence); + if (r) + goto free_fences; + + dma_fence_unwrap_for_each(f, &iter, fence) { + if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + r = -EINVAL; + goto free_fences; + } + + dma_fence_get(f); + fences[num_fences++] = f; + } + + dma_fence_put(fence); + } + } + + /* Retrieve syncobj's fence */ + for (i = 0; i < num_syncobj; i++) { + struct dma_fence *fence; + + r = drm_syncobj_find_fence(filp, syncobj_handles[i], + 0, + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + &fence); + if (r) + goto free_fences; + + if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + r = -EINVAL; + goto free_fences; + } + + fences[num_fences++] = fence; + } + + /* + * Keep only the latest fences to reduce the number of values + * given back to userspace. + */ + num_fences = dma_fence_dedup_array(fences, num_fences); + + waitq = idr_find(&userq_mgr->userq_idr, wait_info->waitq_id); + if (!waitq) { + r = -EINVAL; + goto free_fences; + } + + for (i = 0, cnt = 0; i < num_fences; i++) { + struct amdgpu_userq_fence_driver *fence_drv; + struct amdgpu_userq_fence *userq_fence; + u32 index; + + userq_fence = to_amdgpu_userq_fence(fences[i]); + if (!userq_fence) { + /* + * Just waiting on other driver fences should + * be good for now + */ + r = dma_fence_wait(fences[i], true); + if (r) { + dma_fence_put(fences[i]); + goto free_fences; + } + + dma_fence_put(fences[i]); + continue; + } + + fence_drv = userq_fence->fence_drv; + /* + * We need to make sure the user queue release their reference + * to the fence drivers at some point before queue destruction. + * Otherwise, we would gather those references until we don't + * have any more space left and crash. + */ + r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv, + xa_limit_32b, GFP_KERNEL); + if (r) + goto free_fences; + + amdgpu_userq_fence_driver_get(fence_drv); + + /* Store drm syncobj's gpu va address and value */ + fence_info[cnt].va = fence_drv->va; + fence_info[cnt].value = fences[i]->seqno; + + dma_fence_put(fences[i]); + /* Increment the actual userq fence count */ + cnt++; + } + + wait_info->num_fences = cnt; + /* Copy userq fence info to user space */ + if (copy_to_user(u64_to_user_ptr(wait_info->out_fences), + fence_info, wait_info->num_fences * sizeof(*fence_info))) { + r = -EFAULT; + goto free_fences; + } + + kfree(fences); + kfree(fence_info); + } + + drm_exec_fini(&exec); + for (i = 0; i < num_read_bo_handles; i++) + drm_gem_object_put(gobj_read[i]); + kfree(gobj_read); + + for (i = 0; i < num_write_bo_handles; i++) + drm_gem_object_put(gobj_write[i]); + kfree(gobj_write); + + kfree(timeline_points); + kfree(timeline_handles); + kfree(syncobj_handles); + kfree(bo_handles_write); + kfree(bo_handles_read); + + return 0; + +free_fences: + while (num_fences-- > 0) + dma_fence_put(fences[num_fences]); + kfree(fences); +free_fence_info: + kfree(fence_info); +exec_fini: + drm_exec_fini(&exec); +put_gobj_write: + while (wentry-- > 0) + drm_gem_object_put(gobj_write[wentry]); + kfree(gobj_write); +put_gobj_read: + while (rentry-- > 0) + drm_gem_object_put(gobj_read[rentry]); + kfree(gobj_read); +free_timeline_points: + kfree(timeline_points); +free_timeline_handles: + kfree(timeline_handles); +free_syncobj_handles: + kfree(syncobj_handles); +free_bo_handles_write: + kfree(bo_handles_write); +free_bo_handles_read: + kfree(bo_handles_read); + + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h new file mode 100644 index 000000000000..97a125ab8a78 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_USERQ_FENCE_H__ +#define __AMDGPU_USERQ_FENCE_H__ + +#include <linux/types.h> + +#include "amdgpu_userq.h" + +struct amdgpu_userq_fence { + struct dma_fence base; + /* + * This lock is necessary to synchronize the + * userqueue dma fence operations. + */ + spinlock_t lock; + struct list_head link; + unsigned long fence_drv_array_count; + struct amdgpu_userq_fence_driver *fence_drv; + struct amdgpu_userq_fence_driver **fence_drv_array; +}; + +struct amdgpu_userq_fence_driver { + struct kref refcount; + u64 va; + u64 gpu_addr; + u64 *cpu_addr; + u64 context; + /* + * This lock is necesaary to synchronize the access + * to the fences list by the fence driver. + */ + spinlock_t fence_list_lock; + struct list_head fences; + struct amdgpu_device *adev; + char timeline_name[TASK_COMM_LEN]; +}; + +int amdgpu_userq_fence_slab_init(void); +void amdgpu_userq_fence_slab_fini(void); + +void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv); +void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv); +int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, + struct amdgpu_usermode_queue *userq); +void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq); +void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv); +void amdgpu_userq_fence_driver_destroy(struct kref *ref); +int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 1991dd3d1056..f1f67521c29c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -134,6 +134,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i) mutex_init(&adev->vcn.inst[i].vcn1_jpeg1_workaround); mutex_init(&adev->vcn.inst[i].vcn_pg_lock); + mutex_init(&adev->vcn.inst[i].engine_reset_mutex); atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0); INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler); atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0); @@ -353,9 +354,9 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i) cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); - /* err_event_athub will corrupt VCPU buffer, so we need to + /* err_event_athub and dpc recovery will corrupt VCPU buffer, so we need to * restore fw data and clear buffer in amdgpu_vcn_resume() */ - if (in_ras_intr) + if (in_ras_intr || adev->pcie_reset_ctx.in_link_reset) return 0; return amdgpu_vcn_save_vcpu_bo_inst(adev, i); @@ -1451,3 +1452,78 @@ int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block, return ret; } + +/** + * amdgpu_vcn_reset_engine - Reset a specific VCN engine + * @adev: Pointer to the AMDGPU device + * @instance_id: VCN engine instance to reset + * + * Returns: 0 on success, or a negative error code on failure. + */ +static int amdgpu_vcn_reset_engine(struct amdgpu_device *adev, + uint32_t instance_id) +{ + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[instance_id]; + int r, i; + + mutex_lock(&vinst->engine_reset_mutex); + /* Stop the scheduler's work queue for the dec and enc rings if they are running. + * This ensures that no new tasks are submitted to the queues while + * the reset is in progress. + */ + drm_sched_wqueue_stop(&vinst->ring_dec.sched); + for (i = 0; i < vinst->num_enc_rings; i++) + drm_sched_wqueue_stop(&vinst->ring_enc[i].sched); + + /* Perform the VCN reset for the specified instance */ + r = vinst->reset(vinst); + if (r) + goto unlock; + r = amdgpu_ring_test_ring(&vinst->ring_dec); + if (r) + goto unlock; + for (i = 0; i < vinst->num_enc_rings; i++) { + r = amdgpu_ring_test_ring(&vinst->ring_enc[i]); + if (r) + goto unlock; + } + amdgpu_fence_driver_force_completion(&vinst->ring_dec); + for (i = 0; i < vinst->num_enc_rings; i++) + amdgpu_fence_driver_force_completion(&vinst->ring_enc[i]); + + /* Restart the scheduler's work queue for the dec and enc rings + * if they were stopped by this function. This allows new tasks + * to be submitted to the queues after the reset is complete. + */ + drm_sched_wqueue_start(&vinst->ring_dec.sched); + for (i = 0; i < vinst->num_enc_rings; i++) + drm_sched_wqueue_start(&vinst->ring_enc[i].sched); + +unlock: + mutex_unlock(&vinst->engine_reset_mutex); + + return r; +} + +/** + * amdgpu_vcn_ring_reset - Reset a VCN ring + * @ring: ring to reset + * @vmid: vmid of guilty job + * @timedout_fence: fence of timed out job + * + * This helper is for VCN blocks without unified queues because + * resetting the engine resets all queues in that case. With + * unified queues we have one queue per engine. + * Returns: 0 on success, or a negative error code on failure. + */ +int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + struct amdgpu_device *adev = ring->adev; + + if (adev->vcn.inst[ring->me].using_unified_queue) + return -EINVAL; + + return amdgpu_vcn_reset_engine(adev, ring->me); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index cdcdae7f71ce..0bc0a94d7cf0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -66,7 +66,6 @@ #define VCN_ENC_CMD_REG_WAIT 0x0000000c #define VCN_AON_SOC_ADDRESS_2_0 0x1f800 -#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define VCN_VID_IP_ADDRESS_2_0 0x0 #define VCN_AON_IP_ADDRESS_2_0 0x30000 @@ -331,7 +330,9 @@ struct amdgpu_vcn_inst { struct dpg_pause_state *new_state); int (*set_pg_state)(struct amdgpu_vcn_inst *vinst, enum amd_powergating_state state); + int (*reset)(struct amdgpu_vcn_inst *vinst); bool using_unified_queue; + struct mutex engine_reset_mutex; }; struct amdgpu_vcn_ras { @@ -553,5 +554,7 @@ void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev); int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); - +int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *guilty_fence); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 0bb8cbe0dcc0..13f0cdeb59c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -1323,6 +1323,9 @@ static int amdgpu_virt_req_ras_err_count_internal(struct amdgpu_device *adev, bo { struct amdgpu_virt *virt = &adev->virt; + if (!virt->ops || !virt->ops->req_ras_err_count) + return -EOPNOTSUPP; + /* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host * will ignore incoming guest messages. Ratelimit the guest messages to * prevent guest self DOS. @@ -1378,14 +1381,16 @@ amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev, used_size = host_telemetry->header.used_size; if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10)) - return 0; + return -EINVAL; cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL); if (!cper_dump) return -ENOMEM; - if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0)) + if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0)) { + ret = -EINVAL; goto out; + } *more = cper_dump->more; @@ -1425,7 +1430,7 @@ static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev) int ret = 0; uint32_t more = 0; - if (!amdgpu_sriov_ras_cper_en(adev)) + if (!virt->ops || !virt->ops->req_ras_cper_dump) return -EOPNOTSUPP; do { @@ -1434,7 +1439,7 @@ static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev) adev, virt->fw_reserve.ras_telemetry, &more); else ret = 0; - } while (more); + } while (more && !ret); return ret; } @@ -1444,6 +1449,9 @@ int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update) struct amdgpu_virt *virt = &adev->virt; int ret = 0; + if (!amdgpu_sriov_ras_cper_en(adev)) + return -EOPNOTSUPP; + if ((__ratelimit(&virt->ras.ras_cper_dump_rs) || force_update) && down_read_trylock(&adev->reset_domain->sem)) { mutex_lock(&virt->ras.ras_telemetry_mutex); @@ -1480,3 +1488,16 @@ bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev, return true; } + +/* + * amdgpu_virt_request_bad_pages() - request bad pages + * @adev: amdgpu device. + * Send command to GPU hypervisor to write new bad pages into the shared PF2VF region + */ +void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev) +{ + struct amdgpu_virt *virt = &adev->virt; + + if (virt->ops && virt->ops->req_bad_pages) + virt->ops->req_bad_pages(adev); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index df03dba67ab8..3da3ebb1d9a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -97,6 +97,7 @@ struct amdgpu_virt_ops { bool (*rcvd_ras_intr)(struct amdgpu_device *adev); int (*req_ras_err_count)(struct amdgpu_device *adev); int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr); + int (*req_bad_pages)(struct amdgpu_device *adev); }; /* @@ -146,11 +147,15 @@ enum AMDGIM_FEATURE_FLAG { enum AMDGIM_REG_ACCESS_FLAG { /* Use PSP to program IH_RB_CNTL */ - AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0), + AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0), /* Use RLC to program MMHUB regs */ - AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1), + AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1), /* Use RLC to program GC regs */ - AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2), + AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2), + /* Use PSP to program L1_TLB_CNTL */ + AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN = (1 << 3), + /* Use RLCG to program SQ_CONFIG1 */ + AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG = (1 << 4), }; struct amdgim_pf2vf_info_v1 { @@ -260,7 +265,10 @@ struct amdgpu_virt { uint32_t reg_val_offs; struct amdgpu_irq_src ack_irq; struct amdgpu_irq_src rcv_irq; + struct work_struct flr_work; + struct work_struct bad_pages_work; + struct amdgpu_mm_table mm_table; const struct amdgpu_virt_ops *ops; struct amdgpu_vf_error_buffer vf_errors; @@ -295,6 +303,9 @@ struct amdgpu_virt { union amd_sriov_ras_caps ras_telemetry_en_caps; struct amdgpu_virt_ras ras; struct amd_sriov_ras_telemetry_error_count count_cache; + + /* hibernate and resume with different VF feature for xgmi enabled system */ + bool is_xgmi_node_migrate_enabled; }; struct amdgpu_video_codec_info; @@ -330,9 +341,17 @@ struct amdgpu_video_codec_info; (amdgpu_sriov_vf((adev)) && \ ((adev)->virt.reg_access & (AMDGIM_FEATURE_GC_REG_RLC_EN))) +#define amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev) \ +(amdgpu_sriov_vf((adev)) && \ + ((adev)->virt.reg_access & (AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN))) + #define amdgpu_sriov_rlcg_error_report_enabled(adev) \ (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev)) +#define amdgpu_sriov_reg_access_sq_config(adev) \ +(amdgpu_sriov_vf((adev)) && \ + ((adev)->virt.reg_access & (AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG))) + #define amdgpu_passthrough(adev) \ ((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE) @@ -376,6 +395,10 @@ static inline bool is_virtual_machine(void) ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE) #define amdgpu_sriov_is_mes_info_enable(adev) \ ((adev)->virt.gim_feature & AMDGIM_FEATURE_MES_INFO_ENABLE) + +#define amdgpu_virt_xgmi_migrate_enabled(adev) \ + ((adev)->virt.is_xgmi_node_migrate_enabled && (adev)->gmc.xgmi.node_segment_size != 0) + bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); @@ -423,4 +446,5 @@ int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update) int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev); bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev, enum amdgpu_ras_block block); +void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ce52b4d75e94..5cacf5717016 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -622,7 +622,7 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, pr_warn_ratelimited("Evicted user BO is not reserved\n"); if (ti) { - pr_warn_ratelimited("pid %d\n", ti->pid); + pr_warn_ratelimited("pid %d\n", ti->task.pid); amdgpu_vm_put_task_info(ti); } @@ -765,6 +765,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool cleaner_shader_needed = false; bool pasid_mapping_needed = false; struct dma_fence *fence = NULL; + struct amdgpu_fence *af; unsigned int patch; int r; @@ -787,7 +788,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; - cleaner_shader_needed = adev->gfx.enable_cleaner_shader && + cleaner_shader_needed = job->run_cleaner_shader && + adev->gfx.enable_cleaner_shader && ring->funcs->emit_cleaner_shader && job->base.s_fence && &job->base.s_fence->scheduled == isolation->spearhead; @@ -817,7 +819,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid); - if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && + if (ring->funcs->emit_gds_switch && gds_switch_needed) { amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, job->gds_size, job->gws_base, @@ -829,6 +831,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, r = amdgpu_fence_emit(ring, &fence, NULL, 0); if (r) return r; + /* this is part of the job's context */ + af = container_of(fence, struct amdgpu_fence, base); + af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0; } if (vm_flush_needed) { @@ -1270,8 +1275,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, } else { struct drm_gem_object *obj = &bo->tbo.base; - if (obj->import_attach && bo_va->is_xgmi) { - struct dma_buf *dma_buf = obj->import_attach->dmabuf; + if (drm_gem_is_imported(obj) && bo_va->is_xgmi) { + struct dma_buf *dma_buf = obj->dma_buf; struct drm_gem_object *gobj = dma_buf->priv; struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); @@ -1630,7 +1635,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, * validation */ if (vm->is_compute_context && - bo_va->base.bo->tbo.base.import_attach && + drm_gem_is_imported(&bo_va->base.bo->tbo.base) && (!bo_va->base.bo->tbo.resource || bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM)) amdgpu_vm_bo_evicted_user(&bo_va->base); @@ -2394,10 +2399,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, else adev->vm_manager.fragment_size = amdgpu_vm_fragment_size; - DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n", - vm_size, adev->vm_manager.num_level + 1, - adev->vm_manager.block_size, - adev->vm_manager.fragment_size); + dev_info( + adev->dev, + "vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n", + vm_size, adev->vm_manager.num_level + 1, + adev->vm_manager.block_size, adev->vm_manager.fragment_size); } /** @@ -2408,13 +2414,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, */ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, - DMA_RESV_USAGE_BOOKKEEP, - true, timeout); + timeout = drm_sched_entity_flush(&vm->immediate, timeout); if (timeout <= 0) return timeout; - return dma_fence_wait_timeout(vm->last_unlocked, true, timeout); + return drm_sched_entity_flush(&vm->delayed, timeout); } static void amdgpu_vm_destroy_task_info(struct kref *kref) @@ -2446,7 +2450,8 @@ amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid) */ void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info) { - kref_put(&task_info->refcount, amdgpu_vm_destroy_task_info); + if (task_info) + kref_put(&task_info->refcount, amdgpu_vm_destroy_task_info); } /** @@ -2506,11 +2511,11 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) if (!vm->task_info) return; - if (vm->task_info->pid == current->pid) + if (vm->task_info->task.pid == current->pid) return; - vm->task_info->pid = current->pid; - get_task_comm(vm->task_info->task_name, current); + vm->task_info->task.pid = current->pid; + get_task_comm(vm->task_info->task.comm, current); if (current->group_leader->mm != current->mm) return; @@ -2563,8 +2568,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); - DRM_DEBUG_DRIVER("VM update mode is %s\n", - vm->use_cpu_for_update ? "CPU" : "SDMA"); + dev_dbg(adev->dev, "VM update mode is %s\n", + vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); @@ -2606,7 +2611,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, r = amdgpu_vm_create_task_info(vm); if (r) - DRM_DEBUG("Failed to create task info for VM\n"); + dev_dbg(adev->dev, "Failed to create task info for VM\n"); amdgpu_bo_unreserve(vm->root.bo); amdgpu_bo_unref(&root_bo); @@ -2657,8 +2662,8 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) /* Update VM state */ vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_COMPUTE); - DRM_DEBUG_DRIVER("VM update mode is %s\n", - vm->use_cpu_for_update ? "CPU" : "SDMA"); + dev_dbg(adev->dev, "VM update mode is %s\n", + vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); @@ -2773,7 +2778,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) dev_warn(adev->dev, "VM memory stats for proc %s(%d) task %s(%d) is non-zero when fini\n", - ti->process_name, ti->pid, ti->task_name, ti->tgid); + ti->process_name, ti->task.pid, ti->task.comm, ti->tgid); } amdgpu_vm_put_task_info(vm->task_info); @@ -2981,7 +2986,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, error_unlock: amdgpu_bo_unreserve(root); if (r < 0) - DRM_ERROR("Can't handle page fault (%d)\n", r); + dev_err(adev->dev, "Can't handle page fault (%d)\n", r); error_unref: amdgpu_bo_unref(&root); @@ -3155,3 +3160,12 @@ bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo) { return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv; } + +void amdgpu_vm_print_task_info(struct amdgpu_device *adev, + struct amdgpu_task_info *task_info) +{ + dev_err(adev->dev, + " Process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task.comm, task_info->task.pid); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index f3ad687125ad..fd086efd8457 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -236,9 +236,8 @@ struct amdgpu_vm_pte_funcs { }; struct amdgpu_task_info { + struct drm_wedge_task_info task; char process_name[TASK_COMM_LEN]; - char task_name[TASK_COMM_LEN]; - pid_t pid; pid_t tgid; struct kref refcount; }; @@ -668,4 +667,7 @@ void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); +void amdgpu_vm_print_task_info(struct amdgpu_device *adev, + struct amdgpu_task_info *task_info); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c index 51cddfa3f1e8..5d26797356a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c @@ -71,7 +71,6 @@ static void amdgpu_tlb_fence_work(struct work_struct *work) } static const struct dma_fence_ops amdgpu_tlb_fence_ops = { - .use_64bit_seqno = true, .get_driver_name = amdgpu_tlb_fence_get_driver_name, .get_timeline_name = amdgpu_tlb_fence_get_timeline_name }; @@ -101,8 +100,8 @@ void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, struct amdgpu_vm *vm INIT_WORK(&f->work, amdgpu_tlb_fence_work); spin_lock_init(&f->lock); - dma_fence_init(&f->base, &amdgpu_tlb_fence_ops, &f->lock, - vm->tlb_fence_context, atomic64_read(&vm->tlb_seq)); + dma_fence_init64(&f->base, &amdgpu_tlb_fence_ops, &f->lock, + vm->tlb_fence_context, atomic64_read(&vm->tlb_seq)); /* TODO: We probably need a separate wq here */ dma_fence_get(&f->base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 6da8994e0469..07c936e90d8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -24,6 +24,7 @@ #include <linux/dma-mapping.h> #include <drm/ttm/ttm_range_manager.h> +#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_vm.h" @@ -462,7 +463,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, int r; lpfn = (u64)place->lpfn << PAGE_SHIFT; - if (!lpfn) + if (!lpfn || lpfn > man->size) lpfn = man->size; fpfn = (u64)place->fpfn << PAGE_SHIFT; @@ -782,6 +783,23 @@ uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr) } /** + * amdgpu_vram_mgr_clear_reset_blocks - reset clear blocks + * + * @adev: amdgpu device pointer + * + * Reset the cleared drm buddy blocks. + */ +void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev) +{ + struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; + struct drm_buddy *mm = &mgr->mm; + + mutex_lock(&mgr->lock); + drm_buddy_reset_clear(mm, false); + mutex_unlock(&mgr->lock); +} + +/** * amdgpu_vram_mgr_intersects - test each drm buddy block for intersection * * @man: TTM memory type manager @@ -907,6 +925,9 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) struct ttm_resource_manager *man = &mgr->manager; int err; + man->cg = drmm_cgroup_register_region(adev_to_drm(adev), "vram", adev->gmc.real_vram_size); + if (IS_ERR(man->cg)) + return PTR_ERR(man->cg); ttm_resource_manager_init(man, &adev->mman.bdev, adev->gmc.real_vram_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h index b256cbc2bc27..2c88d5fd87da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h @@ -66,7 +66,10 @@ to_amdgpu_vram_mgr_resource(struct ttm_resource *res) static inline void amdgpu_vram_mgr_set_cleared(struct ttm_resource *res) { - to_amdgpu_vram_mgr_resource(res)->flags |= DRM_BUDDY_CLEARED; + struct amdgpu_vram_mgr_resource *ares = to_amdgpu_vram_mgr_resource(res); + + WARN_ON(ares->flags & DRM_BUDDY_CLEARED); + ares->flags |= DRM_BUDDY_CLEARED; } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index 23b6f7a4aa4a..c417f8689220 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -27,6 +27,9 @@ #include <drm/drm_drv.h> #include "../amdxcp/amdgpu_xcp_drv.h" +static void amdgpu_xcp_sysfs_entries_init(struct amdgpu_xcp_mgr *xcp_mgr); +static void amdgpu_xcp_sysfs_entries_update(struct amdgpu_xcp_mgr *xcp_mgr); + static int __amdgpu_xcp_run(struct amdgpu_xcp_mgr *xcp_mgr, struct amdgpu_xcp_ip *xcp_ip, int xcp_state) { @@ -189,7 +192,7 @@ static int __amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, goto out; } - + amdgpu_xcp_sysfs_entries_update(xcp_mgr); out: mutex_unlock(&xcp_mgr->xcp_lock); @@ -215,15 +218,27 @@ int amdgpu_xcp_restore_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr) return __amdgpu_xcp_switch_partition_mode(xcp_mgr, xcp_mgr->mode); } -int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) +static bool __amdgpu_xcp_is_cached_mode_valid(struct amdgpu_xcp_mgr *xcp_mgr) { - int mode; + if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode) + return true; if (!amdgpu_sriov_vf(xcp_mgr->adev) && xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) - return xcp_mgr->mode; + return true; - if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode) + if (xcp_mgr->mode != AMDGPU_XCP_MODE_NONE && + xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS) + return true; + + return false; +} + +int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) +{ + int mode; + + if (__amdgpu_xcp_is_cached_mode_valid(xcp_mgr)) return xcp_mgr->mode; if (!(flags & AMDGPU_XCP_FL_LOCKED)) @@ -263,9 +278,10 @@ static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev) if (ret == -ENOSPC) { dev_warn(adev->dev, "Skip xcp node #%d when out of drm node resource.", i); - return 0; + ret = 0; + goto out; } else if (ret) { - return ret; + goto out; } /* Redirect all IOCTLs to the primary device */ @@ -278,9 +294,14 @@ static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev) p_ddev->vma_offset_manager = ddev->vma_offset_manager; p_ddev->driver = &amdgpu_partition_driver; adev->xcp_mgr->xcp[i].ddev = p_ddev; + + dev_set_drvdata(p_ddev->dev, &adev->xcp_mgr->xcp[i]); } + ret = 0; +out: + amdgpu_xcp_sysfs_entries_init(adev->xcp_mgr); - return 0; + return ret; } int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode, @@ -288,6 +309,7 @@ int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode, struct amdgpu_xcp_mgr_funcs *xcp_funcs) { struct amdgpu_xcp_mgr *xcp_mgr; + int i; if (!xcp_funcs || !xcp_funcs->get_ip_details) return -EINVAL; @@ -306,6 +328,8 @@ int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode, amdgpu_xcp_init(xcp_mgr, init_num_xcps, init_mode); adev->xcp_mgr = xcp_mgr; + for (i = 0; i < MAX_XCP; ++i) + xcp_mgr->xcp[i].xcp_mgr = xcp_mgr; return amdgpu_xcp_dev_alloc(adev); } @@ -433,6 +457,223 @@ void amdgpu_xcp_release_sched(struct amdgpu_device *adev, } } +int amdgpu_xcp_select_scheds(struct amdgpu_device *adev, + u32 hw_ip, u32 hw_prio, + struct amdgpu_fpriv *fpriv, + unsigned int *num_scheds, + struct drm_gpu_scheduler ***scheds) +{ + u32 sel_xcp_id; + int i; + struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; + + if (fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION) { + u32 least_ref_cnt = ~0; + + fpriv->xcp_id = 0; + for (i = 0; i < xcp_mgr->num_xcps; i++) { + u32 total_ref_cnt; + + total_ref_cnt = atomic_read(&xcp_mgr->xcp[i].ref_cnt); + if (total_ref_cnt < least_ref_cnt) { + fpriv->xcp_id = i; + least_ref_cnt = total_ref_cnt; + } + } + } + sel_xcp_id = fpriv->xcp_id; + + if (xcp_mgr->xcp[sel_xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds) { + *num_scheds = + xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds; + *scheds = + xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].sched; + atomic_inc(&adev->xcp_mgr->xcp[sel_xcp_id].ref_cnt); + dev_dbg(adev->dev, "Selected partition #%d", sel_xcp_id); + } else { + dev_err(adev->dev, "Failed to schedule partition #%d.", sel_xcp_id); + return -ENOENT; + } + + return 0; +} + +static void amdgpu_set_xcp_id(struct amdgpu_device *adev, + uint32_t inst_idx, + struct amdgpu_ring *ring) +{ + int xcp_id; + enum AMDGPU_XCP_IP_BLOCK ip_blk; + uint32_t inst_mask; + + ring->xcp_id = AMDGPU_XCP_NO_PARTITION; + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) + adev->gfx.enforce_isolation[0].xcp_id = ring->xcp_id; + if ((adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) || + (ring->funcs->type == AMDGPU_RING_TYPE_CPER)) + return; + + inst_mask = 1 << inst_idx; + + switch (ring->funcs->type) { + case AMDGPU_HW_IP_GFX: + case AMDGPU_RING_TYPE_COMPUTE: + case AMDGPU_RING_TYPE_KIQ: + ip_blk = AMDGPU_XCP_GFX; + break; + case AMDGPU_RING_TYPE_SDMA: + ip_blk = AMDGPU_XCP_SDMA; + break; + case AMDGPU_RING_TYPE_VCN_ENC: + case AMDGPU_RING_TYPE_VCN_JPEG: + ip_blk = AMDGPU_XCP_VCN; + break; + default: + dev_err(adev->dev, "Not support ring type %d!", ring->funcs->type); + return; + } + + for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) { + if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) { + ring->xcp_id = xcp_id; + dev_dbg(adev->dev, "ring:%s xcp_id :%u", ring->name, + ring->xcp_id); + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) + adev->gfx.enforce_isolation[xcp_id].xcp_id = xcp_id; + break; + } + } +} + +static void amdgpu_xcp_gpu_sched_update(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + unsigned int sel_xcp_id) +{ + unsigned int *num_gpu_sched; + + num_gpu_sched = &adev->xcp_mgr->xcp[sel_xcp_id] + .gpu_sched[ring->funcs->type][ring->hw_prio].num_scheds; + adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[ring->funcs->type][ring->hw_prio] + .sched[(*num_gpu_sched)++] = &ring->sched; + dev_dbg(adev->dev, "%s :[%d] gpu_sched[%d][%d] = %d", + ring->name, sel_xcp_id, ring->funcs->type, + ring->hw_prio, *num_gpu_sched); +} + +static int amdgpu_xcp_sched_list_update(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int i; + + for (i = 0; i < MAX_XCP; i++) { + atomic_set(&adev->xcp_mgr->xcp[i].ref_cnt, 0); + memset(adev->xcp_mgr->xcp[i].gpu_sched, 0, sizeof(adev->xcp_mgr->xcp->gpu_sched)); + } + + if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) + return 0; + + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + ring = adev->rings[i]; + if (!ring || !ring->sched.ready || ring->no_scheduler) + continue; + + amdgpu_xcp_gpu_sched_update(adev, ring, ring->xcp_id); + + /* VCN may be shared by two partitions under CPX MODE in certain + * configs. + */ + if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC || + ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) && + (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst)) + amdgpu_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1); + } + + return 0; +} + +int amdgpu_xcp_update_partition_sched_list(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->num_rings; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE || + ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + amdgpu_set_xcp_id(adev, ring->xcc_id, ring); + else + amdgpu_set_xcp_id(adev, ring->me, ring); + } + + return amdgpu_xcp_sched_list_update(adev); +} + +void amdgpu_xcp_update_supported_modes(struct amdgpu_xcp_mgr *xcp_mgr) +{ + struct amdgpu_device *adev = xcp_mgr->adev; + + xcp_mgr->supp_xcp_modes = 0; + + switch (NUM_XCC(adev->gfx.xcc_mask)) { + case 8: + xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | + BIT(AMDGPU_DPX_PARTITION_MODE) | + BIT(AMDGPU_QPX_PARTITION_MODE) | + BIT(AMDGPU_CPX_PARTITION_MODE); + break; + case 6: + xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | + BIT(AMDGPU_TPX_PARTITION_MODE) | + BIT(AMDGPU_CPX_PARTITION_MODE); + break; + case 4: + xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | + BIT(AMDGPU_DPX_PARTITION_MODE) | + BIT(AMDGPU_CPX_PARTITION_MODE); + break; + case 2: + xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | + BIT(AMDGPU_CPX_PARTITION_MODE); + break; + case 1: + xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | + BIT(AMDGPU_CPX_PARTITION_MODE); + break; + + default: + break; + } +} + +int amdgpu_xcp_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) +{ + /* TODO: + * Stop user queues and threads, and make sure GPU is empty of work. + */ + + if (flags & AMDGPU_XCP_OPS_KFD) + amdgpu_amdkfd_device_fini_sw(xcp_mgr->adev); + + return 0; +} + +int amdgpu_xcp_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) +{ + int ret = 0; + + if (flags & AMDGPU_XCP_OPS_KFD) { + amdgpu_amdkfd_device_probe(xcp_mgr->adev); + amdgpu_amdkfd_device_init(xcp_mgr->adev); + /* If KFD init failed, return failure */ + if (!xcp_mgr->adev->kfd.init_complete) + ret = -EIO; + } + + return ret; +} + +/*====================== xcp sysfs - configuration ======================*/ #define XCP_CFG_SYSFS_RES_ATTR_SHOW(_name) \ static ssize_t amdgpu_xcp_res_sysfs_##_name##_show( \ struct amdgpu_xcp_res_details *xcp_res, char *buf) \ @@ -635,7 +876,7 @@ static const struct attribute *xcp_attrs[] = { NULL, }; -void amdgpu_xcp_cfg_sysfs_init(struct amdgpu_device *adev) +static void amdgpu_xcp_cfg_sysfs_init(struct amdgpu_device *adev) { struct amdgpu_xcp_res_details *xcp_res; struct amdgpu_xcp_cfg *xcp_cfg; @@ -703,16 +944,16 @@ err1: kobject_put(&xcp_cfg->kobj); } -void amdgpu_xcp_cfg_sysfs_fini(struct amdgpu_device *adev) +static void amdgpu_xcp_cfg_sysfs_fini(struct amdgpu_device *adev) { struct amdgpu_xcp_res_details *xcp_res; struct amdgpu_xcp_cfg *xcp_cfg; int i; - if (!adev->xcp_mgr) + if (!adev->xcp_mgr || !adev->xcp_mgr->xcp_cfg) return; - xcp_cfg = adev->xcp_mgr->xcp_cfg; + xcp_cfg = adev->xcp_mgr->xcp_cfg; for (i = 0; i < xcp_cfg->num_res; i++) { xcp_res = &xcp_cfg->xcp_res[i]; kobject_put(&xcp_res->kobj); @@ -722,3 +963,124 @@ void amdgpu_xcp_cfg_sysfs_fini(struct amdgpu_device *adev) sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs); kobject_put(&xcp_cfg->kobj); } + +/*====================== xcp sysfs - data entries ======================*/ + +#define to_xcp(x) container_of(x, struct amdgpu_xcp, kobj) + +static ssize_t xcp_metrics_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct amdgpu_xcp *xcp = to_xcp(kobj); + struct amdgpu_xcp_mgr *xcp_mgr; + ssize_t size; + + xcp_mgr = xcp->xcp_mgr; + size = amdgpu_dpm_get_xcp_metrics(xcp_mgr->adev, xcp->id, NULL); + if (size <= 0) + return size; + + if (size > PAGE_SIZE) + return -ENOSPC; + + return amdgpu_dpm_get_xcp_metrics(xcp_mgr->adev, xcp->id, buf); +} + +static umode_t amdgpu_xcp_attrs_is_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct amdgpu_xcp *xcp = to_xcp(kobj); + + if (!xcp || !xcp->valid) + return 0; + + return attr->mode; +} + +static struct kobj_attribute xcp_sysfs_metrics = __ATTR_RO(xcp_metrics); + +static struct attribute *amdgpu_xcp_attrs[] = { + &xcp_sysfs_metrics.attr, + NULL, +}; + +static const struct attribute_group amdgpu_xcp_attrs_group = { + .attrs = amdgpu_xcp_attrs, + .is_visible = amdgpu_xcp_attrs_is_visible +}; + +static const struct kobj_type xcp_sysfs_ktype = { + .sysfs_ops = &kobj_sysfs_ops, +}; + +static void amdgpu_xcp_sysfs_entries_fini(struct amdgpu_xcp_mgr *xcp_mgr, int n) +{ + struct amdgpu_xcp *xcp; + + for (n--; n >= 0; n--) { + xcp = &xcp_mgr->xcp[n]; + if (!xcp->ddev || !xcp->valid) + continue; + sysfs_remove_group(&xcp->kobj, &amdgpu_xcp_attrs_group); + kobject_put(&xcp->kobj); + } +} + +static void amdgpu_xcp_sysfs_entries_init(struct amdgpu_xcp_mgr *xcp_mgr) +{ + struct amdgpu_xcp *xcp; + int i, r; + + for (i = 0; i < MAX_XCP; i++) { + /* Redirect all IOCTLs to the primary device */ + xcp = &xcp_mgr->xcp[i]; + if (!xcp->ddev) + break; + r = kobject_init_and_add(&xcp->kobj, &xcp_sysfs_ktype, + &xcp->ddev->dev->kobj, "xcp"); + if (r) + goto out; + + r = sysfs_create_group(&xcp->kobj, &amdgpu_xcp_attrs_group); + if (r) + goto out; + } + + return; +out: + kobject_put(&xcp->kobj); +} + +static void amdgpu_xcp_sysfs_entries_update(struct amdgpu_xcp_mgr *xcp_mgr) +{ + struct amdgpu_xcp *xcp; + int i; + + for (i = 0; i < MAX_XCP; i++) { + /* Redirect all IOCTLs to the primary device */ + xcp = &xcp_mgr->xcp[i]; + if (!xcp->ddev) + continue; + sysfs_update_group(&xcp->kobj, &amdgpu_xcp_attrs_group); + } + + return; +} + +void amdgpu_xcp_sysfs_init(struct amdgpu_device *adev) +{ + if (!adev->xcp_mgr) + return; + + amdgpu_xcp_cfg_sysfs_init(adev); + + return; +} + +void amdgpu_xcp_sysfs_fini(struct amdgpu_device *adev) +{ + if (!adev->xcp_mgr) + return; + amdgpu_xcp_sysfs_entries_fini(adev->xcp_mgr, MAX_XCP); + amdgpu_xcp_cfg_sysfs_fini(adev); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h index b63f53242c57..70a0f8400b57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h @@ -39,6 +39,8 @@ #define AMDGPU_XCP_NO_PARTITION (~0) +#define AMDGPU_XCP_OPS_KFD (1 << 0) + struct amdgpu_fpriv; enum AMDGPU_XCP_IP_BLOCK { @@ -108,6 +110,8 @@ struct amdgpu_xcp { struct drm_driver *driver; struct drm_vma_offset_manager *vma_offset_manager; struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX]; + struct amdgpu_xcp_mgr *xcp_mgr; + struct kobject kobj; }; struct amdgpu_xcp_mgr { @@ -142,10 +146,6 @@ struct amdgpu_xcp_mgr_funcs { int (*suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); int (*prepare_resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); int (*resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); - int (*select_scheds)(struct amdgpu_device *adev, - u32 hw_ip, u32 hw_prio, struct amdgpu_fpriv *fpriv, - unsigned int *num_scheds, struct drm_gpu_scheduler ***scheds); - int (*update_partition_sched_list)(struct amdgpu_device *adev); }; int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); @@ -174,18 +174,17 @@ int amdgpu_xcp_open_device(struct amdgpu_device *adev, struct drm_file *file_priv); void amdgpu_xcp_release_sched(struct amdgpu_device *adev, struct amdgpu_ctx_entity *entity); - -void amdgpu_xcp_cfg_sysfs_init(struct amdgpu_device *adev); -void amdgpu_xcp_cfg_sysfs_fini(struct amdgpu_device *adev); - -#define amdgpu_xcp_select_scheds(adev, e, c, d, x, y) \ - ((adev)->xcp_mgr && (adev)->xcp_mgr->funcs && \ - (adev)->xcp_mgr->funcs->select_scheds ? \ - (adev)->xcp_mgr->funcs->select_scheds((adev), (e), (c), (d), (x), (y)) : -ENOENT) -#define amdgpu_xcp_update_partition_sched_list(adev) \ - ((adev)->xcp_mgr && (adev)->xcp_mgr->funcs && \ - (adev)->xcp_mgr->funcs->update_partition_sched_list ? \ - (adev)->xcp_mgr->funcs->update_partition_sched_list(adev) : 0) +int amdgpu_xcp_select_scheds(struct amdgpu_device *adev, + u32 hw_ip, u32 hw_prio, + struct amdgpu_fpriv *fpriv, + unsigned int *num_scheds, + struct drm_gpu_scheduler ***scheds); +void amdgpu_xcp_update_supported_modes(struct amdgpu_xcp_mgr *xcp_mgr); +int amdgpu_xcp_update_partition_sched_list(struct amdgpu_device *adev); +int amdgpu_xcp_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags); +int amdgpu_xcp_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags); +void amdgpu_xcp_sysfs_init(struct amdgpu_device *adev); +void amdgpu_xcp_sysfs_fini(struct amdgpu_device *adev); static inline int amdgpu_xcp_get_num_xcp(struct amdgpu_xcp_mgr *xcp_mgr) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 6029a799074d..1ede308a7c67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -294,17 +294,46 @@ static const struct amdgpu_pcs_ras_field xgmi3x16_pcs_ras_fields[] = { SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxCMDPktErr)}, }; +int amdgpu_xgmi_get_ext_link(struct amdgpu_device *adev, int link_num) +{ + int link_map_6_4_x[8] = { 0, 3, 1, 2, 7, 6, 4, 5 }; + + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + case IP_VERSION(6, 4, 1): + if (link_num < ARRAY_SIZE(link_map_6_4_x)) + return link_map_6_4_x[link_num]; + break; + default: + return -EINVAL; + } + + return -EINVAL; +} + static u32 xgmi_v6_4_get_link_status(struct amdgpu_device *adev, int global_link_num) { - const u32 smnpcs_xgmi3x16_pcs_state_hist1 = 0x11a00070; - const int xgmi_inst = 2; - u32 link_inst; + const u32 smn_xgmi_6_4_pcs_state_hist1[2] = { 0x11a00070, 0x11b00070 }; + const u32 smn_xgmi_6_4_1_pcs_state_hist1[2] = { 0x12100070, + 0x11b00070 }; + u32 i, n; u64 addr; - link_inst = global_link_num % xgmi_inst; + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + n = ARRAY_SIZE(smn_xgmi_6_4_pcs_state_hist1); + addr = smn_xgmi_6_4_pcs_state_hist1[global_link_num % n]; + break; + case IP_VERSION(6, 4, 1): + n = ARRAY_SIZE(smn_xgmi_6_4_1_pcs_state_hist1); + addr = smn_xgmi_6_4_1_pcs_state_hist1[global_link_num % n]; + break; + default: + return U32_MAX; + } - addr = (smnpcs_xgmi3x16_pcs_state_hist1 | (link_inst << 20)) + - adev->asic_funcs->encode_ext_smn_addressing(global_link_num / xgmi_inst); + i = global_link_num / n; + addr += adev->asic_funcs->encode_ext_smn_addressing(i); return RREG32_PCIE_EXT(addr); } @@ -1172,7 +1201,7 @@ static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_ban break; case ACA_SMU_TYPE_CE: count = ext_error_code == 6 ? count : 0ULL; - bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, count); break; default: @@ -1742,16 +1771,25 @@ void amdgpu_xgmi_early_init(struct amdgpu_device *adev) case IP_VERSION(9, 4, 0): case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): - adev->gmc.xgmi.max_speed = XGMI_SPEED_25GT; + /* 25 GT/s */ + adev->gmc.xgmi.max_speed = 25; adev->gmc.xgmi.max_width = 16; break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): case IP_VERSION(9, 5, 0): - adev->gmc.xgmi.max_speed = XGMI_SPEED_32GT; + /* 32 GT/s */ + adev->gmc.xgmi.max_speed = 32; adev->gmc.xgmi.max_width = 16; break; default: break; } } + +void amgpu_xgmi_set_max_speed_width(struct amdgpu_device *adev, + uint16_t max_speed, uint8_t max_width) +{ + adev->gmc.xgmi.max_speed = max_speed; + adev->gmc.xgmi.max_width = max_width; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 32dabba4062f..bba0b26fee8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -25,12 +25,6 @@ #include <drm/task_barrier.h> #include "amdgpu_ras.h" -enum amdgpu_xgmi_link_speed { - XGMI_SPEED_16GT = 16, - XGMI_SPEED_25GT = 25, - XGMI_SPEED_32GT = 32 -}; - struct amdgpu_hive_info { struct kobject kobj; uint64_t hive_id; @@ -97,7 +91,7 @@ struct amdgpu_xgmi { struct ras_common_if *ras_if; bool connected_to_cpu; struct amdgpu_xgmi_ras *ras; - enum amdgpu_xgmi_link_speed max_speed; + uint16_t max_speed; uint8_t max_width; }; @@ -125,8 +119,11 @@ int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev, int req_nps_mode); int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, int global_link_num); +int amdgpu_xgmi_get_ext_link(struct amdgpu_device *adev, int link_num); void amdgpu_xgmi_early_init(struct amdgpu_device *adev); uint32_t amdgpu_xgmi_get_max_bandwidth(struct amdgpu_device *adev); +void amgpu_xgmi_set_max_speed_width(struct amdgpu_device *adev, + uint16_t max_speed, uint8_t max_width); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index d6ac2652f0ac..33edad1f9dcd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -109,10 +109,12 @@ union amd_sriov_msg_feature_flags { union amd_sriov_reg_access_flags { struct { - uint32_t vf_reg_access_ih : 1; - uint32_t vf_reg_access_mmhub : 1; - uint32_t vf_reg_access_gc : 1; - uint32_t reserved : 29; + uint32_t vf_reg_access_ih : 1; + uint32_t vf_reg_access_mmhub : 1; + uint32_t vf_reg_access_gc : 1; + uint32_t vf_reg_access_l1_tlb_cntl : 1; + uint32_t vf_reg_access_sq_config : 1; + uint32_t reserved : 27; } flags; uint32_t all; }; @@ -330,6 +332,7 @@ enum amd_sriov_mailbox_request_message { MB_REQ_MSG_RAS_POISON = 202, MB_REQ_RAS_ERROR_COUNT = 203, MB_REQ_RAS_CPER_DUMP = 204, + MB_REQ_RAS_BAD_PAGES = 205, }; /* mailbox message send from host to guest */ @@ -347,6 +350,9 @@ enum amd_sriov_mailbox_response_message { MB_RES_MSG_GPU_RMA = 10, MB_RES_MSG_RAS_ERROR_COUNT_READY = 11, MB_REQ_RAS_CPER_DUMP_READY = 14, + MB_RES_MSG_RAS_BAD_PAGES_READY = 15, + MB_RES_MSG_RAS_BAD_PAGES_NOTIFICATION = 16, + MB_RES_MSG_UNRECOV_ERR_NOTIFICATION = 17, MB_RES_MSG_TEXT_MESSAGE = 255 }; diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 3c07517be09a..914cf4bfb033 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -29,12 +29,11 @@ #include "gfx_v9_4_3.h" #include "gfxhub_v1_2.h" #include "sdma_v4_4_2.h" +#include "amdgpu_ip.h" #define XCP_INST_MASK(num_inst, xcp_id) \ (num_inst ? GENMASK(num_inst - 1, 0) << (xcp_id * num_inst) : 0) -#define AMDGPU_XCP_OPS_KFD (1 << 0) - void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev) { int i; @@ -62,234 +61,6 @@ void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1; } -static bool aqua_vanjaram_xcp_vcn_shared(struct amdgpu_device *adev) -{ - return (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst); -} - -static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, - uint32_t inst_idx, struct amdgpu_ring *ring) -{ - int xcp_id; - enum AMDGPU_XCP_IP_BLOCK ip_blk; - uint32_t inst_mask; - - ring->xcp_id = AMDGPU_XCP_NO_PARTITION; - if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) - adev->gfx.enforce_isolation[0].xcp_id = ring->xcp_id; - if ((adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) || - (ring->funcs->type == AMDGPU_RING_TYPE_CPER)) - return; - - inst_mask = 1 << inst_idx; - - switch (ring->funcs->type) { - case AMDGPU_HW_IP_GFX: - case AMDGPU_RING_TYPE_COMPUTE: - case AMDGPU_RING_TYPE_KIQ: - ip_blk = AMDGPU_XCP_GFX; - break; - case AMDGPU_RING_TYPE_SDMA: - ip_blk = AMDGPU_XCP_SDMA; - break; - case AMDGPU_RING_TYPE_VCN_ENC: - case AMDGPU_RING_TYPE_VCN_JPEG: - ip_blk = AMDGPU_XCP_VCN; - break; - default: - DRM_ERROR("Not support ring type %d!", ring->funcs->type); - return; - } - - for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) { - if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) { - ring->xcp_id = xcp_id; - dev_dbg(adev->dev, "ring:%s xcp_id :%u", ring->name, - ring->xcp_id); - if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) - adev->gfx.enforce_isolation[xcp_id].xcp_id = xcp_id; - break; - } - } -} - -static void aqua_vanjaram_xcp_gpu_sched_update( - struct amdgpu_device *adev, - struct amdgpu_ring *ring, - unsigned int sel_xcp_id) -{ - unsigned int *num_gpu_sched; - - num_gpu_sched = &adev->xcp_mgr->xcp[sel_xcp_id] - .gpu_sched[ring->funcs->type][ring->hw_prio].num_scheds; - adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[ring->funcs->type][ring->hw_prio] - .sched[(*num_gpu_sched)++] = &ring->sched; - DRM_DEBUG("%s :[%d] gpu_sched[%d][%d] = %d", ring->name, - sel_xcp_id, ring->funcs->type, - ring->hw_prio, *num_gpu_sched); -} - -static int aqua_vanjaram_xcp_sched_list_update( - struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring; - int i; - - for (i = 0; i < MAX_XCP; i++) { - atomic_set(&adev->xcp_mgr->xcp[i].ref_cnt, 0); - memset(adev->xcp_mgr->xcp[i].gpu_sched, 0, sizeof(adev->xcp_mgr->xcp->gpu_sched)); - } - - if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) - return 0; - - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { - ring = adev->rings[i]; - if (!ring || !ring->sched.ready || ring->no_scheduler) - continue; - - aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id); - - /* VCN may be shared by two partitions under CPX MODE in certain - * configs. - */ - if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC || - ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) && - aqua_vanjaram_xcp_vcn_shared(adev)) - aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1); - } - - return 0; -} - -static int aqua_vanjaram_update_partition_sched_list(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < adev->num_rings; i++) { - struct amdgpu_ring *ring = adev->rings[i]; - - if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE || - ring->funcs->type == AMDGPU_RING_TYPE_KIQ) - aqua_vanjaram_set_xcp_id(adev, ring->xcc_id, ring); - else - aqua_vanjaram_set_xcp_id(adev, ring->me, ring); - } - - return aqua_vanjaram_xcp_sched_list_update(adev); -} - -static int aqua_vanjaram_select_scheds( - struct amdgpu_device *adev, - u32 hw_ip, - u32 hw_prio, - struct amdgpu_fpriv *fpriv, - unsigned int *num_scheds, - struct drm_gpu_scheduler ***scheds) -{ - u32 sel_xcp_id; - int i; - - if (fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION) { - u32 least_ref_cnt = ~0; - - fpriv->xcp_id = 0; - for (i = 0; i < adev->xcp_mgr->num_xcps; i++) { - u32 total_ref_cnt; - - total_ref_cnt = atomic_read(&adev->xcp_mgr->xcp[i].ref_cnt); - if (total_ref_cnt < least_ref_cnt) { - fpriv->xcp_id = i; - least_ref_cnt = total_ref_cnt; - } - } - } - sel_xcp_id = fpriv->xcp_id; - - if (adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds) { - *num_scheds = adev->xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds; - *scheds = adev->xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].sched; - atomic_inc(&adev->xcp_mgr->xcp[sel_xcp_id].ref_cnt); - DRM_DEBUG("Selected partition #%d", sel_xcp_id); - } else { - DRM_ERROR("Failed to schedule partition #%d.", sel_xcp_id); - return -ENOENT; - } - - return 0; -} - -static int8_t aqua_vanjaram_logical_to_dev_inst(struct amdgpu_device *adev, - enum amd_hw_ip_block_type block, - int8_t inst) -{ - int8_t dev_inst; - - switch (block) { - case GC_HWIP: - case SDMA0_HWIP: - /* Both JPEG and VCN as JPEG is only alias of VCN */ - case VCN_HWIP: - dev_inst = adev->ip_map.dev_inst[block][inst]; - break; - default: - /* For rest of the IPs, no look up required. - * Assume 'logical instance == physical instance' for all configs. */ - dev_inst = inst; - break; - } - - return dev_inst; -} - -static uint32_t aqua_vanjaram_logical_to_dev_mask(struct amdgpu_device *adev, - enum amd_hw_ip_block_type block, - uint32_t mask) -{ - uint32_t dev_mask = 0; - int8_t log_inst, dev_inst; - - while (mask) { - log_inst = ffs(mask) - 1; - dev_inst = aqua_vanjaram_logical_to_dev_inst(adev, block, log_inst); - dev_mask |= (1 << dev_inst); - mask &= ~(1 << log_inst); - } - - return dev_mask; -} - -static void aqua_vanjaram_populate_ip_map(struct amdgpu_device *adev, - enum amd_hw_ip_block_type ip_block, - uint32_t inst_mask) -{ - int l = 0, i; - - while (inst_mask) { - i = ffs(inst_mask) - 1; - adev->ip_map.dev_inst[ip_block][l++] = i; - inst_mask &= ~(1 << i); - } - for (; l < HWIP_MAX_INSTANCE; l++) - adev->ip_map.dev_inst[ip_block][l] = -1; -} - -void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev) -{ - u32 ip_map[][2] = { - { GC_HWIP, adev->gfx.xcc_mask }, - { SDMA0_HWIP, adev->sdma.sdma_mask }, - { VCN_HWIP, adev->vcn.inst_mask }, - }; - int i; - - for (i = 0; i < ARRAY_SIZE(ip_map); ++i) - aqua_vanjaram_populate_ip_map(adev, ip_map[i][0], ip_map[i][1]); - - adev->ip_map.logical_to_dev_inst = aqua_vanjaram_logical_to_dev_inst; - adev->ip_map.logical_to_dev_mask = aqua_vanjaram_logical_to_dev_mask; -} - /* Fixed pattern for smn addressing on different AIDs: * bit[34]: indicate cross AID access * bit[33:32]: indicate target AID id @@ -353,11 +124,14 @@ static int aqua_vanjaram_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr) if (adev->nbio.funcs->get_compute_partition_mode) { mode = adev->nbio.funcs->get_compute_partition_mode(adev); - if (mode != derv_mode) + if (mode != derv_mode) { dev_warn( adev->dev, "Mismatch in compute partition mode - reported : %d derived : %d", mode, derv_mode); + if (derv_mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) + amdgpu_device_bus_status_check(adev); + } } return mode; @@ -448,52 +222,71 @@ static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int x return 0; } -static int aqua_vanjaram_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr, - int mode, - struct amdgpu_xcp_cfg *xcp_cfg) +static int __aqua_vanjaram_get_px_mode_info(struct amdgpu_xcp_mgr *xcp_mgr, + int px_mode, int *num_xcp, + uint16_t *nps_modes) { struct amdgpu_device *adev = xcp_mgr->adev; - int max_res[AMDGPU_XCP_RES_MAX] = {}; - bool res_lt_xcp; - int num_xcp, i; - u16 nps_modes; - if (!(xcp_mgr->supp_xcp_modes & BIT(mode))) + if (!num_xcp || !nps_modes || !(xcp_mgr->supp_xcp_modes & BIT(px_mode))) return -EINVAL; - max_res[AMDGPU_XCP_RES_XCC] = NUM_XCC(adev->gfx.xcc_mask); - max_res[AMDGPU_XCP_RES_DMA] = adev->sdma.num_instances; - max_res[AMDGPU_XCP_RES_DEC] = adev->vcn.num_vcn_inst; - max_res[AMDGPU_XCP_RES_JPEG] = adev->jpeg.num_jpeg_inst; - - switch (mode) { + switch (px_mode) { case AMDGPU_SPX_PARTITION_MODE: - num_xcp = 1; - nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE); + *num_xcp = 1; + *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE); break; case AMDGPU_DPX_PARTITION_MODE: - num_xcp = 2; - nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE); + *num_xcp = 2; + *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS2_PARTITION_MODE); break; case AMDGPU_TPX_PARTITION_MODE: - num_xcp = 3; - nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | - BIT(AMDGPU_NPS4_PARTITION_MODE); + *num_xcp = 3; + *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); break; case AMDGPU_QPX_PARTITION_MODE: - num_xcp = 4; - nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | - BIT(AMDGPU_NPS4_PARTITION_MODE); + *num_xcp = 4; + *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); break; case AMDGPU_CPX_PARTITION_MODE: - num_xcp = NUM_XCC(adev->gfx.xcc_mask); - nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | - BIT(AMDGPU_NPS4_PARTITION_MODE); + *num_xcp = NUM_XCC(adev->gfx.xcc_mask); + *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); + if (amdgpu_sriov_vf(adev)) + *nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE); break; default: return -EINVAL; } + return 0; +} + +static int aqua_vanjaram_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr, + int mode, + struct amdgpu_xcp_cfg *xcp_cfg) +{ + struct amdgpu_device *adev = xcp_mgr->adev; + int max_res[AMDGPU_XCP_RES_MAX] = {}; + bool res_lt_xcp; + int num_xcp, i, r; + u16 nps_modes; + + if (!(xcp_mgr->supp_xcp_modes & BIT(mode))) + return -EINVAL; + + max_res[AMDGPU_XCP_RES_XCC] = NUM_XCC(adev->gfx.xcc_mask); + max_res[AMDGPU_XCP_RES_DMA] = adev->sdma.num_instances; + max_res[AMDGPU_XCP_RES_DEC] = adev->vcn.num_vcn_inst; + max_res[AMDGPU_XCP_RES_JPEG] = adev->jpeg.num_jpeg_inst; + + r = __aqua_vanjaram_get_px_mode_info(xcp_mgr, mode, &num_xcp, &nps_modes); + if (r) + return r; + xcp_cfg->compatible_nps_modes = (adev->gmc.supported_nps_modes & nps_modes); xcp_cfg->num_res = ARRAY_SIZE(max_res); @@ -542,30 +335,31 @@ static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr, enum amdgpu_gfx_partition mode) { struct amdgpu_device *adev = xcp_mgr->adev; - int num_xcc, num_xccs_per_xcp; + int num_xcc, num_xccs_per_xcp, r; + int num_xcp, nps_mode; + u16 supp_nps_modes; + bool comp_mode; + + nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + r = __aqua_vanjaram_get_px_mode_info(xcp_mgr, mode, &num_xcp, + &supp_nps_modes); + if (r) + return false; + comp_mode = !!(BIT(nps_mode) & supp_nps_modes); num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (mode) { case AMDGPU_SPX_PARTITION_MODE: - return adev->gmc.num_mem_partitions == 1 && num_xcc > 0; + return comp_mode && num_xcc > 0; case AMDGPU_DPX_PARTITION_MODE: - return adev->gmc.num_mem_partitions <= 2 && (num_xcc % 4) == 0; + return comp_mode && (num_xcc % 4) == 0; case AMDGPU_TPX_PARTITION_MODE: - return (adev->gmc.num_mem_partitions == 1 || - adev->gmc.num_mem_partitions == 3) && - ((num_xcc % 3) == 0); + return comp_mode && ((num_xcc % 3) == 0); case AMDGPU_QPX_PARTITION_MODE: num_xccs_per_xcp = num_xcc / 4; - return (adev->gmc.num_mem_partitions == 1 || - adev->gmc.num_mem_partitions == 4) && - (num_xccs_per_xcp >= 2); + return comp_mode && (num_xccs_per_xcp >= 2); case AMDGPU_CPX_PARTITION_MODE: - /* (num_xcc > 1) because 1 XCC is considered SPX, not CPX. - * (num_xcc % adev->gmc.num_mem_partitions) == 0 because - * num_compute_partitions can't be less than num_mem_partitions - */ - return ((num_xcc > 1) && - (num_xcc % adev->gmc.num_mem_partitions) == 0); + return comp_mode && (num_xcc > 1); default: return false; } @@ -573,72 +367,6 @@ static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr, return false; } -static int __aqua_vanjaram_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) -{ - /* TODO: - * Stop user queues and threads, and make sure GPU is empty of work. - */ - - if (flags & AMDGPU_XCP_OPS_KFD) - amdgpu_amdkfd_device_fini_sw(xcp_mgr->adev); - - return 0; -} - -static int __aqua_vanjaram_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) -{ - int ret = 0; - - if (flags & AMDGPU_XCP_OPS_KFD) { - amdgpu_amdkfd_device_probe(xcp_mgr->adev); - amdgpu_amdkfd_device_init(xcp_mgr->adev); - /* If KFD init failed, return failure */ - if (!xcp_mgr->adev->kfd.init_complete) - ret = -EIO; - } - - return ret; -} - -static void -__aqua_vanjaram_update_supported_modes(struct amdgpu_xcp_mgr *xcp_mgr) -{ - struct amdgpu_device *adev = xcp_mgr->adev; - - xcp_mgr->supp_xcp_modes = 0; - - switch (NUM_XCC(adev->gfx.xcc_mask)) { - case 8: - xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | - BIT(AMDGPU_DPX_PARTITION_MODE) | - BIT(AMDGPU_QPX_PARTITION_MODE) | - BIT(AMDGPU_CPX_PARTITION_MODE); - break; - case 6: - xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | - BIT(AMDGPU_TPX_PARTITION_MODE) | - BIT(AMDGPU_CPX_PARTITION_MODE); - break; - case 4: - xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | - BIT(AMDGPU_DPX_PARTITION_MODE) | - BIT(AMDGPU_CPX_PARTITION_MODE); - break; - /* this seems only existing in emulation phase */ - case 2: - xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | - BIT(AMDGPU_CPX_PARTITION_MODE); - break; - case 1: - xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | - BIT(AMDGPU_CPX_PARTITION_MODE); - break; - - default: - break; - } -} - static void __aqua_vanjaram_update_available_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr) { int mode; @@ -685,7 +413,7 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, goto out; } - ret = __aqua_vanjaram_pre_partition_switch(xcp_mgr, flags); + ret = amdgpu_xcp_pre_partition_switch(xcp_mgr, flags); if (ret) goto unlock; @@ -698,7 +426,7 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, *num_xcps = num_xcc / num_xcc_per_xcp; amdgpu_xcp_init(xcp_mgr, *num_xcps, mode); - ret = __aqua_vanjaram_post_partition_switch(xcp_mgr, flags); + ret = amdgpu_xcp_post_partition_switch(xcp_mgr, flags); if (!ret) __aqua_vanjaram_update_available_partition_mode(xcp_mgr); unlock: @@ -781,9 +509,6 @@ struct amdgpu_xcp_mgr_funcs aqua_vanjaram_xcp_funcs = { .get_ip_details = &aqua_vanjaram_get_xcp_ip_details, .get_xcp_res_info = &aqua_vanjaram_get_xcp_res_info, .get_xcp_mem_id = &aqua_vanjaram_get_xcp_mem_id, - .select_scheds = &aqua_vanjaram_select_scheds, - .update_partition_sched_list = - &aqua_vanjaram_update_partition_sched_list }; static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev) @@ -798,7 +523,7 @@ static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev) if (ret) return ret; - __aqua_vanjaram_update_supported_modes(adev->xcp_mgr); + amdgpu_xcp_update_supported_modes(adev->xcp_mgr); /* TODO: Default memory node affinity init */ return ret; @@ -838,7 +563,7 @@ int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev) if (ret) return ret; - aqua_vanjaram_ip_map_init(adev); + amdgpu_ip_map_init(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 81d195d366ce..427b073de2fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -1444,6 +1444,7 @@ static void atom_get_vbios_pn(struct atom_context *ctx) if (vbios_str == NULL) vbios_str += sizeof(BIOS_ATOM_PREFIX) - 1; } + OPTIMIZER_HIDE_VAR(vbios_str); if (vbios_str != NULL && *vbios_str == 0) vbios_str++; diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c index 521b9faab180..492813ab1b54 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c @@ -458,8 +458,8 @@ bool amdgpu_atombios_dp_needs_link_train(struct amdgpu_connector *amdgpu_connect u8 link_status[DP_LINK_STATUS_SIZE]; struct amdgpu_connector_atom_dig *dig = amdgpu_connector->con_priv; - if (drm_dp_dpcd_read_link_status(&amdgpu_connector->ddc_bus->aux, link_status) - <= 0) + if (drm_dp_dpcd_read_link_status(&amdgpu_connector->ddc_bus->aux, + link_status) < 0) return false; if (drm_dp_channel_eq_ok(link_status, dig->dp_lane_count)) return false; @@ -616,7 +616,7 @@ amdgpu_atombios_dp_link_train_cr(struct amdgpu_atombios_dp_link_train_info *dp_i drm_dp_link_train_clock_recovery_delay(dp_info->aux, dp_info->dpcd); if (drm_dp_dpcd_read_link_status(dp_info->aux, - dp_info->link_status) <= 0) { + dp_info->link_status) < 0) { DRM_ERROR("displayport link status failed\n"); break; } @@ -681,7 +681,7 @@ amdgpu_atombios_dp_link_train_ce(struct amdgpu_atombios_dp_link_train_info *dp_i drm_dp_link_train_channel_eq_delay(dp_info->aux, dp_info->dpcd); if (drm_dp_dpcd_read_link_status(dp_info->aux, - dp_info->link_status) <= 0) { + dp_info->link_status) < 0) { DRM_ERROR("displayport link status failed\n"); break; } diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 508cea965983..9e8715b4739d 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -56,6 +56,8 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev); static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev); static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block); +u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev); + MODULE_FIRMWARE("amdgpu/bonaire_sdma.bin"); MODULE_FIRMWARE("amdgpu/bonaire_sdma1.bin"); MODULE_FIRMWARE("amdgpu/hawaii_sdma.bin"); @@ -67,9 +69,6 @@ MODULE_FIRMWARE("amdgpu/kabini_sdma1.bin"); MODULE_FIRMWARE("amdgpu/mullins_sdma.bin"); MODULE_FIRMWARE("amdgpu/mullins_sdma1.bin"); -u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev); - - static void cik_sdma_free_microcode(struct amdgpu_device *adev) { int i; @@ -993,14 +992,9 @@ static int cik_sdma_sw_fini(struct amdgpu_ip_block *ip_block) static int cik_sdma_hw_init(struct amdgpu_ip_block *ip_block) { - int r; struct amdgpu_device *adev = ip_block->adev; - r = cik_sdma_start(adev); - if (r) - return r; - - return r; + return cik_sdma_start(adev); } static int cik_sdma_hw_fini(struct amdgpu_ip_block *ip_block) @@ -1040,14 +1034,10 @@ static bool cik_sdma_is_idle(struct amdgpu_ip_block *ip_block) static int cik_sdma_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; - u32 tmp; struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK | - SRBM_STATUS2__SDMA1_BUSY_MASK); - - if (!tmp) + if (cik_sdma_is_idle(ip_block)) return 0; udelay(1); } diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 279288365940..8aca4f2734f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -60,9 +60,6 @@ #define AUD5_REGISTER_OFFSET (0x179d - 0x1780) #define AUD6_REGISTER_OFFSET (0x17a4 - 0x1780) -#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001 -#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003 - #define PIPEID(x) ((x) << 0) #define MEID(x) ((x) << 2) #define VMID(x) ((x) << 4) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index df401aded662..bf7c22f81cda 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -3075,7 +3075,7 @@ static int dce_v10_0_set_hpd_irq_state(struct amdgpu_device *adev, u32 tmp; if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", hpd); + DRM_DEBUG("invalid hpd %d\n", hpd); return 0; } @@ -3227,7 +3227,7 @@ static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev, u32 tmp; if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", hpd); + DRM_DEBUG("invalid hpd %d\n", hpd); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 80f01c3989cd..47e05783c4a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -3206,7 +3206,7 @@ static int dce_v11_0_set_hpd_irq_state(struct amdgpu_device *adev, u32 tmp; if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", hpd); + DRM_DEBUG("invalid hpd %d\n", hpd); return 0; } @@ -3358,7 +3358,7 @@ static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev, u32 tmp; if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", hpd); + DRM_DEBUG("invalid hpd %d\n", hpd); return; } @@ -3488,8 +3488,7 @@ static const struct amd_ip_funcs dce_v11_0_ip_funcs = { .set_powergating_state = dce_v11_0_set_powergating_state, }; -static void -dce_v11_0_encoder_mode_set(struct drm_encoder *encoder, +static void dce_v11_0_encoder_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 255c70959343..276c025c4c03 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -287,7 +287,7 @@ static void dce_v6_0_hpd_int_ack(struct amdgpu_device *adev, u32 tmp; if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", hpd); + DRM_DEBUG("invalid hpd %d\n", hpd); return; } @@ -412,7 +412,7 @@ static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev, { if (!render) WREG32(mmVGA_RENDER_CONTROL, - RREG32(mmVGA_RENDER_CONTROL) & VGA_VSTATUS_CNTL); + RREG32(mmVGA_RENDER_CONTROL) & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK); } static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev) @@ -1011,16 +1011,16 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, /* select wm A */ arb_control3 = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset); tmp = arb_control3; - tmp &= ~LATENCY_WATERMARK_MASK(3); - tmp |= LATENCY_WATERMARK_MASK(1); + tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT); + tmp |= (1 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT); WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp); WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, ((latency_watermark_a << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) | (line_time << DPG_PIPE_URGENCY_CONTROL__URGENCY_HIGH_WATERMARK__SHIFT))); /* select wm B */ tmp = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset); - tmp &= ~LATENCY_WATERMARK_MASK(3); - tmp |= LATENCY_WATERMARK_MASK(2); + tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT); + tmp |= (2 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT); WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp); WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, ((latency_watermark_b << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) | @@ -1089,7 +1089,7 @@ static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev, } WREG32(mmDC_LB_MEMORY_SPLIT + amdgpu_crtc->crtc_offset, - DC_LB_MEMORY_CONFIG(tmp)); + (tmp << DC_LB_MEMORY_SPLIT__DC_LB_MEMORY_CONFIG__SHIFT)); WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset, (buffer_alloc << PIPE0_DMIF_BUFFER_CONTROL__DMIF_BUFFERS_ALLOCATED__SHIFT)); @@ -1306,6 +1306,7 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 offset; struct drm_connector *connector; struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; @@ -1327,6 +1328,11 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, }; + if (!dig || !dig->afmt || !dig->afmt->pin) + return; + + offset = dig->afmt->pin->offset; + drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { @@ -1348,7 +1354,7 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) return; for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { - u32 tmp = 0; + u32 value = 0; u8 stereo_freqs = 0; int max_channels = -1; int j; @@ -1358,12 +1364,12 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) if (sad->format == eld_reg_to_type[i][1]) { if (sad->channels > max_channels) { - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - MAX_CHANNELS, sad->channels); - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - DESCRIPTOR_BYTE_2, sad->byte2); - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - SUPPORTED_FREQUENCIES, sad->freq); + value = (sad->channels << + AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__MAX_CHANNELS__SHIFT) | + (sad->byte2 << + AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__DESCRIPTOR_BYTE_2__SHIFT) | + (sad->freq << + AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES__SHIFT); max_channels = sad->channels; } @@ -1374,13 +1380,13 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) } } - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - SUPPORTED_FREQUENCIES_STEREO, stereo_freqs); - WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp); + value |= (stereo_freqs << + AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES_STEREO__SHIFT); + + WREG32_AUDIO_ENDPT(offset, eld_reg_to_type[i][0], value); } kfree(sads); - } static void dce_v6_0_audio_enable(struct amdgpu_device *adev, @@ -1886,7 +1892,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, struct amdgpu_bo *abo; uint64_t fb_location, tiling_flags; uint32_t fb_format, fb_pitch_pixels, pipe_config; - u32 fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_NONE); + u32 fb_swap = (GRPH_ENDIAN_NONE << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); u32 viewport_w, viewport_h; int r; bool bypass_lut = false; @@ -1926,76 +1932,76 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, switch (target_fb->format->format) { case DRM_FORMAT_C8: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_8BPP) | - GRPH_FORMAT(GRPH_FORMAT_INDEXED)); + fb_format = ((GRPH_DEPTH_8BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_INDEXED << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); break; case DRM_FORMAT_XRGB4444: case DRM_FORMAT_ARGB4444: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) | - GRPH_FORMAT(GRPH_FORMAT_ARGB4444)); + fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB4444 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16); + fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif break; case DRM_FORMAT_XRGB1555: case DRM_FORMAT_ARGB1555: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) | - GRPH_FORMAT(GRPH_FORMAT_ARGB1555)); + fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB1555 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16); + fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif break; case DRM_FORMAT_BGRX5551: case DRM_FORMAT_BGRA5551: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) | - GRPH_FORMAT(GRPH_FORMAT_BGRA5551)); + fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_BGRA5551 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16); + fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif break; case DRM_FORMAT_RGB565: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) | - GRPH_FORMAT(GRPH_FORMAT_ARGB565)); + fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB565 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16); + fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif break; case DRM_FORMAT_XRGB8888: case DRM_FORMAT_ARGB8888: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) | - GRPH_FORMAT(GRPH_FORMAT_ARGB8888)); + fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32); + fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif break; case DRM_FORMAT_XRGB2101010: case DRM_FORMAT_ARGB2101010: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) | - GRPH_FORMAT(GRPH_FORMAT_ARGB2101010)); + fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB2101010 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32); + fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */ bypass_lut = true; break; case DRM_FORMAT_BGRX1010102: case DRM_FORMAT_BGRA1010102: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) | - GRPH_FORMAT(GRPH_FORMAT_BGRA1010102)); + fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_BGRA1010102 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32); + fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */ bypass_lut = true; break; case DRM_FORMAT_XBGR8888: case DRM_FORMAT_ABGR8888: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) | - GRPH_FORMAT(GRPH_FORMAT_ARGB8888)); - fb_swap = (GRPH_RED_CROSSBAR(GRPH_RED_SEL_B) | - GRPH_BLUE_CROSSBAR(GRPH_BLUE_SEL_R)); + fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); + fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) | + (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap |= GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32); + fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif break; default: @@ -2013,18 +2019,18 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT); num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); - fb_format |= GRPH_NUM_BANKS(num_banks); - fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_2D_TILED_THIN1); - fb_format |= GRPH_TILE_SPLIT(tile_split); - fb_format |= GRPH_BANK_WIDTH(bankw); - fb_format |= GRPH_BANK_HEIGHT(bankh); - fb_format |= GRPH_MACRO_TILE_ASPECT(mtaspect); + fb_format |= (num_banks << GRPH_CONTROL__GRPH_NUM_BANKS__SHIFT); + fb_format |= (GRPH_ARRAY_2D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT); + fb_format |= (tile_split << GRPH_CONTROL__GRPH_TILE_SPLIT__SHIFT); + fb_format |= (bankw << GRPH_CONTROL__GRPH_BANK_WIDTH__SHIFT); + fb_format |= (bankh << GRPH_CONTROL__GRPH_BANK_HEIGHT__SHIFT); + fb_format |= (mtaspect << GRPH_CONTROL__GRPH_MACRO_TILE_ASPECT__SHIFT); } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) { - fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_1D_TILED_THIN1); + fb_format |= (GRPH_ARRAY_1D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT); } pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); - fb_format |= GRPH_PIPE_CONFIG(pipe_config); + fb_format |= (pipe_config << GRPH_CONTROL__GRPH_PIPE_CONFIG__SHIFT); dce_v6_0_vga_enable(crtc, false); @@ -2040,7 +2046,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset, (u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK); WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset, - (u32) fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK); + (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK); WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap); @@ -2108,14 +2114,13 @@ static void dce_v6_0_set_interleave(struct drm_crtc *crtc, if (mode->flags & DRM_MODE_FLAG_INTERLACE) WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset, - INTERLEAVE_EN); + DATA_FORMAT__INTERLEAVE_EN_MASK); else WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset, 0); } static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc) { - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = drm_to_adev(dev); @@ -2125,15 +2130,15 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc) DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id); WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, - ((0 << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) | - (0 << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT))); + ((INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) | + (INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT))); WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset, PRESCALE_GRPH_CONTROL__GRPH_PRESCALE_BYPASS_MASK); WREG32(mmPRESCALE_OVL_CONTROL + amdgpu_crtc->crtc_offset, PRESCALE_OVL_CONTROL__OVL_PRESCALE_BYPASS_MASK); WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset, - ((0 << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) | - (0 << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT))); + ((INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) | + (INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT))); WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0); @@ -2160,19 +2165,19 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc) } WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset, - ((0 << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) | - (0 << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) | - ICON_DEGAMMA_MODE(0) | - (0 << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT))); + ((DEGAMMA_BYPASS << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) | + (DEGAMMA_BYPASS << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) | + (DEGAMMA_BYPASS << DEGAMMA_CONTROL__ICON_DEGAMMA_MODE__SHIFT) | + (DEGAMMA_BYPASS << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT))); WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset, - ((0 << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) | - (0 << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT))); + ((GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) | + (GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT))); WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset, - ((0 << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) | - (0 << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT))); + ((REGAMMA_BYPASS << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) | + (REGAMMA_BYPASS << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT))); WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, - ((0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) | - (0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT))); + ((OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) | + (OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT))); /* XXX match this to the depth of the crtc fmt block, move to modeset? */ WREG32(0x1a50 + amdgpu_crtc->crtc_offset, 0); @@ -2267,8 +2272,6 @@ static void dce_v6_0_hide_cursor(struct drm_crtc *crtc) WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) | (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT)); - - } static void dce_v6_0_show_cursor(struct drm_crtc *crtc) @@ -2285,7 +2288,6 @@ static void dce_v6_0_show_cursor(struct drm_crtc *crtc) CUR_CONTROL__CURSOR_EN_MASK | (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) | (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT)); - } static int dce_v6_0_cursor_move_locked(struct drm_crtc *crtc, @@ -2596,7 +2598,6 @@ static bool dce_v6_0_crtc_mode_fixup(struct drm_crtc *crtc, const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct drm_encoder *encoder; @@ -2669,7 +2670,7 @@ static void dce_v6_0_panic_flush(struct drm_plane *plane) /* Disable DC tiling */ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset); - fb_format &= ~GRPH_ARRAY_MODE(0x7); + fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK; WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); } @@ -2745,7 +2746,6 @@ static int dce_v6_0_early_init(struct amdgpu_ip_block *ip_block) static int dce_v6_0_sw_init(struct amdgpu_ip_block *ip_block) { int r, i; - bool ret; struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->mode_info.num_crtc; i++) { @@ -2789,8 +2789,7 @@ static int dce_v6_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } - ret = amdgpu_atombios_get_connector_info_from_object_table(adev); - if (ret) + if (amdgpu_atombios_get_connector_info_from_object_table(adev)) amdgpu_display_print_display_setup(adev_to_drm(adev)); else return -EINVAL; @@ -2986,12 +2985,12 @@ static void dce_v6_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: interrupt_mask = RREG32(mmINT_MASK + reg_block); - interrupt_mask &= ~VBLANK_INT_MASK; + interrupt_mask &= ~INT_MASK__VBLANK_INT_MASK; WREG32(mmINT_MASK + reg_block, interrupt_mask); break; case AMDGPU_IRQ_STATE_ENABLE: interrupt_mask = RREG32(mmINT_MASK + reg_block); - interrupt_mask |= VBLANK_INT_MASK; + interrupt_mask |= INT_MASK__VBLANK_INT_MASK; WREG32(mmINT_MASK + reg_block, interrupt_mask); break; default: @@ -3006,28 +3005,28 @@ static void dce_v6_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev, } -static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev, +static int dce_v6_0_set_hpd_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, - unsigned type, + unsigned hpd, enum amdgpu_interrupt_state state) { u32 dc_hpd_int_cntl; - if (type >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", type); + if (hpd >= adev->mode_info.num_hpd) { + DRM_DEBUG("invalid hpd %d\n", hpd); return 0; } switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]); - dc_hpd_int_cntl &= ~DC_HPDx_INT_EN; - WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl); + dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]); + dc_hpd_int_cntl &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK; + WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]); - dc_hpd_int_cntl |= DC_HPDx_INT_EN; - WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl); + dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]); + dc_hpd_int_cntl |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK; + WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl); break; default: break; @@ -3036,7 +3035,7 @@ static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev, return 0; } -static int dce_v6_0_set_crtc_interrupt_state(struct amdgpu_device *adev, +static int dce_v6_0_set_crtc_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, enum amdgpu_interrupt_state state) @@ -3096,7 +3095,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev, switch (entry->src_data[0]) { case 0: /* vblank */ if (disp_int & interrupt_status_offsets[crtc].vblank) - WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_ACK); + WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_STATUS__VBLANK_ACK_MASK); else DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); @@ -3107,7 +3106,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev, break; case 1: /* vline */ if (disp_int & interrupt_status_offsets[crtc].vline) - WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_ACK); + WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_STATUS__VLINE_ACK_MASK); else DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); @@ -3121,7 +3120,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev, return 0; } -static int dce_v6_0_set_pageflip_interrupt_state(struct amdgpu_device *adev, +static int dce_v6_0_set_pageflip_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, enum amdgpu_interrupt_state state) @@ -3172,7 +3171,7 @@ static int dce_v6_0_pageflip_irq(struct amdgpu_device *adev, spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); works = amdgpu_crtc->pflip_works; - if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){ + if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) { DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != " "AMDGPU_FLIP_SUBMITTED(%d)\n", amdgpu_crtc->pflip_status, @@ -3249,12 +3248,10 @@ static const struct amd_ip_funcs dce_v6_0_ip_funcs = { .set_powergating_state = dce_v6_0_set_powergating_state, }; -static void -dce_v6_0_encoder_mode_set(struct drm_encoder *encoder, +static void dce_v6_0_encoder_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); @@ -3274,7 +3271,6 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder, static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder) { - struct amdgpu_device *adev = drm_to_adev(encoder->dev); struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder); @@ -3314,7 +3310,6 @@ static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder) static void dce_v6_0_encoder_commit(struct drm_encoder *encoder) { - struct drm_device *dev = encoder->dev; struct amdgpu_device *adev = drm_to_adev(dev); @@ -3325,7 +3320,6 @@ static void dce_v6_0_encoder_commit(struct drm_encoder *encoder) static void dce_v6_0_encoder_disable(struct drm_encoder *encoder) { - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig; int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); @@ -3541,17 +3535,17 @@ static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev) } static const struct amdgpu_irq_src_funcs dce_v6_0_crtc_irq_funcs = { - .set = dce_v6_0_set_crtc_interrupt_state, + .set = dce_v6_0_set_crtc_irq_state, .process = dce_v6_0_crtc_irq, }; static const struct amdgpu_irq_src_funcs dce_v6_0_pageflip_irq_funcs = { - .set = dce_v6_0_set_pageflip_interrupt_state, + .set = dce_v6_0_set_pageflip_irq_state, .process = dce_v6_0_pageflip_irq, }; static const struct amdgpu_irq_src_funcs dce_v6_0_hpd_irq_funcs = { - .set = dce_v6_0_set_hpd_interrupt_state, + .set = dce_v6_0_set_hpd_irq_state, .process = dce_v6_0_hpd_irq, }; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 07358546581f..e62ccf9eb73d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -271,7 +271,7 @@ static void dce_v8_0_hpd_int_ack(struct amdgpu_device *adev, u32 tmp; if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", hpd); + DRM_DEBUG("invalid hpd %d\n", hpd); return; } @@ -3021,7 +3021,7 @@ static void dce_v8_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev, } } -static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev, +static int dce_v8_0_set_hpd_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, enum amdgpu_interrupt_state state) @@ -3029,7 +3029,7 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev, u32 dc_hpd_int_cntl; if (type >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", type); + DRM_DEBUG("invalid hpd %d\n", type); return 0; } @@ -3051,7 +3051,7 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev, return 0; } -static int dce_v8_0_set_crtc_interrupt_state(struct amdgpu_device *adev, +static int dce_v8_0_set_crtc_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, enum amdgpu_interrupt_state state) @@ -3136,7 +3136,7 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev, return 0; } -static int dce_v8_0_set_pageflip_interrupt_state(struct amdgpu_device *adev, +static int dce_v8_0_set_pageflip_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, enum amdgpu_interrupt_state state) @@ -3547,17 +3547,17 @@ static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev) } static const struct amdgpu_irq_src_funcs dce_v8_0_crtc_irq_funcs = { - .set = dce_v8_0_set_crtc_interrupt_state, + .set = dce_v8_0_set_crtc_irq_state, .process = dce_v8_0_crtc_irq, }; static const struct amdgpu_irq_src_funcs dce_v8_0_pageflip_irq_funcs = { - .set = dce_v8_0_set_pageflip_interrupt_state, + .set = dce_v8_0_set_pageflip_irq_state, .process = dce_v8_0_pageflip_irq, }; static const struct amdgpu_irq_src_funcs dce_v8_0_hpd_irq_funcs = { - .set = dce_v8_0_set_hpd_interrupt_state, + .set = dce_v8_0_set_hpd_irq_state, .process = dce_v8_0_hpd_irq, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 6d514efb0a6d..7bd506f06eb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -368,11 +368,6 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = { SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR), SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST), /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0), @@ -421,7 +416,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_10[] = { SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET), SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_WG_STATE_OFFSET), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS) + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), }; static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = { @@ -448,7 +452,32 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = { SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR), SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR_HI), SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI) + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI), + /* gfx header registers */ + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), }; static const struct soc15_reg_golden golden_settings_gc_10_1[] = { @@ -4296,9 +4325,7 @@ static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev) static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer) { - u32 count = 0, i; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; + u32 count = 0; int ctx_reg_offset; if (adev->gfx.rlc.cs_data == NULL) @@ -4306,39 +4333,15 @@ static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, if (buffer == NULL) return; - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); - buffer[count++] = cpu_to_le32(0x80000000); - buffer[count++] = cpu_to_le32(0x80000000); - - for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) { - buffer[count++] = - cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); - buffer[count++] = cpu_to_le32(ext->reg_index - - PACKET3_SET_CONTEXT_REG_START); - for (i = 0; i < ext->reg_count; i++) - buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; - } - } - } + count = amdgpu_gfx_csb_preamble_start(buffer); + count = amdgpu_gfx_csb_data_parser(adev, buffer, count); - ctx_reg_offset = - SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; + ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); buffer[count++] = cpu_to_le32(ctx_reg_offset); buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); - buffer[count++] = cpu_to_le32(0); + amdgpu_gfx_csb_preamble_end(buffer, count); } static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev) @@ -4752,6 +4755,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) int i, j, k, r, ring_id = 0; int xcc_id = 0; struct amdgpu_device *adev = ip_block->adev; + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); @@ -4763,7 +4767,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(10, 1, 4): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 8; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; @@ -4778,7 +4782,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(10, 3, 7): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 2; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 2; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; @@ -4800,7 +4804,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex); if (adev->gfx.me_fw_version >= 101 && adev->gfx.pfp_fw_version >= 158 && - adev->gfx.mec_fw_version >= 152) { + adev->gfx.mec_fw_version >= 151) { adev->gfx.enable_cleaner_shader = true; r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); if (r) { @@ -4810,7 +4814,9 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) } break; case IP_VERSION(10, 3, 0): + case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 2): + case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex; @@ -4826,6 +4832,34 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) } } break; + case IP_VERSION(10, 3, 6): + adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 14 && + adev->gfx.pfp_fw_version >= 17 && + adev->gfx.mec_fw_version >= 24) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + case IP_VERSION(10, 3, 7): + adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 4 && + adev->gfx.pfp_fw_version >= 9 && + adev->gfx.mec_fw_version >= 12) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; default: adev->gfx.enable_cleaner_shader = false; break; @@ -4886,7 +4920,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) /* set up the gfx ring */ for (i = 0; i < adev->gfx.me.num_me; i++) { - for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { + for (j = 0; j < num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) continue; @@ -4918,11 +4952,15 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) } } } - /* TODO: Add queue reset mask when FW fully supports it */ + adev->gfx.gfx_supported_reset = amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); adev->gfx.compute_supported_reset = amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + if (!amdgpu_sriov_vf(adev)) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + } r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0); if (r) { @@ -6114,7 +6152,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); @@ -6192,7 +6230,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0); @@ -6269,7 +6307,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); @@ -6644,7 +6682,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); @@ -6851,22 +6889,9 @@ static int gfx_v10_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) { int r, i; - struct amdgpu_ring *ring; for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v10_0_kgq_init_queue(ring, false); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v10_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false); if (r) return r; } @@ -7173,55 +7198,24 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore) static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; - int r; - - ring = &adev->gfx.kiq[0].ring; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (unlikely(r != 0)) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } - - gfx_v10_0_kiq_init_queue(ring); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); + gfx_v10_0_kiq_init_queue(&adev->gfx.kiq[0].ring); return 0; } static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = NULL; - int r = 0, i; + int i, r; gfx_v10_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto done; - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v10_0_kcq_init_queue(ring, false); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v10_0_kcq_init_queue(&adev->gfx.compute_ring[i], + false); if (r) - goto done; + return r; } - r = amdgpu_gfx_enable_kcq(adev, 0); -done: - return r; + return amdgpu_gfx_enable_kcq(adev, 0); } static int gfx_v10_0_cp_resume(struct amdgpu_device *adev) @@ -9056,21 +9050,6 @@ static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask); } -static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring, - unsigned int vmid) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t value = 0; - - value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); - value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); - value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); - value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - WREG32_SOC15(GC, 0, mmSQ_CMD, value); - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); -} - static void gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, uint32_t me, uint32_t pipe, @@ -9532,7 +9511,9 @@ static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) amdgpu_ring_insert_nop(ring, num_nop - 1); } -static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; @@ -9542,15 +9523,14 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) u64 addr; int r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + spin_lock_irqsave(&kiq->ring_lock, flags); - if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7 + kiq->pmf->map_queues_size)) { + if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7)) { spin_unlock_irqrestore(&kiq->ring_lock, flags); return -ENOMEM; } @@ -9570,37 +9550,37 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) 0, 1, 0x20); gfx_v10_0_ring_emit_reg_wait(kiq_ring, SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffffffff); - kiq->pmf->kiq_map_queues(kiq_ring, ring); amdgpu_ring_commit(kiq_ring); - - spin_unlock_irqrestore(&kiq->ring_lock, flags); - r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) return r; - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - DRM_ERROR("fail to resv mqd_obj\n"); + r = gfx_v10_0_kgq_init_queue(ring, true); + if (r) { + DRM_ERROR("fail to init kgq\n"); return r; } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v10_0_kgq_init_queue(ring, true); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; } - amdgpu_bo_unreserve(ring->mqd_obj); - if (r) { - DRM_ERROR("fail to unresv mqd_obj\n"); + kiq->pmf->kiq_map_queues(kiq_ring, ring); + amdgpu_ring_commit(kiq_ring); + r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + if (r) return r; - } - return amdgpu_ring_test_ring(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, - unsigned int vmid) + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; @@ -9608,12 +9588,11 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int i, r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + spin_lock_irqsave(&kiq->ring_lock, flags); if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { @@ -9624,9 +9603,8 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 0, 0); amdgpu_ring_commit(kiq_ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) return r; @@ -9649,20 +9627,9 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, return r; } - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - dev_err(adev->dev, "fail to resv mqd_obj\n"); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v10_0_kcq_init_queue(ring, true); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v10_0_kcq_init_queue(ring, true); if (r) { - dev_err(adev->dev, "fail to unresv mqd_obj\n"); + dev_err(adev->dev, "fail to init kcq\n"); return r; } @@ -9673,13 +9640,12 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, } kiq->pmf->kiq_map_queues(kiq_ring, ring); amdgpu_ring_commit(kiq_ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) return r; - return amdgpu_ring_test_ring(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) @@ -9711,9 +9677,14 @@ static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printe for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); for (reg = 0; reg < reg_count; reg++) { - drm_printf(p, "%-50s \t 0x%08x\n", - gc_cp_reg_list_10[reg].reg_name, - adev->gfx.ip_dump_compute_queues[index + reg]); + if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP) + drm_printf(p, "%-50s \t 0x%08x\n", + "mmCP_MEC_ME2_HEADER_DUMP", + adev->gfx.ip_dump_compute_queues[index + reg]); + else + drm_printf(p, "%-50s \t 0x%08x\n", + gc_cp_reg_list_10[reg].reg_name, + adev->gfx.ip_dump_compute_queues[index + reg]); } index += reg_count; } @@ -9774,9 +9745,13 @@ static void gfx_v10_ip_dump(struct amdgpu_ip_block *ip_block) nv_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); for (reg = 0; reg < reg_count; reg++) { - adev->gfx.ip_dump_compute_queues[index + reg] = - RREG32(SOC15_REG_ENTRY_OFFSET( - gc_cp_reg_list_10[reg])); + if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP) + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP)); + else + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_cp_reg_list_10[reg])); } index += reg_count; } @@ -9905,7 +9880,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v10_0_ring_soft_recovery, .emit_mem_sync = gfx_v10_0_emit_mem_sync, .reset = gfx_v10_0_reset_kgq, .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader, @@ -9946,7 +9920,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v10_0_ring_soft_recovery, .emit_mem_sync = gfx_v10_0_emit_mem_sync, .reset = gfx_v10_0_reset_kcq, .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h index 5255378af53c..f67569ccf9f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h @@ -43,9 +43,9 @@ static const u32 gfx_10_1_10_cleaner_shader_hex[] = { 0xd70f6a01, 0x000202ff, 0x00000400, 0x80828102, 0xbf84fff7, 0xbefc03ff, - 0x00000068, 0xbe803080, - 0xbe813080, 0xbe823080, - 0xbe833080, 0x80fc847c, + 0x00000068, 0xbe803000, + 0xbe813000, 0xbe823000, + 0xbe833000, 0x80fc847c, 0xbf84fffa, 0xbeea0480, 0xbeec0480, 0xbeee0480, 0xbef00480, 0xbef20480, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm index 9ba3359253c9..54f7ed9e2801 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm @@ -40,7 +40,6 @@ shader main type(CS) wave_size(32) // Note: original source code from SQ team - // // Create 32 waves in a threadgroup (CS waves) // Each allocates 64 VGPRs @@ -71,8 +70,8 @@ label_0005: s_sub_u32 s2, s2, 8 s_cbranch_scc0 label_0005 // - s_mov_b32 s2, 0x80000000 // Bit31 is first_wave - s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup // CLEAR LDS // @@ -99,10 +98,10 @@ label_001F: label_0023: s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance) label_sgpr_loop: - s_movreld_b32 s0, 0 - s_movreld_b32 s1, 0 - s_movreld_b32 s2, 0 - s_movreld_b32 s3, 0 + s_movreld_b32 s0, s0 + s_movreld_b32 s1, s0 + s_movreld_b32 s2, s0 + s_movreld_b32 s3, s0 s_sub_u32 m0, m0, 4 s_cbranch_scc0 label_sgpr_loop diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index d8772cd6db63..c01c241a1b06 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -48,6 +48,8 @@ #include "gfx_v11_0_3.h" #include "nbio_v4_3.h" #include "mes_v11_0.h" +#include "mes_userqueue.h" +#include "amdgpu_userq_fence.h" #define GFX11_NUM_GFX_RINGS 1 #define GFX11_MEC_HPD_SIZE 2048 @@ -83,6 +85,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); @@ -177,9 +180,13 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), @@ -230,7 +237,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), - SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS) + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), }; static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { @@ -259,7 +275,24 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), }; static const struct soc15_reg_golden golden_settings_gc_11_0[] = { @@ -580,33 +613,18 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t padding, offset; - - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - padding = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); - *cpu_ptr = cpu_to_le32(0xCAFEDEAD); - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) - return r; + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); - cpu_ptr = &adev->wb.wb[index]; + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + cpu_ptr = &adev->wb.wb[index]; - r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err1; - } + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err1; } ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); @@ -633,12 +651,10 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err2: - if (!ring->is_mes_queue) - amdgpu_ib_free(&ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -744,6 +760,10 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, AMDGPU_UCODE_REQUIRED, "amdgpu/gc_11_0_0_rlc_1.bin"); + else if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_rlc_kicker.bin", ucode_prefix); else err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, AMDGPU_UCODE_REQUIRED, @@ -833,9 +853,7 @@ static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer) { - u32 count = 0, i; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; + u32 count = 0; int ctx_reg_offset; if (adev->gfx.rlc.cs_data == NULL) @@ -843,39 +861,15 @@ static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, if (buffer == NULL) return; - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + count = amdgpu_gfx_csb_preamble_start(buffer); + count = amdgpu_gfx_csb_data_parser(adev, buffer, count); - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); - buffer[count++] = cpu_to_le32(0x80000000); - buffer[count++] = cpu_to_le32(0x80000000); - - for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) { - buffer[count++] = - cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); - buffer[count++] = cpu_to_le32(ext->reg_index - - PACKET3_SET_CONTEXT_REG_START); - for (i = 0; i < ext->reg_count; i++) - buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; - } - } - } - - ctx_reg_offset = - SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; + ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); buffer[count++] = cpu_to_le32(ctx_reg_offset); buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); - buffer[count++] = cpu_to_le32(0); + amdgpu_gfx_csb_preamble_end(buffer, count); } static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev) @@ -1056,14 +1050,21 @@ static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, #define MQD_FWWORKAREA_SIZE 484 #define MQD_FWWORKAREA_ALIGNMENT 256 -static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, +static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev, struct amdgpu_gfx_shadow_info *shadow_info) { - if (adev->gfx.cp_gfx_shadow) { - shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; - shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; - shadow_info->csa_size = MQD_FWWORKAREA_SIZE; - shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; + shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; + shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; + shadow_info->csa_size = MQD_FWWORKAREA_SIZE; + shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; +} + +static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, + struct amdgpu_gfx_shadow_info *shadow_info, + bool skip_check) +{ + if (adev->gfx.cp_gfx_shadow || skip_check) { + gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info); return 0; } else { memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); @@ -1136,6 +1137,10 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, ring->ring_obj = NULL; ring->use_doorbell = true; + if (adev->gfx.disable_kq) { + ring->no_scheduler = true; + ring->no_user_submission = true; + } if (!ring_id) ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; @@ -1568,24 +1573,18 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) { - int i, j, k, r, ring_id = 0; + int i, j, k, r, ring_id; int xcc_id = 0; struct amdgpu_device *adev = ip_block->adev; + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): - adev->gfx.me.num_me = 1; - adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; - adev->gfx.mec.num_mec = 2; - adev->gfx.mec.num_pipe_per_mec = 4; - adev->gfx.mec.num_queue_per_pipe = 4; - break; - case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): @@ -1593,7 +1592,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 5, 3): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 2; adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; @@ -1612,6 +1611,35 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + if (!adev->gfx.disable_uq && + adev->gfx.me_fw_version >= 2390 && + adev->gfx.pfp_fw_version >= 2530 && + adev->gfx.mec_fw_version >= 2600 && + adev->mes.fw_version[0] >= 120) { + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + } + break; + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): + /* add firmware version checks here */ + if (0 && !adev->gfx.disable_uq) { + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + } + break; + default: + break; + } + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); if (adev->gfx.me_fw_version >= 2280 && @@ -1640,6 +1668,34 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) } } break; + case IP_VERSION(11, 5, 2): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 12 && + adev->gfx.pfp_fw_version >= 15 && + adev->gfx.mec_fw_version >= 15) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + case IP_VERSION(11, 5, 3): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 7 && + adev->gfx.pfp_fw_version >= 8 && + adev->gfx.mec_fw_version >= 8) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; default: adev->gfx.enable_cleaner_shader = false; break; @@ -1701,37 +1757,42 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } - /* set up the gfx ring */ - for (i = 0; i < adev->gfx.me.num_me; i++) { - for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { - for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { - if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) - continue; - - r = gfx_v11_0_gfx_ring_init(adev, ring_id, - i, k, j); - if (r) - return r; - ring_id++; + if (adev->gfx.num_gfx_rings) { + ring_id = 0; + /* set up the gfx ring */ + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { + if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v11_0_gfx_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; + ring_id++; + } } } } - ring_id = 0; - /* set up the compute queues - allocate horizontally across pipes */ - for (i = 0; i < adev->gfx.mec.num_mec; ++i) { - for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { - for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, - k, j)) - continue; + if (adev->gfx.num_compute_rings) { + ring_id = 0; + /* set up the compute queues - allocate horizontally across pipes */ + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, + k, j)) + continue; - r = gfx_v11_0_compute_ring_init(adev, ring_id, - i, k, j); - if (r) - return r; + r = gfx_v11_0_compute_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; - ring_id++; + ring_id++; + } } } } @@ -1745,12 +1806,17 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): if ((adev->gfx.me_fw_version >= 2280) && - (adev->gfx.mec_fw_version >= 2410)) { - adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; - adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + (adev->gfx.mec_fw_version >= 2410) && + !amdgpu_sriov_vf(adev)) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; } break; default: + if (!amdgpu_sriov_vf(adev)) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + } break; } @@ -2428,7 +2494,7 @@ static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); @@ -2472,7 +2538,7 @@ static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); @@ -2517,7 +2583,7 @@ static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); @@ -3153,7 +3219,7 @@ static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); @@ -3371,7 +3437,7 @@ static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); @@ -4061,6 +4127,8 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); #endif + if (prop->tmz_queue) + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ @@ -4081,6 +4149,16 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, /* active the queue */ mqd->cp_gfx_hqd_active = 1; + /* set gfx UQ items */ + mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr); + mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); + mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr); + mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr); + mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); + mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); + return 0; } @@ -4115,22 +4193,9 @@ static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) { int r, i; - struct amdgpu_ring *ring; for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v11_0_kgq_init_queue(ring, false); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false); if (r) return r; } @@ -4218,6 +4283,8 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, prop->allow_tunneling); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + if (prop->tmz_queue) + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); mqd->cp_hqd_pq_control = tmp; /* set the wb address whether it's enabled or not */ @@ -4269,6 +4336,10 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_active = prop->hqd_active; + /* set UQ fenceaddress */ + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); + return 0; } @@ -4452,57 +4523,24 @@ static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; - int r; - - ring = &adev->gfx.kiq[0].ring; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (unlikely(r != 0)) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } - - gfx_v11_0_kiq_init_queue(ring); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); - ring->sched.ready = true; + gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring); return 0; } static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = NULL; - int r = 0, i; + int i, r; if (!amdgpu_async_gfx_ring) gfx_v11_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto done; - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v11_0_kcq_init_queue(ring, false); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false); if (r) - goto done; + return r; } - r = amdgpu_gfx_enable_kcq(adev, 0); -done: - return r; + return amdgpu_gfx_enable_kcq(adev, 0); } static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) @@ -4555,11 +4593,23 @@ static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) return r; } - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - r = amdgpu_ring_test_helper(ring); - if (r) - return r; + if (adev->gfx.disable_kq) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + /* we don't want to set ring->ready */ + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + } + if (amdgpu_async_gfx_ring) + amdgpu_gfx_disable_kgq(adev, 0); + } else { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } } for (i = 0; i < adev->gfx.num_compute_rings; i++) { @@ -4587,7 +4637,7 @@ static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) if (r) return r; - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; @@ -4768,6 +4818,49 @@ static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) return r; } +static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, + bool enable) +{ + unsigned int irq_type; + int m, p, r; + + if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) { + for (m = 0; m < adev->gfx.me.num_me; m++) { + for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { + irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; + if (enable) + r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, + irq_type); + if (r) + return r; + } + } + } + + if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) { + for (m = 0; m < adev->gfx.mec.num_mec; ++m) { + for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + (m * adev->gfx.mec.num_pipe_per_mec) + + p; + if (enable) + r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, + irq_type); + if (r) + return r; + } + } + } + + return 0; +} + static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -4777,9 +4870,11 @@ static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); + gfx_v11_0_set_userq_eop_interrupts(adev, false); if (!adev->no_hw_access) { - if (amdgpu_async_gfx_ring) { + if (amdgpu_async_gfx_ring && + !adev->gfx.disable_kq) { if (amdgpu_gfx_disable_kgq(adev, 0)) DRM_ERROR("KGQ disable failed\n"); } @@ -5105,11 +5200,36 @@ static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + switch (amdgpu_user_queue) { + case -1: + case 0: + default: + adev->gfx.disable_kq = false; + adev->gfx.disable_uq = true; + break; + case 1: + adev->gfx.disable_kq = false; + adev->gfx.disable_uq = false; + break; + case 2: + adev->gfx.disable_kq = true; + adev->gfx.disable_uq = false; + break; + } + adev->gfx.funcs = &gfx_v11_0_gfx_funcs; - adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), - AMDGPU_MAX_COMPUTE_RINGS); + if (adev->gfx.disable_kq) { + /* We need one GFX ring temporarily to set up + * the clear state. + */ + adev->gfx.num_gfx_rings = 1; + adev->gfx.num_compute_rings = 0; + } else { + adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); + } gfx_v11_0_set_kiq_pm4_funcs(adev); gfx_v11_0_set_ring_funcs(adev); @@ -5140,6 +5260,11 @@ static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block) r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); if (r) return r; + + r = gfx_v11_0_set_userq_eop_interrupts(adev, true); + if (r) + return r; + return 0; } @@ -5737,10 +5862,6 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); } - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x400000; - amdgpu_ring_write(ring, header); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -5760,10 +5881,6 @@ static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x40000000; - /* Currently, there is a high possibility to get wave ID mismatch * between ME and GDS, leading to a hw deadlock, because ME generates * different wave IDs than the GDS expects. This situation happens @@ -5821,8 +5938,7 @@ static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); amdgpu_ring_write(ring, upper_32_bits(seq)); - amdgpu_ring_write(ring, ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); + amdgpu_ring_write(ring, 0); } static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -5850,10 +5966,7 @@ static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - if (ring->is_mes_queue) - gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); - else - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* compute doesn't have PFP */ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { @@ -6082,28 +6195,13 @@ static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) void *de_payload_cpu_addr; int cnt; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v10_gfx_meta_data, de_payload); - de_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gds_backup) + - offsetof(struct v10_gfx_meta_data, de_payload); - gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - offset = offsetof(struct v10_gfx_meta_data, de_payload); - de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + offset = offsetof(struct v10_gfx_meta_data, de_payload); + de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + - AMDGPU_CSA_SIZE - adev->gds.gds_size, - PAGE_SIZE); - } + gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + + AMDGPU_CSA_SIZE - adev->gds.gds_size, + PAGE_SIZE); de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); @@ -6190,21 +6288,6 @@ static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask, 0x20); } -static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring, - unsigned vmid) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t value = 0; - - value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); - value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); - value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); - value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - WREG32_SOC15(GC, 0, regSQ_CMD, value); - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); -} - static void gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, uint32_t me, uint32_t pipe, @@ -6342,25 +6425,23 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - int i; + u32 doorbell_offset = entry->src_data[0]; u8 me_id, pipe_id, queue_id; struct amdgpu_ring *ring; - uint32_t mes_queue_id = entry->src_data[0]; + int i; DRM_DEBUG("IH: CP EOP\n"); - if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { - struct amdgpu_mes_queue *queue; - - mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; + if (adev->enable_mes && doorbell_offset) { + struct amdgpu_userq_fence_driver *fence_drv = NULL; + struct xarray *xa = &adev->userq_xa; + unsigned long flags; - spin_lock(&adev->mes.queue_id_lock); - queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); - if (queue) { - DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); - amdgpu_fence_process(queue->ring); - } - spin_unlock(&adev->mes.queue_id_lock); + xa_lock_irqsave(xa, flags); + fence_drv = xa_load(xa, doorbell_offset); + if (fence_drv) + amdgpu_userq_fence_driver_process(fence_drv); + xa_unlock_irqrestore(xa, flags); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; @@ -6527,27 +6608,29 @@ static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, pipe_id = (entry->ring_id & 0x03) >> 0; queue_id = (entry->ring_id & 0x70) >> 4; - switch (me_id) { - case 0: - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - if (ring->me == me_id && ring->pipe == pipe_id && - ring->queue == queue_id) - drm_sched_fault(&ring->sched); - } - break; - case 1: - case 2: - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - if (ring->me == me_id && ring->pipe == pipe_id && - ring->queue == queue_id) - drm_sched_fault(&ring->sched); + if (!adev->gfx.disable_kq) { + switch (me_id) { + case 0: + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + default: + BUG(); + break; } - break; - default: - BUG(); - break; } } @@ -6655,32 +6738,90 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } -static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) +{ + /* Disable the pipe reset until the CPFW fully support it.*/ + dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); + return false; +} + + +static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t reset_pipe = 0, clean_pipe = 0; int r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + if (!gfx_v11_pipe_reset_support(adev)) + return -EOPNOTSUPP; - r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); - if (r) - return r; + gfx_v11_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - dev_err(adev->dev, "fail to resv mqd_obj\n"); - return r; + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + PFP_PIPE0_RESET, 1); + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + ME_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + PFP_PIPE0_RESET, 0); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + ME_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + PFP_PIPE1_RESET, 1); + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + ME_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + PFP_PIPE1_RESET, 0); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + ME_PIPE1_RESET, 0); + break; + default: + break; } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v11_0_kgq_init_queue(ring, true); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; + + WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); + + r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - + RS64_FW_UC_START_ADDR_LO; + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v11_0_unset_safe_mode(adev, 0); + + dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name, + r == 0 ? "successfully" : "failed"); + /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly, + * so the pipe reset status relies on the later gfx ring test result. + */ + return 0; +} + +static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + struct amdgpu_device *adev = ring->adev; + int r; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); + if (r) { + + dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); + r = gfx_v11_reset_gfx_pipe(ring); + if (r) + return r; } - amdgpu_bo_unreserve(ring->mqd_obj); + + r = gfx_v11_0_kgq_init_queue(ring, true); if (r) { - dev_err(adev->dev, "fail to unresv mqd_obj\n"); + dev_err(adev->dev, "failed to init kgq\n"); return r; } @@ -6690,37 +6831,159 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) return r; } - return amdgpu_ring_test_ring(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + +static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) +{ + + struct amdgpu_device *adev = ring->adev; + uint32_t reset_pipe = 0, clean_pipe = 0; + int r; + + if (!gfx_v11_pipe_reset_support(adev)) + return -EOPNOTSUPP; + + gfx_v11_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); + clean_pipe = reset_pipe; + + if (adev->gfx.rs64_enable) { + + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE1_RESET, 0); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE2_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE2_RESET, 0); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE3_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE3_RESET, 0); + break; + default: + break; + } + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe); + r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) - + RS64_FW_UC_START_ADDR_LO; + } else { + if (ring->me == 1) { + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE1_RESET, 0); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE2_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE2_RESET, 0); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE3_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE3_RESET, 0); + break; + default: + break; + } + /* mec1 fw pc: CP_MEC1_INSTR_PNTR */ + } else { + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE1_RESET, 0); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE2_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE2_RESET, 0); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE3_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE3_RESET, 0); + break; + default: + break; + } + /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */ + } + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe); + r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR)); + } + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v11_0_unset_safe_mode(adev, 0); + + dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name, + r == 0 ? "successfully" : "failed"); + /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe + * reset status relies on the compute ring test result. + */ + return 0; } -static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) +static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; int r = 0; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); if (r) { - dev_err(adev->dev, "reset via MMIO failed %d\n", r); - return r; + dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); + r = gfx_v11_0_reset_compute_pipe(ring); + if (r) + return r; } - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - dev_err(adev->dev, "fail to resv mqd_obj\n"); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v11_0_kcq_init_queue(ring, true); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v11_0_kcq_init_queue(ring, true); if (r) { - dev_err(adev->dev, "fail to unresv mqd_obj\n"); + dev_err(adev->dev, "fail to init kcq\n"); return r; } r = amdgpu_mes_map_legacy_queue(adev, ring); @@ -6729,7 +6992,7 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) return r; } - return amdgpu_ring_test_ring(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) @@ -6761,9 +7024,14 @@ static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printe for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); for (reg = 0; reg < reg_count; reg++) { - drm_printf(p, "%-50s \t 0x%08x\n", - gc_cp_reg_list_11[reg].reg_name, - adev->gfx.ip_dump_compute_queues[index + reg]); + if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP) + drm_printf(p, "%-50s \t 0x%08x\n", + "regCP_MEC_ME2_HEADER_DUMP", + adev->gfx.ip_dump_compute_queues[index + reg]); + else + drm_printf(p, "%-50s \t 0x%08x\n", + gc_cp_reg_list_11[reg].reg_name, + adev->gfx.ip_dump_compute_queues[index + reg]); } index += reg_count; } @@ -6823,9 +7091,16 @@ static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block) /* ME0 is for GFX so start from 1 for CP */ soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); for (reg = 0; reg < reg_count; reg++) { - adev->gfx.ip_dump_compute_queues[index + reg] = - RREG32(SOC15_REG_ENTRY_OFFSET( - gc_cp_reg_list_11[reg])); + if (i && + gc_cp_reg_list_11[reg].reg_offset == + regCP_MEC_ME1_HEADER_DUMP) + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_OFFSET(GC, 0, + regCP_MEC_ME2_HEADER_DUMP)); + else + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_cp_reg_list_11[reg])); } index += reg_count; } @@ -6953,7 +7228,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .emit_wreg = gfx_v11_0_ring_emit_wreg, .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v11_0_ring_soft_recovery, .emit_mem_sync = gfx_v11_0_emit_mem_sync, .reset = gfx_v11_0_reset_kgq, .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, @@ -6995,7 +7269,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .emit_wreg = gfx_v11_0_ring_emit_wreg, .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v11_0_ring_soft_recovery, .emit_mem_sync = gfx_v11_0_emit_mem_sync, .reset = gfx_v11_0_reset_kcq, .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index dceb5ad38862..3e138527d534 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -36,7 +36,7 @@ #include "gc/gc_12_0_0_offset.h" #include "gc/gc_12_0_0_sh_mask.h" #include "soc24_enum.h" -#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" +#include "ivsrcid/gfx/irqsrcs_gfx_12_0_0.h" #include "soc15.h" #include "clearstate_gfx12.h" @@ -44,6 +44,8 @@ #include "gfx_v12_0.h" #include "nbif_v6_3_1.h" #include "mes_v12_0.h" +#include "mes_userqueue.h" +#include "amdgpu_userq_fence.h" #define GFX12_NUM_GFX_RINGS 1 #define GFX12_MEC_HPD_SIZE 2048 @@ -77,6 +79,7 @@ MODULE_FIRMWARE("amdgpu/gc_12_0_1_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_me.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc_kicker.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_toc.bin"); static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = { @@ -133,11 +136,14 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR0), SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR1), SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_RS64_INSTR_PNTR), - /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), @@ -186,7 +192,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_12[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), - SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS) + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), }; static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { @@ -215,7 +230,24 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), }; static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = { @@ -475,33 +507,18 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t padding, offset; - - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - padding = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); - *cpu_ptr = cpu_to_le32(0xCAFEDEAD); - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) - return r; + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); - cpu_ptr = &adev->wb.wb[index]; + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + cpu_ptr = &adev->wb.wb[index]; - r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r); - goto err1; - } + r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r); + goto err1; } ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); @@ -528,12 +545,10 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err2: - if (!ring->is_mes_queue) - amdgpu_ib_free(&ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -572,7 +587,7 @@ out: static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) { - char ucode_prefix[15]; + char ucode_prefix[30]; int err; const struct rlc_firmware_header_v2_0 *rlc_hdr; uint16_t version_major; @@ -599,9 +614,14 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); if (!amdgpu_sriov_vf(adev)) { - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, - AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_rlc.bin", ucode_prefix); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_rlc_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_rlc.bin", ucode_prefix); if (err) goto out; rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; @@ -881,6 +901,34 @@ static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev, soc24_grbm_select(adev, me, pipe, q, vm); } +/* all sizes are in bytes */ +#define MQD_SHADOW_BASE_SIZE 73728 +#define MQD_SHADOW_BASE_ALIGNMENT 256 +#define MQD_FWWORKAREA_SIZE 484 +#define MQD_FWWORKAREA_ALIGNMENT 256 + +static void gfx_v12_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev, + struct amdgpu_gfx_shadow_info *shadow_info) +{ + shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; + shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; + shadow_info->csa_size = MQD_FWWORKAREA_SIZE; + shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; +} + +static int gfx_v12_0_get_gfx_shadow_info(struct amdgpu_device *adev, + struct amdgpu_gfx_shadow_info *shadow_info, + bool skip_check) +{ + if (adev->gfx.cp_gfx_shadow || skip_check) { + gfx_v12_0_get_gfx_shadow_info_nocheck(adev, shadow_info); + return 0; + } + + memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); + return -EINVAL; +} + static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = { .get_gpu_clock_counter = &gfx_v12_0_get_gpu_clock_counter, .select_se_sh = &gfx_v12_0_select_se_sh, @@ -889,6 +937,7 @@ static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = { .read_wave_vgprs = &gfx_v12_0_read_wave_vgprs, .select_me_pipe_q = &gfx_v12_0_select_me_pipe_q, .update_perfmon_mgcg = &gfx_v12_0_update_perf_clk, + .get_gfx_shadow_info = &gfx_v12_0_get_gfx_shadow_info, }; static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev) @@ -1346,6 +1395,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) unsigned num_compute_rings; int xcc_id = 0; struct amdgpu_device *adev = ip_block->adev; + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); @@ -1354,8 +1404,8 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(12, 0, 1): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; - adev->gfx.mec.num_mec = 2; + adev->gfx.me.num_queue_per_pipe = 8; + adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 2; adev->gfx.mec.num_queue_per_pipe = 4; break; @@ -1372,6 +1422,22 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): + if (!adev->gfx.disable_uq && + adev->gfx.me_fw_version >= 2780 && + adev->gfx.pfp_fw_version >= 2840 && + adev->gfx.mec_fw_version >= 3050 && + adev->mes.fw_version[0] >= 123) { + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + } + break; + default: + break; + } + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): if (adev->gfx.me_fw_version >= 2480 && adev->gfx.pfp_fw_version >= 2530 && adev->gfx.mec_fw_version >= 2680 && @@ -1383,36 +1449,38 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) break; } - /* recalculate compute rings to use based on hardware configuration */ - num_compute_rings = (adev->gfx.mec.num_pipe_per_mec * - adev->gfx.mec.num_queue_per_pipe) / 2; - adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings, - num_compute_rings); + if (adev->gfx.num_compute_rings) { + /* recalculate compute rings to use based on hardware configuration */ + num_compute_rings = (adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe) / 2; + adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings, + num_compute_rings); + } /* EOP Event */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, - GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, + GFX_12_0_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); if (r) return r; /* Bad opcode Event */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, - GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, + GFX_12_0_0__SRCID__CP_BAD_OPCODE_ERROR, &adev->gfx.bad_op_irq); if (r) return r; /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, - GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, + GFX_12_0_0__SRCID__CP_PRIV_REG_FAULT, &adev->gfx.priv_reg_irq); if (r) return r; /* Privileged inst */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, - GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT, + GFX_12_0_0__SRCID__CP_PRIV_INSTR_FAULT, &adev->gfx.priv_inst_irq); if (r) return r; @@ -1433,37 +1501,41 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } - /* set up the gfx ring */ - for (i = 0; i < adev->gfx.me.num_me; i++) { - for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { - for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { - if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) - continue; - - r = gfx_v12_0_gfx_ring_init(adev, ring_id, - i, k, j); - if (r) - return r; - ring_id++; + if (adev->gfx.num_gfx_rings) { + /* set up the gfx ring */ + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { + if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v12_0_gfx_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; + ring_id++; + } } } } - ring_id = 0; - /* set up the compute queues - allocate horizontally across pipes */ - for (i = 0; i < adev->gfx.mec.num_mec; ++i) { - for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { - for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - if (!amdgpu_gfx_is_mec_queue_enabled(adev, - 0, i, k, j)) - continue; + if (adev->gfx.num_compute_rings) { + ring_id = 0; + /* set up the compute queues - allocate horizontally across pipes */ + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, + 0, i, k, j)) + continue; - r = gfx_v12_0_compute_ring_init(adev, ring_id, - i, k, j); - if (r) - return r; + r = gfx_v12_0_compute_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; - ring_id++; + ring_id++; + } } } } @@ -1476,10 +1548,14 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): if ((adev->gfx.me_fw_version >= 2660) && - (adev->gfx.mec_fw_version >= 2920)) { - adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; - adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + (adev->gfx.mec_fw_version >= 2920) && + !amdgpu_sriov_vf(adev)) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; } + break; + default: + break; } if (!adev->enable_mes_kiq) { @@ -2324,7 +2400,7 @@ static int gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); @@ -2468,7 +2544,7 @@ static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); @@ -2948,6 +3024,8 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); #endif + if (prop->tmz_queue) + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ @@ -2968,6 +3046,14 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, /* active the queue */ mqd->cp_gfx_hqd_active = 1; + /* set gfx UQ items */ + mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr); + mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); + mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); + mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); + return 0; } @@ -3001,37 +3087,19 @@ static int gfx_v12_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) { - int r, i; - struct amdgpu_ring *ring; + int i, r; for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto done; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v12_0_kgq_init_queue(ring, false); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v12_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false); if (r) - goto done; + return r; } r = amdgpu_gfx_enable_kgq(adev, 0); if (r) - goto done; - - r = gfx_v12_0_cp_gfx_start(adev); - if (r) - goto done; + return r; -done: - return r; + return gfx_v12_0_cp_gfx_start(adev); } static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, @@ -3109,6 +3177,8 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + if (prop->tmz_queue) + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); mqd->cp_hqd_pq_control = tmp; /* set the wb address whether it's enabled or not */ @@ -3160,6 +3230,10 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_active = prop->hqd_active; + /* set UQ fenceaddress */ + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); + return 0; } @@ -3344,57 +3418,25 @@ static int gfx_v12_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) static int gfx_v12_0_kiq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; - int r; - - ring = &adev->gfx.kiq[0].ring; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (unlikely(r != 0)) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } - - gfx_v12_0_kiq_init_queue(ring); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); - ring->sched.ready = true; + gfx_v12_0_kiq_init_queue(&adev->gfx.kiq[0].ring); + adev->gfx.kiq[0].ring.sched.ready = true; return 0; } static int gfx_v12_0_kcq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = NULL; - int r = 0, i; + int i, r; if (!amdgpu_async_gfx_ring) gfx_v12_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto done; - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v12_0_kcq_init_queue(ring, false); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v12_0_kcq_init_queue(&adev->gfx.compute_ring[i], false); if (r) - goto done; + return r; } - r = amdgpu_gfx_enable_kcq(adev, 0); -done: - return r; + return amdgpu_gfx_enable_kcq(adev, 0); } static int gfx_v12_0_cp_resume(struct amdgpu_device *adev) @@ -3476,7 +3518,7 @@ static int gfx_v12_0_gfxhub_enable(struct amdgpu_device *adev) if (r) return r; - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; @@ -3650,6 +3692,49 @@ static int gfx_v12_0_hw_init(struct amdgpu_ip_block *ip_block) return r; } +static int gfx_v12_0_set_userq_eop_interrupts(struct amdgpu_device *adev, + bool enable) +{ + unsigned int irq_type; + int m, p, r; + + if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) { + for (m = 0; m < adev->gfx.me.num_me; m++) { + for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { + irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; + if (enable) + r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, + irq_type); + if (r) + return r; + } + } + } + + if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) { + for (m = 0; m < adev->gfx.mec.num_mec; ++m) { + for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + (m * adev->gfx.mec.num_pipe_per_mec) + + p; + if (enable) + r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, + irq_type); + if (r) + return r; + } + } + } + + return 0; +} + static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -3660,6 +3745,7 @@ static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); + gfx_v12_0_set_userq_eop_interrupts(adev, false); if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { @@ -3748,11 +3834,33 @@ static int gfx_v12_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + switch (amdgpu_user_queue) { + case -1: + case 0: + default: + adev->gfx.disable_kq = false; + adev->gfx.disable_uq = true; + break; + case 1: + adev->gfx.disable_kq = false; + adev->gfx.disable_uq = false; + break; + case 2: + adev->gfx.disable_kq = true; + adev->gfx.disable_uq = false; + break; + } + adev->gfx.funcs = &gfx_v12_0_gfx_funcs; - adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), - AMDGPU_MAX_COMPUTE_RINGS); + if (adev->gfx.disable_kq) { + adev->gfx.num_gfx_rings = 0; + adev->gfx.num_compute_rings = 0; + } else { + adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS; + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); + } gfx_v12_0_set_kiq_pm4_funcs(adev); gfx_v12_0_set_ring_funcs(adev); @@ -3783,6 +3891,10 @@ static int gfx_v12_0_late_init(struct amdgpu_ip_block *ip_block) if (r) return r; + r = gfx_v12_0_set_userq_eop_interrupts(adev, true); + if (r) + return r; + return 0; } @@ -4222,45 +4334,17 @@ static u64 gfx_v12_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v12_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always being used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - if (ring->use_doorbell) { - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - WREG32_SOC15(GC, 0, regCP_RB0_WPTR, - lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, - upper_32_bits(ring->wptr)); - } + WREG32_SOC15(GC, 0, regCP_RB0_WPTR, + lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, + upper_32_bits(ring->wptr)); } } @@ -4285,42 +4369,14 @@ static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring) static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - /* XXX check if swapping is necessary on BE */ - if (ring->use_doorbell) { - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - BUG(); /* only DOORBELL method supported on gfx12 now */ - } + BUG(); /* only DOORBELL method supported on gfx12 now */ } } @@ -4367,10 +4423,6 @@ static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vmid << 24); - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x400000; - amdgpu_ring_write(ring, header); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -4390,10 +4442,6 @@ static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x40000000; - amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -4433,8 +4481,7 @@ static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); amdgpu_ring_write(ring, upper_32_bits(seq)); - amdgpu_ring_write(ring, ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); + amdgpu_ring_write(ring, 0); } static void gfx_v12_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -4462,10 +4509,7 @@ static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, static void gfx_v12_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - if (ring->is_mes_queue) - gfx_v12_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); - else - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* compute doesn't have PFP */ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { @@ -4656,21 +4700,6 @@ static void gfx_v12_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask, 0x20); } -static void gfx_v12_0_ring_soft_recovery(struct amdgpu_ring *ring, - unsigned vmid) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t value = 0; - - value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); - value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); - value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); - value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - WREG32_SOC15(GC, 0, regSQ_CMD, value); - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); -} - static void gfx_v12_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, uint32_t me, uint32_t pipe, @@ -4799,25 +4828,23 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - int i; + u32 doorbell_offset = entry->src_data[0]; u8 me_id, pipe_id, queue_id; struct amdgpu_ring *ring; - uint32_t mes_queue_id = entry->src_data[0]; + int i; DRM_DEBUG("IH: CP EOP\n"); - if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { - struct amdgpu_mes_queue *queue; - - mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; + if (adev->enable_mes && doorbell_offset) { + struct amdgpu_userq_fence_driver *fence_drv = NULL; + struct xarray *xa = &adev->userq_xa; + unsigned long flags; - spin_lock(&adev->mes.queue_id_lock); - queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); - if (queue) { - DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); - amdgpu_fence_process(queue->ring); - } - spin_unlock(&adev->mes.queue_id_lock); + xa_lock_irqsave(xa, flags); + fence_drv = xa_load(xa, doorbell_offset); + if (fence_drv) + amdgpu_userq_fence_driver_process(fence_drv); + xa_unlock_irqrestore(xa, flags); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; @@ -4984,27 +5011,29 @@ static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev, pipe_id = (entry->ring_id & 0x03) >> 0; queue_id = (entry->ring_id & 0x70) >> 4; - switch (me_id) { - case 0: - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - if (ring->me == me_id && ring->pipe == pipe_id && - ring->queue == queue_id) - drm_sched_fault(&ring->sched); - } - break; - case 1: - case 2: - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - if (ring->me == me_id && ring->pipe == pipe_id && - ring->queue == queue_id) - drm_sched_fault(&ring->sched); + if (!adev->gfx.disable_kq) { + switch (me_id) { + case 0: + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + default: + BUG(); + break; } - break; - default: - BUG(); - break; } } @@ -5210,34 +5239,89 @@ static void gfx_v12_ip_dump(struct amdgpu_ip_block *ip_block) amdgpu_gfx_off_ctrl(adev, true); } -static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +static bool gfx_v12_pipe_reset_support(struct amdgpu_device *adev) +{ + /* Disable the pipe reset until the CPFW fully support it.*/ + dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); + return false; +} + +static int gfx_v12_reset_gfx_pipe(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t reset_pipe = 0, clean_pipe = 0; int r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + if (!gfx_v12_pipe_reset_support(adev)) + return -EOPNOTSUPP; + + gfx_v12_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + PFP_PIPE0_RESET, 1); + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + ME_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + PFP_PIPE0_RESET, 0); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + ME_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + PFP_PIPE1_RESET, 1); + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + ME_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + PFP_PIPE1_RESET, 0); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + ME_PIPE1_RESET, 0); + break; + default: + break; + } + + WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); + + r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - + RS64_FW_UC_START_ADDR_LO; + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v12_0_unset_safe_mode(adev, 0); + + dev_info(adev->dev, "The ring %s pipe reset: %s\n", ring->name, + r == 0 ? "successfully" : "failed"); + /* Sometimes the ME start pc counter can't cache correctly, so the + * PC check only as a reference and pipe reset result rely on the + * later ring test. + */ + return 0; +} + +static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + struct amdgpu_device *adev = ring->adev; + int r; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); if (r) { - dev_err(adev->dev, "reset via MES failed %d\n", r); - return r; + dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); + r = gfx_v12_reset_gfx_pipe(ring); + if (r) + return r; } - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - dev_err(adev->dev, "fail to resv mqd_obj\n"); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v12_0_kgq_init_queue(ring, true); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v12_0_kgq_init_queue(ring, true); if (r) { - DRM_ERROR("fail to unresv mqd_obj\n"); + dev_err(adev->dev, "failed to init kgq\n"); return r; } @@ -5247,37 +5331,112 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) return r; } - return amdgpu_ring_test_ring(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } -static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) +static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reset_pipe = 0, clean_pipe = 0; + int r = 0; + + if (!gfx_v12_pipe_reset_support(adev)) + return -EOPNOTSUPP; + + gfx_v12_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); + clean_pipe = reset_pipe; + + if (adev->gfx.rs64_enable) { + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE1_RESET, 0); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE2_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE2_RESET, 0); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE3_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE3_RESET, 0); + break; + default: + break; + } + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe); + r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) - + RS64_FW_UC_START_ADDR_LO; + } else { + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE1_RESET, 0); + break; + default: + break; + } + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe); + /* Doesn't find the F32 MEC instruction pointer register, and suppose + * the driver won't run into the F32 mode. + */ + } + + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v12_0_unset_safe_mode(adev, 0); + + dev_info(adev->dev, "The ring %s pipe resets: %s\n", ring->name, + r == 0 ? "successfully" : "failed"); + /* Need the ring test to verify the pipe reset result.*/ + return 0; +} + +static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; int r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); if (r) { - dev_err(adev->dev, "reset via MMIO failed %d\n", r); - return r; + dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); + r = gfx_v12_0_reset_compute_pipe(ring); + if (r) + return r; } - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - DRM_ERROR("fail to resv mqd_obj\n"); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v12_0_kcq_init_queue(ring, true); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v12_0_kcq_init_queue(ring, true); if (r) { - DRM_ERROR("fail to unresv mqd_obj\n"); + dev_err(adev->dev, "failed to init kcq\n"); return r; } r = amdgpu_mes_map_legacy_queue(adev, ring); @@ -5286,7 +5445,7 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) return r; } - return amdgpu_ring_test_ring(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static void gfx_v12_0_ring_begin_use(struct amdgpu_ring *ring) @@ -5364,7 +5523,6 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = { .emit_wreg = gfx_v12_0_ring_emit_wreg, .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v12_0_ring_soft_recovery, .emit_mem_sync = gfx_v12_0_emit_mem_sync, .reset = gfx_v12_0_reset_kgq, .emit_cleaner_shader = gfx_v12_0_ring_emit_cleaner_shader, @@ -5403,7 +5561,6 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = { .emit_wreg = gfx_v12_0_ring_emit_wreg, .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v12_0_ring_soft_recovery, .emit_mem_sync = gfx_v12_0_emit_mem_sync, .reset = gfx_v12_0_reset_kcq, .emit_cleaner_shader = gfx_v12_0_ring_emit_cleaner_shader, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 13fbee46417a..70d7a1f434c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -53,6 +53,9 @@ #define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002 #define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02010001 +#define GFX6_NUM_GFX_RINGS 1 +#define GFX6_NUM_COMPUTE_RINGS 2 + static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev); @@ -1732,10 +1735,14 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev) gfx_v6_0_get_cu_info(adev); gfx_v6_0_config_init(adev); - WREG32(mmCP_QUEUE_THRESHOLDS, ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) | - (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT))); - WREG32(mmCP_MEQ_THRESHOLDS, (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) | - (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT)); + WREG32(mmCP_QUEUE_THRESHOLDS, + ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) | + (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT))); + + /* set HW defaults for 3D engine */ + WREG32(mmCP_MEQ_THRESHOLDS, + (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) | + (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT)); sx_debug_1 = RREG32(mmSX_DEBUG_1); WREG32(mmSX_DEBUG_1, sx_debug_1); @@ -2851,44 +2858,21 @@ static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev) static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer) { - u32 count = 0, i; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; + u32 count = 0; if (adev->gfx.rlc.cs_data == NULL) return; if (buffer == NULL) return; - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); - buffer[count++] = cpu_to_le32(0x80000000); - buffer[count++] = cpu_to_le32(0x80000000); - - for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) { - buffer[count++] = - cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); - buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000); - for (i = 0; i < ext->reg_count; i++) - buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; - } - } - } + count = amdgpu_gfx_csb_preamble_start(buffer); + count = amdgpu_gfx_csb_data_parser(adev, buffer, count); buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); - buffer[count++] = cpu_to_le32(0); + amdgpu_gfx_csb_preamble_end(buffer, count); } static void gfx_v6_0_init_pg(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 8181bd0e4f18..2aa323dab34e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -55,6 +55,9 @@ #define GFX7_NUM_GFX_RINGS 1 #define GFX7_MEC_HPD_SIZE 2048 +#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001 +#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003 + static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev); @@ -3882,67 +3885,22 @@ static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev) static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer) { - u32 count = 0, i; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; + u32 count = 0; if (adev->gfx.rlc.cs_data == NULL) return; if (buffer == NULL) return; - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); - buffer[count++] = cpu_to_le32(0x80000000); - buffer[count++] = cpu_to_le32(0x80000000); - - for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) { - buffer[count++] = - cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); - buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START); - for (i = 0; i < ext->reg_count; i++) - buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; - } - } - } + count = amdgpu_gfx_csb_preamble_start(buffer); + count = amdgpu_gfx_csb_data_parser(adev, buffer, count); buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); - switch (adev->asic_type) { - case CHIP_BONAIRE: - buffer[count++] = cpu_to_le32(0x16000012); - buffer[count++] = cpu_to_le32(0x00000000); - break; - case CHIP_KAVERI: - buffer[count++] = cpu_to_le32(0x00000000); /* XXX */ - buffer[count++] = cpu_to_le32(0x00000000); - break; - case CHIP_KABINI: - case CHIP_MULLINS: - buffer[count++] = cpu_to_le32(0x00000000); /* XXX */ - buffer[count++] = cpu_to_le32(0x00000000); - break; - case CHIP_HAWAII: - buffer[count++] = cpu_to_le32(0x3a00161a); - buffer[count++] = cpu_to_le32(0x0000002e); - break; - default: - buffer[count++] = cpu_to_le32(0x00000000); - buffer[count++] = cpu_to_le32(0x00000000); - break; - } - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); + buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); + buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); - buffer[count++] = cpu_to_le32(0); + amdgpu_gfx_csb_preamble_end(buffer, count); } static void gfx_v7_0_init_pg(struct amdgpu_device *adev) @@ -4926,76 +4884,6 @@ static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ } -static void gfx_v7_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, - int mem_space, int opt, uint32_t addr0, - uint32_t addr1, uint32_t ref, uint32_t mask, - uint32_t inv) -{ - amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); - amdgpu_ring_write(ring, - /* memory (1) or register (0) */ - (WAIT_REG_MEM_MEM_SPACE(mem_space) | - WAIT_REG_MEM_OPERATION(opt) | /* wait */ - WAIT_REG_MEM_FUNCTION(3) | /* equal */ - WAIT_REG_MEM_ENGINE(eng_sel))); - - if (mem_space) - BUG_ON(addr0 & 0x3); /* Dword align */ - amdgpu_ring_write(ring, addr0); - amdgpu_ring_write(ring, addr1); - amdgpu_ring_write(ring, ref); - amdgpu_ring_write(ring, mask); - amdgpu_ring_write(ring, inv); /* poll interval */ -} - -static void gfx_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, - uint32_t val, uint32_t mask) -{ - gfx_v7_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); -} - -static int gfx_v7_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) -{ - struct amdgpu_device *adev = ring->adev; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - struct amdgpu_ring *kiq_ring = &kiq->ring; - unsigned long flags; - u32 tmp; - int r; - - if (amdgpu_sriov_vf(adev)) - return -EINVAL; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - spin_lock_irqsave(&kiq->ring_lock, flags); - - if (amdgpu_ring_alloc(kiq_ring, 5)) { - spin_unlock_irqrestore(&kiq->ring_lock, flags); - return -ENOMEM; - } - - tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); - gfx_v7_0_ring_emit_wreg(kiq_ring, mmCP_VMID_RESET, tmp); - amdgpu_ring_commit(kiq_ring); - - spin_unlock_irqrestore(&kiq->ring_lock, flags); - - r = amdgpu_ring_test_ring(kiq_ring); - if (r) - return r; - - if (amdgpu_ring_alloc(ring, 7 + 12 + 5)) - return -ENOMEM; - gfx_v7_0_ring_emit_fence_gfx(ring, ring->fence_drv.gpu_addr, - ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); - gfx_v7_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff); - gfx_v7_0_ring_emit_wreg(ring, mmCP_VMID_RESET, 0); - - return amdgpu_ring_test_ring(ring); -} - static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { .name = "gfx_v7_0", .early_init = gfx_v7_0_early_init, @@ -5045,7 +4933,6 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .emit_wreg = gfx_v7_0_ring_emit_wreg, .soft_recovery = gfx_v7_0_ring_soft_recovery, .emit_mem_sync = gfx_v7_0_emit_mem_sync, - .reset = gfx_v7_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index d116a2e2f469..367449d8061b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1223,48 +1223,22 @@ out: static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer) { - u32 count = 0, i; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; + u32 count = 0; if (adev->gfx.rlc.cs_data == NULL) return; if (buffer == NULL) return; - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); - buffer[count++] = cpu_to_le32(0x80000000); - buffer[count++] = cpu_to_le32(0x80000000); - - for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) { - buffer[count++] = - cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); - buffer[count++] = cpu_to_le32(ext->reg_index - - PACKET3_SET_CONTEXT_REG_START); - for (i = 0; i < ext->reg_count; i++) - buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; - } - } - } + count = amdgpu_gfx_csb_preamble_start(buffer); + count = amdgpu_gfx_csb_data_parser(adev, buffer, count); buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); - buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - - PACKET3_SET_CONTEXT_REG_START); + buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); - buffer[count++] = cpu_to_le32(0); + amdgpu_gfx_csb_preamble_end(buffer, count); } static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev) @@ -4666,6 +4640,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); /* reset ring buffer */ ring->wptr = 0; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); amdgpu_ring_clear_ring(ring); } return 0; @@ -4683,60 +4658,25 @@ static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; - int r; - - ring = &adev->gfx.kiq[0].ring; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); - if (unlikely(r != 0)) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } - - gfx_v8_0_kiq_init_queue(ring); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); + gfx_v8_0_kiq_init_queue(&adev->gfx.kiq[0].ring); return 0; } static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = NULL; - int r = 0, i; + int i, r; gfx_v8_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto done; - r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); - if (!r) { - r = gfx_v8_0_kcq_init_queue(ring); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v8_0_kcq_init_queue(&adev->gfx.compute_ring[i]); if (r) - goto done; + return r; } gfx_v8_0_set_mec_doorbell_range(adev); - r = gfx_v8_0_kiq_kcq_enable(adev); - if (r) - goto done; - -done: - return r; + return gfx_v8_0_kiq_kcq_enable(adev); } static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev) @@ -6400,34 +6340,6 @@ static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, amdgpu_ring_write(ring, val); } -static void gfx_v8_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, - int mem_space, int opt, uint32_t addr0, - uint32_t addr1, uint32_t ref, uint32_t mask, - uint32_t inv) -{ - amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); - amdgpu_ring_write(ring, - /* memory (1) or register (0) */ - (WAIT_REG_MEM_MEM_SPACE(mem_space) | - WAIT_REG_MEM_OPERATION(opt) | /* wait */ - WAIT_REG_MEM_FUNCTION(3) | /* equal */ - WAIT_REG_MEM_ENGINE(eng_sel))); - - if (mem_space) - BUG_ON(addr0 & 0x3); /* Dword align */ - amdgpu_ring_write(ring, addr0); - amdgpu_ring_write(ring, addr1); - amdgpu_ring_write(ring, ref); - amdgpu_ring_write(ring, mask); - amdgpu_ring_write(ring, inv); /* poll interval */ -} - -static void gfx_v8_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, - uint32_t val, uint32_t mask) -{ - gfx_v8_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); -} - static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) { struct amdgpu_device *adev = ring->adev; @@ -6904,48 +6816,6 @@ static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } -static int gfx_v8_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) -{ - struct amdgpu_device *adev = ring->adev; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - struct amdgpu_ring *kiq_ring = &kiq->ring; - unsigned long flags; - u32 tmp; - int r; - - if (amdgpu_sriov_vf(adev)) - return -EINVAL; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - spin_lock_irqsave(&kiq->ring_lock, flags); - - if (amdgpu_ring_alloc(kiq_ring, 5)) { - spin_unlock_irqrestore(&kiq->ring_lock, flags); - return -ENOMEM; - } - - tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); - gfx_v8_0_ring_emit_wreg(kiq_ring, mmCP_VMID_RESET, tmp); - amdgpu_ring_commit(kiq_ring); - - spin_unlock_irqrestore(&kiq->ring_lock, flags); - - r = amdgpu_ring_test_ring(kiq_ring); - if (r) - return r; - - if (amdgpu_ring_alloc(ring, 7 + 12 + 5)) - return -ENOMEM; - gfx_v8_0_ring_emit_fence_gfx(ring, ring->fence_drv.gpu_addr, - ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); - gfx_v8_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff); - gfx_v8_0_ring_emit_wreg(ring, mmCP_VMID_RESET, 0); - - return amdgpu_ring_test_ring(ring); -} - static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .name = "gfx_v8_0", .early_init = gfx_v8_0_early_init, @@ -7011,7 +6881,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .emit_wreg = gfx_v8_0_ring_emit_wreg, .soft_recovery = gfx_v8_0_ring_soft_recovery, .emit_mem_sync = gfx_v8_0_emit_mem_sync, - .reset = gfx_v8_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d345285ea885..20b30f4b3c7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -225,17 +225,36 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = { SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE), SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT), SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6), - /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0), SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1), SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2), - SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3) + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3), + /* packet headers */ + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP) }; static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = { @@ -277,6 +296,14 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = { SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO), SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI), SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP) }; enum ta_ras_gfx_subblock { @@ -1269,6 +1296,7 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) adev->gfx.mec_fw_write_wait = false; if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) && + (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) && ((adev->gfx.mec_fw_version < 0x000001a5) || (adev->gfx.mec_feature_version < 46) || (adev->gfx.pfp_fw_version < 0x000000b7) || @@ -1623,42 +1651,16 @@ static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer) { - u32 count = 0, i; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; + u32 count = 0; if (adev->gfx.rlc.cs_data == NULL) return; if (buffer == NULL) return; - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); - buffer[count++] = cpu_to_le32(0x80000000); - buffer[count++] = cpu_to_le32(0x80000000); - - for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) { - buffer[count++] = - cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); - buffer[count++] = cpu_to_le32(ext->reg_index - - PACKET3_SET_CONTEXT_REG_START); - for (i = 0; i < ext->reg_count; i++) - buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; - } - } - } - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); - buffer[count++] = cpu_to_le32(0); + count = amdgpu_gfx_csb_preamble_start(buffer); + count = amdgpu_gfx_csb_data_parser(adev, buffer, count); + amdgpu_gfx_csb_preamble_end(buffer, count); } static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) @@ -2233,6 +2235,25 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block) } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 0, 1): + case IP_VERSION(9, 2, 1): + case IP_VERSION(9, 4, 0): + case IP_VERSION(9, 2, 2): + case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 3, 0): + adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 167 && + adev->gfx.pfp_fw_version >= 196 && + adev->gfx.mec_fw_version >= 474) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; case IP_VERSION(9, 4, 2): adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex; adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex); @@ -2389,6 +2410,8 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block) amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); adev->gfx.compute_supported_reset = amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + if (!amdgpu_sriov_vf(adev)) + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); if (r) { @@ -3890,55 +3913,23 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore) static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; - int r; - - ring = &adev->gfx.kiq[0].ring; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (unlikely(r != 0)) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } - - gfx_v9_0_kiq_init_queue(ring); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); + gfx_v9_0_kiq_init_queue(&adev->gfx.kiq[0].ring); return 0; } static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = NULL; - int r = 0, i; + int i, r; gfx_v9_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto done; - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v9_0_kcq_init_queue(ring, false); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v9_0_kcq_init_queue(&adev->gfx.compute_ring[i], false); if (r) - goto done; + return r; } - r = amdgpu_gfx_enable_kcq(adev, 0); -done: - return r; + return amdgpu_gfx_enable_kcq(adev, 0); } static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) @@ -5472,16 +5463,8 @@ static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring, payload_size = sizeof(struct v9_ce_ib_state); - if (ring->is_mes_queue) { - payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); - } else { - payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; - } + payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size); @@ -5504,16 +5487,8 @@ static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring, payload_size = sizeof(struct v9_de_ib_state); - if (ring->is_mes_queue) { - payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); - } else { - payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; - } + payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status = IB_COMPLETION_STATUS_PREEMPTED; @@ -5703,19 +5678,9 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) cnt = (sizeof(ce_payload) >> 2) + 4 - 2; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ce_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - } else { - offset = offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - } + offset = offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | @@ -5801,28 +5766,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bo void *de_payload_cpu_addr; int cnt; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gds_backup) + - offsetof(struct v9_gfx_meta_data, de_payload); - gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - offset = offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + offset = offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + - AMDGPU_CSA_SIZE - adev->gds.gds_size, - PAGE_SIZE); - } + gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + + AMDGPU_CSA_SIZE - adev->gds.gds_size, + PAGE_SIZE); if (usegds) { de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); @@ -7223,53 +7173,9 @@ static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) amdgpu_ring_insert_nop(ring, num_nop - 1); } -static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) -{ - struct amdgpu_device *adev = ring->adev; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - struct amdgpu_ring *kiq_ring = &kiq->ring; - unsigned long flags; - u32 tmp; - int r; - - if (amdgpu_sriov_vf(adev)) - return -EINVAL; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - spin_lock_irqsave(&kiq->ring_lock, flags); - - if (amdgpu_ring_alloc(kiq_ring, 5)) { - spin_unlock_irqrestore(&kiq->ring_lock, flags); - return -ENOMEM; - } - - tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); - gfx_v9_0_ring_emit_wreg(kiq_ring, - SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp); - amdgpu_ring_commit(kiq_ring); - - spin_unlock_irqrestore(&kiq->ring_lock, flags); - - r = amdgpu_ring_test_ring(kiq_ring); - if (r) - return r; - - if (amdgpu_ring_alloc(ring, 7 + 7 + 5)) - return -ENOMEM; - gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr, - ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); - gfx_v9_0_ring_emit_reg_wait(ring, - SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff); - gfx_v9_0_ring_emit_wreg(ring, - SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0); - - return amdgpu_ring_test_ring(ring); -} - static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, - unsigned int vmid) + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; @@ -7277,12 +7183,11 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int i, r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + spin_lock_irqsave(&kiq->ring_lock, flags); if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { @@ -7319,20 +7224,9 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, return r; } - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)){ - dev_err(adev->dev, "fail to resv mqd_obj\n"); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v9_0_kcq_init_queue(ring, true); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v9_0_kcq_init_queue(ring, true); if (r) { - dev_err(adev->dev, "fail to unresv mqd_obj\n"); + dev_err(adev->dev, "fail to init kcq\n"); return r; } spin_lock_irqsave(&kiq->ring_lock, flags); @@ -7343,13 +7237,13 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, } kiq->pmf->kiq_map_queues(kiq_ring, ring); amdgpu_ring_commit(kiq_ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) { DRM_ERROR("fail to remap queue\n"); return r; } - return amdgpu_ring_test_ring(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) @@ -7381,9 +7275,14 @@ static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); for (reg = 0; reg < reg_count; reg++) { - drm_printf(p, "%-50s \t 0x%08x\n", - gc_cp_reg_list_9[reg].reg_name, - adev->gfx.ip_dump_compute_queues[index + reg]); + if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP) + drm_printf(p, "%-50s \t 0x%08x\n", + "mmCP_MEC_ME2_HEADER_DUMP", + adev->gfx.ip_dump_compute_queues[index + reg]); + else + drm_printf(p, "%-50s \t 0x%08x\n", + gc_cp_reg_list_9[reg].reg_name, + adev->gfx.ip_dump_compute_queues[index + reg]); } index += reg_count; } @@ -7420,9 +7319,13 @@ static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block) soc15_grbm_select(adev, 1 + i, j, k, 0, 0); for (reg = 0; reg < reg_count; reg++) { - adev->gfx.ip_dump_compute_queues[index + reg] = - RREG32(SOC15_REG_ENTRY_OFFSET( - gc_cp_reg_list_9[reg])); + if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP) + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP)); + else + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_cp_reg_list_9[reg])); } index += reg_count; } @@ -7436,8 +7339,14 @@ static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block) static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + /* Emit the cleaner shader */ - amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + else + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER_9_0, 0)); + amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ } @@ -7544,7 +7453,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, - .reset = gfx_v9_0_reset_kgq, .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index d81449f9d822..c48cd47b531f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -1547,7 +1547,7 @@ static void gfx_v9_4_2_log_utc_edc_count(struct amdgpu_device *adev, { uint32_t bank, way, mem; static const char * const vml2_way_str[] = { "BIGK", "4K" }; - static const char * const utcl2_rounter_str[] = { "VMC", "APT" }; + static const char * const utcl2_router_str[] = { "VMC", "APT" }; mem = instance % blk->num_mem_blocks; way = (instance / blk->num_mem_blocks) % blk->num_ways; @@ -1568,7 +1568,7 @@ static void gfx_v9_4_2_log_utc_edc_count(struct amdgpu_device *adev, dev_info( adev->dev, "GFX SubBlock UTCL2_ROUTER_IFIF%d_GROUP0_%s, SED %d, DED %d\n", - bank, utcl2_rounter_str[mem], sec_cnt, ded_cnt); + bank, utcl2_router_str[mem], sec_cnt, ded_cnt); break; case ATC_L2_CACHE_2M: dev_info( diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 736398b0d16d..51babf5c78c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -105,9 +105,6 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_SAFE_MODE), SOC15_REG_ENTRY_STR(GC, 0, regRLC_INT_STAT), SOC15_REG_ENTRY_STR(GC, 0, regRLC_GPM_GENERAL_6), - /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME2_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), @@ -154,6 +151,14 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9_4_3[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GFX_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), }; struct amdgpu_gfx_ras gfx_v9_4_3_ras; @@ -867,13 +872,12 @@ static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle, switch (type) { case ACA_SMU_TYPE_UE: - bank->aca_err_type = ACA_BANK_ERR_UE_DE_DECODE(bank); + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, 1ULL); break; case ACA_SMU_TYPE_CE: - bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); - ret = aca_error_cache_log_bank_error(handle, &info, - bank->aca_err_type, + bank->aca_err_type = ACA_ERROR_TYPE_CE; + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, ACA_REG__MISC0__ERRCNT(misc0)); break; default: @@ -1144,7 +1148,15 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): - if (adev->gfx.mec_fw_version >= 155) { + if ((adev->gfx.mec_fw_version >= 155) && + !amdgpu_sriov_vf(adev)) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE; + } + break; + case IP_VERSION(9, 5, 0): + if ((adev->gfx.mec_fw_version >= 21) && + !amdgpu_sriov_vf(adev)) { adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE; } @@ -1263,6 +1275,22 @@ static void gfx_v9_4_3_xcc_init_gds_vmid(struct amdgpu_device *adev, int xcc_id) } } +/* For ASICs that needs xnack chain and MEC version supports, set SG_CONFIG1 + * DISABLE_XNACK_CHECK_IN_RETRY_DISABLE bit and inform KFD to set xnack_chain + * bit in SET_RESOURCES + */ +static void gfx_v9_4_3_xcc_init_sq(struct amdgpu_device *adev, int xcc_id) +{ + uint32_t data; + + if (!(adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN)) + return; + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_CONFIG1); + data = REG_SET_FIELD(data, SQ_CONFIG1, DISABLE_XNACK_CHECK_IN_RETRY_DISABLE, 1); + WREG32_SOC15(GC, xcc_id, regSQ_CONFIG1, data); +} + static void gfx_v9_4_3_xcc_constants_init(struct amdgpu_device *adev, int xcc_id) { @@ -1307,6 +1335,7 @@ static void gfx_v9_4_3_xcc_constants_init(struct amdgpu_device *adev, gfx_v9_4_3_xcc_init_compute_vmid(adev, xcc_id); gfx_v9_4_3_xcc_init_gds_vmid(adev, xcc_id); + gfx_v9_4_3_xcc_init_sq(adev, xcc_id); } static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) @@ -1319,6 +1348,22 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) adev->gfx.config.db_debug2 = RREG32_SOC15(GC, GET_INST(GC, 0), regDB_DEBUG2); + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + /* ToDo: GC 9.4.4 */ + case IP_VERSION(9, 4, 3): + if (adev->gfx.mec_fw_version >= 184 && + (amdgpu_sriov_reg_access_sq_config(adev) || + !amdgpu_sriov_vf(adev))) + adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN; + break; + case IP_VERSION(9, 5, 0): + if (adev->gfx.mec_fw_version >= 23) + adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN; + break; + default: + break; + } + for (i = 0; i < num_xcc; i++) gfx_v9_4_3_xcc_constants_init(adev, i); } @@ -2168,55 +2213,27 @@ static int gfx_v9_4_3_xcc_kcq_fini_register(struct amdgpu_device *adev, int xcc_ static int gfx_v9_4_3_xcc_kiq_resume(struct amdgpu_device *adev, int xcc_id) { - struct amdgpu_ring *ring; - int r; - - ring = &adev->gfx.kiq[xcc_id].ring; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - return r; - - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (unlikely(r != 0)) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } - - gfx_v9_4_3_xcc_kiq_init_queue(ring, xcc_id); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); + gfx_v9_4_3_xcc_kiq_init_queue(&adev->gfx.kiq[xcc_id].ring, xcc_id); return 0; } static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id) { - struct amdgpu_ring *ring = NULL; - int r = 0, i; + struct amdgpu_ring *ring; + int i, r; gfx_v9_4_3_xcc_cp_compute_enable(adev, true, xcc_id); for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto done; - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + ring = &adev->gfx.compute_ring[i + xcc_id * + adev->gfx.num_compute_rings]; + + r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false); if (r) - goto done; + return r; } - r = amdgpu_gfx_enable_kcq(adev, xcc_id); -done: - return r; + return amdgpu_gfx_enable_kcq(adev, xcc_id); } static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id) @@ -3476,9 +3493,7 @@ static int gfx_v9_4_3_unmap_done(struct amdgpu_device *adev, uint32_t me, static bool gfx_v9_4_3_pipe_reset_support(struct amdgpu_device *adev) { - /*TODO: Need check gfx9.4.4 mec fw whether supports pipe reset as well.*/ - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) && - adev->gfx.mec_fw_version >= 0x0000009b) + if (!!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)) return true; else dev_warn_once(adev->dev, "Please use the latest MEC version to see whether support pipe reset\n"); @@ -3541,7 +3556,8 @@ static int gfx_v9_4_3_reset_hw_pipe(struct amdgpu_ring *ring) } static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, - unsigned int vmid) + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; struct amdgpu_kiq *kiq = &adev->gfx.kiq[ring->xcc_id]; @@ -3549,12 +3565,11 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + spin_lock_irqsave(&kiq->ring_lock, flags); if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { @@ -3580,7 +3595,9 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, dev_err(adev->dev, "fail to wait on hqd deactive and will try pipe reset\n"); pipe_reset: - if(r) { + if (r) { + if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)) + return -EOPNOTSUPP; r = gfx_v9_4_3_reset_hw_pipe(ring); dev_info(adev->dev, "ring: %s pipe reset :%s\n", ring->name, r ? "failed" : "successfully"); @@ -3588,20 +3605,9 @@ pipe_reset: return r; } - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)){ - dev_err(adev->dev, "fail to resv mqd_obj\n"); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } - amdgpu_bo_unreserve(ring->mqd_obj); + r = gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true); if (r) { - dev_err(adev->dev, "fail to unresv mqd_obj\n"); + dev_err(adev->dev, "fail to init kcq\n"); return r; } spin_lock_irqsave(&kiq->ring_lock, flags); @@ -3612,14 +3618,14 @@ pipe_reset: } kiq->pmf->kiq_map_queues(kiq_ring, ring); amdgpu_ring_commit(kiq_ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) { dev_err(adev->dev, "fail to remap queue\n"); return r; } - return amdgpu_ring_test_ring(ring); + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } enum amdgpu_gfx_cp_ras_mem_id { @@ -4598,12 +4604,21 @@ static void gfx_v9_4_3_ip_print(struct amdgpu_ip_block *ip_block, struct drm_pri "\nxcc:%d mec:%d, pipe:%d, queue:%d\n", xcc_id, i, j, k); for (reg = 0; reg < reg_count; reg++) { - drm_printf(p, - "%-50s \t 0x%08x\n", - gc_cp_reg_list_9_4_3[reg].reg_name, - adev->gfx.ip_dump_compute_queues - [xcc_offset + inst_offset + - reg]); + if (i && gc_cp_reg_list_9_4_3[reg].reg_offset == + regCP_MEC_ME1_HEADER_DUMP) + drm_printf(p, + "%-50s \t 0x%08x\n", + "regCP_MEC_ME2_HEADER_DUMP", + adev->gfx.ip_dump_compute_queues + [xcc_offset + inst_offset + + reg]); + else + drm_printf(p, + "%-50s \t 0x%08x\n", + gc_cp_reg_list_9_4_3[reg].reg_name, + adev->gfx.ip_dump_compute_queues + [xcc_offset + inst_offset + + reg]); } inst_offset += reg_count; } @@ -4652,12 +4667,20 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block) GET_INST(GC, xcc_id)); for (reg = 0; reg < reg_count; reg++) { - adev->gfx.ip_dump_compute_queues - [xcc_offset + - inst_offset + reg] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST( - gc_cp_reg_list_9_4_3[reg], - GET_INST(GC, xcc_id))); + if (i && gc_cp_reg_list_9_4_3[reg].reg_offset == + regCP_MEC_ME1_HEADER_DUMP) + adev->gfx.ip_dump_compute_queues + [xcc_offset + + inst_offset + reg] = + RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), + regCP_MEC_ME2_HEADER_DUMP)); + else + adev->gfx.ip_dump_compute_queues + [xcc_offset + + inst_offset + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST( + gc_cp_reg_list_9_4_3[reg], + GET_INST(GC, xcc_id))); } inst_offset += reg_count; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index cb25f7f0dfc1..6c03bf9f1ae8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -74,6 +74,8 @@ static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev, static void gfxhub_v1_2_xcc_init_gart_aperture_regs(struct amdgpu_device *adev, uint32_t xcc_mask) { + uint64_t gart_start = amdgpu_virt_xgmi_migrate_enabled(adev) ? + adev->gmc.vram_start : adev->gmc.fb_start; uint64_t pt_base; int i; @@ -91,10 +93,10 @@ static void gfxhub_v1_2_xcc_init_gart_aperture_regs(struct amdgpu_device *adev, if (adev->gmc.pdb0_bo) { WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, - (u32)(adev->gmc.fb_start >> 12)); + (u32)(gart_start >> 12)); WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, - (u32)(adev->gmc.fb_start >> 44)); + (u32)(gart_start >> 44)); WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, @@ -180,7 +182,7 @@ gfxhub_v1_2_xcc_init_system_aperture_regs(struct amdgpu_device *adev, /* In the case squeezing vram into GART aperture, we don't use * FB aperture and AGP aperture. Disable them. */ - if (adev->gmc.pdb0_bo) { + if (adev->gmc.pdb0_bo && adev->gmc.xgmi.connected_to_cpu) { WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_TOP, 0); WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF); WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 95d894a231fc..7923f491cf73 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -164,10 +164,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, entry->src_id, entry->ring_id, entry->vmid, entry->pasid); task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); if (task_info) { - dev_err(adev->dev, - " in process %s pid %d thread %s pid %d\n", - task_info->process_name, task_info->tgid, - task_info->task_name, task_info->pid); + amdgpu_vm_print_task_info(adev, task_info); amdgpu_vm_put_task_info(task_info); } @@ -268,7 +265,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal @@ -428,10 +425,6 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int struct amdgpu_device *adev = ring->adev; uint32_t reg; - /* MES fw manages IH_VMID_x_LUT updating */ - if (ring->is_mes_queue) - return; - if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else @@ -969,7 +962,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) adev->hdp.funcs->init_registers(adev); /* Flush HDP after it is initialized */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index ad099f136f84..f15d691e9a20 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -134,10 +134,7 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev, entry->src_id, entry->ring_id, entry->vmid, entry->pasid); task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); if (task_info) { - dev_err(adev->dev, - " in process %s pid %d thread %s pid %d)\n", - task_info->process_name, task_info->tgid, - task_info->task_name, task_info->pid); + amdgpu_vm_print_task_info(adev, task_info); amdgpu_vm_put_task_info(task_info); } @@ -229,7 +226,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal @@ -393,10 +390,6 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int struct amdgpu_device *adev = ring->adev; uint32_t reg; - /* MES fw manages IH_VMID_x_LUT updating */ - if (ring->is_mes_queue) - return; - if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; else @@ -752,6 +745,18 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gmc.vram_type = vram_type; adev->gmc.vram_vendor = vram_vendor; + /* The mall_size is already calculated as mall_size_per_umc * num_umc. + * However, for gfx1151, which features a 2-to-1 UMC mapping, + * the result must be multiplied by 2 to determine the actual mall size. + */ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(11, 5, 1): + adev->gmc.mall_size *= 2; + break; + default: + break; + } + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): @@ -832,7 +837,7 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.first_kfd_vmid = 8; + adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8; amdgpu_vm_manager_init(adev); @@ -899,7 +904,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev) return r; /* Flush HDP after it is initialized */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index 05c026d0b0d9..de763105fdfd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -127,10 +127,7 @@ static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev, entry->src_id, entry->ring_id, entry->vmid, entry->pasid); task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); if (task_info) { - dev_err(adev->dev, - " in process %s pid %d thread %s pid %d)\n", - task_info->process_name, task_info->tgid, - task_info->task_name, task_info->pid); + amdgpu_vm_print_task_info(adev, task_info); amdgpu_vm_put_task_info(task_info); } @@ -297,7 +294,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, return; /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal @@ -413,10 +410,6 @@ static void gmc_v12_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid struct amdgpu_device *adev = ring->adev; uint32_t reg; - /* MES fw manages IH_VMID_x_LUT updating */ - if (ring->is_mes_queue) - return; - if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; else @@ -820,7 +813,7 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.first_kfd_vmid = 8; + adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8; amdgpu_vm_manager_init(adev); @@ -881,7 +874,7 @@ static int gmc_v12_0_gart_enable(struct amdgpu_device *adev) return r; /* Flush HDP after it is initialized */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index a992e79d9581..8030fcd64210 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -249,7 +249,7 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) /* disable VGA render */ tmp = RREG32(mmVGA_RENDER_CONTROL); - tmp &= ~VGA_VSTATUS_CNTL; + tmp &= VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK; WREG32(mmVGA_RENDER_CONTROL, tmp); } /* Update configuration */ @@ -627,17 +627,16 @@ static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev, "write" : "read", block, mc_client, mc_id); } -/* static const u32 mc_cg_registers[] = { - MC_HUB_MISC_HUB_CG, - MC_HUB_MISC_SIP_CG, - MC_HUB_MISC_VM_CG, - MC_XPB_CLK_GAT, - ATC_MISC_CG, - MC_CITF_MISC_WR_CG, - MC_CITF_MISC_RD_CG, - MC_CITF_MISC_VM_CG, - VM_L2_CG, + mmMC_HUB_MISC_HUB_CG, + mmMC_HUB_MISC_SIP_CG, + mmMC_HUB_MISC_VM_CG, + mmMC_XPB_CLK_GAT, + mmATC_MISC_CG, + mmMC_CITF_MISC_WR_CG, + mmMC_CITF_MISC_RD_CG, + mmMC_CITF_MISC_VM_CG, + mmVM_L2_CG, }; static const u32 mc_cg_ls_en[] = { @@ -672,7 +671,7 @@ static void gmc_v6_0_enable_mc_ls(struct amdgpu_device *adev, for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { orig = data = RREG32(mc_cg_registers[i]); - if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_LS)) + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) data |= mc_cg_ls_en[i]; else data &= ~mc_cg_ls_en[i]; @@ -689,7 +688,7 @@ static void gmc_v6_0_enable_mc_mgcg(struct amdgpu_device *adev, for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { orig = data = RREG32(mc_cg_registers[i]); - if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_MGCG)) + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) data |= mc_cg_en[i]; else data &= ~mc_cg_en[i]; @@ -705,7 +704,7 @@ static void gmc_v6_0_enable_bif_mgls(struct amdgpu_device *adev, orig = data = RREG32_PCIE(ixPCIE_CNTL2); - if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_BIF_LS)) { + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) { data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_LS_EN, 1); data = REG_SET_FIELD(data, PCIE_CNTL2, MST_MEM_LS_EN, 1); data = REG_SET_FIELD(data, PCIE_CNTL2, REPLAY_MEM_LS_EN, 1); @@ -728,7 +727,7 @@ static void gmc_v6_0_enable_hdp_mgcg(struct amdgpu_device *adev, orig = data = RREG32(mmHDP_HOST_PATH_CNTL); - if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_MGCG)) + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG)) data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 0); else data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 1); @@ -744,7 +743,7 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev, orig = data = RREG32(mmHDP_MEM_POWER_LS); - if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_LS)) + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 1); else data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 0); @@ -752,7 +751,6 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev, if (orig != data) WREG32(mmHDP_MEM_POWER_LS, data); } -*/ static int gmc_v6_0_convert_vram_type(int mc_seq_vram_type) { @@ -1098,6 +1096,20 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev, static int gmc_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { + struct amdgpu_device *adev = ip_block->adev; + bool gate = false; + + if (state == AMD_CG_STATE_GATE) + gate = true; + + if (!(adev->flags & AMD_IS_APU)) { + gmc_v6_0_enable_mc_mgcg(adev, gate); + gmc_v6_0_enable_mc_ls(adev, gate); + } + gmc_v6_0_enable_bif_mgls(adev, gate); + gmc_v6_0_enable_hdp_mgcg(adev, gate); + gmc_v6_0_enable_hdp_ls(adev, gate); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 83e39f16044a..a8d5795084fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1157,17 +1157,10 @@ static bool gmc_v7_0_is_idle(struct amdgpu_ip_block *ip_block) static int gmc_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned int i; - u32 tmp; struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - /* read MC_STATUS */ - tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK | - SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK | - SRBM_STATUS__MCC_BUSY_MASK | - SRBM_STATUS__MCD_BUSY_MASK | - SRBM_STATUS__VMC_BUSY_MASK); - if (!tmp) + if (gmc_v7_0_is_idle(ip_block)) return 0; udelay(1); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 99ca08e9bdb5..b45fa0cea9d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1458,9 +1458,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); if (task_info) { - dev_err(adev->dev, " for process %s pid %d thread %s pid %d\n", - task_info->process_name, task_info->tgid, - task_info->task_name, task_info->pid); + amdgpu_vm_print_task_info(adev, task_info); amdgpu_vm_put_task_info(task_info); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 783e0c3b86b4..c4d69cf4e06c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -78,8 +78,6 @@ #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2 0x05ea #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX 2 -#define MAX_MEM_RANGES 8 - static const char * const gfxhub_client_ids[] = { "CB", "DB", @@ -411,11 +409,6 @@ static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = { (0x001d43e0 + 0x00001800), }; -static inline bool gmc_v9_0_is_multi_chiplet(struct amdgpu_device *adev) -{ - return !!adev->aid_mask; -} - static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned int type, @@ -641,10 +634,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); if (task_info) { - dev_err(adev->dev, - " for process %s pid %d thread %s pid %d)\n", - task_info->process_name, task_info->tgid, - task_info->task_name, task_info->pid); + amdgpu_vm_print_task_info(adev, task_info); amdgpu_vm_put_task_info(task_info); } @@ -652,7 +642,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, addr, entry->client_id, soc15_ih_clientid_name[entry->client_id]); - if (gmc_v9_0_is_multi_chiplet(adev)) + if (amdgpu_is_multi_aid(adev)) dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n", node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4, node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : ""); @@ -801,7 +791,7 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, uint32_t vmhub) { if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || - gmc_v9_0_is_multi_chiplet(adev)) + amdgpu_is_multi_aid(adev)) return false; return ((vmhub == AMDGPU_MMHUB0(0) || @@ -1131,8 +1121,8 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, } static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, + struct amdgpu_vm *vm, struct amdgpu_bo *bo, - struct amdgpu_bo_va_mapping *mapping, uint64_t *flags) { struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -1142,7 +1132,6 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, AMDGPU_GEM_CREATE_EXT_COHERENT); bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT; bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; - struct amdgpu_vm *vm = mapping->bo_va->base.vm; unsigned int mtype_local, mtype; uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0); bool snoop = false; @@ -1172,7 +1161,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, mtype = MTYPE_UC; else mtype = MTYPE_NC; - if (mapping->bo_va->is_xgmi) + if (amdgpu_xgmi_same_hive(adev, bo_adev)) snoop = true; } } else { @@ -1213,10 +1202,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, if (uncached) { mtype = MTYPE_UC; } else if (ext_coherent) { - if (gc_ip_version == IP_VERSION(9, 5, 0) || adev->rev_id) - mtype = is_local ? MTYPE_CC : MTYPE_UC; - else - mtype = MTYPE_UC; + mtype = is_local ? MTYPE_CC : MTYPE_UC; } else if (adev->flags & AMD_IS_APU) { mtype = is_local ? mtype_local : MTYPE_NC; } else { @@ -1267,7 +1253,8 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, } if ((*flags & AMDGPU_PTE_VALID) && bo) - gmc_v9_0_get_coherence_flags(adev, bo, mapping, flags); + gmc_v9_0_get_coherence_flags(adev, mapping->bo_va->base.vm, bo, + flags); } static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, @@ -1336,7 +1323,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, mtype_local = MTYPE_CC; *flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype_local); - } else if (adev->rev_id) { + } else { /* MTYPE_UC case */ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC); } @@ -1388,46 +1375,6 @@ static unsigned int gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) return size; } -static enum amdgpu_memory_partition -gmc_v9_0_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes) -{ - enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE; - - if (adev->nbio.funcs->get_memory_partition_mode) - mode = adev->nbio.funcs->get_memory_partition_mode(adev, - supp_modes); - - return mode; -} - -static enum amdgpu_memory_partition -gmc_v9_0_query_vf_memory_partition(struct amdgpu_device *adev) -{ - switch (adev->gmc.num_mem_partitions) { - case 0: - return UNKNOWN_MEMORY_PARTITION_MODE; - case 1: - return AMDGPU_NPS1_PARTITION_MODE; - case 2: - return AMDGPU_NPS2_PARTITION_MODE; - case 4: - return AMDGPU_NPS4_PARTITION_MODE; - default: - return AMDGPU_NPS1_PARTITION_MODE; - } - - return AMDGPU_NPS1_PARTITION_MODE; -} - -static enum amdgpu_memory_partition -gmc_v9_0_query_memory_partition(struct amdgpu_device *adev) -{ - if (amdgpu_sriov_vf(adev)) - return gmc_v9_0_query_vf_memory_partition(adev); - - return gmc_v9_0_get_memory_partition(adev, NULL); -} - static bool gmc_v9_0_need_reset_on_init(struct amdgpu_device *adev) { if (adev->nbio.funcs && adev->nbio.funcs->is_nps_switch_requested && @@ -1449,7 +1396,7 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .get_vm_pte = gmc_v9_0_get_vm_pte, .override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags, .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size, - .query_mem_partition_mode = &gmc_v9_0_query_memory_partition, + .query_mem_partition_mode = &amdgpu_gmc_query_memory_partition, .request_mem_partition_mode = &amdgpu_gmc_request_memory_partition, .need_reset_on_init = &gmc_v9_0_need_reset_on_init, }; @@ -1505,7 +1452,6 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.umc_inst_num = UMC_V12_0_UMC_INSTANCE_NUM; adev->umc.node_inst_num /= UMC_V12_0_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET; - adev->umc.retire_unit = UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) adev->umc.ras = &umc_v12_0_ras; break; @@ -1557,7 +1503,7 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) { - if (gmc_v9_0_is_multi_chiplet(adev)) + if (amdgpu_is_multi_aid(adev)) adev->gfxhub.funcs = &gfxhub_v1_2_funcs; else adev->gfxhub.funcs = &gfxhub_v1_0_funcs; @@ -1603,7 +1549,7 @@ static void gmc_v9_0_init_nps_details(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU)) return; - mode = gmc_v9_0_get_memory_partition(adev, &supp_modes); + mode = amdgpu_gmc_get_memory_partition(adev, &supp_modes); /* Mode detected by hardware and supported modes available */ if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) && supp_modes) { @@ -1639,7 +1585,7 @@ static int gmc_v9_0_early_init(struct amdgpu_ip_block *ip_block) */ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || - gmc_v9_0_is_multi_chiplet(adev)) + amdgpu_is_multi_aid(adev)) adev->gmc.xgmi.supported = true; if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(6, 1, 0)) { @@ -1726,7 +1672,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; - if (adev->gmc.xgmi.connected_to_cpu) { + if (amdgpu_gmc_is_pdb0_enabled(adev)) { amdgpu_gmc_sysvm_location(adev, mc); } else { amdgpu_gmc_vram_location(adev, mc, base); @@ -1841,7 +1787,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) return 0; } - if (adev->gmc.xgmi.connected_to_cpu) { + if (amdgpu_gmc_is_pdb0_enabled(adev)) { adev->gmc.vmid0_page_table_depth = 1; adev->gmc.vmid0_page_table_block_size = 12; } else { @@ -1867,7 +1813,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) if (r) return r; - if (adev->gmc.xgmi.connected_to_cpu) + if (amdgpu_gmc_is_pdb0_enabled(adev)) r = amdgpu_gmc_pdb0_alloc(adev); } @@ -1889,192 +1835,13 @@ static void gmc_v9_0_save_registers(struct amdgpu_device *adev) adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0); } -static bool gmc_v9_0_validate_partition_info(struct amdgpu_device *adev) -{ - enum amdgpu_memory_partition mode; - u32 supp_modes; - bool valid; - - mode = gmc_v9_0_get_memory_partition(adev, &supp_modes); - - /* Mode detected by hardware not present in supported modes */ - if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) && - !(BIT(mode - 1) & supp_modes)) - return false; - - switch (mode) { - case UNKNOWN_MEMORY_PARTITION_MODE: - case AMDGPU_NPS1_PARTITION_MODE: - valid = (adev->gmc.num_mem_partitions == 1); - break; - case AMDGPU_NPS2_PARTITION_MODE: - valid = (adev->gmc.num_mem_partitions == 2); - break; - case AMDGPU_NPS4_PARTITION_MODE: - valid = (adev->gmc.num_mem_partitions == 3 || - adev->gmc.num_mem_partitions == 4); - break; - default: - valid = false; - } - - return valid; -} - -static bool gmc_v9_0_is_node_present(int *node_ids, int num_ids, int nid) -{ - int i; - - /* Check if node with id 'nid' is present in 'node_ids' array */ - for (i = 0; i < num_ids; ++i) - if (node_ids[i] == nid) - return true; - - return false; -} - -static void -gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev, - struct amdgpu_mem_partition_info *mem_ranges) -{ - struct amdgpu_numa_info numa_info; - int node_ids[MAX_MEM_RANGES]; - int num_ranges = 0, ret; - int num_xcc, xcc_id; - uint32_t xcc_mask; - - num_xcc = NUM_XCC(adev->gfx.xcc_mask); - xcc_mask = (1U << num_xcc) - 1; - - for_each_inst(xcc_id, xcc_mask) { - ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info); - if (ret) - continue; - - if (numa_info.nid == NUMA_NO_NODE) { - mem_ranges[0].size = numa_info.size; - mem_ranges[0].numa.node = numa_info.nid; - num_ranges = 1; - break; - } - - if (gmc_v9_0_is_node_present(node_ids, num_ranges, - numa_info.nid)) - continue; - - node_ids[num_ranges] = numa_info.nid; - mem_ranges[num_ranges].numa.node = numa_info.nid; - mem_ranges[num_ranges].size = numa_info.size; - ++num_ranges; - } - - adev->gmc.num_mem_partitions = num_ranges; -} - -static void -gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev, - struct amdgpu_mem_partition_info *mem_ranges) -{ - enum amdgpu_memory_partition mode; - u32 start_addr = 0, size; - int i, r, l; - - mode = gmc_v9_0_query_memory_partition(adev); - - switch (mode) { - case UNKNOWN_MEMORY_PARTITION_MODE: - adev->gmc.num_mem_partitions = 0; - break; - case AMDGPU_NPS1_PARTITION_MODE: - adev->gmc.num_mem_partitions = 1; - break; - case AMDGPU_NPS2_PARTITION_MODE: - adev->gmc.num_mem_partitions = 2; - break; - case AMDGPU_NPS4_PARTITION_MODE: - if (adev->flags & AMD_IS_APU) - adev->gmc.num_mem_partitions = 3; - else - adev->gmc.num_mem_partitions = 4; - break; - default: - adev->gmc.num_mem_partitions = 1; - break; - } - - /* Use NPS range info, if populated */ - r = amdgpu_gmc_get_nps_memranges(adev, mem_ranges, - &adev->gmc.num_mem_partitions); - if (!r) { - l = 0; - for (i = 1; i < adev->gmc.num_mem_partitions; ++i) { - if (mem_ranges[i].range.lpfn > - mem_ranges[i - 1].range.lpfn) - l = i; - } - - } else { - if (!adev->gmc.num_mem_partitions) { - dev_err(adev->dev, - "Not able to detect NPS mode, fall back to NPS1"); - adev->gmc.num_mem_partitions = 1; - } - /* Fallback to sw based calculation */ - size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT; - size /= adev->gmc.num_mem_partitions; - - for (i = 0; i < adev->gmc.num_mem_partitions; ++i) { - mem_ranges[i].range.fpfn = start_addr; - mem_ranges[i].size = - ((u64)size << AMDGPU_GPU_PAGE_SHIFT); - mem_ranges[i].range.lpfn = start_addr + size - 1; - start_addr += size; - } - - l = adev->gmc.num_mem_partitions - 1; - } - - /* Adjust the last one */ - mem_ranges[l].range.lpfn = - (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1; - mem_ranges[l].size = - adev->gmc.real_vram_size - - ((u64)mem_ranges[l].range.fpfn << AMDGPU_GPU_PAGE_SHIFT); -} - -static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev) -{ - bool valid; - - adev->gmc.mem_partitions = kcalloc(MAX_MEM_RANGES, - sizeof(struct amdgpu_mem_partition_info), - GFP_KERNEL); - if (!adev->gmc.mem_partitions) - return -ENOMEM; - - /* TODO : Get the range from PSP/Discovery for dGPU */ - if (adev->gmc.is_app_apu) - gmc_v9_0_init_acpi_mem_ranges(adev, adev->gmc.mem_partitions); - else - gmc_v9_0_init_sw_mem_ranges(adev, adev->gmc.mem_partitions); - - if (amdgpu_sriov_vf(adev)) - valid = true; - else - valid = gmc_v9_0_validate_partition_info(adev); - if (!valid) { - /* TODO: handle invalid case */ - dev_WARN(adev->dev, - "Mem ranges not matching with hardware config"); - } - - return 0; -} - static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev) { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; adev->gmc.vram_width = 128 * 64; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E; } static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) @@ -2089,7 +1856,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) spin_lock_init(&adev->gmc.invalidate_lock); - if (gmc_v9_0_is_multi_chiplet(adev)) { + if (amdgpu_is_multi_aid(adev)) { gmc_v9_4_3_init_vram_info(adev); } else if (!adev->bios) { if (adev->flags & AMD_IS_APU) { @@ -2239,8 +2006,8 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) amdgpu_gmc_get_vbios_allocations(adev); - if (gmc_v9_0_is_multi_chiplet(adev)) { - r = gmc_v9_0_init_mem_ranges(adev); + if (amdgpu_is_multi_aid(adev)) { + r = amdgpu_gmc_init_mem_ranges(adev); if (r) return r; } @@ -2268,7 +2035,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vm_manager.first_kfd_vmid = (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || - gmc_v9_0_is_multi_chiplet(adev)) ? + amdgpu_is_multi_aid(adev)) ? 3 : 8; @@ -2280,7 +2047,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - if (gmc_v9_0_is_multi_chiplet(adev)) + if (amdgpu_is_multi_aid(adev)) amdgpu_gmc_sysfs_init(adev); return 0; @@ -2290,7 +2057,7 @@ static int gmc_v9_0_sw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - if (gmc_v9_0_is_multi_chiplet(adev)) + if (amdgpu_is_multi_aid(adev)) amdgpu_gmc_sysfs_fini(adev); amdgpu_gmc_ras_fini(adev); @@ -2364,7 +2131,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) { int r; - if (adev->gmc.xgmi.connected_to_cpu) + if (amdgpu_gmc_is_pdb0_enabled(adev)) amdgpu_gmc_init_pdb0(adev); if (adev->gart.bo == NULL) { @@ -2411,13 +2178,6 @@ static int gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block) adev->gmc.flush_tlb_needs_extra_type_2 = amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) && adev->gmc.xgmi.num_physical_nodes; - /* - * TODO: This workaround is badly documented and had a buggy - * implementation. We should probably verify what we do here. - */ - adev->gmc.flush_tlb_needs_extra_type_0 = - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) && - adev->rev_id == 0; /* The sequence of these two function calls matters.*/ gmc_v9_0_init_golden_registers(adev); @@ -2435,7 +2195,7 @@ static int gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block) adev->hdp.funcs->init_registers(adev); /* After HDP is initialized, flush HDP.*/ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) value = false; @@ -2529,7 +2289,7 @@ static int gmc_v9_0_resume(struct amdgpu_ip_block *ip_block) * information again. */ if (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS) { - gmc_v9_0_init_sw_mem_ranges(adev, adev->gmc.mem_partitions); + amdgpu_gmc_init_sw_mem_ranges(adev, adev->gmc.mem_partitions); adev->gmc.reset_flags &= ~AMDGPU_GMC_INIT_RESET_NPS; } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index f1dc13b3ab38..e6c0d86d3486 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -36,17 +36,6 @@ #define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK 0x00020000L #define mmHDP_MEM_POWER_CTRL_BASE_IDX 0 -static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) { - WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); - } else { - amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - } -} - static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { @@ -180,7 +169,7 @@ struct amdgpu_hdp_ras hdp_v4_0_ras = { }; const struct amdgpu_hdp_funcs hdp_v4_0_funcs = { - .flush_hdp = hdp_v4_0_flush_hdp, + .flush_hdp = amdgpu_hdp_generic_flush, .invalidate_hdp = hdp_v4_0_invalidate_hdp, .update_clock_gating = hdp_v4_0_update_clock_gating, .get_clock_gating_state = hdp_v4_0_get_clockgating_state, diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index 43195c079748..8bc001dc9f63 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -27,17 +27,6 @@ #include "hdp/hdp_5_0_0_sh_mask.h" #include <uapi/linux/kfd_ioctl.h> -static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) { - WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); - } else { - amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - } -} - static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { @@ -217,7 +206,7 @@ static void hdp_v5_0_init_registers(struct amdgpu_device *adev) } const struct amdgpu_hdp_funcs hdp_v5_0_funcs = { - .flush_hdp = hdp_v5_0_flush_hdp, + .flush_hdp = amdgpu_hdp_generic_flush, .invalidate_hdp = hdp_v5_0_invalidate_hdp, .update_clock_gating = hdp_v5_0_update_clock_gating, .get_clock_gating_state = hdp_v5_0_get_clockgating_state, diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c index fcb8dd2876bc..40940b4ab400 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c @@ -33,7 +33,17 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev, if (!ring || !ring->funcs->emit_wreg) { WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + if (amdgpu_sriov_vf(adev)) { + /* this is fine because SR_IOV doesn't remap the register */ + RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + } else { + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); + } } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c index a88d25a06c29..ec20daf4272c 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c @@ -30,17 +30,6 @@ #define regHDP_CLK_CNTL_V6_1 0xd5 #define regHDP_CLK_CNTL_V6_1_BASE_IDX 0 -static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) { - WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); - } else { - amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - } -} - static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev, bool enable) { @@ -149,7 +138,7 @@ static void hdp_v6_0_get_clockgating_state(struct amdgpu_device *adev, } const struct amdgpu_hdp_funcs hdp_v6_0_funcs = { - .flush_hdp = hdp_v6_0_flush_hdp, + .flush_hdp = amdgpu_hdp_generic_flush, .update_clock_gating = hdp_v6_0_update_clock_gating, .get_clock_gating_state = hdp_v6_0_get_clockgating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c index 49f7eb4fbd11..ed1debc03507 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -27,17 +27,6 @@ #include "hdp/hdp_7_0_0_sh_mask.h" #include <uapi/linux/kfd_ioctl.h> -static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) { - WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); - } else { - amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - } -} - static void hdp_v7_0_update_clock_gating(struct amdgpu_device *adev, bool enable) { @@ -137,7 +126,7 @@ static void hdp_v7_0_get_clockgating_state(struct amdgpu_device *adev, } const struct amdgpu_hdp_funcs hdp_v7_0_funcs = { - .flush_hdp = hdp_v7_0_flush_hdp, + .flush_hdp = amdgpu_hdp_generic_flush, .update_clock_gating = hdp_v7_0_update_clock_gating, .get_clock_gating_state = hdp_v7_0_get_clockgating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index eb4185dcbd1d..5900b560b7de 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -349,6 +349,7 @@ static int ih_v6_0_irq_init(struct amdgpu_device *adev) if (ret) return ret; } + ih[i]->overflow = false; } /* update doorbell range for ih ring 0 */ @@ -446,7 +447,10 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev, wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr); if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) goto out; - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + if (!amdgpu_sriov_vf(adev)) + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + else + ih->overflow = true; /* When a ring buffer overflow happen start parsing interrupt * from the last not overwritten vector (wptr + 32). Hopefully diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index cfa91d709d49..cc626036ed9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -32,6 +32,7 @@ #include "gc/gc_11_0_0_sh_mask.h" MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu_kicker.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin"); @@ -51,8 +52,12 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_imu.bin", ucode_prefix); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c index df898dbb746e..58cd87db8061 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c @@ -34,12 +34,13 @@ MODULE_FIRMWARE("amdgpu/gc_12_0_0_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_imu_kicker.bin"); #define TRANSFER_RAM_MASK 0x001c0000 static int imu_v12_0_init_microcode(struct amdgpu_device *adev) { - char ucode_prefix[15]; + char ucode_prefix[30]; int err; const struct imu_firmware_header_v1_0 *imu_hdr; struct amdgpu_firmware_info *info = NULL; @@ -47,8 +48,12 @@ static int imu_v12_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_imu.bin", ucode_prefix); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu.bin", ucode_prefix); if (err) goto out; @@ -362,7 +367,7 @@ static void program_imu_rlc_ram(struct amdgpu_device *adev, static void imu_v12_0_program_rlc_ram(struct amdgpu_device *adev) { u32 reg_data, size = 0; - const u32 *data; + const u32 *data = NULL; int r = -EINVAL; WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, 0x2); diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c index 69dd92f6e86d..a887df520414 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c @@ -25,9 +25,16 @@ * */ +#include <linux/gpio/machine.h> #include "amdgpu.h" #include "isp_v4_1_1.h" +#define ISP_PERFORMANCE_STATE_LOW 0 +#define ISP_PERFORMANCE_STATE_HIGH 1 + +#define ISP_HIGH_PERFORMANC_XCLK 788 +#define ISP_HIGH_PERFORMANC_ICLK 788 + static const unsigned int isp_4_1_1_int_srcid[MAX_ISP411_INT_SRC] = { ISP_4_1__SRCID__ISP_RINGBUFFER_WPT9, ISP_4_1__SRCID__ISP_RINGBUFFER_WPT10, @@ -39,22 +46,182 @@ static const unsigned int isp_4_1_1_int_srcid[MAX_ISP411_INT_SRC] = { ISP_4_1__SRCID__ISP_RINGBUFFER_WPT16 }; +static struct gpiod_lookup_table isp_gpio_table = { + .dev_id = "amd_isp_capture", + .table = { + GPIO_LOOKUP("AMDI0030:00", 85, "enable_isp", GPIO_ACTIVE_HIGH), + { } + }, +}; + +static struct gpiod_lookup_table isp_sensor_gpio_table = { + .dev_id = "i2c-ov05c10", + .table = { + GPIO_LOOKUP("amdisp-pinctrl", 0, "enable", GPIO_ACTIVE_HIGH), + { } + }, +}; + +static int isp_poweroff(struct generic_pm_domain *genpd) +{ + struct amdgpu_isp *isp = container_of(genpd, struct amdgpu_isp, ispgpd); + struct amdgpu_device *adev = isp->adev; + + return amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ISP, true, 0); +} + +static int isp_poweron(struct generic_pm_domain *genpd) +{ + struct amdgpu_isp *isp = container_of(genpd, struct amdgpu_isp, ispgpd); + struct amdgpu_device *adev = isp->adev; + + return amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ISP, false, 0); +} + +static int isp_set_performance_state(struct generic_pm_domain *genpd, + unsigned int state) +{ + struct amdgpu_isp *isp = container_of(genpd, struct amdgpu_isp, ispgpd); + struct amdgpu_device *adev = isp->adev; + u32 iclk, xclk; + int ret; + + switch (state) { + case ISP_PERFORMANCE_STATE_HIGH: + xclk = ISP_HIGH_PERFORMANC_XCLK; + iclk = ISP_HIGH_PERFORMANC_ICLK; + break; + case ISP_PERFORMANCE_STATE_LOW: + /* isp runs at default lowest clock-rate on power-on, do nothing */ + return 0; + default: + return -EINVAL; + } + + ret = amdgpu_dpm_set_soft_freq_range(adev, PP_ISPXCLK, xclk, 0); + if (ret) { + drm_err(&adev->ddev, "failed to set xclk %u to %u: %d\n", + xclk, state, ret); + return ret; + } + + ret = amdgpu_dpm_set_soft_freq_range(adev, PP_ISPICLK, iclk, 0); + if (ret) { + drm_err(&adev->ddev, "failed to set iclk %u to %u: %d\n", + iclk, state, ret); + return ret; + } + + return 0; +} + +static int isp_genpd_add_device(struct device *dev, void *data) +{ + struct generic_pm_domain *gpd = data; + struct platform_device *pdev = container_of(dev, struct platform_device, dev); + struct amdgpu_isp *isp = container_of(gpd, struct amdgpu_isp, ispgpd); + struct amdgpu_device *adev = isp->adev; + int ret; + + if (!pdev) + return -EINVAL; + + if (!dev->type->name) { + drm_dbg(&adev->ddev, "Invalid device type to add\n"); + goto exit; + } + + if (strcmp(dev->type->name, "mfd_device")) { + drm_dbg(&adev->ddev, "Invalid isp mfd device %s to add\n", pdev->mfd_cell->name); + goto exit; + } + + ret = pm_genpd_add_device(gpd, dev); + if (ret) { + drm_err(&adev->ddev, "Failed to add dev %s to genpd %d\n", + pdev->mfd_cell->name, ret); + return -ENODEV; + } + +exit: + /* Continue to add */ + return 0; +} + +static int isp_genpd_remove_device(struct device *dev, void *data) +{ + struct generic_pm_domain *gpd = data; + struct platform_device *pdev = container_of(dev, struct platform_device, dev); + struct amdgpu_isp *isp = container_of(gpd, struct amdgpu_isp, ispgpd); + struct amdgpu_device *adev = isp->adev; + int ret; + + if (!pdev) + return -EINVAL; + + if (!dev->type->name) { + drm_dbg(&adev->ddev, "Invalid device type to remove\n"); + goto exit; + } + + if (strcmp(dev->type->name, "mfd_device")) { + drm_dbg(&adev->ddev, "Invalid isp mfd device %s to remove\n", + pdev->mfd_cell->name); + goto exit; + } + + ret = pm_genpd_remove_device(dev); + if (ret) { + drm_err(&adev->ddev, "Failed to remove dev from genpd %d\n", ret); + return -ENODEV; + } + +exit: + /* Continue to remove */ + return 0; +} + static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) { + const struct software_node *amd_camera_node, *isp4_node; struct amdgpu_device *adev = isp->adev; + struct acpi_device *acpi_dev; int idx, int_idx, num_res, r; u64 isp_base; if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) return -EINVAL; + r = amdgpu_acpi_get_isp4_dev(&acpi_dev); + if (r) { + drm_dbg(&adev->ddev, "Invalid isp platform detected (%d)", r); + /* allow GPU init to progress */ + return 0; + } + + /* add GPIO resources required for OMNI5C10 sensor */ + if (!strcmp("OMNI5C10", acpi_device_hid(acpi_dev))) { + gpiod_add_lookup_table(&isp_gpio_table); + gpiod_add_lookup_table(&isp_sensor_gpio_table); + } + isp_base = adev->rmmio_base; + isp->ispgpd.name = "ISP_v_4_1_1"; + isp->ispgpd.power_off = isp_poweroff; + isp->ispgpd.power_on = isp_poweron; + isp->ispgpd.set_performance_state = isp_set_performance_state; + + r = pm_genpd_init(&isp->ispgpd, NULL, true); + if (r) { + drm_err(&adev->ddev, "failed to initialize genpd (%d)\n", r); + return -EINVAL; + } + isp->isp_cell = kcalloc(3, sizeof(struct mfd_cell), GFP_KERNEL); if (!isp->isp_cell) { r = -ENOMEM; - drm_err(&adev->ddev, - "%s: isp mfd cell alloc failed\n", __func__); + drm_err(&adev->ddev, "isp mfd cell alloc failed (%d)\n", r); goto failure; } @@ -64,19 +231,20 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) GFP_KERNEL); if (!isp->isp_res) { r = -ENOMEM; - drm_err(&adev->ddev, - "%s: isp mfd res alloc failed\n", __func__); + drm_err(&adev->ddev, "isp mfd resource alloc failed (%d)\n", r); goto failure; } isp->isp_pdata = kzalloc(sizeof(*isp->isp_pdata), GFP_KERNEL); if (!isp->isp_pdata) { r = -ENOMEM; - drm_err(&adev->ddev, - "%s: isp platform data alloc failed\n", __func__); + drm_err(&adev->ddev, "isp platform data alloc failed (%d)\n", r); goto failure; } + amd_camera_node = (const struct software_node *)acpi_dev->driver_data; + isp4_node = software_node_find_by_name(amd_camera_node, "isp4"); + /* initialize isp platform data */ isp->isp_pdata->adev = (void *)adev; isp->isp_pdata->asic_type = adev->asic_type; @@ -105,14 +273,14 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) isp->isp_cell[0].num_resources = num_res; isp->isp_cell[0].resources = &isp->isp_res[0]; isp->isp_cell[0].platform_data = isp->isp_pdata; + isp->isp_cell[0].swnode = isp4_node; isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data); /* initialize isp i2c platform data */ isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL); if (!isp->isp_i2c_res) { r = -ENOMEM; - drm_err(&adev->ddev, - "%s: isp mfd res alloc failed\n", __func__); + drm_err(&adev->ddev, "isp mfd res alloc failed (%d)\n", r); goto failure; } @@ -131,8 +299,7 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) isp->isp_gpio_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL); if (!isp->isp_gpio_res) { r = -ENOMEM; - drm_err(&adev->ddev, - "%s: isp gpio res alloc failed\n", __func__); + drm_err(&adev->ddev, "isp gpio resource alloc failed (%d)\n", r); goto failure; } @@ -148,10 +315,23 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) isp->isp_cell[2].platform_data = isp->isp_pdata; isp->isp_cell[2].pdata_size = sizeof(struct isp_platform_data); - r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 3); + /* add only amd_isp_capture and amd_isp_i2c_designware to genpd */ + r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 2); if (r) { - drm_err(&adev->ddev, - "%s: add mfd hotplug device failed\n", __func__); + drm_err(&adev->ddev, "add mfd hotplug device failed (%d)\n", r); + goto failure; + } + + r = device_for_each_child(isp->parent, &isp->ispgpd, + isp_genpd_add_device); + if (r) { + drm_err(&adev->ddev, "failed to add devices to genpd (%d)\n", r); + goto failure; + } + + r = mfd_add_hotplug_devices(isp->parent, &isp->isp_cell[2], 1); + if (r) { + drm_err(&adev->ddev, "add pinctl hotplug device failed (%d)\n", r); goto failure; } @@ -170,6 +350,9 @@ failure: static int isp_v4_1_1_hw_fini(struct amdgpu_isp *isp) { + device_for_each_child(isp->parent, NULL, + isp_genpd_remove_device); + mfd_remove_devices(isp->parent); kfree(isp->isp_res); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 4cde8a8bcc83..58239c405fda 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -118,7 +118,10 @@ static int jpeg_v2_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return r; @@ -764,11 +767,20 @@ static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { - jpeg_v2_0_stop(ring->adev); - jpeg_v2_0_start(ring->adev); - return amdgpu_ring_test_helper(ring); + int r; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = jpeg_v2_0_stop(ring->adev); + if (r) + return r; + r = jpeg_v2_0_start(ring->adev); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 8b39e114f3be..3e2c389242db 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -167,7 +167,10 @@ static int jpeg_v2_5_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return r; @@ -643,11 +646,14 @@ static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev, return 0; } -static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { + amdgpu_ring_reset_helper_begin(ring, timedout_fence); jpeg_v2_5_stop_inst(ring->adev, ring->me); jpeg_v2_5_start_inst(ring->adev, ring->me); - return amdgpu_ring_test_helper(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 2f8510c2986b..a44eb2667664 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -132,7 +132,10 @@ static int jpeg_v3_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return r; @@ -555,11 +558,20 @@ static int jpeg_v3_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { - jpeg_v3_0_stop(ring->adev); - jpeg_v3_0_start(ring->adev); - return amdgpu_ring_test_helper(ring); + int r; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = jpeg_v3_0_stop(ring->adev); + if (r) + return r; + r = jpeg_v3_0_start(ring->adev); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index f17ec5414fd6..da3ee69f1a3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -143,7 +143,10 @@ static int jpeg_v4_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return r; @@ -720,14 +723,20 @@ static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { - if (amdgpu_sriov_vf(ring->adev)) - return -EINVAL; + int r; - jpeg_v4_0_stop(ring->adev); - jpeg_v4_0_start(ring->adev); - return amdgpu_ring_test_helper(ring); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = jpeg_v4_0_stop(ring->adev); + if (r) + return r; + r = jpeg_v4_0_start(ring->adev); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 5598a35f72af..b86288a69e7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -149,6 +149,18 @@ static int jpeg_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) return r; } + /* JPEG DJPEG POISON EVENT */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq); + if (r) + return r; + + /* JPEG EJPEG POISON EVENT */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq); + if (r) + return r; + r = amdgpu_jpeg_sw_init(adev); if (r) return r; @@ -204,12 +216,11 @@ static int jpeg_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - if (!amdgpu_sriov_vf(adev)) { - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; - r = amdgpu_jpeg_sysfs_reset_mask_init(adev); - if (r) - return r; - } + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return 0; } @@ -230,8 +241,7 @@ static int jpeg_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; - if (!amdgpu_sriov_vf(adev)) - amdgpu_jpeg_sysfs_reset_mask_fini(adev); + amdgpu_jpeg_sysfs_reset_mask_fini(adev); r = amdgpu_jpeg_sw_fini(adev); @@ -434,6 +444,9 @@ static int jpeg_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block) ret = jpeg_v4_0_3_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) + amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0); + return ret; } @@ -1041,6 +1054,14 @@ static int jpeg_v4_0_3_set_interrupt_state(struct amdgpu_device *adev, return 0; } +static int jpeg_v4_0_3_set_ras_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -1120,14 +1141,17 @@ static void jpeg_v4_0_3_core_stall_reset(struct amdgpu_ring *ring) WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 0x00); } -static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { if (amdgpu_sriov_vf(ring->adev)) return -EOPNOTSUPP; + amdgpu_ring_reset_helper_begin(ring, timedout_fence); jpeg_v4_0_3_core_stall_reset(ring); jpeg_v4_0_3_start_jrbc(ring); - return amdgpu_ring_test_helper(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { @@ -1200,6 +1224,11 @@ static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = { .process = jpeg_v4_0_3_process_interrupt, }; +static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_ras_irq_funcs = { + .set = jpeg_v4_0_3_set_ras_interrupt_state, + .process = amdgpu_jpeg_process_poison_irq, +}; + static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) { int i; @@ -1208,6 +1237,9 @@ static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings; } adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs; + + adev->jpeg.inst->ras_poison_irq.num_types = 1; + adev->jpeg.inst->ras_poison_irq.funcs = &jpeg_v4_0_3_ras_irq_funcs; } const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block = { @@ -1304,9 +1336,47 @@ static void jpeg_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev) jpeg_v4_0_3_inst_reset_ras_error_count(adev, i); } +static uint32_t jpeg_v4_0_3_query_poison_by_instance(struct amdgpu_device *adev, + uint32_t instance, uint32_t sub_block) +{ + uint32_t poison_stat = 0, reg_value = 0; + + switch (sub_block) { + case AMDGPU_JPEG_V4_0_3_JPEG0: + reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG0_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF); + break; + case AMDGPU_JPEG_V4_0_3_JPEG1: + reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG1_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF); + break; + default: + break; + } + + if (poison_stat) + dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n", + instance, sub_block); + + return poison_stat; +} + +static bool jpeg_v4_0_3_query_ras_poison_status(struct amdgpu_device *adev) +{ + uint32_t inst = 0, sub = 0, poison_stat = 0; + + for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++) + for (sub = 0; sub < AMDGPU_JPEG_V4_0_3_MAX_SUB_BLOCK; sub++) + poison_stat += + jpeg_v4_0_3_query_poison_by_instance(adev, inst, sub); + + return !!poison_stat; +} + static const struct amdgpu_ras_block_hw_ops jpeg_v4_0_3_ras_hw_ops = { .query_ras_error_count = jpeg_v4_0_3_query_ras_error_count, .reset_ras_error_count = jpeg_v4_0_3_reset_ras_error_count, + .query_poison_status = jpeg_v4_0_3_query_ras_poison_status, }; static int jpeg_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, @@ -1328,7 +1398,7 @@ static int jpeg_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_ban 1ULL); break; case ACA_SMU_TYPE_CE: - bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, ACA_REG__MISC0__ERRCNT(misc0)); break; @@ -1383,6 +1453,13 @@ static int jpeg_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_comm if (r) return r; + if (amdgpu_ras_is_supported(adev, ras_block->block) && + adev->jpeg.inst->ras_poison_irq.funcs) { + r = amdgpu_irq_get(adev, &adev->jpeg.inst->ras_poison_irq, 0); + if (r) + goto late_fini; + } + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG, &jpeg_v4_0_3_aca_info, NULL); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h index a90bf370a002..2e110d04af84 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h @@ -46,6 +46,13 @@ #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 +enum amdgpu_jpeg_v4_0_3_sub_block { + AMDGPU_JPEG_V4_0_3_JPEG0 = 0, + AMDGPU_JPEG_V4_0_3_JPEG1, + + AMDGPU_JPEG_V4_0_3_MAX_SUB_BLOCK, +}; + extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 974030a5c03c..481d1a2dbe5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -174,9 +174,10 @@ static int jpeg_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - /* TODO: Add queue reset mask when FW fully supports it */ adev->jpeg.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); if (r) return r; @@ -767,6 +768,22 @@ static int jpeg_v4_0_5_process_interrupt(struct amdgpu_device *adev, return 0; } +static int jpeg_v4_0_5_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + int r; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = jpeg_v4_0_5_stop(ring->adev); + if (r) + return r; + r = jpeg_v4_0_5_start(ring->adev); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = { .name = "jpeg_v4_0_5", .early_init = jpeg_v4_0_5_early_init, @@ -812,6 +829,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v4_0_5_ring_reset, }; static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index 31d213ccbe0a..e0a71909252b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -120,13 +120,13 @@ static int jpeg_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - /* TODO: Add queue reset mask when FW fully supports it */ adev->jpeg.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); - if (r) - return r; - return 0; + + return r; } /** @@ -644,6 +644,22 @@ static int jpeg_v5_0_0_process_interrupt(struct amdgpu_device *adev, return 0; } +static int jpeg_v5_0_0_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + int r; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = jpeg_v5_0_0_stop(ring->adev); + if (r) + return r; + r = jpeg_v5_0_0_start(ring->adev); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = { .name = "jpeg_v5_0_0", .early_init = jpeg_v5_0_0_early_init, @@ -689,6 +705,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v5_0_0_ring_reset, }; static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index 218e16b68f1d..54523dc1f702 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -28,15 +28,18 @@ #include "soc15d.h" #include "jpeg_v4_0_3.h" #include "jpeg_v5_0_1.h" +#include "mmsch_v5_0.h" #include "vcn/vcn_5_0_0_offset.h" #include "vcn/vcn_5_0_0_sh_mask.h" #include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" +static int jpeg_v5_0_1_start_sriov(struct amdgpu_device *adev); static void jpeg_v5_0_1_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev); static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); +static void jpeg_v5_0_1_set_ras_funcs(struct amdgpu_device *adev); static void jpeg_v5_0_1_dec_ring_set_wptr(struct amdgpu_ring *ring); static int amdgpu_ih_srcid_jpeg[] = { @@ -118,6 +121,7 @@ static int jpeg_v5_0_1_early_init(struct amdgpu_ip_block *ip_block) adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS; jpeg_v5_0_1_set_dec_ring_funcs(adev); jpeg_v5_0_1_set_irq_funcs(adev); + jpeg_v5_0_1_set_ras_funcs(adev); return 0; } @@ -142,6 +146,17 @@ static int jpeg_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; } + /* JPEG DJPEG POISON EVENT */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_5_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq); + if (r) + return r; + + /* JPEG EJPEG POISON EVENT */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_5_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq); + if (r) + return r; r = amdgpu_jpeg_sw_init(adev); if (r) @@ -156,21 +171,16 @@ static int jpeg_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { ring = &adev->jpeg.inst[i].ring_dec[j]; - ring->use_doorbell = false; + ring->use_doorbell = true; ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); if (!amdgpu_sriov_vf(adev)) { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + j + 11 * jpeg_inst; } else { - if (j < 4) - ring->doorbell_index = - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 4 + j + 32 * jpeg_inst; - else - ring->doorbell_index = - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 8 + j + 32 * jpeg_inst; + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 2 + j + 32 * jpeg_inst; } sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, @@ -190,14 +200,13 @@ static int jpeg_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - if (!amdgpu_sriov_vf(adev)) { - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; - r = amdgpu_jpeg_sysfs_reset_mask_init(adev); - if (r) - return r; - } + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + r = amdgpu_jpeg_sysfs_reset_mask_init(adev); - return 0; + return r; } /** @@ -216,8 +225,7 @@ static int jpeg_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; - if (!amdgpu_sriov_vf(adev)) - amdgpu_jpeg_sysfs_reset_mask_fini(adev); + amdgpu_jpeg_sysfs_reset_mask_fini(adev); r = amdgpu_jpeg_sw_fini(adev); @@ -237,7 +245,10 @@ static int jpeg_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) int i, j, r, jpeg_inst; if (amdgpu_sriov_vf(adev)) { - /* jpeg_v5_0_1_start_sriov(adev); */ + r = jpeg_v5_0_1_start_sriov(adev); + if (r) + return r; + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { ring = &adev->jpeg.inst[i].ring_dec[j]; @@ -264,7 +275,7 @@ static int jpeg_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) ring = &adev->jpeg.inst[i].ring_dec[j]; if (ring->use_doorbell) WREG32_SOC15_OFFSET(VCN, GET_INST(VCN, i), regVCN_JPEG_DB_CTRL, - (ring->pipe ? (ring->pipe - 0x15) : 0), + ring->pipe, ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | VCN_JPEG_DB_CTRL__EN_MASK); @@ -291,8 +302,13 @@ static int jpeg_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block) cancel_delayed_work_sync(&adev->jpeg.idle_work); - if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) - ret = jpeg_v5_0_1_set_powergating_state(ip_block, AMD_PG_STATE_GATE); + if (!amdgpu_sriov_vf(adev)) { + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) + ret = jpeg_v5_0_1_set_powergating_state(ip_block, AMD_PG_STATE_GATE); + } + + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) + amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0); return ret; } @@ -422,6 +438,119 @@ static void jpeg_v5_0_1_init_jrbc(struct amdgpu_ring *ring) reg_offset); } +static int jpeg_v5_0_1_start_sriov(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint64_t ctx_addr; + uint32_t param, resp, expected; + uint32_t tmp, timeout; + + struct amdgpu_mm_table *table = &adev->virt.mm_table; + uint32_t *table_loc; + uint32_t table_size; + uint32_t size, size_dw, item_offset; + uint32_t init_status; + int i, j, jpeg_inst; + + struct mmsch_v5_0_cmd_direct_write + direct_wt = { {0} }; + struct mmsch_v5_0_cmd_end end = { {0} }; + struct mmsch_v5_0_init_header header; + + direct_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_WRITE; + end.cmd_header.command_type = + MMSCH_COMMAND__END; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + jpeg_inst = GET_INST(JPEG, i); + + memset(&header, 0, sizeof(struct mmsch_v5_0_init_header)); + header.version = MMSCH_VERSION; + header.total_size = sizeof(struct mmsch_v5_0_init_header) >> 2; + + table_loc = (uint32_t *)table->cpu_addr; + table_loc += header.total_size; + + item_offset = header.total_size; + + for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + table_size = 0; + + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW); + MMSCH_V5_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr)); + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH); + MMSCH_V5_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr)); + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC_RB_SIZE); + MMSCH_V5_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4); + + if (j < 5) { + header.mjpegdec0[j].table_offset = item_offset; + header.mjpegdec0[j].init_status = 0; + header.mjpegdec0[j].table_size = table_size; + } else { + header.mjpegdec1[j - 5].table_offset = item_offset; + header.mjpegdec1[j - 5].init_status = 0; + header.mjpegdec1[j - 5].table_size = table_size; + } + header.total_size += table_size; + item_offset += table_size; + } + + MMSCH_V5_0_INSERT_END(); + + /* send init table to MMSCH */ + size = sizeof(struct mmsch_v5_0_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy((void *)table_loc, &header, size); + + ctx_addr = table->gpu_addr; + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); + + tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID); + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp); + + size = header.total_size; + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size); + + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0); + + param = 0x00000001; + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param); + tmp = 0; + timeout = 1000; + resp = 0; + expected = MMSCH_VF_MAILBOX_RESP__OK; + init_status = + ((struct mmsch_v5_0_init_header *)(table_loc))->mjpegdec0[i].init_status; + while (resp != expected) { + resp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP); + + if (resp != 0) + break; + udelay(10); + tmp = tmp + 10; + if (tmp >= timeout) { + DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ + " waiting for regMMSCH_VF_MAILBOX_RESP "\ + "(expected=0x%08x, readback=0x%08x)\n", + tmp, expected, resp); + return -EBUSY; + } + } + if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE && + init_status != MMSCH_VF_ENGINE_STATUS__PASS) + DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n", + resp, init_status); + + } + return 0; +} + /** * jpeg_v5_0_1_start - start JPEG block * @@ -581,6 +710,11 @@ static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block, struct amdgpu_device *adev = ip_block->adev; int ret; + if (amdgpu_sriov_vf(adev)) { + adev->jpeg.cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + if (state == adev->jpeg.cur_state) return 0; @@ -603,6 +737,16 @@ static int jpeg_v5_0_1_set_interrupt_state(struct amdgpu_device *adev, return 0; } +static int jpeg_v5_0_1_set_ras_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + + + static int jpeg_v5_0_1_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -688,14 +832,14 @@ static void jpeg_v5_0_1_core_stall_reset(struct amdgpu_ring *ring) WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 0x00); } -static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { - if (amdgpu_sriov_vf(ring->adev)) - return -EOPNOTSUPP; - + amdgpu_ring_reset_helper_begin(ring, timedout_fence); jpeg_v5_0_1_core_stall_reset(ring); jpeg_v5_0_1_init_jrbc(ring); - return amdgpu_ring_test_helper(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = { @@ -772,6 +916,11 @@ static const struct amdgpu_irq_src_funcs jpeg_v5_0_1_irq_funcs = { .process = jpeg_v5_0_1_process_interrupt, }; +static const struct amdgpu_irq_src_funcs jpeg_v5_0_1_ras_irq_funcs = { + .set = jpeg_v5_0_1_set_ras_interrupt_state, + .process = amdgpu_jpeg_process_poison_irq, +}; + static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev) { int i; @@ -780,6 +929,10 @@ static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev) adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings; adev->jpeg.inst->irq.funcs = &jpeg_v5_0_1_irq_funcs; + + adev->jpeg.inst->ras_poison_irq.num_types = 1; + adev->jpeg.inst->ras_poison_irq.funcs = &jpeg_v5_0_1_ras_irq_funcs; + } const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block = { @@ -789,3 +942,150 @@ const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block = { .rev = 1, .funcs = &jpeg_v5_0_1_ip_funcs, }; + +static uint32_t jpeg_v5_0_1_query_poison_by_instance(struct amdgpu_device *adev, + uint32_t instance, uint32_t sub_block) +{ + uint32_t poison_stat = 0, reg_value = 0; + + switch (sub_block) { + case AMDGPU_JPEG_V5_0_1_JPEG0: + reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG0_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF); + break; + case AMDGPU_JPEG_V5_0_1_JPEG1: + reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG1_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF); + break; + default: + break; + } + + if (poison_stat) + dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n", + instance, sub_block); + + return poison_stat; +} + +static bool jpeg_v5_0_1_query_ras_poison_status(struct amdgpu_device *adev) +{ + uint32_t inst = 0, sub = 0, poison_stat = 0; + + for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++) + for (sub = 0; sub < AMDGPU_JPEG_V5_0_1_MAX_SUB_BLOCK; sub++) + poison_stat += + jpeg_v5_0_1_query_poison_by_instance(adev, inst, sub); + + return !!poison_stat; +} + +static const struct amdgpu_ras_block_hw_ops jpeg_v5_0_1_ras_hw_ops = { + .query_poison_status = jpeg_v5_0_1_query_ras_poison_status, +}; + +static int jpeg_v5_0_1_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct aca_bank_info info; + u64 misc0; + int ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + switch (type) { + case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); + break; + case ACA_SMU_TYPE_CE: + bank->aca_err_type = ACA_ERROR_TYPE_CE; + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, + ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; + } + + return ret; +} + +/* reference to smu driver if header file */ +static int jpeg_v5_0_1_err_codes[] = { + 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-7][S|D] */ + 24, 25, 26, 27, 28, 29, 30, 31 +}; + +static bool jpeg_v5_0_1_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + + if (instlo != mmSMNAID_AID0_MCA_SMU) + return false; + + if (aca_bank_check_error_codes(handle->adev, bank, + jpeg_v5_0_1_err_codes, + ARRAY_SIZE(jpeg_v5_0_1_err_codes))) + return false; + + return true; +} + +static const struct aca_bank_ops jpeg_v5_0_1_aca_bank_ops = { + .aca_bank_parser = jpeg_v5_0_1_aca_bank_parser, + .aca_bank_is_valid = jpeg_v5_0_1_aca_bank_is_valid, +}; + +static const struct aca_info jpeg_v5_0_1_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK, + .bank_ops = &jpeg_v5_0_1_aca_bank_ops, +}; + +static int jpeg_v5_0_1_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r; + + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + if (amdgpu_ras_is_supported(adev, ras_block->block) && + adev->jpeg.inst->ras_poison_irq.funcs) { + r = amdgpu_irq_get(adev, &adev->jpeg.inst->ras_poison_irq, 0); + if (r) + goto late_fini; + } + + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG, + &jpeg_v5_0_1_aca_info, NULL); + if (r) + goto late_fini; + + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + + return r; +} + +static struct amdgpu_jpeg_ras jpeg_v5_0_1_ras = { + .ras_block = { + .hw_ops = &jpeg_v5_0_1_ras_hw_ops, + .ras_late_init = jpeg_v5_0_1_ras_late_init, + }, +}; + +static void jpeg_v5_0_1_set_ras_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.ras = &jpeg_v5_0_1_ras; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h index efdab57324e4..a7e58d5fb246 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h @@ -26,6 +26,9 @@ extern const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block; +#define regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET 0x4094 +#define regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET 0x1bffe + #define regUVD_JRBC0_UVD_JRBC_RB_WPTR 0x0640 #define regUVD_JRBC0_UVD_JRBC_RB_WPTR_BASE_IDX 1 #define regUVD_JRBC0_UVD_JRBC_STATUS 0x0649 @@ -98,4 +101,11 @@ extern const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block; #define regVCN_RRMT_CNTL 0x0940 #define regVCN_RRMT_CNTL_BASE_IDX 1 +enum amdgpu_jpeg_v5_0_1_sub_block { + AMDGPU_JPEG_V5_0_1_JPEG0 = 0, + AMDGPU_JPEG_V5_0_1_JPEG1, + + AMDGPU_JPEG_V5_0_1_MAX_SUB_BLOCK, +}; + #endif /* __JPEG_V5_0_1_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c new file mode 100644 index 000000000000..d6f50b13e2ba --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -0,0 +1,355 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_gfx.h" +#include "mes_userqueue.h" +#include "amdgpu_userq_fence.h" + +#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE +#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE + +static int +mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo) +{ + int ret; + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + DRM_ERROR("Failed to reserve bo. ret %d\n", ret); + goto err_reserve_bo_failed; + } + + ret = amdgpu_ttm_alloc_gart(&bo->tbo); + if (ret) { + DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); + goto err_map_bo_gart_failed; + } + + amdgpu_bo_unreserve(bo); + bo = amdgpu_bo_ref(bo); + + return 0; + +err_map_bo_gart_failed: + amdgpu_bo_unreserve(bo); +err_reserve_bo_failed: + return ret; +} + +static int +mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + uint64_t wptr) +{ + struct amdgpu_bo_va_mapping *wptr_mapping; + struct amdgpu_vm *wptr_vm; + struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj; + int ret; + + wptr_vm = queue->vm; + ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); + if (ret) + return ret; + + wptr &= AMDGPU_GMC_HOLE_MASK; + wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT); + amdgpu_bo_unreserve(wptr_vm->root.bo); + if (!wptr_mapping) { + DRM_ERROR("Failed to lookup wptr bo\n"); + return -EINVAL; + } + + wptr_obj->obj = wptr_mapping->bo_va->base.bo; + if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { + DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n"); + return -EINVAL; + } + + ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj); + if (ret) { + DRM_ERROR("Failed to map wptr bo to GART\n"); + return ret; + } + + queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj); + return 0; +} + +static int convert_to_mes_priority(int priority) +{ + switch (priority) { + case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW: + default: + return AMDGPU_MES_PRIORITY_LEVEL_NORMAL; + case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW: + return AMDGPU_MES_PRIORITY_LEVEL_LOW; + case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH: + return AMDGPU_MES_PRIORITY_LEVEL_MEDIUM; + case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH: + return AMDGPU_MES_PRIORITY_LEVEL_HIGH; + } +} + +static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_userq_obj *ctx = &queue->fw_obj; + struct amdgpu_mqd_prop *userq_props = queue->userq_prop; + struct mes_add_queue_input queue_input; + int r; + + memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); + + queue_input.process_va_start = 0; + queue_input.process_va_end = adev->vm_manager.max_pfn - 1; + + /* set process quantum to 10 ms and gang quantum to 1 ms as default */ + queue_input.process_quantum = 100000; + queue_input.gang_quantum = 10000; + queue_input.paging = false; + + queue_input.process_context_addr = ctx->gpu_addr; + queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; + queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; + queue_input.gang_global_priority_level = convert_to_mes_priority(queue->priority); + + queue_input.process_id = queue->vm->pasid; + queue_input.queue_type = queue->queue_type; + queue_input.mqd_addr = queue->mqd.gpu_addr; + queue_input.wptr_addr = userq_props->wptr_gpu_addr; + queue_input.queue_size = userq_props->queue_size >> 2; + queue_input.doorbell_offset = userq_props->doorbell_index; + queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo); + queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr; + + amdgpu_mes_lock(&adev->mes); + r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); + if (r) { + DRM_ERROR("Failed to map queue in HW, err (%d)\n", r); + return r; + } + + DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n", userq_props->doorbell_index); + return 0; +} + +static int mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct mes_remove_queue_input queue_input; + struct amdgpu_userq_obj *ctx = &queue->fw_obj; + int r; + + memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); + queue_input.doorbell_offset = queue->doorbell_index; + queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; + + amdgpu_mes_lock(&adev->mes); + r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); + if (r) + DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r); + return r; +} + +static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + struct drm_amdgpu_userq_in *mqd_user) +{ + struct amdgpu_userq_obj *ctx = &queue->fw_obj; + int r, size; + + /* + * The FW expects at least one page space allocated for + * process ctx and gang ctx each. Create an object + * for the same. + */ + size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ; + r = amdgpu_userq_create_object(uq_mgr, ctx, size); + if (r) { + DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r); + return r; + } + + return 0; +} + +static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, + struct drm_amdgpu_userq_in *args_in, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type]; + struct drm_amdgpu_userq_in *mqd_user = args_in; + struct amdgpu_mqd_prop *userq_props; + int r; + + /* Structure to initialize MQD for userqueue using generic MQD init function */ + userq_props = kzalloc(sizeof(struct amdgpu_mqd_prop), GFP_KERNEL); + if (!userq_props) { + DRM_ERROR("Failed to allocate memory for userq_props\n"); + return -ENOMEM; + } + + if (!mqd_user->wptr_va || !mqd_user->rptr_va || + !mqd_user->queue_va || mqd_user->queue_size == 0) { + DRM_ERROR("Invalid MQD parameters for userqueue\n"); + r = -EINVAL; + goto free_props; + } + + r = amdgpu_userq_create_object(uq_mgr, &queue->mqd, mqd_hw_default->mqd_size); + if (r) { + DRM_ERROR("Failed to create MQD object for userqueue\n"); + goto free_props; + } + + /* Initialize the MQD BO with user given values */ + userq_props->wptr_gpu_addr = mqd_user->wptr_va; + userq_props->rptr_gpu_addr = mqd_user->rptr_va; + userq_props->queue_size = mqd_user->queue_size; + userq_props->hqd_base_gpu_addr = mqd_user->queue_va; + userq_props->mqd_gpu_addr = queue->mqd.gpu_addr; + userq_props->use_doorbell = true; + userq_props->doorbell_index = queue->doorbell_index; + userq_props->fence_address = queue->fence_drv->gpu_addr; + + if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) { + struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd; + + if (mqd_user->mqd_size != sizeof(*compute_mqd)) { + DRM_ERROR("Invalid compute IP MQD size\n"); + r = -EINVAL; + goto free_mqd; + } + + compute_mqd = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); + if (IS_ERR(compute_mqd)) { + DRM_ERROR("Failed to read user MQD\n"); + r = -ENOMEM; + goto free_mqd; + } + + userq_props->eop_gpu_addr = compute_mqd->eop_va; + userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; + userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM; + userq_props->hqd_active = false; + userq_props->tmz_queue = + mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE; + kfree(compute_mqd); + } else if (queue->queue_type == AMDGPU_HW_IP_GFX) { + struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11; + + if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) { + DRM_ERROR("Invalid GFX MQD\n"); + r = -EINVAL; + goto free_mqd; + } + + mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); + if (IS_ERR(mqd_gfx_v11)) { + DRM_ERROR("Failed to read user MQD\n"); + r = -ENOMEM; + goto free_mqd; + } + + userq_props->shadow_addr = mqd_gfx_v11->shadow_va; + userq_props->csa_addr = mqd_gfx_v11->csa_va; + userq_props->tmz_queue = + mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE; + kfree(mqd_gfx_v11); + } else if (queue->queue_type == AMDGPU_HW_IP_DMA) { + struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11; + + if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) { + DRM_ERROR("Invalid SDMA MQD\n"); + r = -EINVAL; + goto free_mqd; + } + + mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); + if (IS_ERR(mqd_sdma_v11)) { + DRM_ERROR("Failed to read sdma user MQD\n"); + r = -ENOMEM; + goto free_mqd; + } + + userq_props->csa_addr = mqd_sdma_v11->csa_va; + kfree(mqd_sdma_v11); + } + + queue->userq_prop = userq_props; + + r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props); + if (r) { + DRM_ERROR("Failed to initialize MQD for userqueue\n"); + goto free_mqd; + } + + /* Create BO for FW operations */ + r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user); + if (r) { + DRM_ERROR("Failed to allocate BO for userqueue (%d)", r); + goto free_mqd; + } + + /* FW expects WPTR BOs to be mapped into GART */ + r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); + if (r) { + DRM_ERROR("Failed to create WPTR mapping\n"); + goto free_ctx; + } + + return 0; + +free_ctx: + amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj); + +free_mqd: + amdgpu_userq_destroy_object(uq_mgr, &queue->mqd); + +free_props: + kfree(userq_props); + + return r; +} + +static void +mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj); + kfree(queue->userq_prop); + amdgpu_userq_destroy_object(uq_mgr, &queue->mqd); +} + +const struct amdgpu_userq_funcs userq_mes_funcs = { + .mqd_create = mes_userq_mqd_create, + .mqd_destroy = mes_userq_mqd_destroy, + .unmap = mes_userq_unmap, + .map = mes_userq_map, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h new file mode 100644 index 000000000000..090ae8897770 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef MES_USERQ_H +#define MES_USERQ_H +#include "amdgpu_userq.h" + +extern const struct amdgpu_userq_funcs userq_mes_funcs; +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index e65916ada23b..28eb846280dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -287,6 +287,23 @@ static int convert_to_mes_queue_type(int queue_type) return -1; } +static int convert_to_mes_priority_level(int priority_level) +{ + switch (priority_level) { + case AMDGPU_MES_PRIORITY_LEVEL_LOW: + return AMD_PRIORITY_LEVEL_LOW; + case AMDGPU_MES_PRIORITY_LEVEL_NORMAL: + default: + return AMD_PRIORITY_LEVEL_NORMAL; + case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM: + return AMD_PRIORITY_LEVEL_MEDIUM; + case AMDGPU_MES_PRIORITY_LEVEL_HIGH: + return AMD_PRIORITY_LEVEL_HIGH; + case AMDGPU_MES_PRIORITY_LEVEL_REALTIME: + return AMD_PRIORITY_LEVEL_REALTIME; + } +} + static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, struct mes_add_queue_input *input) { @@ -310,9 +327,9 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gang_quantum = input->gang_quantum; mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; mes_add_queue_pkt.inprocess_gang_priority = - input->inprocess_gang_priority; + convert_to_mes_priority_level(input->inprocess_gang_priority); mes_add_queue_pkt.gang_global_priority_level = - input->gang_global_priority_level; + convert_to_mes_priority_level(input->gang_global_priority_level); mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; mes_add_queue_pkt.mqd_addr = input->mqd_addr; @@ -458,31 +475,6 @@ static int mes_v11_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_typ return r; } -static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes, - struct mes_reset_queue_input *input) -{ - if (input->use_mmio) - return mes_v11_0_reset_queue_mmio(mes, input->queue_type, - input->me_id, input->pipe_id, - input->queue_id, input->vmid); - - union MESAPI__RESET mes_reset_queue_pkt; - - memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); - - mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; - mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; - mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - - mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; - mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; - /*mes_reset_queue_pkt.reset_queue_only = 1;*/ - - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), - offsetof(union MESAPI__REMOVE_QUEUE, api_status)); -} - static int mes_v11_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { @@ -649,7 +641,7 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, break; case MES_MISC_OP_CHANGE_CONFIG: if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) { - dev_err(mes->adev->dev, "MES FW versoin must be larger than 0x63 to support limit single process feature.\n"); + dev_err(mes->adev->dev, "MES FW version must be larger than 0x63 to support limit single process feature.\n"); return -EINVAL; } misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; @@ -694,7 +686,8 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes->compute_hqd_mask[i]; for (i = 0; i < MAX_GFX_PIPES; i++) - mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i]; + mes_set_hw_res_pkt.gfx_hqd_mask[i] = + mes->gfx_hqd_mask[i]; for (i = 0; i < MAX_SDMA_PIPES; i++) mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; @@ -723,7 +716,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes->event_log_gpu_addr; } - if (enforce_isolation) + if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) mes_set_hw_res_pkt.limit_single_process = 1; return mes_v11_0_submit_pkt_and_poll_completion(mes, @@ -753,8 +746,8 @@ static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes) offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } -static int mes_v11_0_reset_legacy_queue(struct amdgpu_mes *mes, - struct mes_reset_legacy_queue_input *input) +static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input) { union MESAPI__RESET mes_reset_queue_pkt; @@ -772,7 +765,7 @@ static int mes_v11_0_reset_legacy_queue(struct amdgpu_mes *mes, mes_reset_queue_pkt.queue_type = convert_to_mes_queue_type(input->queue_type); - if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { + if (input->legacy_gfx) { mes_reset_queue_pkt.reset_legacy_gfx = 1; mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; mes_reset_queue_pkt.queue_id_lp = input->queue_id; @@ -798,7 +791,6 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .suspend_gang = mes_v11_0_suspend_gang, .resume_gang = mes_v11_0_resume_gang, .misc_op = mes_v11_0_misc_op, - .reset_legacy_queue = mes_v11_0_reset_legacy_queue, .reset_hw_queue = mes_v11_0_reset_hw_queue, }; @@ -894,6 +886,10 @@ static void mes_v11_0_get_fw_version(struct amdgpu_device *adev) { int pipe; + /* return early if we have already fetched these */ + if (adev->mes.sched_version && adev->mes.kiq_version) + return; + /* get MES scheduler/KIQ versions */ mutex_lock(&adev->srbm_mutex); @@ -1634,10 +1630,12 @@ static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block) if (r) goto failure; - r = mes_v11_0_set_hw_resources_1(&adev->mes); - if (r) { - DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); - goto failure; + if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x50) { + r = mes_v11_0_set_hw_resources_1(&adev->mes); + if (r) { + DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); + goto failure; + } } r = mes_v11_0_query_sched_status(&adev->mes); @@ -1697,22 +1695,10 @@ static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block) return 0; } -static int mes_v11_0_late_init(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - - /* it's only intended for use in mes_self_test case, not for s0ix and reset */ - if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend && - (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3))) - amdgpu_mes_self_test(adev); - - return 0; -} - static const struct amd_ip_funcs mes_v11_0_ip_funcs = { .name = "mes_v11_0", .early_init = mes_v11_0_early_init, - .late_init = mes_v11_0_late_init, + .late_init = NULL, .sw_init = mes_v11_0_sw_init, .sw_fini = mes_v11_0_sw_fini, .hw_init = mes_v11_0_hw_init, diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 183dd3346da5..6b222630f3fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -274,6 +274,23 @@ static int convert_to_mes_queue_type(int queue_type) return -1; } +static int convert_to_mes_priority_level(int priority_level) +{ + switch (priority_level) { + case AMDGPU_MES_PRIORITY_LEVEL_LOW: + return AMD_PRIORITY_LEVEL_LOW; + case AMDGPU_MES_PRIORITY_LEVEL_NORMAL: + default: + return AMD_PRIORITY_LEVEL_NORMAL; + case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM: + return AMD_PRIORITY_LEVEL_MEDIUM; + case AMDGPU_MES_PRIORITY_LEVEL_HIGH: + return AMD_PRIORITY_LEVEL_HIGH; + case AMDGPU_MES_PRIORITY_LEVEL_REALTIME: + return AMD_PRIORITY_LEVEL_REALTIME; + } +} + static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes, struct mes_add_queue_input *input) { @@ -297,9 +314,9 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gang_quantum = input->gang_quantum; mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; mes_add_queue_pkt.inprocess_gang_priority = - input->inprocess_gang_priority; + convert_to_mes_priority_level(input->inprocess_gang_priority); mes_add_queue_pkt.gang_global_priority_level = - input->gang_global_priority_level; + convert_to_mes_priority_level(input->gang_global_priority_level); mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; mes_add_queue_pkt.mqd_addr = input->mqd_addr; @@ -477,32 +494,6 @@ static int mes_v12_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_typ return r; } -static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes, - struct mes_reset_queue_input *input) -{ - union MESAPI__RESET mes_reset_queue_pkt; - int pipe; - - memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); - - mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; - mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; - mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - - mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; - mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; - /*mes_reset_queue_pkt.reset_queue_only = 1;*/ - - if (mes->adev->enable_uni_mes) - pipe = AMDGPU_MES_KIQ_PIPE; - else - pipe = AMDGPU_MES_SCHED_PIPE; - - return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, - &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), - offsetof(union MESAPI__REMOVE_QUEUE, api_status)); -} - static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { @@ -762,7 +753,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) pipe * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE); } - if (enforce_isolation) + if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) mes_set_hw_res_pkt.limit_single_process = 1; return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, @@ -845,8 +836,8 @@ static void mes_v12_0_enable_unmapped_doorbell_handling( WREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL, data); } -static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, - struct mes_reset_legacy_queue_input *input) +static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input) { union MESAPI__RESET mes_reset_queue_pkt; int pipe; @@ -865,7 +856,7 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, mes_reset_queue_pkt.queue_type = convert_to_mes_queue_type(input->queue_type); - if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { + if (input->legacy_gfx) { mes_reset_queue_pkt.reset_legacy_gfx = 1; mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; mes_reset_queue_pkt.queue_id_lp = input->queue_id; @@ -878,7 +869,7 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; } - if (mes->adev->enable_uni_mes) + if (input->is_kq) pipe = AMDGPU_MES_KIQ_PIPE; else pipe = AMDGPU_MES_SCHED_PIPE; @@ -896,7 +887,6 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = { .suspend_gang = mes_v12_0_suspend_gang, .resume_gang = mes_v12_0_resume_gang, .misc_op = mes_v12_0_misc_op, - .reset_legacy_queue = mes_v12_0_reset_legacy_queue, .reset_hw_queue = mes_v12_0_reset_hw_queue, }; @@ -1392,17 +1382,20 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, mes_v12_0_queue_init_register(ring); } - /* get MES scheduler/KIQ versions */ - mutex_lock(&adev->srbm_mutex); - soc21_grbm_select(adev, 3, pipe, 0, 0); + if (((pipe == AMDGPU_MES_SCHED_PIPE) && !adev->mes.sched_version) || + ((pipe == AMDGPU_MES_KIQ_PIPE) && !adev->mes.kiq_version)) { + /* get MES scheduler/KIQ versions */ + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, 3, pipe, 0, 0); - if (pipe == AMDGPU_MES_SCHED_PIPE) - adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); - else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) - adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + if (pipe == AMDGPU_MES_SCHED_PIPE) + adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) + adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); - soc21_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } return 0; } @@ -1749,7 +1742,8 @@ static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block) if (r) goto failure; - mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); + if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x4b) + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); mes_v12_0_init_aggregated_doorbell(&adev->mes); @@ -1808,21 +1802,10 @@ static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block) return 0; } -static int mes_v12_0_late_init(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - - /* it's only intended for use in mes_self_test case, not for s0ix and reset */ - if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend) - amdgpu_mes_self_test(adev); - - return 0; -} - static const struct amd_ip_funcs mes_v12_0_ip_funcs = { .name = "mes_v12_0", .early_init = mes_v12_0_early_init, - .late_init = mes_v12_0_late_init, + .late_init = NULL, .sw_init = mes_v12_0_sw_init, .sw_fini = mes_v12_0_sw_fini, .hw_init = mes_v12_0_hw_init, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index a54e7b929295..cc688ae79e84 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -30,6 +30,7 @@ #include "soc15_common.h" #include "soc15.h" #include "amdgpu_ras.h" +#include "amdgpu_psp.h" #define regVM_L2_CNTL3_DEFAULT 0x80100007 #define regVM_L2_CNTL4_DEFAULT 0x000000c1 @@ -75,6 +76,8 @@ static void mmhub_v1_8_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmi static void mmhub_v1_8_init_gart_aperture_regs(struct amdgpu_device *adev) { + uint64_t gart_start = amdgpu_virt_xgmi_migrate_enabled(adev) ? + adev->gmc.vram_start : adev->gmc.fb_start; uint64_t pt_base; u32 inst_mask; int i; @@ -94,10 +97,10 @@ static void mmhub_v1_8_init_gart_aperture_regs(struct amdgpu_device *adev) if (adev->gmc.pdb0_bo) { WREG32_SOC15(MMHUB, i, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, - (u32)(adev->gmc.fb_start >> 12)); + (u32)(gart_start >> 12)); WREG32_SOC15(MMHUB, i, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, - (u32)(adev->gmc.fb_start >> 44)); + (u32)(gart_start >> 44)); WREG32_SOC15(MMHUB, i, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, @@ -192,10 +195,8 @@ static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev) uint32_t tmp, inst_mask; int i; - /* Setup TLB control */ - inst_mask = adev->aid_mask; - for_each_inst(i, inst_mask) { - tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL); + if (amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev)) { + tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); @@ -209,7 +210,26 @@ static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev) MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); - WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp); + psp_reg_program_no_ring(&adev->psp, tmp, PSP_REG_MMHUB_L1_TLB_CNTL); + } else { + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { + tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, + 1); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC);/* XXX for emulation. */ + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); + + WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp); + } } } @@ -221,6 +241,9 @@ static void mmhub_v1_8_init_snoop_override_regs(struct amdgpu_device *adev) uint32_t distance = regDAGB1_WRCLI_GPU_SNOOP_OVERRIDE - regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE; + if (amdgpu_sriov_vf(adev)) + return; + inst_mask = adev->aid_mask; for_each_inst(i, inst_mask) { for (j = 0; j < 5; j++) { /* DAGB instances */ @@ -454,6 +477,30 @@ static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev) return 0; } +static void mmhub_v1_8_disable_l1_tlb(struct amdgpu_device *adev) +{ + u32 tmp; + u32 i, inst_mask; + + if (amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev)) { + tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + psp_reg_program_no_ring(&adev->psp, tmp, PSP_REG_MMHUB_L1_TLB_CNTL); + } else { + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { + tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, + 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp); + } + } +} + static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub; @@ -467,15 +514,6 @@ static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev) for (i = 0; i < 16; i++) WREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT0_CNTL, i * hub->ctx_distance, 0); - - /* Setup TLB control */ - tmp = RREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, - 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, - ENABLE_ADVANCED_DRIVER_MODEL, 0); - WREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL, tmp); - if (!amdgpu_sriov_vf(adev)) { /* Setup L2 cache */ tmp = RREG32_SOC15(MMHUB, j, regVM_L2_CNTL); @@ -485,6 +523,8 @@ static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, j, regVM_L2_CNTL3, 0); } } + + mmhub_v1_8_disable_l1_tlb(adev); } /** @@ -751,7 +791,7 @@ static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank 1ULL); break; case ACA_SMU_TYPE_CE: - bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, ACA_REG__MISC0__ERRCNT(misc0)); break; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c index f2ab5001b492..951998454b25 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c @@ -37,39 +37,31 @@ static const char *mmhub_client_ids_v4_1_0[][2] = { [0][0] = "VMC", [4][0] = "DCEDMC", - [5][0] = "DCEVGA", [6][0] = "MP0", [7][0] = "MP1", [8][0] = "MPIO", - [16][0] = "HDP", - [17][0] = "LSDMA", - [18][0] = "JPEG", - [19][0] = "VCNU0", - [21][0] = "VSCH", - [22][0] = "VCNU1", - [23][0] = "VCN1", - [32+20][0] = "VCN0", - [2][1] = "DBGUNBIO", + [16][0] = "LSDMA", + [17][0] = "JPEG", + [19][0] = "VCNU", + [22][0] = "VSCH", + [23][0] = "HDP", + [32+23][0] = "VCNRD", [3][1] = "DCEDWB", [4][1] = "DCEDMC", - [5][1] = "DCEVGA", [6][1] = "MP0", [7][1] = "MP1", [8][1] = "MPIO", [10][1] = "DBGU0", [11][1] = "DBGU1", - [12][1] = "DBGU2", - [13][1] = "DBGU3", + [12][1] = "DBGUNBIO", [14][1] = "XDP", [15][1] = "OSSSYS", - [16][1] = "HDP", - [17][1] = "LSDMA", - [18][1] = "JPEG", - [19][1] = "VCNU0", - [20][1] = "VCN0", - [21][1] = "VSCH", - [22][1] = "VCNU1", - [23][1] = "VCN1", + [16][1] = "LSDMA", + [17][1] = "JPEG", + [18][1] = "VCNWR", + [19][1] = "VCNU", + [22][1] = "VSCH", + [23][1] = "HDP", }; static uint32_t mmhub_v4_1_0_get_invalidate_req(unsigned int vmid, diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h new file mode 100644 index 000000000000..6f749814929f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h @@ -0,0 +1,144 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MMSCH_V5_0_H__ +#define __MMSCH_V5_0_H__ + +#include "amdgpu_vcn.h" + +#define MMSCH_VERSION_MAJOR 5 +#define MMSCH_VERSION_MINOR 0 +#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR) + +#define RB_ENABLED (1 << 0) +#define RB4_ENABLED (1 << 1) + +#define MMSCH_VF_ENGINE_STATUS__PASS 0x1 + +#define MMSCH_VF_MAILBOX_RESP__OK 0x1 +#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2 +#define MMSCH_VF_MAILBOX_RESP__FAILED 0x3 +#define MMSCH_VF_MAILBOX_RESP__FAILED_SMALL_CTX_SIZE 0x4 +#define MMSCH_VF_MAILBOX_RESP__UNKNOWN_CMD 0x5 + +enum mmsch_v5_0_command_type { + MMSCH_COMMAND__DIRECT_REG_WRITE = 0, + MMSCH_COMMAND__DIRECT_REG_POLLING = 2, + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3, + MMSCH_COMMAND__INDIRECT_REG_WRITE = 8, + MMSCH_COMMAND__END = 0xf +}; + +struct mmsch_v5_0_table_info { + uint32_t init_status; + uint32_t table_offset; + uint32_t table_size; +}; + +struct mmsch_v5_0_init_header { + uint32_t version; + uint32_t total_size; + struct mmsch_v5_0_table_info vcn0; + struct mmsch_v5_0_table_info mjpegdec0[5]; + struct mmsch_v5_0_table_info mjpegdec1[5]; +}; + +struct mmsch_v5_0_cmd_direct_reg_header { + uint32_t reg_offset : 28; + uint32_t command_type : 4; +}; + +struct mmsch_v5_0_cmd_indirect_reg_header { + uint32_t reg_offset : 20; + uint32_t reg_idx_space : 8; + uint32_t command_type : 4; +}; + +struct mmsch_v5_0_cmd_direct_write { + struct mmsch_v5_0_cmd_direct_reg_header cmd_header; + uint32_t reg_value; +}; + +struct mmsch_v5_0_cmd_direct_read_modify_write { + struct mmsch_v5_0_cmd_direct_reg_header cmd_header; + uint32_t write_data; + uint32_t mask_value; +}; + +struct mmsch_v5_0_cmd_direct_polling { + struct mmsch_v5_0_cmd_direct_reg_header cmd_header; + uint32_t mask_value; + uint32_t wait_value; +}; + +struct mmsch_v5_0_cmd_end { + struct mmsch_v5_0_cmd_direct_reg_header cmd_header; +}; + +struct mmsch_v5_0_cmd_indirect_write { + struct mmsch_v5_0_cmd_indirect_reg_header cmd_header; + uint32_t reg_value; +}; + +#define MMSCH_V5_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \ + size = sizeof(struct mmsch_v5_0_cmd_direct_read_modify_write); \ + size_dw = size / 4; \ + direct_rd_mod_wt.cmd_header.reg_offset = reg; \ + direct_rd_mod_wt.mask_value = mask; \ + direct_rd_mod_wt.write_data = data; \ + memcpy((void *)table_loc, &direct_rd_mod_wt, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#define MMSCH_V5_0_INSERT_DIRECT_WT(reg, value) { \ + size = sizeof(struct mmsch_v5_0_cmd_direct_write); \ + size_dw = size / 4; \ + direct_wt.cmd_header.reg_offset = reg; \ + direct_wt.reg_value = value; \ + memcpy((void *)table_loc, &direct_wt, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#define MMSCH_V5_0_INSERT_DIRECT_POLL(reg, mask, wait) { \ + size = sizeof(struct mmsch_v5_0_cmd_direct_polling); \ + size_dw = size / 4; \ + direct_poll.cmd_header.reg_offset = reg; \ + direct_poll.mask_value = mask; \ + direct_poll.wait_value = wait; \ + memcpy((void *)table_loc, &direct_poll, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#define MMSCH_V5_0_INSERT_END() { \ + size = sizeof(struct mmsch_v5_0_cmd_end); \ + size_dw = size / 4; \ + memcpy((void *)table_loc, &end, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#endif + diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index f5411b798e11..48101a34e049 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -274,6 +274,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) { struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + struct amdgpu_reset_context reset_context = { 0 }; amdgpu_virt_fini_data_exchange(adev); @@ -281,8 +282,6 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) if (amdgpu_device_should_recover_gpu(adev) && (!amdgpu_device_has_job_running(adev) || adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) { - struct amdgpu_reset_context reset_context; - memset(&reset_context, 0, sizeof(reset_context)); reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; @@ -293,6 +292,19 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) } } +static void xgpu_ai_mailbox_bad_pages_work(struct work_struct *work) +{ + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, bad_pages_work); + struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + + if (down_read_trylock(&adev->reset_domain->sem)) { + amdgpu_virt_fini_data_exchange(adev); + amdgpu_virt_request_bad_pages(adev); + amdgpu_virt_init_data_exchange(adev); + up_read(&adev->reset_domain->sem); + } +} + static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, @@ -312,26 +324,42 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { enum idh_event event = xgpu_ai_mailbox_peek_msg(adev); + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); switch (event) { - case IDH_FLR_NOTIFICATION: + case IDH_RAS_BAD_PAGES_NOTIFICATION: + xgpu_ai_mailbox_send_ack(adev); + if (amdgpu_sriov_runtime(adev)) + schedule_work(&adev->virt.bad_pages_work); + break; + case IDH_UNRECOV_ERR_NOTIFICATION: + xgpu_ai_mailbox_send_ack(adev); + ras->is_rma = true; + dev_err(adev->dev, "VF is in an unrecoverable state. Runtime Services are halted.\n"); if (amdgpu_sriov_runtime(adev)) WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, - &adev->virt.flr_work), - "Failed to queue work! at %s", - __func__); + &adev->virt.flr_work), + "Failed to queue work! at %s", + __func__); break; - case IDH_QUERY_ALIVE: - xgpu_ai_mailbox_send_ack(adev); - break; - /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore - * it byfar since that polling thread will handle it, - * other msg like flr complete is not handled here. - */ - case IDH_CLR_MSG_BUF: - case IDH_FLR_NOTIFICATION_CMPL: - case IDH_READY_TO_ACCESS_GPU: - default: + case IDH_FLR_NOTIFICATION: + if (amdgpu_sriov_runtime(adev)) + WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, + &adev->virt.flr_work), + "Failed to queue work! at %s", + __func__); + break; + case IDH_QUERY_ALIVE: + xgpu_ai_mailbox_send_ack(adev); + break; + /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore + * it byfar since that polling thread will handle it, + * other msg like flr complete is not handled here. + */ + case IDH_CLR_MSG_BUF: + case IDH_FLR_NOTIFICATION_CMPL: + case IDH_READY_TO_ACCESS_GPU: + default: break; } @@ -387,6 +415,7 @@ int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev) } INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work); + INIT_WORK(&adev->virt.bad_pages_work, xgpu_ai_mailbox_bad_pages_work); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index ed57cbc150af..874b9f8f9804 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -40,6 +40,7 @@ enum idh_request { IDH_LOG_VF_ERROR = 200, IDH_READY_TO_RESET = 201, IDH_RAS_POISON = 202, + IDH_REQ_RAS_BAD_PAGES = 205, }; enum idh_event { @@ -54,6 +55,9 @@ enum idh_event { IDH_RAS_POISON_READY, IDH_PF_SOFT_FLR_NOTIFICATION, IDH_RAS_ERROR_DETECTED, + IDH_RAS_BAD_PAGES_READY = 15, + IDH_RAS_BAD_PAGES_NOTIFICATION = 16, + IDH_UNRECOV_ERR_NOTIFICATION = 17, IDH_TEXT_MESSAGE = 255, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 5aadf24cb202..f6d8597452ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -67,6 +67,8 @@ static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev, reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0); if (reg == IDH_FAIL) r = -EINVAL; + if (reg == IDH_UNRECOV_ERR_NOTIFICATION) + r = -ENODEV; else if (reg != event) return -ENOENT; @@ -103,6 +105,7 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event) { int r; uint64_t timeout, now; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); now = (uint64_t)ktime_to_ms(ktime_get()); timeout = now + NV_MAILBOX_POLL_MSG_TIMEDOUT; @@ -110,8 +113,16 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event) do { r = xgpu_nv_mailbox_rcv_msg(adev, event); if (!r) { - dev_dbg(adev->dev, "rcv_msg 0x%x after %llu ms\n", event, NV_MAILBOX_POLL_MSG_TIMEDOUT - timeout + now); + dev_dbg(adev->dev, "rcv_msg 0x%x after %llu ms\n", + event, NV_MAILBOX_POLL_MSG_TIMEDOUT - timeout + now); return 0; + } else if (r == -ENODEV) { + if (!amdgpu_ras_is_rma(adev)) { + ras->is_rma = true; + dev_err(adev->dev, "VF is in an unrecoverable state. " + "Runtime Services are halted.\n"); + } + return r; } msleep(10); @@ -166,6 +177,10 @@ static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev, enum idh_event event = -1; send_request: + + if (amdgpu_ras_is_rma(adev)) + return -ENODEV; + xgpu_nv_mailbox_trans_msg(adev, req, data1, data2, data3); switch (req) { @@ -187,6 +202,9 @@ send_request: case IDH_REQ_RAS_CPER_DUMP: event = IDH_RAS_CPER_DUMP_READY; break; + case IDH_REQ_RAS_BAD_PAGES: + event = IDH_RAS_BAD_PAGES_READY; + break; default: break; } @@ -320,6 +338,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) { struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + struct amdgpu_reset_context reset_context = { 0 }; amdgpu_virt_fini_data_exchange(adev); @@ -330,8 +349,6 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT || adev->compute_timeout == MAX_SCHEDULE_TIMEOUT || adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) { - struct amdgpu_reset_context reset_context; - memset(&reset_context, 0, sizeof(reset_context)); reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; @@ -342,6 +359,19 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) } } +static void xgpu_nv_mailbox_bad_pages_work(struct work_struct *work) +{ + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, bad_pages_work); + struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + + if (down_read_trylock(&adev->reset_domain->sem)) { + amdgpu_virt_fini_data_exchange(adev); + amdgpu_virt_request_bad_pages(adev); + amdgpu_virt_init_data_exchange(adev); + up_read(&adev->reset_domain->sem); + } +} + static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, @@ -364,8 +394,27 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { enum idh_event event = xgpu_nv_mailbox_peek_msg(adev); + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); switch (event) { + case IDH_RAS_BAD_PAGES_NOTIFICATION: + xgpu_nv_mailbox_send_ack(adev); + if (amdgpu_sriov_runtime(adev)) + schedule_work(&adev->virt.bad_pages_work); + break; + case IDH_UNRECOV_ERR_NOTIFICATION: + xgpu_nv_mailbox_send_ack(adev); + if (!amdgpu_ras_is_rma(adev)) { + ras->is_rma = true; + dev_err(adev->dev, "VF is in an unrecoverable state. Runtime Services are halted.\n"); + } + + if (amdgpu_sriov_runtime(adev)) + WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, + &adev->virt.flr_work), + "Failed to queue work! at %s", + __func__); + break; case IDH_FLR_NOTIFICATION: if (amdgpu_sriov_runtime(adev)) WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, @@ -436,6 +485,7 @@ int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev) } INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work); + INIT_WORK(&adev->virt.bad_pages_work, xgpu_nv_mailbox_bad_pages_work); return 0; } @@ -480,6 +530,11 @@ static int xgpu_nv_req_ras_cper_dump(struct amdgpu_device *adev, u64 vf_rptr) adev, IDH_REQ_RAS_CPER_DUMP, vf_rptr_hi, vf_rptr_lo, 0); } +static int xgpu_nv_req_ras_bad_pages(struct amdgpu_device *adev) +{ + return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_BAD_PAGES); +} + const struct amdgpu_virt_ops xgpu_nv_virt_ops = { .req_full_gpu = xgpu_nv_request_full_gpu_access, .rel_full_gpu = xgpu_nv_release_full_gpu_access, @@ -492,4 +547,5 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = { .rcvd_ras_intr = xgpu_nv_rcvd_ras_intr, .req_ras_err_count = xgpu_nv_req_ras_err_count, .req_ras_cper_dump = xgpu_nv_req_ras_cper_dump, + .req_bad_pages = xgpu_nv_req_ras_bad_pages, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index 72c9fceb9d79..5808689562cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -42,6 +42,7 @@ enum idh_request { IDH_RAS_POISON = 202, IDH_REQ_RAS_ERROR_COUNT = 203, IDH_REQ_RAS_CPER_DUMP = 204, + IDH_REQ_RAS_BAD_PAGES = 205, }; enum idh_event { @@ -58,6 +59,9 @@ enum idh_event { IDH_RAS_ERROR_DETECTED, IDH_RAS_ERROR_COUNT_READY = 11, IDH_RAS_CPER_DUMP_READY = 14, + IDH_RAS_BAD_PAGES_READY = 15, + IDH_RAS_BAD_PAGES_NOTIFICATION = 16, + IDH_UNRECOV_ERR_NOTIFICATION = 17, IDH_TEXT_MESSAGE = 255, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 2ece3ae75ec1..bed5ef4d8788 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -360,7 +360,7 @@ static void nbio_v7_11_get_clockgating_state(struct amdgpu_device *adev, *flags |= AMD_CG_SUPPORT_BIF_LS; } -#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) +#define MMIO_REG_HOLE_OFFSET 0x44000 static void nbio_v7_11_set_reg_remap(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index f23cb79110d6..1c22bc11c1f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -31,9 +31,6 @@ #define NPS_MODE_MASK 0x000000FFL -/* Core 0 Port 0 counter */ -#define smnPCIEP_NAK_COUNTER 0x1A340218 - static void nbio_v7_9_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, @@ -177,8 +174,12 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do { u32 doorbell_range = 0, doorbell_ctrl = 0; u32 aid_id = instance; + u32 range_size; if (use_doorbell) { + range_size = (amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(9, 5, 0)) ? + 0xb : 0x9; doorbell_range = REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0, BIF_DOORBELL0_RANGE_OFFSET_ENTRY, @@ -186,7 +187,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do doorbell_range = REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0, BIF_DOORBELL0_RANGE_SIZE_ENTRY, - 0x9); + range_size); if (aid_id) doorbell_range = REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0, @@ -204,7 +205,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x4); doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, - S2A_DOORBELL_PORT1_RANGE_SIZE, 0x9); + S2A_DOORBELL_PORT1_RANGE_SIZE, range_size); doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0x4); @@ -463,22 +464,6 @@ static void nbio_v7_9_init_registers(struct amdgpu_device *adev) } } -static u64 nbio_v7_9_get_pcie_replay_count(struct amdgpu_device *adev) -{ - u32 val, nak_r, nak_g; - - if (adev->flags & AMD_IS_APU) - return 0; - - /* Get the number of NAKs received and generated */ - val = RREG32_PCIE(smnPCIEP_NAK_COUNTER); - nak_r = val & 0xFFFF; - nak_g = val >> 16; - - /* Add the total number of NAKs, i.e the number of replays */ - return (nak_r + nak_g); -} - #define MMIO_REG_HOLE_OFFSET 0x1A000 static void nbio_v7_9_set_reg_remap(struct amdgpu_device *adev) @@ -520,7 +505,6 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode, .is_nps_switch_requested = nbio_v7_9_is_nps_switch_requested, .init_registers = nbio_v7_9_init_registers, - .get_pcie_replay_count = nbio_v7_9_get_pcie_replay_count, .set_reg_remap = nbio_v7_9_set_reg_remap, }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index f4a91b126c73..73f87131a7e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -106,7 +106,9 @@ enum psp_gfx_cmd_id /*IDs of performance monitoring/profiling*/ GFX_CMD_ID_CONFIG_SQ_PERFMON = 0x00000046, /* Config CGTT_SQ_CLK_CTRL */ /* Dynamic memory partitioninig (NPS mode change)*/ - GFX_CMD_ID_FB_NPS_MODE = 0x00000048, /* Configure memory partitioning mode */ + GFX_CMD_ID_FB_NPS_MODE = 0x00000048, /* Configure memory partitioning mode */ + GFX_CMD_ID_FB_FW_RESERV_ADDR = 0x00000050, /* Query FW reservation addr */ + GFX_CMD_ID_FB_FW_RESERV_EXT_ADDR = 0x00000051, /* Query FW reservation extended addr */ }; /* PSP boot config sub-commands */ @@ -404,11 +406,19 @@ struct psp_gfx_uresp_bootcfg { uint32_t boot_cfg; /* boot config data */ }; +/* Command-specific response for fw reserve info */ +struct psp_gfx_uresp_fw_reserve_info { + uint32_t reserve_base_address_hi; + uint32_t reserve_base_address_lo; + uint32_t reserve_size; +}; + /* Union of command-specific responses for GPCOM ring. */ union psp_gfx_uresp { struct psp_gfx_uresp_reserved reserved; struct psp_gfx_uresp_bootcfg boot_cfg; struct psp_gfx_uresp_fwar_db_info fwar_db_info; + struct psp_gfx_uresp_fw_reserve_info fw_reserve_info; }; /* Structure of GFX Response buffer. diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 145186a1e48f..3584b8c18fd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -94,7 +94,7 @@ static int psp_v10_0_ring_create(struct psp_context *psp, /* Wait for response flag (bit 31) in C2PMSG_64 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); return ret; } @@ -115,7 +115,7 @@ static int psp_v10_0_ring_stop(struct psp_context *psp, /* Wait for response flag (bit 31) in C2PMSG_64 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index bb5dfc410a66..6cc05d36e359 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -152,11 +152,9 @@ static int psp_v11_0_wait_for_bootloader(struct psp_context *psp) for (retry_loop = 0; retry_loop < 10; retry_loop++) { /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ - ret = psp_wait_for(psp, - SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, - 0x80000000, - false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); if (ret == 0) return 0; @@ -252,8 +250,8 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) /* there might be handshake issue with hardware which needs delay */ mdelay(20); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), - RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), - 0, true); + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0, + PSP_WAITREG_CHANGED); return ret; } @@ -277,11 +275,13 @@ static int psp_v11_0_ring_stop(struct psp_context *psp, /* Wait for response flag (bit 31) */ if (amdgpu_sriov_vf(adev)) - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); else - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); return ret; } @@ -317,13 +317,15 @@ static int psp_v11_0_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_101 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Wait for sOS ready for ring creation */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0); if (ret) { DRM_ERROR("Failed to wait for sOS ready for ring creation\n"); return ret; @@ -347,8 +349,9 @@ static int psp_v11_0_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -381,7 +384,8 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp) offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64); - ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for(psp, offset, MBOX_TOS_READY_FLAG, + MBOX_TOS_READY_MASK, 0); if (ret) { DRM_INFO("psp is not working correctly before mode1 reset!\n"); @@ -395,7 +399,8 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp) offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); - ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false); + ret = psp_wait_for(psp, offset, MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, + 0); if (ret) { DRM_INFO("psp mode 1 reset failed!\n"); @@ -421,8 +426,9 @@ static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg) max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout; for (i = 0; i < max_wait; i++) { - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); if (ret == 0) break; } @@ -533,7 +539,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); } else { @@ -601,7 +607,7 @@ static int psp_v11_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20)); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -638,7 +644,7 @@ static int psp_v11_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver) WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (!ret) *fw_ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c index 5697760a819b..93787a90d598 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c @@ -41,8 +41,9 @@ static int psp_v11_0_8_ring_stop(struct psp_context *psp, /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Write the ring destroy command*/ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, @@ -50,8 +51,9 @@ static int psp_v11_0_8_ring_stop(struct psp_context *psp, /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -87,13 +89,15 @@ static int psp_v11_0_8_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_101 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Wait for sOS ready for ring creation */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0); if (ret) { DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); return ret; @@ -117,8 +121,9 @@ static int psp_v11_0_8_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index fcd708eae75c..4c6450d62299 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -34,9 +34,6 @@ #include "sdma0/sdma0_4_0_offset.h" #include "nbio/nbio_7_4_offset.h" -#include "oss/osssys_4_0_offset.h" -#include "oss/osssys_4_0_sh_mask.h" - MODULE_FIRMWARE("amdgpu/renoir_asd.bin"); MODULE_FIRMWARE("amdgpu/renoir_ta.bin"); MODULE_FIRMWARE("amdgpu/green_sardine_asd.bin"); @@ -85,7 +82,7 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp) /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -99,11 +96,8 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp) WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); return ret; } @@ -124,7 +118,7 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp) /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -138,46 +132,13 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp) WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), - RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), - 0, true); + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0, + PSP_WAITREG_CHANGED); return ret; } -static void psp_v12_0_reroute_ih(struct psp_context *psp) -{ - struct amdgpu_device *adev = psp->adev; - uint32_t tmp; - - /* Change IH ring for VMC */ - tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b); - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1); - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); - - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); - - mdelay(20); - psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); - - /* Change IH ring for UMC */ - tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b); - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); - - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); - - mdelay(20); - psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); -} - static int psp_v12_0_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -186,49 +147,23 @@ static int psp_v12_0_ring_create(struct psp_context *psp, struct psp_ring *ring = &psp->km_ring; struct amdgpu_device *adev = psp->adev; - psp_v12_0_reroute_ih(psp); - - if (amdgpu_sriov_vf(psp->adev)) { - /* Write low address of the ring to C2PMSG_102 */ - psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); - /* Write high address of the ring to C2PMSG_103 */ - psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); - - /* Write the ring initialization command to C2PMSG_101 */ - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, - GFX_CTRL_CMD_ID_INIT_GPCOM_RING); - - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - - /* Wait for response flag (bit 31) in C2PMSG_101 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x8000FFFF, false); - - } else { - /* Write low address of the ring to C2PMSG_69 */ - psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); - /* Write high address of the ring to C2PMSG_70 */ - psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); - /* Write size of ring to C2PMSG_71 */ - psp_ring_reg = ring->ring_size; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); - /* Write the ring initialization command to C2PMSG_64 */ - psp_ring_reg = ring_type; - psp_ring_reg = psp_ring_reg << 16; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); - - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - - /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); - } + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); return ret; } @@ -247,16 +182,15 @@ static int psp_v12_0_ring_stop(struct psp_context *psp, WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_DESTROY_RINGS); - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - /* Wait for response flag (bit 31) */ if (amdgpu_sriov_vf(adev)) - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); else - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); return ret; } @@ -287,7 +221,8 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp) offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64); - ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for(psp, offset, MBOX_TOS_READY_FLAG, + MBOX_TOS_READY_MASK, 0); if (ret) { DRM_INFO("psp is not working correctly before mode1 reset!\n"); @@ -301,7 +236,8 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp) offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); - ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false); + ret = psp_wait_for(psp, offset, MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, + 0); if (ret) { DRM_INFO("psp mode 1 reset failed!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index cc621064610f..af4a7d7c4abd 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -42,7 +42,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin"); @@ -71,20 +73,13 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_4_ta.bin"); /* Retry times for vmbx ready wait */ #define PSP_VMBX_POLLING_LIMIT 3000 -/* VBIOS gfl defines */ -#define MBOX_READY_MASK 0x80000000 -#define MBOX_STATUS_MASK 0x0000FFFF -#define MBOX_COMMAND_MASK 0x00FF0000 -#define MBOX_READY_FLAG 0x80000000 -#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2 -#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3 -#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4 - /* memory training timeout define */ #define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000 #define regMP1_PUB_SCRATCH0 0x3b10090 +#define PSP13_BL_STATUS_SIZE 100 + static int psp_v13_0_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -151,6 +146,32 @@ static bool psp_v13_0_is_sos_alive(struct psp_context *psp) return sol_reg != 0x0; } +static void psp_v13_0_bootloader_print_status(struct psp_context *psp, + const char *msg) +{ + struct amdgpu_device *adev = psp->adev; + u32 bl_status_reg; + char bl_status_msg[PSP13_BL_STATUS_SIZE]; + int i, at; + + if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) { + at = 0; + for_each_inst(i, adev->aid_mask) { + bl_status_reg = + (SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_92) + << 2) + + adev->asic_funcs->encode_ext_smn_addressing(i); + at += snprintf(bl_status_msg + at, + PSP13_BL_STATUS_SIZE - at, + " status(%02i): 0x%08x", i, + RREG32_PCIE_EXT(bl_status_reg)); + } + dev_info(adev->dev, "%s - %s", msg, bl_status_msg); + } +} + static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -161,7 +182,7 @@ static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp) ready having bit 31 of C2PMSG_33 set to 1 */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_33), - 0x80000000, 0xffffffff, false); + 0x80000000, 0xffffffff, PSP_WAITREG_NOVERBOSE); if (ret == 0) break; @@ -192,10 +213,13 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) for (retry_loop = 0; retry_loop < retry_cnt; retry_loop++) { ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), - 0x80000000, 0xffffffff, false); + 0x80000000, 0xffffffff, PSP_WAITREG_NOVERBOSE); if (ret == 0) return 0; + if (retry_loop && !(retry_loop % 10)) + psp_v13_0_bootloader_print_status( + psp, "Waiting for bootloader completion"); } return ret; @@ -338,8 +362,8 @@ static int psp_v13_0_bootloader_load_sos(struct psp_context *psp) /* there might be handshake issue with hardware which needs delay */ mdelay(20); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_81), - RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), - 0, true); + RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), 0, + PSP_WAITREG_CHANGED); if (!ret) psp_v13_0_init_sos_version(psp); @@ -360,8 +384,9 @@ static int psp_v13_0_ring_stop(struct psp_context *psp, /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Write the ring destroy command*/ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64, @@ -369,8 +394,9 @@ static int psp_v13_0_ring_stop(struct psp_context *psp, /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -406,13 +432,15 @@ static int psp_v13_0_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_101 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Wait for sOS ready for ring creation */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), + MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0); if (ret) { DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); return ret; @@ -436,8 +464,9 @@ static int psp_v13_0_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -500,8 +529,9 @@ static int psp_v13_0_memory_training_send_msg(struct psp_context *psp, int msg) max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout; for (i = 0; i < max_wait; i++) { - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); if (ret == 0) break; } @@ -610,7 +640,7 @@ static int psp_v13_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); } else { @@ -653,7 +683,7 @@ static int psp_v13_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20)); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -690,7 +720,7 @@ static int psp_v13_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver) WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (!ret) *fw_ver = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36); @@ -710,12 +740,14 @@ static int psp_v13_0_exec_spi_cmd(struct psp_context *psp, int cmd) /* Ring the doorbell */ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_73, 1); - if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE) + if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE || + cmd == C2PMSG_CMD_SPI_GET_FLASH_IMAGE) ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT); else - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), - MBOX_READY_FLAG, MBOX_READY_MASK, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, 0); if (ret) { dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret); return ret; @@ -739,7 +771,7 @@ static int psp_v13_0_update_spirom(struct psp_context *psp, /* Confirm PSP is ready to start */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), - MBOX_READY_FLAG, MBOX_READY_MASK, false); + MBOX_READY_FLAG, MBOX_READY_MASK, 0); if (ret) { dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret); return ret; @@ -766,6 +798,37 @@ static int psp_v13_0_update_spirom(struct psp_context *psp, return 0; } +static int psp_v13_0_dump_spirom(struct psp_context *psp, + uint64_t fw_pri_mc_addr) +{ + struct amdgpu_device *adev = psp->adev; + int ret; + + /* Confirm PSP is ready to start */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, 0); + if (ret) { + dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret); + return ret; + } + + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, lower_32_bits(fw_pri_mc_addr)); + + ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_LO); + if (ret) + return ret; + + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, upper_32_bits(fw_pri_mc_addr)); + + ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_HI); + if (ret) + return ret; + + ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_GET_FLASH_IMAGE); + + return ret; +} + static int psp_v13_0_vbflash_status(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -858,6 +921,26 @@ static bool psp_v13_0_is_reload_needed(struct psp_context *psp) return false; } +static int psp_v13_0_reg_program_no_ring(struct psp_context *psp, uint32_t val, + enum psp_reg_prog_id id) +{ + struct amdgpu_device *adev = psp->adev; + int ret = -EOPNOTSUPP; + + /* PSP will broadcast the value to all instances */ + if (amdgpu_sriov_vf(adev)) { + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_GBR_IH_SET); + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, id); + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_103, val); + + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, 0); + } + + return ret; +} + static const struct psp_funcs psp_v13_0_funcs = { .init_microcode = psp_v13_0_init_microcode, .wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state, @@ -879,11 +962,13 @@ static const struct psp_funcs psp_v13_0_funcs = { .load_usbc_pd_fw = psp_v13_0_load_usbc_pd_fw, .read_usbc_pd_fw = psp_v13_0_read_usbc_pd_fw, .update_spirom = psp_v13_0_update_spirom, + .dump_spirom = psp_v13_0_dump_spirom, .vbflash_stat = psp_v13_0_vbflash_status, .fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk, .get_ras_capability = psp_v13_0_get_ras_capability, .is_aux_sos_load_required = psp_v13_0_is_aux_sos_load_required, .is_reload_needed = psp_v13_0_is_reload_needed, + .reg_program_no_ring = psp_v13_0_reg_program_no_ring, }; void psp_v13_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c index eaa5512a21da..5f39a2edcc95 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c @@ -76,11 +76,9 @@ static int psp_v13_0_4_wait_for_bootloader(struct psp_context *psp) for (retry_loop = 0; retry_loop < 10; retry_loop++) { /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ - ret = psp_wait_for(psp, - SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), - 0x80000000, - 0x80000000, - false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); if (ret == 0) return 0; @@ -185,8 +183,8 @@ static int psp_v13_0_4_bootloader_load_sos(struct psp_context *psp) /* there might be handshake issue with hardware which needs delay */ mdelay(20); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_81), - RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), - 0, true); + RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), 0, + PSP_WAITREG_CHANGED); return ret; } @@ -204,8 +202,9 @@ static int psp_v13_0_4_ring_stop(struct psp_context *psp, /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Write the ring destroy command*/ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64, @@ -213,8 +212,9 @@ static int psp_v13_0_4_ring_stop(struct psp_context *psp, /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -250,13 +250,15 @@ static int psp_v13_0_4_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_101 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Wait for sOS ready for ring creation */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), + MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0); if (ret) { DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); return ret; @@ -280,8 +282,9 @@ static int psp_v13_0_4_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c index 7c49c3f3c388..38dfc5c19f2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -34,7 +34,9 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_2_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_5_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_5_ta.bin"); @@ -109,11 +111,9 @@ static int psp_v14_0_wait_for_bootloader(struct psp_context *psp) for (retry_loop = 0; retry_loop < 10; retry_loop++) { /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ - ret = psp_wait_for(psp, - SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), - 0x80000000, - 0x80000000, - false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); if (ret == 0) return 0; @@ -228,9 +228,10 @@ static int psp_v14_0_bootloader_load_sos(struct psp_context *psp) /* there might be handshake issue with hardware which needs delay */ mdelay(20); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_81), - RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81), - 0, true); + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_81), + RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81), 0, + PSP_WAITREG_CHANGED); return ret; } @@ -248,8 +249,9 @@ static int psp_v14_0_ring_stop(struct psp_context *psp, /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Write the ring destroy command*/ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, @@ -257,8 +259,9 @@ static int psp_v14_0_ring_stop(struct psp_context *psp, /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -294,13 +297,15 @@ static int psp_v14_0_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_101 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Wait for sOS ready for ring creation */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0); if (ret) { DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); return ret; @@ -324,8 +329,9 @@ static int psp_v14_0_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -388,8 +394,9 @@ static int psp_v14_0_memory_training_send_msg(struct psp_context *psp, int msg) max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout; for (i = 0; i < max_wait; i++) { - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); if (ret == 0) break; } @@ -498,7 +505,7 @@ static int psp_v14_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); } else { @@ -540,8 +547,9 @@ static int psp_v14_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc */ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20)); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -577,8 +585,9 @@ static int psp_v14_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver) WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, 0); if (!ret) *fw_ver = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36); @@ -602,11 +611,13 @@ static int psp_v14_0_exec_spi_cmd(struct psp_context *psp, int cmd) ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT); else - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), - MBOX_READY_FLAG, MBOX_READY_MASK, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, 0); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), - MBOX_READY_FLAG, MBOX_READY_MASK, false); + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, 0); if (ret) { dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret); return ret; @@ -629,8 +640,9 @@ static int psp_v14_0_update_spirom(struct psp_context *psp, int ret; /* Confirm PSP is ready to start */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), - MBOX_READY_FLAG, MBOX_READY_MASK, false); + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, 0); if (ret) { dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index f6b75e3e47ff..833830bc3e2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -91,7 +91,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -109,7 +109,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) mdelay(20); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); return ret; } @@ -130,7 +130,7 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -147,8 +147,8 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) /* there might be handshake issue with hardware which needs delay */ mdelay(20); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), - RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), - 0, true); + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0, + PSP_WAITREG_CHANGED); return ret; } @@ -168,7 +168,7 @@ static void psp_v3_1_reroute_ih(struct psp_context *psp) mdelay(20); psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + 0x80000000, 0x8000FFFF, 0); /* Change IH ring for UMC */ tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b); @@ -180,7 +180,7 @@ static void psp_v3_1_reroute_ih(struct psp_context *psp) mdelay(20); psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + 0x80000000, 0x8000FFFF, 0); } static int psp_v3_1_ring_create(struct psp_context *psp, @@ -217,9 +217,9 @@ static int psp_v3_1_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_101 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, - mmMP0_SMN_C2PMSG_101), 0x80000000, - 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, 0); } else { /* Write low address of the ring to C2PMSG_69 */ @@ -240,10 +240,9 @@ static int psp_v3_1_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, - mmMP0_SMN_C2PMSG_64), 0x80000000, - 0x8000FFFF, false); - + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, 0); } return ret; } @@ -267,11 +266,13 @@ static int psp_v3_1_ring_stop(struct psp_context *psp, /* Wait for response flag (bit 31) */ if (amdgpu_sriov_vf(adev)) - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, 0); else - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, 0); return ret; } @@ -311,7 +312,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp) offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64); - ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, 0); if (ret) { DRM_INFO("psp is not working correctly before mode1 reset!\n"); @@ -325,7 +326,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp) offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); - ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false); + ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, 0); if (ret) { DRM_INFO("psp mode 1 reset failed!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 33ed2b158fcd..f38004e6064e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2187,7 +2187,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev, dev_dbg_ratelimited(adev->dev, " for process %s pid %d thread %s pid %d\n", task_info->process_name, task_info->tgid, - task_info->task_name, task_info->pid); + task_info->task.comm, task_info->task.pid); amdgpu_vm_put_task_info(task_info); } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index dc94d58d33a6..36b1ca73c2ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -31,7 +31,6 @@ #include "amdgpu_ucode.h" #include "amdgpu_trace.h" #include "amdgpu_reset.h" -#include "gc/gc_9_0_sh_mask.h" #include "sdma/sdma_4_4_2_offset.h" #include "sdma/sdma_4_4_2_sh_mask.h" @@ -46,6 +45,7 @@ #include "amdgpu_ras.h" MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin"); +MODULE_FIRMWARE("amdgpu/sdma_4_4_4.bin"); MODULE_FIRMWARE("amdgpu/sdma_4_4_5.bin"); static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_4_2[] = { @@ -107,8 +107,11 @@ static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev); -static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev); +static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring); +static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring); +static int sdma_v4_4_2_soft_reset_engine(struct amdgpu_device *adev, + u32 instance_id); static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 offset) @@ -490,7 +493,7 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, { struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 doorbell_offset, doorbell; - u32 rb_cntl, ib_cntl; + u32 rb_cntl, ib_cntl, sdma_cntl; int i; for_each_inst(i, inst_mask) { @@ -502,6 +505,9 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl); + sdma_cntl = RREG32_SDMA(i, regSDMA_CNTL); + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, UTC_L1_ENABLE, 0); + WREG32_SDMA(i, regSDMA_CNTL, sdma_cntl); if (sdma[i]->use_doorbell) { doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); @@ -995,6 +1001,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, /* set utc l1 enable flag always to 1 */ temp = RREG32_SDMA(i, regSDMA_CNTL); temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1); + WREG32_SDMA(i, regSDMA_CNTL, temp); if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) { /* enable context empty interrupt during initialization */ @@ -1291,71 +1298,21 @@ static void sdma_v4_4_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring) seq, 0xffffffff, 4); } -/* - * sdma_v4_4_2_get_invalidate_req - Construct the VM_INVALIDATE_ENG0_REQ register value - * @vmid: The VMID to invalidate - * @flush_type: The type of flush (0 = legacy, 1 = lightweight, 2 = heavyweight) - * - * This function constructs the VM_INVALIDATE_ENG0_REQ register value for the specified VMID - * and flush type. It ensures that all relevant page table cache levels (L1 PTEs, L2 PTEs, and - * L2 PDEs) are invalidated. - */ -static uint32_t sdma_v4_4_2_get_invalidate_req(unsigned int vmid, - uint32_t flush_type) -{ - u32 req = 0; - - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, - PER_VMID_INVALIDATE_REQ, 1 << vmid); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, - CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); - return req; -} - -/* - * sdma_v4_4_2_ring_emit_vm_flush - Emit VM flush commands for SDMA - * @ring: The SDMA ring - * @vmid: The VMID to flush - * @pd_addr: The page directory address +/** + * sdma_v4_4_2_ring_emit_vm_flush - vm flush using sDMA + * + * @ring: amdgpu_ring pointer + * @vmid: vmid number to use + * @pd_addr: address * - * This function emits the necessary register writes and waits to perform a VM flush for the - * specified VMID. It updates the PTB address registers and issues a VM invalidation request - * using the specified VM invalidation engine. + * Update the page table base and flush the VM TLB + * using sDMA. */ static void sdma_v4_4_2_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vmid, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { - struct amdgpu_device *adev = ring->adev; - uint32_t req = sdma_v4_4_2_get_invalidate_req(vmid, 0); - unsigned int eng = ring->vm_inv_eng; - struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub]; - - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + - (hub->ctx_addr_distance * vmid), - lower_32_bits(pd_addr)); - - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + - (hub->ctx_addr_distance * vmid), - upper_32_bits(pd_addr)); - /* - * Construct and emit the VM invalidation packet - */ - amdgpu_ring_write(ring, - SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_VM_INVALIDATE) | - SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATE) | - SDMA_PKT_VM_INVALIDATION_HEADER_XCC0_ENG_ID(0x1f) | - SDMA_PKT_VM_INVALIDATION_HEADER_XCC1_ENG_ID(0x1f) | - SDMA_PKT_VM_INVALIDATION_HEADER_MMHUB_ENG_ID(eng)); - amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(req)); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(BIT(vmid))); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); } static void sdma_v4_4_2_ring_emit_wreg(struct amdgpu_ring *ring, @@ -1384,6 +1341,12 @@ static bool sdma_v4_4_2_fw_support_paging_queue(struct amdgpu_device *adev) } } +static const struct amdgpu_sdma_funcs sdma_v4_4_2_sdma_funcs = { + .stop_kernel_queue = &sdma_v4_4_2_stop_queue, + .start_kernel_queue = &sdma_v4_4_2_restore_queue, + .soft_reset_kernel_queue = &sdma_v4_4_2_soft_reset_engine, +}; + static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1402,8 +1365,6 @@ static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block) sdma_v4_4_2_set_vm_pte_funcs(adev); sdma_v4_4_2_set_irq_funcs(adev); sdma_v4_4_2_set_ras_funcs(adev); - sdma_v4_4_2_set_engine_reset_funcs(adev); - return 0; } @@ -1498,6 +1459,7 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) /* Initialize guilty flags for GFX and PAGE queues */ adev->sdma.instance[i].gfx_guilty = false; adev->sdma.instance[i].page_guilty = false; + adev->sdma.instance[i].funcs = &sdma_v4_4_2_sdma_funcs; ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; @@ -1694,46 +1656,26 @@ static bool sdma_v4_4_2_is_queue_selected(struct amdgpu_device *adev, uint32_t i return (context_status & SDMA_GFX_CONTEXT_STATUS__SELECTED_MASK) != 0; } -static bool sdma_v4_4_2_ring_is_guilty(struct amdgpu_ring *ring) +static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; - uint32_t instance_id = ring->me; - - return sdma_v4_4_2_is_queue_selected(adev, instance_id, false); -} - -static bool sdma_v4_4_2_page_ring_is_guilty(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t instance_id = ring->me; - - if (!adev->sdma.has_page_queue) - return false; - - return sdma_v4_4_2_is_queue_selected(adev, instance_id, true); -} - -static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) -{ - struct amdgpu_device *adev = ring->adev; - u32 id = GET_INST(SDMA0, ring->me); + u32 id = ring->me; int r; - if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - - amdgpu_amdkfd_suspend(adev, false); - r = amdgpu_sdma_reset_engine(adev, id); - amdgpu_amdkfd_resume(adev, false); - + amdgpu_amdkfd_suspend(adev, true); + r = amdgpu_sdma_reset_engine(adev, id, false); + amdgpu_amdkfd_resume(adev, true); return r; } -static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_id) +static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + u32 instance_id = ring->me; u32 inst_mask; uint64_t rptr; - struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring; if (amdgpu_sriov_vf(adev)) return -EINVAL; @@ -1766,11 +1708,11 @@ static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_ return 0; } -static int sdma_v4_4_2_restore_queue(struct amdgpu_device *adev, uint32_t instance_id) +static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring) { - int i; + struct amdgpu_device *adev = ring->adev; u32 inst_mask; - struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring; + int i, r; inst_mask = 1 << ring->me; udelay(50); @@ -1787,17 +1729,18 @@ static int sdma_v4_4_2_restore_queue(struct amdgpu_device *adev, uint32_t instan return -ETIMEDOUT; } - return sdma_v4_4_2_inst_start(adev, inst_mask, true); -} + r = sdma_v4_4_2_inst_start(adev, inst_mask, true); -static struct sdma_on_reset_funcs sdma_v4_4_2_engine_reset_funcs = { - .pre_reset = sdma_v4_4_2_stop_queue, - .post_reset = sdma_v4_4_2_restore_queue, -}; + return r; +} -static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev) +static int sdma_v4_4_2_soft_reset_engine(struct amdgpu_device *adev, + u32 instance_id) { - amdgpu_sdma_register_on_reset_callbacks(adev, &sdma_v4_4_2_engine_reset_funcs); + /* For SDMA 4.x, use the existing DPM interface for backward compatibility + * we need to convert the logical instance ID to physical instance ID before reset. + */ + return amdgpu_dpm_reset_sdma(adev, 1 << GET_INST(SDMA0, instance_id)); } static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev, @@ -1939,7 +1882,7 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev, if (task_info) { dev_dbg_ratelimited(adev->dev, " for process %s pid %d thread %s pid %d\n", task_info->process_name, task_info->tgid, - task_info->task_name, task_info->pid); + task_info->task.comm, task_info->task.pid); amdgpu_vm_put_task_info(task_info); } @@ -2177,7 +2120,8 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { 3 + /* hdp invalidate */ 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */ /* sdma_v4_4_2_ring_emit_vm_flush */ - 4 + 2 * 3 + + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */ .emit_ib = sdma_v4_4_2_ring_emit_ib, @@ -2193,7 +2137,6 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, .reset = sdma_v4_4_2_reset_queue, - .is_guilty = sdma_v4_4_2_ring_is_guilty, }; static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { @@ -2209,7 +2152,8 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { 3 + /* hdp invalidate */ 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */ /* sdma_v4_4_2_ring_emit_vm_flush */ - 4 + 2 * 3 + + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */ .emit_ib = sdma_v4_4_2_ring_emit_ib, @@ -2225,7 +2169,6 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, .reset = sdma_v4_4_2_reset_queue, - .is_guilty = sdma_v4_4_2_page_ring_is_guilty, }; static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev) @@ -2422,7 +2365,9 @@ static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev) adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; break; case IP_VERSION(9, 5, 0): - /*TODO: enable the queue reset flag until fw supported */ + if ((adev->gfx.mec_fw_version >= 0xf) && amdgpu_dpm_reset_sdma_is_supported(adev)) + adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + break; default: break; } @@ -2595,7 +2540,7 @@ static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_ban 1ULL); break; case ACA_SMU_TYPE_CE: - bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, ACA_REG__MISC0__ERRCNT(misc0)); break; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 0dce59f4f6e2..7dc67a22a7a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -112,6 +112,8 @@ static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev); +static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring); +static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring); static const struct soc15_reg_golden golden_settings_sdma_5[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107), @@ -369,67 +371,36 @@ static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; DRM_DEBUG("Setting write pointer\n"); - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - AMDGPU_MES_PRIORITY_LEVEL_NORMAL); - + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - *wptr_saved = ring->wptr << 2; - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, - ring->wptr << 2); - } + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); } else { - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("Not using doorbell -- " - "mmSDMA%i_GFX_RB_WPTR == 0x%08x " - "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, - ring->me, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, - ring->me, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); - } + DRM_DEBUG("Not using doorbell -- " + "mmSDMA%i_GFX_RB_WPTR == 0x%08x " + "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, + ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, + ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } } @@ -575,11 +546,9 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if (flags & AMDGPU_FENCE_FLAG_INT) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -588,15 +557,15 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se * sdma_v5_0_gfx_stop - stop the gfx async dma engines * * @adev: amdgpu_device pointer - * + * @inst_mask: mask of dma engine instances to be disabled * Stop the gfx async dma ring buffers (NAVI10). */ -static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev) +static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask) { u32 rb_cntl, ib_cntl; int i; - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); @@ -688,9 +657,11 @@ static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable) { u32 f32_cntl; int i; + uint32_t inst_mask; + inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); if (!enable) { - sdma_v5_0_gfx_stop(adev); + sdma_v5_0_gfx_stop(adev, 1 << inst_mask); sdma_v5_0_rlc_stop(adev); } @@ -1046,33 +1017,22 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 20); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1085,10 +1045,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -1100,8 +1057,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1124,38 +1080,24 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(adev, NULL, 256, - AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, + AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | @@ -1183,10 +1125,7 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1197,8 +1136,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1389,6 +1327,36 @@ static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); } +static int sdma_v5_0_soft_reset_engine(struct amdgpu_device *adev, u32 instance_id) +{ + u32 grbm_soft_reset; + u32 tmp; + + grbm_soft_reset = REG_SET_FIELD(0, + GRBM_SOFT_RESET, SOFT_RESET_SDMA0, + 1); + grbm_soft_reset <<= instance_id; + + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + + udelay(50); + + tmp &= ~grbm_soft_reset; + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + return 0; +} + +static const struct amdgpu_sdma_funcs sdma_v5_0_sdma_funcs = { + .stop_kernel_queue = &sdma_v5_0_stop_queue, + .start_kernel_queue = &sdma_v5_0_restore_queue, + .soft_reset_kernel_queue = &sdma_v5_0_soft_reset_engine, +}; + static int sdma_v5_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1431,6 +1399,8 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block) return r; for (i = 0; i < adev->sdma.num_instances; i++) { + mutex_init(&adev->sdma.instance[i].engine_reset_mutex); + adev->sdma.instance[i].funcs = &sdma_v5_0_sdma_funcs; ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; ring->use_doorbell = true; @@ -1458,7 +1428,8 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(5, 0, 0): case IP_VERSION(5, 0, 2): case IP_VERSION(5, 0, 5): - if (adev->sdma.instance[0].fw_version >= 35) + if ((adev->sdma.instance[0].fw_version >= 35) && + !amdgpu_sriov_vf(adev)) adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; break; default: @@ -1569,35 +1540,43 @@ static int sdma_v5_0_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; - int i, j, r; - u32 rb_cntl, ib_cntl, f32_cntl, freeze, cntl, preempt, soft_reset, stat1_reg; + int r; - if (amdgpu_sriov_vf(adev)) + if (ring->me >= adev->sdma.num_instances) { + dev_err(adev->dev, "sdma instance not found\n"); return -EINVAL; - - for (i = 0; i < adev->sdma.num_instances; i++) { - if (ring == &adev->sdma.instance[i].ring) - break; } - if (i == adev->sdma.num_instances) { - DRM_ERROR("sdma instance not found\n"); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + + amdgpu_amdkfd_suspend(adev, true); + r = amdgpu_sdma_reset_engine(adev, ring->me, true); + amdgpu_amdkfd_resume(adev, true); + if (r) + return r; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + +static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring) +{ + u32 f32_cntl, freeze, cntl, stat1_reg; + struct amdgpu_device *adev = ring->adev; + int i, j, r = 0; + + if (amdgpu_sriov_vf(adev)) return -EINVAL; - } + i = ring->me; amdgpu_gfx_rlc_enter_safe_mode(adev, 0); /* stop queue */ - ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); - - rb_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + sdma_v5_0_gfx_stop(adev, 1 << i); /* engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */ freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE)); @@ -1628,31 +1607,27 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0); WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl); +err0: + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + return r; +} - /* soft reset SDMA_GFX_PREEMPT.IB_PREEMPT = 0 mmGRBM_SOFT_RESET.SOFT_RESET_SDMA0/1 = 1 */ - preempt = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT)); - preempt = REG_SET_FIELD(preempt, SDMA0_GFX_PREEMPT, IB_PREEMPT, 0); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT), preempt); - - soft_reset = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - soft_reset |= 1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i; - - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset); - - udelay(50); - - soft_reset &= ~(1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i); - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset); +static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 inst_id = ring->me; + u32 freeze; + int r; + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); /* unfreeze*/ - freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE)); + freeze = RREG32(sdma_v5_0_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE)); freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze); - - r = sdma_v5_0_gfx_resume_instance(adev, i, true); + WREG32(sdma_v5_0_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE), freeze); -err0: + r = sdma_v5_0_gfx_resume_instance(adev, inst_id, true); amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 2b39a03ff0c1..3bd44c24f692 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -113,6 +113,8 @@ static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev); +static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring); +static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring); static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) { @@ -394,11 +396,9 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if ((flags & AMDGPU_FENCE_FLAG_INT)) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -407,15 +407,15 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se * sdma_v5_2_gfx_stop - stop the gfx async dma engines * * @adev: amdgpu_device pointer - * + * @inst_mask: mask of dma engine instances to be disabled * Stop the gfx async dma ring buffers. */ -static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev) +static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask) { u32 rb_cntl, ib_cntl; int i; - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); @@ -506,9 +506,11 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable) { u32 f32_cntl; int i; + uint32_t inst_mask; + inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); if (!enable) { - sdma_v5_2_gfx_stop(adev); + sdma_v5_2_gfx_stop(adev, inst_mask); sdma_v5_2_rlc_stop(adev); } @@ -761,37 +763,49 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev) return 0; } -static int sdma_v5_2_soft_reset(struct amdgpu_ip_block *ip_block) +static int sdma_v5_2_soft_reset_engine(struct amdgpu_device *adev, u32 instance_id) { - struct amdgpu_device *adev = ip_block->adev; u32 grbm_soft_reset; u32 tmp; - int i; - for (i = 0; i < adev->sdma.num_instances; i++) { - grbm_soft_reset = REG_SET_FIELD(0, - GRBM_SOFT_RESET, SOFT_RESET_SDMA0, - 1); - grbm_soft_reset <<= i; + grbm_soft_reset = REG_SET_FIELD(0, + GRBM_SOFT_RESET, SOFT_RESET_SDMA0, + 1); + grbm_soft_reset <<= instance_id; - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - udelay(50); + udelay(50); - tmp &= ~grbm_soft_reset; - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp &= ~grbm_soft_reset; + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + return 0; +} +static int sdma_v5_2_soft_reset(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + sdma_v5_2_soft_reset_engine(adev, i); udelay(50); } return 0; } +static const struct amdgpu_sdma_funcs sdma_v5_2_sdma_funcs = { + .stop_kernel_queue = &sdma_v5_2_stop_queue, + .start_kernel_queue = &sdma_v5_2_restore_queue, + .soft_reset_kernel_queue = &sdma_v5_2_soft_reset_engine, +}; + /** * sdma_v5_2_start - setup and start the async dma engines * @@ -903,33 +917,22 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 20); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -942,10 +945,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -957,8 +957,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -981,37 +980,23 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | @@ -1039,10 +1024,7 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1053,8 +1035,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1337,6 +1318,8 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block) } for (i = 0; i < adev->sdma.num_instances; i++) { + mutex_init(&adev->sdma.instance[i].engine_reset_mutex); + adev->sdma.instance[i].funcs = &sdma_v5_2_sdma_funcs; ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; ring->use_doorbell = true; @@ -1364,11 +1347,13 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(5, 2, 2): case IP_VERSION(5, 2, 3): case IP_VERSION(5, 2, 4): - if (adev->sdma.instance[0].fw_version >= 76) + if ((adev->sdma.instance[0].fw_version >= 76) && + !amdgpu_sriov_vf(adev)) adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; break; case IP_VERSION(5, 2, 5): - if (adev->sdma.instance[0].fw_version >= 34) + if ((adev->sdma.instance[0].fw_version >= 34) && + !amdgpu_sriov_vf(adev)) adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; break; default: @@ -1469,35 +1454,43 @@ static int sdma_v5_2_wait_for_idle(struct amdgpu_ip_block *ip_block) return -ETIMEDOUT; } -static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; - int i, j, r; - u32 rb_cntl, ib_cntl, f32_cntl, freeze, cntl, preempt, soft_reset, stat1_reg; + int r; - if (amdgpu_sriov_vf(adev)) + if (ring->me >= adev->sdma.num_instances) { + dev_err(adev->dev, "sdma instance not found\n"); return -EINVAL; - - for (i = 0; i < adev->sdma.num_instances; i++) { - if (ring == &adev->sdma.instance[i].ring) - break; } - if (i == adev->sdma.num_instances) { - DRM_ERROR("sdma instance not found\n"); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + + amdgpu_amdkfd_suspend(adev, true); + r = amdgpu_sdma_reset_engine(adev, ring->me, true); + amdgpu_amdkfd_resume(adev, true); + if (r) + return r; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + +static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring) +{ + u32 f32_cntl, freeze, cntl, stat1_reg; + struct amdgpu_device *adev = ring->adev; + int i, j, r = 0; + + if (amdgpu_sriov_vf(adev)) return -EINVAL; - } + i = ring->me; amdgpu_gfx_rlc_enter_safe_mode(adev, 0); /* stop queue */ - ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); - - rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + sdma_v5_2_gfx_stop(adev, 1 << i); /*engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */ freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE)); @@ -1530,32 +1523,28 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0); WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl); - /* soft reset SDMA_GFX_PREEMPT.IB_PREEMPT = 0 mmGRBM_SOFT_RESET.SOFT_RESET_SDMA0/1 = 1 */ - preempt = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT)); - preempt = REG_SET_FIELD(preempt, SDMA0_GFX_PREEMPT, IB_PREEMPT, 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT), preempt); - - soft_reset = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - soft_reset |= 1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i; - - - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset); - - udelay(50); - - soft_reset &= ~(1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i); +err0: + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + return r; +} - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset); +static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 inst_id = ring->me; + u32 freeze; + int r; + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); /* unfreeze and unhalt */ - freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE)); + freeze = RREG32(sdma_v5_2_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE)); freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze); + WREG32(sdma_v5_2_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE), freeze); - r = sdma_v5_2_gfx_resume_instance(adev, i, true); + r = sdma_v5_2_gfx_resume_instance(adev, inst_id, true); -err0: amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index c214c3d2149b..e6d8eddda2bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -43,6 +43,8 @@ #include "sdma_common.h" #include "sdma_v6_0.h" #include "v11_structs.h" +#include "mes_userqueue.h" +#include "amdgpu_userq_fence.h" MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin"); @@ -376,11 +378,9 @@ static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if (flags & AMDGPU_FENCE_FLAG_INT) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -891,6 +891,12 @@ static int sdma_v6_0_mqd_init(struct amdgpu_device *adev, void *mqd, m->sdmax_rlcx_rb_aql_cntl = regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT; m->sdmax_rlcx_dummy_reg = regSDMA0_QUEUE0_DUMMY_REG_DEFAULT; + m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr); + m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr); + + m->sdmax_rlcx_f32_dbg0 = lower_32_bits(prop->fence_address); + m->sdmax_rlcx_f32_dbg1 = upper_32_bits(prop->fence_address); + return 0; } @@ -917,33 +923,22 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -956,10 +951,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -971,8 +963,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -995,37 +986,23 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | @@ -1053,10 +1030,7 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1067,8 +1041,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1300,6 +1273,23 @@ static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int r; + switch (amdgpu_user_queue) { + case -1: + case 0: + default: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = true; + break; + case 1: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = false; + break; + case 2: + adev->sdma.no_user_submission = true; + adev->sdma.disable_uq = false; + break; + } + r = amdgpu_sdma_init_microcode(adev, 0, true); if (r) return r; @@ -1329,11 +1319,19 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; + /* SDMA user fence event */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, + GFX_11_0_0__SRCID__SDMA_FENCE, + &adev->sdma.fence_irq); + if (r) + return r; + for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; ring->use_doorbell = true; ring->me = i; + ring->no_user_submission = adev->sdma.no_user_submission; DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i, ring->use_doorbell?"true":"false"); @@ -1357,7 +1355,8 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(6, 0, 0): case IP_VERSION(6, 0, 2): case IP_VERSION(6, 0, 3): - if (adev->sdma.instance[0].fw_version >= 21) + if ((adev->sdma.instance[0].fw_version >= 21) && + !amdgpu_sriov_vf(adev)) adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; break; default: @@ -1376,6 +1375,43 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(6, 0, 0): + if ((adev->sdma.instance[0].fw_version >= 24) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 0, 1): + if ((adev->sdma.instance[0].fw_version >= 18) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 0, 2): + if ((adev->sdma.instance[0].fw_version >= 21) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 0, 3): + if ((adev->sdma.instance[0].fw_version >= 25) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 1, 0): + if ((adev->sdma.instance[0].fw_version >= 14) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 1, 1): + if ((adev->sdma.instance[0].fw_version >= 17) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 1, 2): + if ((adev->sdma.instance[0].fw_version >= 15) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 1, 3): + if ((adev->sdma.instance[0].fw_version >= 10) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + default: + break; + } + r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) return r; @@ -1399,11 +1435,39 @@ static int sdma_v6_0_sw_fini(struct amdgpu_ip_block *ip_block) return 0; } +static int sdma_v6_0_set_userq_trap_interrupts(struct amdgpu_device *adev, + bool enable) +{ + unsigned int irq_type; + int i, r; + + if (adev->userq_funcs[AMDGPU_HW_IP_DMA]) { + for (i = 0; i < adev->sdma.num_instances; i++) { + irq_type = AMDGPU_SDMA_IRQ_INSTANCE0 + i; + if (enable) + r = amdgpu_irq_get(adev, &adev->sdma.trap_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->sdma.trap_irq, + irq_type); + if (r) + return r; + } + } + + return 0; +} + static int sdma_v6_0_hw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + int r; - return sdma_v6_0_start(adev); + r = sdma_v6_0_start(adev); + if (r) + return r; + + return sdma_v6_0_set_userq_trap_interrupts(adev, true); } static int sdma_v6_0_hw_fini(struct amdgpu_ip_block *ip_block) @@ -1415,6 +1479,7 @@ static int sdma_v6_0_hw_fini(struct amdgpu_ip_block *ip_block) sdma_v6_0_ctxempty_int_enable(adev, false); sdma_v6_0_enable(adev, false); + sdma_v6_0_set_userq_trap_interrupts(adev, false); return 0; } @@ -1506,29 +1571,29 @@ static int sdma_v6_0_ring_preempt_ib(struct amdgpu_ring *ring) return r; } -static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; - int i, r; + int r; - if (amdgpu_sriov_vf(adev)) + if (ring->me >= adev->sdma.num_instances) { + dev_err(adev->dev, "sdma instance not found\n"); return -EINVAL; - - for (i = 0; i < adev->sdma.num_instances; i++) { - if (ring == &adev->sdma.instance[i].ring) - break; } - if (i == adev->sdma.num_instances) { - DRM_ERROR("sdma instance not found\n"); - return -EINVAL; - } + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true); if (r) return r; - return sdma_v6_0_gfx_resume_instance(adev, i, true); + r = sdma_v6_0_gfx_resume_instance(adev, ring->me, true); + if (r) + return r; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static int sdma_v6_0_set_trap_irq_state(struct amdgpu_device *adev, @@ -1555,25 +1620,9 @@ static int sdma_v6_0_process_trap_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { int instances, queue; - uint32_t mes_queue_id = entry->src_data[0]; DRM_DEBUG("IH: SDMA trap\n"); - if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { - struct amdgpu_mes_queue *queue; - - mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; - - spin_lock(&adev->mes.queue_id_lock); - queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); - if (queue) { - DRM_DEBUG("process smda queue id = %d\n", mes_queue_id); - amdgpu_fence_process(queue->ring); - } - spin_unlock(&adev->mes.queue_id_lock); - return 0; - } - queue = entry->ring_id & 0xf; instances = (entry->ring_id & 0xf0) >> 4; if (instances > 1) { @@ -1595,6 +1644,29 @@ static int sdma_v6_0_process_trap_irq(struct amdgpu_device *adev, return 0; } +static int sdma_v6_0_process_fence_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + u32 doorbell_offset = entry->src_data[0]; + + if (adev->enable_mes && doorbell_offset) { + struct amdgpu_userq_fence_driver *fence_drv = NULL; + struct xarray *xa = &adev->userq_xa; + unsigned long flags; + + doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; + + xa_lock_irqsave(xa, flags); + fence_drv = xa_load(xa, doorbell_offset); + if (fence_drv) + amdgpu_userq_fence_driver_process(fence_drv); + xa_unlock_irqrestore(xa, flags); + } + + return 0; +} + static int sdma_v6_0_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -1731,6 +1803,10 @@ static const struct amdgpu_irq_src_funcs sdma_v6_0_trap_irq_funcs = { .process = sdma_v6_0_process_trap_irq, }; +static const struct amdgpu_irq_src_funcs sdma_v6_0_fence_irq_funcs = { + .process = sdma_v6_0_process_fence_irq, +}; + static const struct amdgpu_irq_src_funcs sdma_v6_0_illegal_inst_irq_funcs = { .process = sdma_v6_0_process_illegal_inst_irq, }; @@ -1740,6 +1816,7 @@ static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev) adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 + adev->sdma.num_instances; adev->sdma.trap_irq.funcs = &sdma_v6_0_trap_irq_funcs; + adev->sdma.fence_irq.funcs = &sdma_v6_0_fence_irq_funcs; adev->sdma.illegal_inst_irq.funcs = &sdma_v6_0_illegal_inst_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index b2706221df99..b8b06d4c5882 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -33,7 +33,7 @@ #include "gc/gc_12_0_0_offset.h" #include "gc/gc_12_0_0_sh_mask.h" #include "hdp/hdp_6_0_0_offset.h" -#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" +#include "ivsrcid/gfx/irqsrcs_gfx_12_0_0.h" #include "soc15_common.h" #include "soc15.h" @@ -42,6 +42,8 @@ #include "sdma_common.h" #include "sdma_v7_0.h" #include "v12_structs.h" +#include "mes_userqueue.h" +#include "amdgpu_userq_fence.h" MODULE_FIRMWARE("amdgpu/sdma_7_0_0.bin"); MODULE_FIRMWARE("amdgpu/sdma_7_0_1.bin"); @@ -204,66 +206,39 @@ static uint64_t sdma_v7_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v7_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; DRM_DEBUG("Setting write pointer\n"); - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - *wptr_saved = ring->wptr << 2; - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); } else { - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("Not using doorbell -- " - "regSDMA%i_GFX_RB_WPTR == 0x%08x " - "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, - ring->me, - regSDMA0_QUEUE0_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, - ring->me, - regSDMA0_QUEUE0_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); - } + DRM_DEBUG("Not using doorbell -- " + "regSDMA%i_GFX_RB_WPTR == 0x%08x " + "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, + ring->me, + regSDMA0_QUEUE0_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, + ring->me, + regSDMA0_QUEUE0_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } } @@ -407,11 +382,9 @@ static void sdma_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if (flags & AMDGPU_FENCE_FLAG_INT) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -829,29 +802,29 @@ static bool sdma_v7_0_check_soft_reset(struct amdgpu_ip_block *ip_block) return false; } -static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; - int i, r; + int r; - if (amdgpu_sriov_vf(adev)) + if (ring->me >= adev->sdma.num_instances) { + dev_err(adev->dev, "sdma instance not found\n"); return -EINVAL; - - for (i = 0; i < adev->sdma.num_instances; i++) { - if (ring == &adev->sdma.instance[i].ring) - break; } - if (i == adev->sdma.num_instances) { - DRM_ERROR("sdma instance not found\n"); - return -EINVAL; - } + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true); if (r) return r; - return sdma_v7_0_gfx_resume_instance(adev, i, true); + r = sdma_v7_0_gfx_resume_instance(adev, ring->me, true); + if (r) + return r; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } /** @@ -935,6 +908,12 @@ static int sdma_v7_0_mqd_init(struct amdgpu_device *adev, void *mqd, m->sdmax_rlcx_rb_aql_cntl = 0x4000; //regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT; m->sdmax_rlcx_dummy_reg = 0xf; //regSDMA0_QUEUE0_DUMMY_REG_DEFAULT; + m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr); + m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr); + + m->sdmax_rlcx_mcu_dbg0 = lower_32_bits(prop->fence_address); + m->sdmax_rlcx_mcu_dbg1 = upper_32_bits(prop->fence_address); + return 0; } @@ -961,33 +940,22 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1000,10 +968,7 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -1015,8 +980,7 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1039,37 +1003,23 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | @@ -1097,10 +1047,7 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1111,8 +1058,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1312,6 +1258,23 @@ static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int r; + switch (amdgpu_user_queue) { + case -1: + case 0: + default: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = true; + break; + case 1: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = false; + break; + case 2: + adev->sdma.no_user_submission = true; + adev->sdma.disable_uq = false; + break; + } + r = amdgpu_sdma_init_microcode(adev, 0, true); if (r) { DRM_ERROR("Failed to init sdma firmware!\n"); @@ -1337,16 +1300,24 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) /* SDMA trap event */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, - GFX_11_0_0__SRCID__SDMA_TRAP, + GFX_12_0_0__SRCID__SDMA_TRAP, &adev->sdma.trap_irq); if (r) return r; + /* SDMA user fence event */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, + GFX_12_0_0__SRCID__SDMA_FENCE, + &adev->sdma.fence_irq); + if (r) + return r; + for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; ring->use_doorbell = true; ring->me = i; + ring->no_user_submission = adev->sdma.no_user_submission; DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i, ring->use_doorbell?"true":"false"); @@ -1366,7 +1337,8 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) adev->sdma.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring); - adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + if (!amdgpu_sriov_vf(adev)) + adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) @@ -1378,6 +1350,16 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(7, 0, 0): + case IP_VERSION(7, 0, 1): + if ((adev->sdma.instance[0].fw_version >= 7836028) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + default: + break; + } + return r; } @@ -1400,11 +1382,39 @@ static int sdma_v7_0_sw_fini(struct amdgpu_ip_block *ip_block) return 0; } +static int sdma_v7_0_set_userq_trap_interrupts(struct amdgpu_device *adev, + bool enable) +{ + unsigned int irq_type; + int i, r; + + if (adev->userq_funcs[AMDGPU_HW_IP_DMA]) { + for (i = 0; i < adev->sdma.num_instances; i++) { + irq_type = AMDGPU_SDMA_IRQ_INSTANCE0 + i; + if (enable) + r = amdgpu_irq_get(adev, &adev->sdma.trap_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->sdma.trap_irq, + irq_type); + if (r) + return r; + } + } + + return 0; +} + static int sdma_v7_0_hw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + int r; - return sdma_v7_0_start(adev); + r = sdma_v7_0_start(adev); + if (r) + return r; + + return sdma_v7_0_set_userq_trap_interrupts(adev, true); } static int sdma_v7_0_hw_fini(struct amdgpu_ip_block *ip_block) @@ -1416,6 +1426,7 @@ static int sdma_v7_0_hw_fini(struct amdgpu_ip_block *ip_block) sdma_v7_0_ctx_switch_enable(adev, false); sdma_v7_0_enable(adev, false); + sdma_v7_0_set_userq_trap_interrupts(adev, false); return 0; } @@ -1533,25 +1544,9 @@ static int sdma_v7_0_process_trap_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { int instances, queue; - uint32_t mes_queue_id = entry->src_data[0]; DRM_DEBUG("IH: SDMA trap\n"); - if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { - struct amdgpu_mes_queue *queue; - - mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; - - spin_lock(&adev->mes.queue_id_lock); - queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); - if (queue) { - DRM_DEBUG("process smda queue id = %d\n", mes_queue_id); - amdgpu_fence_process(queue->ring); - } - spin_unlock(&adev->mes.queue_id_lock); - return 0; - } - queue = entry->ring_id & 0xf; instances = (entry->ring_id & 0xf0) >> 4; if (instances > 1) { @@ -1573,6 +1568,29 @@ static int sdma_v7_0_process_trap_irq(struct amdgpu_device *adev, return 0; } +static int sdma_v7_0_process_fence_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + u32 doorbell_offset = entry->src_data[0]; + + if (adev->enable_mes && doorbell_offset) { + struct amdgpu_userq_fence_driver *fence_drv = NULL; + struct xarray *xa = &adev->userq_xa; + unsigned long flags; + + doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; + + xa_lock_irqsave(xa, flags); + fence_drv = xa_load(xa, doorbell_offset); + if (fence_drv) + amdgpu_userq_fence_driver_process(fence_drv); + xa_unlock_irqrestore(xa, flags); + } + + return 0; +} + static int sdma_v7_0_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -1710,6 +1728,10 @@ static const struct amdgpu_irq_src_funcs sdma_v7_0_trap_irq_funcs = { .process = sdma_v7_0_process_trap_irq, }; +static const struct amdgpu_irq_src_funcs sdma_v7_0_fence_irq_funcs = { + .process = sdma_v7_0_process_fence_irq, +}; + static const struct amdgpu_irq_src_funcs sdma_v7_0_illegal_inst_irq_funcs = { .process = sdma_v7_0_process_illegal_inst_irq, }; @@ -1719,6 +1741,7 @@ static void sdma_v7_0_set_irq_funcs(struct amdgpu_device *adev) adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 + adev->sdma.num_instances; adev->sdma.trap_irq.funcs = &sdma_v7_0_trap_irq_funcs; + adev->sdma.fence_irq.funcs = &sdma_v7_0_fence_irq_funcs; adev->sdma.illegal_inst_irq.funcs = &sdma_v7_0_illegal_inst_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 2247f6a94858..e0f139de7991 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -35,6 +35,7 @@ #include "amdgpu_vce.h" #include "atom.h" #include "amd_pcie.h" + #include "si_dpm.h" #include "sid.h" #include "si_ih.h" @@ -44,17 +45,30 @@ #include "dce_v6_0.h" #include "si.h" #include "uvd_v3_1.h" -#include "amdgpu_vkms.h" + +#include "uvd/uvd_4_0_d.h" + +#include "smu/smu_6_0_d.h" +#include "smu/smu_6_0_sh_mask.h" + #include "gca/gfx_6_0_d.h" +#include "gca/gfx_6_0_sh_mask.h" + #include "oss/oss_1_0_d.h" #include "oss/oss_1_0_sh_mask.h" + #include "gmc/gmc_6_0_d.h" +#include"gmc/gmc_6_0_sh_mask.h" + #include "dce/dce_6_0_d.h" -#include "uvd/uvd_4_0_d.h" +#include "dce/dce_6_0_sh_mask.h" + #include "bif/bif_3_0_d.h" #include "bif/bif_3_0_sh_mask.h" +#include "si_enums.h" #include "amdgpu_dm.h" +#include "amdgpu_vkms.h" static const u32 tahiti_golden_registers[] = { @@ -1071,8 +1085,8 @@ static u32 si_smc_rreg(struct amdgpu_device *adev, u32 reg) u32 r; spin_lock_irqsave(&adev->smc_idx_lock, flags); - WREG32(SMC_IND_INDEX_0, (reg)); - r = RREG32(SMC_IND_DATA_0); + WREG32(mmSMC_IND_INDEX_0, (reg)); + r = RREG32(mmSMC_IND_DATA_0); spin_unlock_irqrestore(&adev->smc_idx_lock, flags); return r; } @@ -1082,8 +1096,8 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) unsigned long flags; spin_lock_irqsave(&adev->smc_idx_lock, flags); - WREG32(SMC_IND_INDEX_0, (reg)); - WREG32(SMC_IND_DATA_0, (v)); + WREG32(mmSMC_IND_INDEX_0, (reg)); + WREG32(mmSMC_IND_DATA_0, (v)); spin_unlock_irqrestore(&adev->smc_idx_lock, flags); } @@ -1110,20 +1124,20 @@ static void si_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v) } static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { - {GRBM_STATUS}, + {mmGRBM_STATUS}, {mmGRBM_STATUS2}, {mmGRBM_STATUS_SE0}, {mmGRBM_STATUS_SE1}, {mmSRBM_STATUS}, {mmSRBM_STATUS2}, - {DMA_STATUS_REG + DMA0_REGISTER_OFFSET}, - {DMA_STATUS_REG + DMA1_REGISTER_OFFSET}, + {mmDMA_STATUS_REG + DMA0_REGISTER_OFFSET}, + {mmDMA_STATUS_REG + DMA1_REGISTER_OFFSET}, {mmCP_STAT}, {mmCP_STALLED_STAT1}, {mmCP_STALLED_STAT2}, {mmCP_STALLED_STAT3}, - {GB_ADDR_CONFIG}, - {MC_ARB_RAMCFG}, + {mmGB_ADDR_CONFIG}, + {mmMC_ARB_RAMCFG}, {mmGB_TILE_MODE0}, {mmGB_TILE_MODE1}, {mmGB_TILE_MODE2}, @@ -1156,7 +1170,7 @@ static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { {mmGB_TILE_MODE29}, {mmGB_TILE_MODE30}, {mmGB_TILE_MODE31}, - {CC_RB_BACKEND_DISABLE, true}, + {mmCC_RB_BACKEND_DISABLE, true}, {mmGC_USER_RB_BACKEND_DISABLE, true}, {mmPA_SC_RASTER_CONFIG, true}, }; @@ -1264,37 +1278,37 @@ static bool si_read_disabled_bios(struct amdgpu_device *adev) u32 rom_cntl; bool r; - bus_cntl = RREG32(R600_BUS_CNTL); + bus_cntl = RREG32(mmBUS_CNTL); if (adev->mode_info.num_crtc) { - d1vga_control = RREG32(AVIVO_D1VGA_CONTROL); - d2vga_control = RREG32(AVIVO_D2VGA_CONTROL); - vga_render_control = RREG32(VGA_RENDER_CONTROL); + d1vga_control = RREG32(mmD1VGA_CONTROL); + d2vga_control = RREG32(mmD2VGA_CONTROL); + vga_render_control = RREG32(mmVGA_RENDER_CONTROL); } rom_cntl = RREG32(R600_ROM_CNTL); /* enable the rom */ - WREG32(R600_BUS_CNTL, (bus_cntl & ~R600_BIOS_ROM_DIS)); + WREG32(mmBUS_CNTL, (bus_cntl & ~BUS_CNTL__BIOS_ROM_DIS_MASK)); if (adev->mode_info.num_crtc) { /* Disable VGA mode */ - WREG32(AVIVO_D1VGA_CONTROL, - (d1vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE | - AVIVO_DVGA_CONTROL_TIMING_SELECT))); - WREG32(AVIVO_D2VGA_CONTROL, - (d2vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE | - AVIVO_DVGA_CONTROL_TIMING_SELECT))); - WREG32(VGA_RENDER_CONTROL, - (vga_render_control & C_000300_VGA_VSTATUS_CNTL)); + WREG32(mmD1VGA_CONTROL, + (d1vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK | + D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK))); + WREG32(mmD2VGA_CONTROL, + (d2vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK | + D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK))); + WREG32(mmVGA_RENDER_CONTROL, + (vga_render_control & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK)); } WREG32(R600_ROM_CNTL, rom_cntl | R600_SCK_OVERWRITE); r = amdgpu_read_bios(adev); /* restore regs */ - WREG32(R600_BUS_CNTL, bus_cntl); + WREG32(mmBUS_CNTL, bus_cntl); if (adev->mode_info.num_crtc) { - WREG32(AVIVO_D1VGA_CONTROL, d1vga_control); - WREG32(AVIVO_D2VGA_CONTROL, d2vga_control); - WREG32(VGA_RENDER_CONTROL, vga_render_control); + WREG32(mmD1VGA_CONTROL, d1vga_control); + WREG32(mmD2VGA_CONTROL, d2vga_control); + WREG32(mmVGA_RENDER_CONTROL, vga_render_control); } WREG32(R600_ROM_CNTL, rom_cntl); return r; @@ -1331,23 +1345,24 @@ static void si_set_clk_bypass_mode(struct amdgpu_device *adev) { u32 tmp, i; - tmp = RREG32(CG_SPLL_FUNC_CNTL); - tmp |= SPLL_BYPASS_EN; - WREG32(CG_SPLL_FUNC_CNTL, tmp); + tmp = RREG32(mmCG_SPLL_FUNC_CNTL); + tmp |= CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN_MASK; + WREG32(mmCG_SPLL_FUNC_CNTL, tmp); - tmp = RREG32(CG_SPLL_FUNC_CNTL_2); - tmp |= SPLL_CTLREQ_CHG; - WREG32(CG_SPLL_FUNC_CNTL_2, tmp); + tmp = RREG32(mmCG_SPLL_FUNC_CNTL_2); + tmp |= CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK; + WREG32(mmCG_SPLL_FUNC_CNTL_2, tmp); for (i = 0; i < adev->usec_timeout; i++) { - if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS) + if (RREG32(mmCG_SPLL_STATUS) & CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK) break; udelay(1); } - tmp = RREG32(CG_SPLL_FUNC_CNTL_2); - tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE); - WREG32(CG_SPLL_FUNC_CNTL_2, tmp); + tmp = RREG32(mmCG_SPLL_FUNC_CNTL_2); + tmp &= ~(CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK | + CG_SPLL_FUNC_CNTL_2__SCLK_MUX_UPDATE_MASK); + WREG32(mmCG_SPLL_FUNC_CNTL_2, tmp); tmp = RREG32(MPLL_CNTL_MODE); tmp &= ~MPLL_MCLK_SEL; @@ -1358,21 +1373,21 @@ static void si_spll_powerdown(struct amdgpu_device *adev) { u32 tmp; - tmp = RREG32(SPLL_CNTL_MODE); - tmp |= SPLL_SW_DIR_CONTROL; - WREG32(SPLL_CNTL_MODE, tmp); + tmp = RREG32(mmSPLL_CNTL_MODE); + tmp |= SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK; + WREG32(mmSPLL_CNTL_MODE, tmp); - tmp = RREG32(CG_SPLL_FUNC_CNTL); - tmp |= SPLL_RESET; - WREG32(CG_SPLL_FUNC_CNTL, tmp); + tmp = RREG32(mmCG_SPLL_FUNC_CNTL); + tmp |= CG_SPLL_FUNC_CNTL__SPLL_RESET_MASK; + WREG32(mmCG_SPLL_FUNC_CNTL, tmp); - tmp = RREG32(CG_SPLL_FUNC_CNTL); - tmp |= SPLL_SLEEP; - WREG32(CG_SPLL_FUNC_CNTL, tmp); + tmp = RREG32(mmCG_SPLL_FUNC_CNTL); + tmp |= CG_SPLL_FUNC_CNTL__SPLL_SLEEP_MASK; + WREG32(mmCG_SPLL_FUNC_CNTL, tmp); - tmp = RREG32(SPLL_CNTL_MODE); - tmp &= ~SPLL_SW_DIR_CONTROL; - WREG32(SPLL_CNTL_MODE, tmp); + tmp = RREG32(mmSPLL_CNTL_MODE); + tmp &= ~SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK; + WREG32(mmSPLL_CNTL_MODE, tmp); } static int si_gpu_pci_config_reset(struct amdgpu_device *adev) @@ -1454,14 +1469,14 @@ static void si_vga_set_state(struct amdgpu_device *adev, bool state) { uint32_t temp; - temp = RREG32(CONFIG_CNTL); + temp = RREG32(mmCONFIG_CNTL); if (!state) { temp &= ~(1<<0); temp |= (1<<1); } else { temp &= ~(1<<1); } - WREG32(CONFIG_CNTL, temp); + WREG32(mmCONFIG_CNTL, temp); } static u32 si_get_xclk(struct amdgpu_device *adev) @@ -1469,12 +1484,12 @@ static u32 si_get_xclk(struct amdgpu_device *adev) u32 reference_clock = adev->clock.spll.reference_freq; u32 tmp; - tmp = RREG32(CG_CLKPIN_CNTL_2); - if (tmp & MUX_TCLK_TO_XCLK) + tmp = RREG32(mmCG_CLKPIN_CNTL_2); + if (tmp & CG_CLKPIN_CNTL_2__MUX_TCLK_TO_XCLK_MASK) return TCLK; - tmp = RREG32(CG_CLKPIN_CNTL); - if (tmp & XTALIN_DIVIDE) + tmp = RREG32(mmCG_CLKPIN_CNTL); + if (tmp & CG_CLKPIN_CNTL__XTALIN_DIVIDE_MASK) return reference_clock / 4; return reference_clock; @@ -1519,9 +1534,9 @@ static int si_get_pcie_lanes(struct amdgpu_device *adev) if (adev->flags & AMD_IS_APU) return 0; - link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); + link_width_cntl = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL); - switch ((link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT) { + switch ((link_width_cntl & PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK) >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT) { case LC_LINK_WIDTH_X1: return 1; case LC_LINK_WIDTH_X2: @@ -1568,13 +1583,13 @@ static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes) return; } - link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); - link_width_cntl &= ~LC_LINK_WIDTH_MASK; - link_width_cntl |= mask << LC_LINK_WIDTH_SHIFT; - link_width_cntl |= (LC_RECONFIG_NOW | - LC_RECONFIG_ARC_MISSING_ESCAPE); + link_width_cntl = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL); + link_width_cntl &= ~PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_MASK; + link_width_cntl |= mask << PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH__SHIFT; + link_width_cntl |= (PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_NOW_MASK | + PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_ARC_MISSING_ESCAPE_MASK); - WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); + WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); } static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, @@ -2018,7 +2033,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = static uint32_t si_get_rev_id(struct amdgpu_device *adev) { - return (RREG32(CC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK) + return (RREG32(mmCC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK) >> CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT; } @@ -2239,9 +2254,9 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3))) return; - speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); - current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >> - LC_CURRENT_DATA_RATE_SHIFT; + speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL); + current_data_rate = (speed_cntl & PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) >> + PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) { if (current_data_rate == 2) { DRM_INFO("PCIE gen 3 link speeds already enabled\n"); @@ -2268,17 +2283,17 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD); pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD); - tmp = RREG32_PCIE(PCIE_LC_STATUS1); - max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT; - current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT; + tmp = RREG32_PCIE(ixPCIE_LC_STATUS1); + max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >> PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH__SHIFT; + current_lw = (tmp & PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH_MASK) >> PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH__SHIFT; if (current_lw < max_lw) { - tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); - if (tmp & LC_RENEGOTIATION_SUPPORT) { - tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS); - tmp |= (max_lw << LC_LINK_WIDTH_SHIFT); - tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW; - WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp); + tmp = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL); + if (tmp & PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATION_SUPPORT_MASK) { + tmp &= ~(PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_DIS_MASK); + tmp |= (max_lw << PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH__SHIFT); + tmp |= PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_SUPPORT_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATE_EN_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_NOW_MASK; + WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, tmp); } } @@ -2301,13 +2316,13 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) PCI_EXP_LNKCTL2, &gpu_cfg2); - tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); - tmp |= LC_SET_QUIESCE; - WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); + tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4); + tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK; + WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp); - tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); - tmp |= LC_REDO_EQ; - WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); + tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4); + tmp |= PCIE_LC_CNTL4__LC_REDO_EQ_MASK; + WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp); mdelay(100); @@ -2333,16 +2348,16 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) (PCI_EXP_LNKCTL2_ENTER_COMP | PCI_EXP_LNKCTL2_TX_MARGIN)); - tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); - tmp &= ~LC_SET_QUIESCE; - WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); + tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4); + tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK; + WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp); } } } - speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE; - speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE; - WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); + speed_cntl |= PCIE_LC_SPEED_CNTL__LC_FORCE_EN_SW_SPEED_CHANGE_MASK | PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_HW_SPEED_CHANGE_MASK; + speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK; + WREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL, speed_cntl); tmp16 = 0; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) @@ -2354,13 +2369,13 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2, PCI_EXP_LNKCTL2_TLS, tmp16); - speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); - speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE; - WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); + speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL); + speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK; + WREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL, speed_cntl); for (i = 0; i < adev->usec_timeout; i++) { - speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); - if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0) + speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL); + if ((speed_cntl & PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK) == 0) break; udelay(1); } @@ -2418,121 +2433,121 @@ static void si_program_aspm(struct amdgpu_device *adev) if (!amdgpu_device_should_use_aspm(adev)) return; - orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); - data &= ~LC_XMIT_N_FTS_MASK; - data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN; + orig = data = RREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL); + data &= ~PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_MASK; + data |= (0x24 << PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS__SHIFT) | PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_OVERRIDE_EN_MASK; if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data); + WREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL, data); - orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3); - data |= LC_GO_TO_RECOVERY; + orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL3); + data |= PCIE_LC_CNTL3__LC_GO_TO_RECOVERY_MASK; if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_CNTL3, data); + WREG32_PCIE_PORT(ixPCIE_LC_CNTL3, data); - orig = data = RREG32_PCIE(PCIE_P_CNTL); - data |= P_IGNORE_EDB_ERR; + orig = data = RREG32_PCIE(ixPCIE_P_CNTL); + data |= PCIE_P_CNTL__P_IGNORE_EDB_ERR_MASK; if (orig != data) - WREG32_PCIE(PCIE_P_CNTL, data); + WREG32_PCIE(ixPCIE_P_CNTL, data); - orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); - data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK); - data |= LC_PMI_TO_L1_DIS; + orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL); + data &= ~(PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK | PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK); + data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; if (!disable_l0s) - data |= LC_L0S_INACTIVITY(7); + data |= (7 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT); if (!disable_l1) { - data |= LC_L1_INACTIVITY(7); - data &= ~LC_PMI_TO_L1_DIS; + data |= (7 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT); + data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_CNTL, data); + WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data); if (!disable_plloff_in_l1) { bool clk_req_support; - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0); - data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); - data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_0); + data &= ~(PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK | PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK); + data |= (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) | (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT); if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_0, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_0, data); - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_1); - data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); - data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_1); + data &= ~(PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK | PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK); + data |= (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) | (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT); if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_1, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_1, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_0); - data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); - data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_0); + data &= ~(PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK | PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK); + data |= (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) | (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT); if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_0, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_0, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_1); - data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); - data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_1); + data &= ~(PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK | PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK); + data |= (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) | (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT); if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_1, data); if ((adev->asic_type != CHIP_OLAND) && (adev->asic_type != CHIP_HAINAN)) { - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0); - data &= ~PLL_RAMP_UP_TIME_0_MASK; + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_0); + data &= ~PB0_PIF_PWRDOWN_0__PLL_RAMP_UP_TIME_0_MASK; if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_0, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_0, data); - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_1); - data &= ~PLL_RAMP_UP_TIME_1_MASK; + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_1); + data &= ~PB0_PIF_PWRDOWN_1__PLL_RAMP_UP_TIME_1_MASK; if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_1, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_1, data); - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_2); - data &= ~PLL_RAMP_UP_TIME_2_MASK; + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_2); + data &= ~PB0_PIF_PWRDOWN_2__PLL_RAMP_UP_TIME_2_MASK; if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_2, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_2, data); - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_3); - data &= ~PLL_RAMP_UP_TIME_3_MASK; + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_3); + data &= ~PB0_PIF_PWRDOWN_3__PLL_RAMP_UP_TIME_3_MASK; if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_3, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_3, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_0); - data &= ~PLL_RAMP_UP_TIME_0_MASK; + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_0); + data &= ~PB1_PIF_PWRDOWN_0__PLL_RAMP_UP_TIME_0_MASK; if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_0, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_0, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_1); - data &= ~PLL_RAMP_UP_TIME_1_MASK; + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_1); + data &= ~PB1_PIF_PWRDOWN_1__PLL_RAMP_UP_TIME_1_MASK; if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_1, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_2); - data &= ~PLL_RAMP_UP_TIME_2_MASK; + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_2); + data &= ~PB1_PIF_PWRDOWN_2__PLL_RAMP_UP_TIME_2_MASK; if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_2, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_2, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_3); - data &= ~PLL_RAMP_UP_TIME_3_MASK; + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_3); + data &= ~PB1_PIF_PWRDOWN_3__PLL_RAMP_UP_TIME_3_MASK; if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_3, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_3, data); } - orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); - data &= ~LC_DYN_LANES_PWR_STATE_MASK; - data |= LC_DYN_LANES_PWR_STATE(3); + orig = data = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL); + data &= ~PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE_MASK; + data |= (3 << PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE__SHIFT); if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data); + WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, data); - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_CNTL); - data &= ~LS2_EXIT_TIME_MASK; + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_CNTL); + data &= ~PB0_PIF_CNTL__LS2_EXIT_TIME_MASK; if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN)) - data |= LS2_EXIT_TIME(5); + data |= (5 << PB0_PIF_CNTL__LS2_EXIT_TIME__SHIFT); if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_CNTL, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_CNTL, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_CNTL); - data &= ~LS2_EXIT_TIME_MASK; + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_CNTL); + data &= ~PB1_PIF_CNTL__LS2_EXIT_TIME_MASK; if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN)) - data |= LS2_EXIT_TIME(5); + data |= (5 << PB1_PIF_CNTL__LS2_EXIT_TIME__SHIFT); if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_CNTL, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_CNTL, data); if (!disable_clkreq && !pci_is_root_bus(adev->pdev->bus)) { @@ -2548,64 +2563,64 @@ static void si_program_aspm(struct amdgpu_device *adev) } if (clk_req_support) { - orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2); - data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23; + orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL2); + data |= PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK | PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK; if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_CNTL2, data); + WREG32_PCIE_PORT(ixPCIE_LC_CNTL2, data); - orig = data = RREG32(THM_CLK_CNTL); - data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK); - data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1); + orig = data = RREG32(mmTHM_CLK_CNTL); + data &= ~(THM_CLK_CNTL__CMON_CLK_SEL_MASK | THM_CLK_CNTL__TMON_CLK_SEL_MASK); + data |= (1 << THM_CLK_CNTL__CMON_CLK_SEL__SHIFT) | (1 << THM_CLK_CNTL__TMON_CLK_SEL__SHIFT); if (orig != data) - WREG32(THM_CLK_CNTL, data); + WREG32(mmTHM_CLK_CNTL, data); - orig = data = RREG32(MISC_CLK_CNTL); - data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK); - data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1); + orig = data = RREG32(mmMISC_CLK_CNTL); + data &= ~(MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL_MASK | MISC_CLK_CNTL__ZCLK_SEL_MASK); + data |= (1 << MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL__SHIFT) | (1 << MISC_CLK_CNTL__ZCLK_SEL__SHIFT); if (orig != data) - WREG32(MISC_CLK_CNTL, data); + WREG32(mmMISC_CLK_CNTL, data); - orig = data = RREG32(CG_CLKPIN_CNTL); - data &= ~BCLK_AS_XCLK; + orig = data = RREG32(mmCG_CLKPIN_CNTL); + data &= ~CG_CLKPIN_CNTL__BCLK_AS_XCLK_MASK; if (orig != data) - WREG32(CG_CLKPIN_CNTL, data); + WREG32(mmCG_CLKPIN_CNTL, data); - orig = data = RREG32(CG_CLKPIN_CNTL_2); - data &= ~FORCE_BIF_REFCLK_EN; + orig = data = RREG32(mmCG_CLKPIN_CNTL_2); + data &= ~CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN_MASK; if (orig != data) - WREG32(CG_CLKPIN_CNTL_2, data); + WREG32(mmCG_CLKPIN_CNTL_2, data); - orig = data = RREG32(MPLL_BYPASSCLK_SEL); - data &= ~MPLL_CLKOUT_SEL_MASK; - data |= MPLL_CLKOUT_SEL(4); + orig = data = RREG32(mmMPLL_BYPASSCLK_SEL); + data &= ~MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK; + data |= 4 << MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT; if (orig != data) - WREG32(MPLL_BYPASSCLK_SEL, data); + WREG32(mmMPLL_BYPASSCLK_SEL, data); - orig = data = RREG32(SPLL_CNTL_MODE); - data &= ~SPLL_REFCLK_SEL_MASK; + orig = data = RREG32(mmSPLL_CNTL_MODE); + data &= ~SPLL_CNTL_MODE__SPLL_REFCLK_SEL_MASK; if (orig != data) - WREG32(SPLL_CNTL_MODE, data); + WREG32(mmSPLL_CNTL_MODE, data); } } } else { if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_CNTL, data); + WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data); } - orig = data = RREG32_PCIE(PCIE_CNTL2); - data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN; + orig = data = RREG32_PCIE(ixPCIE_CNTL2); + data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK | PCIE_CNTL2__MST_MEM_LS_EN_MASK | PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK; if (orig != data) - WREG32_PCIE(PCIE_CNTL2, data); + WREG32_PCIE(ixPCIE_CNTL2, data); if (!disable_l0s) { - data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); - if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) { - data = RREG32_PCIE(PCIE_LC_STATUS1); - if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) { - orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); - data &= ~LC_L0S_INACTIVITY_MASK; + data = RREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL); + if((data & PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) == PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) { + data = RREG32_PCIE(ixPCIE_LC_STATUS1); + if ((data & PCIE_LC_STATUS1__LC_REVERSE_XMIT_MASK) && (data & PCIE_LC_STATUS1__LC_REVERSE_RCVR_MASK)) { + orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL); + data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_CNTL, data); + WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data); } } } diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index e2089c8da71b..7f18e4875287 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -27,6 +27,8 @@ #include "si.h" #include "sid.h" +#include "oss/oss_1_0_d.h" +#include "oss/oss_1_0_sh_mask.h" const u32 sdma_offsets[SDMA_MAX_INSTANCE] = { DMA0_REGISTER_OFFSET, @@ -38,17 +40,31 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev); static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev); static void si_dma_set_irq_funcs(struct amdgpu_device *adev); +/** + * si_dma_ring_get_rptr - get the current read pointer + * + * @ring: amdgpu ring pointer + * + * Get the current rptr from the hardware (SI). + */ static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) { return *ring->rptr_cpu_addr; } +/** + * si_dma_ring_get_wptr - get the current write pointer + * + * @ring: amdgpu ring pointer + * + * Get the current wptr from the hardware (SI). + */ static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - return (RREG32(DMA_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2; + return (RREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2; } static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) @@ -56,7 +72,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(DMA_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); + WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); } static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, @@ -117,9 +133,9 @@ static void si_dma_stop(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { /* dma0 */ - rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]); - rb_cntl &= ~DMA_RB_ENABLE; - WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); + rb_cntl = RREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i]); + rb_cntl &= ~DMA_GFX_RB_CNTL__RB_ENABLE_MASK; + WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); } } @@ -133,44 +149,44 @@ static int si_dma_start(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; - WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); - WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); + WREG32(mmDMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); + WREG32(mmDMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); /* Set ring buffer size in dwords */ rb_bufsz = order_base_2(ring->ring_size / 4); rb_cntl = rb_bufsz << 1; #ifdef __BIG_ENDIAN - rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; + rb_cntl |= DMA_GFX_RB_CNTL__RB_SWAP_ENABLE_MASK | DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE_MASK; #endif - WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); + WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); /* Initialize the ring buffer's read and write pointers */ - WREG32(DMA_RB_RPTR + sdma_offsets[i], 0); - WREG32(DMA_RB_WPTR + sdma_offsets[i], 0); + WREG32(mmDMA_GFX_RB_RPTR + sdma_offsets[i], 0); + WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], 0); rptr_addr = ring->rptr_gpu_addr; - WREG32(DMA_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr)); - WREG32(DMA_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF); + WREG32(mmDMA_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr)); + WREG32(mmDMA_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF); - rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; + rb_cntl |= DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK; - WREG32(DMA_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); + WREG32(mmDMA_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); /* enable DMA IBs */ - ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; + ib_cntl = DMA_GFX_IB_CNTL__IB_ENABLE_MASK | DMA_GFX_IB_CNTL__CMD_VMID_FORCE_MASK; #ifdef __BIG_ENDIAN - ib_cntl |= DMA_IB_SWAP_ENABLE; + ib_cntl |= DMA_GFX_IB_CNTL__IB_SWAP_ENABLE_MASK; #endif - WREG32(DMA_IB_CNTL + sdma_offsets[i], ib_cntl); + WREG32(mmDMA_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); - dma_cntl = RREG32(DMA_CNTL + sdma_offsets[i]); - dma_cntl &= ~CTXEMPTY_INT_ENABLE; - WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl); + dma_cntl = RREG32(mmDMA_CNTL + sdma_offsets[i]); + dma_cntl &= ~DMA_CNTL__CTXEMPTY_INT_ENABLE_MASK; + WREG32(mmDMA_CNTL + sdma_offsets[i], dma_cntl); ring->wptr = 0; - WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2); - WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); + WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); + WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_GFX_RB_CNTL__RB_ENABLE_MASK); r = amdgpu_ring_test_helper(ring); if (r) @@ -461,7 +477,7 @@ static int si_dma_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - adev->sdma.num_instances = 2; + adev->sdma.num_instances = SDMA_MAX_INSTANCE; si_dma_set_ring_funcs(adev); si_dma_set_buffer_funcs(adev); @@ -545,9 +561,9 @@ static bool si_dma_is_idle(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - u32 tmp = RREG32(SRBM_STATUS2); + u32 tmp = RREG32(mmSRBM_STATUS2); - if (tmp & (DMA_BUSY_MASK | DMA1_BUSY_MASK)) + if (tmp & (SRBM_STATUS2__DMA_BUSY_MASK | SRBM_STATUS2__DMA1_BUSY_MASK)) return false; return true; @@ -583,14 +599,14 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev, case AMDGPU_SDMA_IRQ_INSTANCE0: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET); - sdma_cntl &= ~TRAP_ENABLE; - WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); + sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET); + sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK; + WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET); - sdma_cntl |= TRAP_ENABLE; - WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); + sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET); + sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK; + WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); break; default: break; @@ -599,14 +615,14 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev, case AMDGPU_SDMA_IRQ_INSTANCE1: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET); - sdma_cntl &= ~TRAP_ENABLE; - WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); + sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET); + sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK; + WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET); - sdma_cntl |= TRAP_ENABLE; - WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); + sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET); + sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK; + WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); break; default: break; @@ -645,11 +661,11 @@ static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block, offset = DMA0_REGISTER_OFFSET; else offset = DMA1_REGISTER_OFFSET; - orig = data = RREG32(DMA_POWER_CNTL + offset); - data &= ~MEM_POWER_OVERRIDE; + orig = data = RREG32(mmDMA_POWER_CNTL + offset); + data &= ~DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (data != orig) - WREG32(DMA_POWER_CNTL + offset, data); - WREG32(DMA_CLK_CTRL + offset, 0x00000100); + WREG32(mmDMA_POWER_CNTL + offset, data); + WREG32(mmDMA_CLK_CTRL + offset, 0x00000100); } } else { for (i = 0; i < adev->sdma.num_instances; i++) { @@ -657,15 +673,15 @@ static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block, offset = DMA0_REGISTER_OFFSET; else offset = DMA1_REGISTER_OFFSET; - orig = data = RREG32(DMA_POWER_CNTL + offset); - data |= MEM_POWER_OVERRIDE; + orig = data = RREG32(mmDMA_POWER_CNTL + offset); + data |= DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (data != orig) - WREG32(DMA_POWER_CNTL + offset, data); + WREG32(mmDMA_POWER_CNTL + offset, data); - orig = data = RREG32(DMA_CLK_CTRL + offset); + orig = data = RREG32(mmDMA_CLK_CTRL + offset); data = 0xff000000; if (data != orig) - WREG32(DMA_CLK_CTRL + offset, data); + WREG32(mmDMA_CLK_CTRL + offset, data); } } @@ -679,11 +695,11 @@ static int si_dma_set_powergating_state(struct amdgpu_ip_block *ip_block, struct amdgpu_device *adev = ip_block->adev; - WREG32(DMA_PGFSM_WRITE, 0x00002000); - WREG32(DMA_PGFSM_CONFIG, 0x100010ff); + WREG32(mmDMA_PGFSM_WRITE, 0x00002000); + WREG32(mmDMA_PGFSM_CONFIG, 0x100010ff); for (tmp = 0; tmp < 5; tmp++) - WREG32(DMA_PGFSM_WRITE, 0); + WREG32(mmDMA_PGFSM_WRITE, 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h index d656ef1fa6e1..6da65778292b 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_enums.h +++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h @@ -23,115 +23,15 @@ #ifndef SI_ENUMS_H #define SI_ENUMS_H -#define VBLANK_INT_MASK (1 << 0) -#define DC_HPDx_INT_EN (1 << 16) -#define VBLANK_ACK (1 << 4) -#define VLINE_ACK (1 << 4) - -#define CURSOR_WIDTH 64 -#define CURSOR_HEIGHT 64 - -#define VGA_VSTATUS_CNTL 0xFFFCFFFF #define PRIORITY_MARK_MASK 0x7fff #define PRIORITY_OFF (1 << 16) #define PRIORITY_ALWAYS_ON (1 << 20) -#define INTERLEAVE_EN (1 << 0) - -#define LATENCY_WATERMARK_MASK(x) ((x) << 16) -#define DC_LB_MEMORY_CONFIG(x) ((x) << 20) -#define ICON_DEGAMMA_MODE(x) (((x) & 0x3) << 8) - -#define GRPH_ENDIAN_SWAP(x) (((x) & 0x3) << 0) -#define GRPH_ENDIAN_NONE 0 -#define GRPH_ENDIAN_8IN16 1 -#define GRPH_ENDIAN_8IN32 2 -#define GRPH_ENDIAN_8IN64 3 -#define GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4) -#define GRPH_RED_SEL_R 0 -#define GRPH_RED_SEL_G 1 -#define GRPH_RED_SEL_B 2 -#define GRPH_RED_SEL_A 3 -#define GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6) -#define GRPH_GREEN_SEL_G 0 -#define GRPH_GREEN_SEL_B 1 -#define GRPH_GREEN_SEL_A 2 -#define GRPH_GREEN_SEL_R 3 -#define GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8) -#define GRPH_BLUE_SEL_B 0 -#define GRPH_BLUE_SEL_A 1 -#define GRPH_BLUE_SEL_R 2 -#define GRPH_BLUE_SEL_G 3 -#define GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10) -#define GRPH_ALPHA_SEL_A 0 -#define GRPH_ALPHA_SEL_R 1 -#define GRPH_ALPHA_SEL_G 2 -#define GRPH_ALPHA_SEL_B 3 - -#define GRPH_DEPTH(x) (((x) & 0x3) << 0) -#define GRPH_DEPTH_8BPP 0 -#define GRPH_DEPTH_16BPP 1 -#define GRPH_DEPTH_32BPP 2 - -#define GRPH_FORMAT(x) (((x) & 0x7) << 8) -#define GRPH_FORMAT_INDEXED 0 -#define GRPH_FORMAT_ARGB1555 0 -#define GRPH_FORMAT_ARGB565 1 -#define GRPH_FORMAT_ARGB4444 2 -#define GRPH_FORMAT_AI88 3 -#define GRPH_FORMAT_MONO16 4 -#define GRPH_FORMAT_BGRA5551 5 -#define GRPH_FORMAT_ARGB8888 0 -#define GRPH_FORMAT_ARGB2101010 1 -#define GRPH_FORMAT_32BPP_DIG 2 -#define GRPH_FORMAT_8B_ARGB2101010 3 -#define GRPH_FORMAT_BGRA1010102 4 -#define GRPH_FORMAT_8B_BGRA1010102 5 -#define GRPH_FORMAT_RGB111110 6 -#define GRPH_FORMAT_BGR101111 7 - -#define GRPH_NUM_BANKS(x) (((x) & 0x3) << 2) -#define GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20) -#define GRPH_ARRAY_LINEAR_GENERAL 0 -#define GRPH_ARRAY_LINEAR_ALIGNED 1 -#define GRPH_ARRAY_1D_TILED_THIN1 2 -#define GRPH_ARRAY_2D_TILED_THIN1 4 -#define GRPH_TILE_SPLIT(x) (((x) & 0x7) << 13) -#define GRPH_BANK_WIDTH(x) (((x) & 0x3) << 6) -#define GRPH_BANK_HEIGHT(x) (((x) & 0x3) << 11) -#define GRPH_MACRO_TILE_ASPECT(x) (((x) & 0x3) << 18) -#define GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20) -#define GRPH_PIPE_CONFIG(x) (((x) & 0x1f) << 24) - -#define CURSOR_EN (1 << 0) -#define CURSOR_MODE(x) (((x) & 0x3) << 8) -#define CURSOR_MONO 0 -#define CURSOR_24_1 1 -#define CURSOR_24_8_PRE_MULT 2 -#define CURSOR_24_8_UNPRE_MULT 3 -#define CURSOR_2X_MAGNIFY (1 << 16) -#define CURSOR_FORCE_MC_ON (1 << 20) -#define CURSOR_URGENT_CONTROL(x) (((x) & 0x7) << 24) -#define CURSOR_URGENT_ALWAYS 0 -#define CURSOR_URGENT_1_8 1 -#define CURSOR_URGENT_1_4 2 -#define CURSOR_URGENT_3_8 3 -#define CURSOR_URGENT_1_2 4 -#define CURSOR_UPDATE_PENDING (1 << 0) -#define CURSOR_UPDATE_TAKEN (1 << 1) -#define CURSOR_UPDATE_LOCK (1 << 16) -#define CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24) - - -#define ES_AND_GS_AUTO 3 -#define RADEON_PACKET_TYPE3 3 -#define CE_PARTITION_BASE 3 -#define BUF_SWAP_32BIT (2 << 16) #define GFX_POWER_STATUS (1 << 1) #define GFX_CLOCK_STATUS (1 << 2) #define GFX_LS_STATUS (1 << 3) -#define RLC_BUSY_STATUS (1 << 0) +#define RLC_BUSY_STATUS (1 << 0) #define RLC_PUD(x) ((x) << 0) #define RLC_PUD_MASK (0xff << 0) #define RLC_PDD(x) ((x) << 8) @@ -140,140 +40,8 @@ #define RLC_TTPD_MASK (0xff << 16) #define RLC_MSD(x) ((x) << 24) #define RLC_MSD_MASK (0xff << 24) -#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) -#define WRITE_DATA_DST_SEL(x) ((x) << 8) -#define EVENT_TYPE(x) ((x) << 0) -#define EVENT_INDEX(x) ((x) << 8) -#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4) -#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0) -#define WAIT_REG_MEM_ENGINE(x) ((x) << 8) -#define GFX6_NUM_GFX_RINGS 1 -#define GFX6_NUM_COMPUTE_RINGS 2 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D -#define PACKET3(op, n) ((RADEON_PACKET_TYPE3 << 30) | \ - (((op) & 0xFF) << 8) | \ - ((n) & 0x3FFF) << 16) -#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1) -#define PACKET3_NOP 0x10 -#define PACKET3_SET_BASE 0x11 -#define PACKET3_BASE_INDEX(x) ((x) << 0) -#define PACKET3_CLEAR_STATE 0x12 -#define PACKET3_INDEX_BUFFER_SIZE 0x13 -#define PACKET3_DISPATCH_DIRECT 0x15 -#define PACKET3_DISPATCH_INDIRECT 0x16 -#define PACKET3_ALLOC_GDS 0x1B -#define PACKET3_WRITE_GDS_RAM 0x1C -#define PACKET3_ATOMIC_GDS 0x1D -#define PACKET3_ATOMIC 0x1E -#define PACKET3_OCCLUSION_QUERY 0x1F -#define PACKET3_SET_PREDICATION 0x20 -#define PACKET3_REG_RMW 0x21 -#define PACKET3_COND_EXEC 0x22 -#define PACKET3_PRED_EXEC 0x23 -#define PACKET3_DRAW_INDIRECT 0x24 -#define PACKET3_DRAW_INDEX_INDIRECT 0x25 -#define PACKET3_INDEX_BASE 0x26 -#define PACKET3_DRAW_INDEX_2 0x27 -#define PACKET3_CONTEXT_CONTROL 0x28 -#define PACKET3_INDEX_TYPE 0x2A -#define PACKET3_DRAW_INDIRECT_MULTI 0x2C -#define PACKET3_DRAW_INDEX_AUTO 0x2D -#define PACKET3_DRAW_INDEX_IMMD 0x2E -#define PACKET3_NUM_INSTANCES 0x2F -#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30 -#define PACKET3_INDIRECT_BUFFER_CONST 0x31 -#define PACKET3_INDIRECT_BUFFER 0x3F -#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34 -#define PACKET3_DRAW_INDEX_OFFSET_2 0x35 -#define PACKET3_DRAW_INDEX_MULTI_ELEMENT 0x36 -#define PACKET3_WRITE_DATA 0x37 -#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 -#define PACKET3_MEM_SEMAPHORE 0x39 -#define PACKET3_MPEG_INDEX 0x3A -#define PACKET3_COPY_DW 0x3B -#define PACKET3_WAIT_REG_MEM 0x3C -#define PACKET3_MEM_WRITE 0x3D -#define PACKET3_COPY_DATA 0x40 -#define PACKET3_CP_DMA 0x41 -# define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20) -# define PACKET3_CP_DMA_ENGINE(x) ((x) << 27) -# define PACKET3_CP_DMA_SRC_SEL(x) ((x) << 29) -# define PACKET3_CP_DMA_CP_SYNC (1 << 31) -# define PACKET3_CP_DMA_DIS_WC (1 << 21) -# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22) -# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24) -# define PACKET3_CP_DMA_CMD_SAS (1 << 26) -# define PACKET3_CP_DMA_CMD_DAS (1 << 27) -# define PACKET3_CP_DMA_CMD_SAIC (1 << 28) -# define PACKET3_CP_DMA_CMD_DAIC (1 << 29) -# define PACKET3_CP_DMA_CMD_RAW_WAIT (1 << 30) -#define PACKET3_PFP_SYNC_ME 0x42 -#define PACKET3_SURFACE_SYNC 0x43 -# define PACKET3_DEST_BASE_0_ENA (1 << 0) -# define PACKET3_DEST_BASE_1_ENA (1 << 1) -# define PACKET3_CB0_DEST_BASE_ENA (1 << 6) -# define PACKET3_CB1_DEST_BASE_ENA (1 << 7) -# define PACKET3_CB2_DEST_BASE_ENA (1 << 8) -# define PACKET3_CB3_DEST_BASE_ENA (1 << 9) -# define PACKET3_CB4_DEST_BASE_ENA (1 << 10) -# define PACKET3_CB5_DEST_BASE_ENA (1 << 11) -# define PACKET3_CB6_DEST_BASE_ENA (1 << 12) -# define PACKET3_CB7_DEST_BASE_ENA (1 << 13) -# define PACKET3_DB_DEST_BASE_ENA (1 << 14) -# define PACKET3_DEST_BASE_2_ENA (1 << 19) -# define PACKET3_DEST_BASE_3_ENA (1 << 21) -# define PACKET3_TCL1_ACTION_ENA (1 << 22) -# define PACKET3_TC_ACTION_ENA (1 << 23) -# define PACKET3_CB_ACTION_ENA (1 << 25) -# define PACKET3_DB_ACTION_ENA (1 << 26) -# define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27) -# define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29) -#define PACKET3_ME_INITIALIZE 0x44 -#define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16) -#define PACKET3_COND_WRITE 0x45 -#define PACKET3_EVENT_WRITE 0x46 -#define PACKET3_EVENT_WRITE_EOP 0x47 -#define PACKET3_EVENT_WRITE_EOS 0x48 -#define PACKET3_PREAMBLE_CNTL 0x4A -# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28) -# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28) -#define PACKET3_ONE_REG_WRITE 0x57 -#define PACKET3_LOAD_CONFIG_REG 0x5F -#define PACKET3_LOAD_CONTEXT_REG 0x60 -#define PACKET3_LOAD_SH_REG 0x61 -#define PACKET3_SET_CONFIG_REG 0x68 -#define PACKET3_SET_CONFIG_REG_START 0x00002000 -#define PACKET3_SET_CONFIG_REG_END 0x00002c00 -#define PACKET3_SET_CONTEXT_REG 0x69 -#define PACKET3_SET_CONTEXT_REG_START 0x000a000 -#define PACKET3_SET_CONTEXT_REG_END 0x000a400 -#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73 -#define PACKET3_SET_RESOURCE_INDIRECT 0x74 -#define PACKET3_SET_SH_REG 0x76 -#define PACKET3_SET_SH_REG_START 0x00002c00 -#define PACKET3_SET_SH_REG_END 0x00003000 -#define PACKET3_SET_SH_REG_OFFSET 0x77 -#define PACKET3_ME_WRITE 0x7A -#define PACKET3_SCRATCH_RAM_WRITE 0x7D -#define PACKET3_SCRATCH_RAM_READ 0x7E -#define PACKET3_CE_WRITE 0x7F -#define PACKET3_LOAD_CONST_RAM 0x80 -#define PACKET3_WRITE_CONST_RAM 0x81 -#define PACKET3_WRITE_CONST_RAM_OFFSET 0x82 -#define PACKET3_DUMP_CONST_RAM 0x83 -#define PACKET3_INCREMENT_CE_COUNTER 0x84 -#define PACKET3_INCREMENT_DE_COUNTER 0x85 -#define PACKET3_WAIT_ON_CE_COUNTER 0x86 -#define PACKET3_WAIT_ON_DE_COUNTER 0x87 -#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 -#define PACKET3_SET_CE_DE_COUNTERS 0x89 -#define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A -#define PACKET3_SWITCH_BUFFER 0x8B -#define PACKET3_SEM_WAIT_ON_SIGNAL (0x1 << 12) -#define PACKET3_SEM_SEL_SIGNAL (0x6 << 29) -#define PACKET3_SEM_SEL_WAIT (0x7 << 29) - #endif diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 5c38e1fb1dca..1df00f8a2406 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -27,6 +27,7 @@ #include "amdgpu_ih.h" #include "sid.h" #include "si_ih.h" + #include "oss/oss_1_0_d.h" #include "oss/oss_1_0_sh_mask.h" @@ -213,7 +214,7 @@ static int si_ih_resume(struct amdgpu_ip_block *ip_block) static bool si_ih_is_idle(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - u32 tmp = RREG32(SRBM_STATUS); + u32 tmp = RREG32(mmSRBM_STATUS); if (tmp & SRBM_STATUS__IH_BUSY_MASK) return false; @@ -239,23 +240,23 @@ static int si_ih_soft_reset(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset = 0; - u32 tmp = RREG32(SRBM_STATUS); + u32 tmp = RREG32(mmSRBM_STATUS); if (tmp & SRBM_STATUS__IH_BUSY_MASK) srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_IH_MASK; if (srbm_soft_reset) { - tmp = RREG32(SRBM_SOFT_RESET); + tmp = RREG32(mmSRBM_SOFT_RESET); tmp |= srbm_soft_reset; - dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32(SRBM_SOFT_RESET, tmp); - tmp = RREG32(SRBM_SOFT_RESET); + dev_info(adev->dev, "mmSRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32(mmSRBM_SOFT_RESET, tmp); + tmp = RREG32(mmSRBM_SOFT_RESET); udelay(50); tmp &= ~srbm_soft_reset; - WREG32(SRBM_SOFT_RESET, tmp); - tmp = RREG32(SRBM_SOFT_RESET); + WREG32(mmSRBM_SOFT_RESET, tmp); + tmp = RREG32(mmSRBM_SOFT_RESET); udelay(50); } diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index cbf232f5235b..cbd4f8951cfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -24,43 +24,12 @@ #ifndef SI_H #define SI_H -#define TAHITI_RB_BITMAP_WIDTH_PER_SH 2 - -#define SI_MAX_SH_GPRS 256 -#define SI_MAX_TEMP_GPRS 16 -#define SI_MAX_SH_THREADS 256 -#define SI_MAX_SH_STACK_ENTRIES 4096 -#define SI_MAX_FRC_EOV_CNT 16384 -#define SI_MAX_BACKENDS 8 -#define SI_MAX_BACKENDS_MASK 0xFF -#define SI_MAX_BACKENDS_PER_SE_MASK 0x0F -#define SI_MAX_SIMDS 12 -#define SI_MAX_SIMDS_MASK 0x0FFF -#define SI_MAX_SIMDS_PER_SE_MASK 0x00FF -#define SI_MAX_PIPES 8 -#define SI_MAX_PIPES_MASK 0xFF -#define SI_MAX_PIPES_PER_SIMD_MASK 0x3F -#define SI_MAX_LDS_NUM 0xFFFF -#define SI_MAX_TCC 16 -#define SI_MAX_TCC_MASK 0xFFFF #define SI_MAX_CTLACKS_ASSERTION_WAIT 100 -/* SMC IND accessor regs */ -#define SMC_IND_INDEX_0 0x80 -#define SMC_IND_DATA_0 0x81 - -#define SMC_IND_ACCESS_CNTL 0x8A -# define AUTO_INCREMENT_IND_0 (1 << 0) -#define SMC_MESSAGE_0 0x8B -#define SMC_RESP_0 0x8C - /* CG IND registers are accessed via SMC indirect space + SMC_CG_IND_START */ #define SMC_CG_IND_START 0xc0030000 #define SMC_CG_IND_END 0xc0040000 -#define CG_CGTT_LOCAL_0 0x400 -#define CG_CGTT_LOCAL_1 0x401 - /* SMC IND registers */ #define SMC_SYSCON_RESET_CNTL 0x80000000 # define RST_REG (1 << 0) @@ -68,9 +37,6 @@ # define CK_DISABLE (1 << 0) # define CKEN (1 << 24) -#define VGA_HDP_CONTROL 0xCA -#define VGA_MEMORY_DISABLE (1 << 4) - #define DCCG_DISP_SLOW_SELECT_REG 0x13F #define DCCG_DISP1_SLOW_SELECT(x) ((x) << 0) #define DCCG_DISP1_SLOW_SELECT_MASK (7 << 0) @@ -79,47 +45,6 @@ #define DCCG_DISP2_SLOW_SELECT_MASK (7 << 4) #define DCCG_DISP2_SLOW_SELECT_SHIFT 4 -#define CG_SPLL_FUNC_CNTL 0x180 -#define SPLL_RESET (1 << 0) -#define SPLL_SLEEP (1 << 1) -#define SPLL_BYPASS_EN (1 << 3) -#define SPLL_REF_DIV(x) ((x) << 4) -#define SPLL_REF_DIV_MASK (0x3f << 4) -#define SPLL_PDIV_A(x) ((x) << 20) -#define SPLL_PDIV_A_MASK (0x7f << 20) -#define SPLL_PDIV_A_SHIFT 20 -#define CG_SPLL_FUNC_CNTL_2 0x181 -#define SCLK_MUX_SEL(x) ((x) << 0) -#define SCLK_MUX_SEL_MASK (0x1ff << 0) -#define SPLL_CTLREQ_CHG (1 << 23) -#define SCLK_MUX_UPDATE (1 << 26) -#define CG_SPLL_FUNC_CNTL_3 0x182 -#define SPLL_FB_DIV(x) ((x) << 0) -#define SPLL_FB_DIV_MASK (0x3ffffff << 0) -#define SPLL_FB_DIV_SHIFT 0 -#define SPLL_DITHEN (1 << 28) -#define CG_SPLL_FUNC_CNTL_4 0x183 - -#define SPLL_STATUS 0x185 -#define SPLL_CHG_STATUS (1 << 1) -#define SPLL_CNTL_MODE 0x186 -#define SPLL_SW_DIR_CONTROL (1 << 0) -# define SPLL_REFCLK_SEL(x) ((x) << 26) -# define SPLL_REFCLK_SEL_MASK (3 << 26) - -#define CG_SPLL_SPREAD_SPECTRUM 0x188 -#define SSEN (1 << 0) -#define CLK_S(x) ((x) << 4) -#define CLK_S_MASK (0xfff << 4) -#define CLK_S_SHIFT 4 -#define CG_SPLL_SPREAD_SPECTRUM_2 0x189 -#define CLK_V(x) ((x) << 0) -#define CLK_V_MASK (0x3ffffff << 0) -#define CLK_V_SHIFT 0 - -#define CG_SPLL_AUTOSCALE_CNTL 0x18b -# define AUTOSCALE_ON_SS_CLEAR (1 << 9) - /* discrete uvd clocks */ #define CG_UPLL_FUNC_CNTL 0x18d # define UPLL_RESET_MASK 0x00000001 @@ -149,317 +74,13 @@ #define CG_UPLL_SPREAD_SPECTRUM 0x194 # define SSEN_MASK 0x00000001 -#define MPLL_BYPASSCLK_SEL 0x197 -# define MPLL_CLKOUT_SEL(x) ((x) << 8) -# define MPLL_CLKOUT_SEL_MASK 0xFF00 - -#define CG_CLKPIN_CNTL 0x198 -# define XTALIN_DIVIDE (1 << 1) -# define BCLK_AS_XCLK (1 << 2) -#define CG_CLKPIN_CNTL_2 0x199 -# define FORCE_BIF_REFCLK_EN (1 << 3) -# define MUX_TCLK_TO_XCLK (1 << 8) - -#define THM_CLK_CNTL 0x19b -# define CMON_CLK_SEL(x) ((x) << 0) -# define CMON_CLK_SEL_MASK 0xFF -# define TMON_CLK_SEL(x) ((x) << 8) -# define TMON_CLK_SEL_MASK 0xFF00 -#define MISC_CLK_CNTL 0x19c -# define DEEP_SLEEP_CLK_SEL(x) ((x) << 0) -# define DEEP_SLEEP_CLK_SEL_MASK 0xFF -# define ZCLK_SEL(x) ((x) << 8) -# define ZCLK_SEL_MASK 0xFF00 - -#define CG_THERMAL_CTRL 0x1c0 -#define DPM_EVENT_SRC(x) ((x) << 0) -#define DPM_EVENT_SRC_MASK (7 << 0) -#define DIG_THERM_DPM(x) ((x) << 14) -#define DIG_THERM_DPM_MASK 0x003FC000 -#define DIG_THERM_DPM_SHIFT 14 -#define CG_THERMAL_STATUS 0x1c1 -#define FDO_PWM_DUTY(x) ((x) << 9) -#define FDO_PWM_DUTY_MASK (0xff << 9) -#define FDO_PWM_DUTY_SHIFT 9 -#define CG_THERMAL_INT 0x1c2 -#define DIG_THERM_INTH(x) ((x) << 8) -#define DIG_THERM_INTH_MASK 0x0000FF00 -#define DIG_THERM_INTH_SHIFT 8 -#define DIG_THERM_INTL(x) ((x) << 16) -#define DIG_THERM_INTL_MASK 0x00FF0000 -#define DIG_THERM_INTL_SHIFT 16 -#define THERM_INT_MASK_HIGH (1 << 24) -#define THERM_INT_MASK_LOW (1 << 25) - -#define CG_MULT_THERMAL_CTRL 0x1c4 -#define TEMP_SEL(x) ((x) << 20) -#define TEMP_SEL_MASK (0xff << 20) -#define TEMP_SEL_SHIFT 20 -#define CG_MULT_THERMAL_STATUS 0x1c5 -#define ASIC_MAX_TEMP(x) ((x) << 0) -#define ASIC_MAX_TEMP_MASK 0x000001ff -#define ASIC_MAX_TEMP_SHIFT 0 -#define CTF_TEMP(x) ((x) << 9) -#define CTF_TEMP_MASK 0x0003fe00 -#define CTF_TEMP_SHIFT 9 - -#define CG_FDO_CTRL0 0x1d5 -#define FDO_STATIC_DUTY(x) ((x) << 0) -#define FDO_STATIC_DUTY_MASK 0x000000FF -#define FDO_STATIC_DUTY_SHIFT 0 -#define CG_FDO_CTRL1 0x1d6 -#define FMAX_DUTY100(x) ((x) << 0) -#define FMAX_DUTY100_MASK 0x000000FF -#define FMAX_DUTY100_SHIFT 0 -#define CG_FDO_CTRL2 0x1d7 -#define TMIN(x) ((x) << 0) -#define TMIN_MASK 0x000000FF -#define TMIN_SHIFT 0 -#define FDO_PWM_MODE(x) ((x) << 11) -#define FDO_PWM_MODE_MASK (7 << 11) -#define FDO_PWM_MODE_SHIFT 11 -#define TACH_PWM_RESP_RATE(x) ((x) << 25) -#define TACH_PWM_RESP_RATE_MASK (0x7f << 25) -#define TACH_PWM_RESP_RATE_SHIFT 25 - -#define CG_TACH_CTRL 0x1dc -# define EDGE_PER_REV(x) ((x) << 0) -# define EDGE_PER_REV_MASK (0x7 << 0) -# define EDGE_PER_REV_SHIFT 0 -# define TARGET_PERIOD(x) ((x) << 3) -# define TARGET_PERIOD_MASK 0xfffffff8 -# define TARGET_PERIOD_SHIFT 3 -#define CG_TACH_STATUS 0x1dd -# define TACH_PERIOD(x) ((x) << 0) -# define TACH_PERIOD_MASK 0xffffffff -# define TACH_PERIOD_SHIFT 0 - -#define GENERAL_PWRMGT 0x1e0 -# define GLOBAL_PWRMGT_EN (1 << 0) -# define STATIC_PM_EN (1 << 1) -# define THERMAL_PROTECTION_DIS (1 << 2) -# define THERMAL_PROTECTION_TYPE (1 << 3) -# define SW_SMIO_INDEX(x) ((x) << 6) -# define SW_SMIO_INDEX_MASK (1 << 6) -# define SW_SMIO_INDEX_SHIFT 6 -# define VOLT_PWRMGT_EN (1 << 10) -# define DYN_SPREAD_SPECTRUM_EN (1 << 23) -#define CG_TPC 0x1e1 -#define SCLK_PWRMGT_CNTL 0x1e2 -# define SCLK_PWRMGT_OFF (1 << 0) -# define SCLK_LOW_D1 (1 << 1) -# define FIR_RESET (1 << 4) -# define FIR_FORCE_TREND_SEL (1 << 5) -# define FIR_TREND_MODE (1 << 6) -# define DYN_GFX_CLK_OFF_EN (1 << 7) -# define GFX_CLK_FORCE_ON (1 << 8) -# define GFX_CLK_REQUEST_OFF (1 << 9) -# define GFX_CLK_FORCE_OFF (1 << 10) -# define GFX_CLK_OFF_ACPI_D1 (1 << 11) -# define GFX_CLK_OFF_ACPI_D2 (1 << 12) -# define GFX_CLK_OFF_ACPI_D3 (1 << 13) -# define DYN_LIGHT_SLEEP_EN (1 << 14) - -#define TARGET_AND_CURRENT_PROFILE_INDEX 0x1e6 -# define CURRENT_STATE_INDEX_MASK (0xf << 4) -# define CURRENT_STATE_INDEX_SHIFT 4 - -#define CG_FTV 0x1ef - -#define CG_FFCT_0 0x1f0 -# define UTC_0(x) ((x) << 0) -# define UTC_0_MASK (0x3ff << 0) -# define DTC_0(x) ((x) << 10) -# define DTC_0_MASK (0x3ff << 10) - -#define CG_BSP 0x1ff -# define BSP(x) ((x) << 0) -# define BSP_MASK (0xffff << 0) -# define BSU(x) ((x) << 16) -# define BSU_MASK (0xf << 16) -#define CG_AT 0x200 -# define CG_R(x) ((x) << 0) -# define CG_R_MASK (0xffff << 0) -# define CG_L(x) ((x) << 16) -# define CG_L_MASK (0xffff << 16) - -#define CG_GIT 0x201 -# define CG_GICST(x) ((x) << 0) -# define CG_GICST_MASK (0xffff << 0) -# define CG_GIPOT(x) ((x) << 16) -# define CG_GIPOT_MASK (0xffff << 16) - -#define CG_SSP 0x203 -# define SST(x) ((x) << 0) -# define SST_MASK (0xffff << 0) -# define SSTU(x) ((x) << 16) -# define SSTU_MASK (0xf << 16) - -#define CG_DISPLAY_GAP_CNTL 0x20a -# define DISP1_GAP(x) ((x) << 0) -# define DISP1_GAP_MASK (3 << 0) -# define DISP2_GAP(x) ((x) << 2) -# define DISP2_GAP_MASK (3 << 2) -# define VBI_TIMER_COUNT(x) ((x) << 4) -# define VBI_TIMER_COUNT_MASK (0x3fff << 4) -# define VBI_TIMER_UNIT(x) ((x) << 20) -# define VBI_TIMER_UNIT_MASK (7 << 20) -# define DISP1_GAP_MCHG(x) ((x) << 24) -# define DISP1_GAP_MCHG_MASK (3 << 24) -# define DISP2_GAP_MCHG(x) ((x) << 26) -# define DISP2_GAP_MCHG_MASK (3 << 26) - -#define CG_ULV_CONTROL 0x21e -#define CG_ULV_PARAMETER 0x21f - -#define SMC_SCRATCH0 0x221 - -#define CG_CAC_CTRL 0x22e -# define CAC_WINDOW(x) ((x) << 0) -# define CAC_WINDOW_MASK 0x00ffffff - -#define DMIF_ADDR_CONFIG 0x2F5 - -#define DMIF_ADDR_CALC 0x300 - -#define PIPE0_DMIF_BUFFER_CONTROL 0x0328 -# define DMIF_BUFFERS_ALLOCATED(x) ((x) << 0) -# define DMIF_BUFFERS_ALLOCATED_COMPLETED (1 << 4) - -#define SRBM_STATUS 0x394 -#define GRBM_RQ_PENDING (1 << 5) -#define VMC_BUSY (1 << 8) -#define MCB_BUSY (1 << 9) -#define MCB_NON_DISPLAY_BUSY (1 << 10) -#define MCC_BUSY (1 << 11) -#define MCD_BUSY (1 << 12) -#define SEM_BUSY (1 << 14) -#define IH_BUSY (1 << 17) - -#define SRBM_SOFT_RESET 0x398 -#define SOFT_RESET_BIF (1 << 1) -#define SOFT_RESET_DC (1 << 5) -#define SOFT_RESET_DMA1 (1 << 6) -#define SOFT_RESET_GRBM (1 << 8) -#define SOFT_RESET_HDP (1 << 9) -#define SOFT_RESET_IH (1 << 10) -#define SOFT_RESET_MC (1 << 11) -#define SOFT_RESET_ROM (1 << 14) -#define SOFT_RESET_SEM (1 << 15) -#define SOFT_RESET_VMC (1 << 17) -#define SOFT_RESET_DMA (1 << 20) -#define SOFT_RESET_TST (1 << 21) -#define SOFT_RESET_REGBB (1 << 22) -#define SOFT_RESET_ORB (1 << 23) - -#define CC_SYS_RB_BACKEND_DISABLE 0x3A0 -#define GC_USER_SYS_RB_BACKEND_DISABLE 0x3A1 - -#define SRBM_READ_ERROR 0x3A6 -#define SRBM_INT_CNTL 0x3A8 -#define SRBM_INT_ACK 0x3AA - -#define SRBM_STATUS2 0x3B1 -#define DMA_BUSY (1 << 5) -#define DMA1_BUSY (1 << 6) - -#define VM_L2_CNTL 0x500 -#define ENABLE_L2_CACHE (1 << 0) -#define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1) -#define L2_CACHE_PTE_ENDIAN_SWAP_MODE(x) ((x) << 2) -#define L2_CACHE_PDE_ENDIAN_SWAP_MODE(x) ((x) << 4) -#define ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE (1 << 9) -#define ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE (1 << 10) -#define EFFECTIVE_L2_QUEUE_SIZE(x) (((x) & 7) << 15) -#define CONTEXT1_IDENTITY_ACCESS_MODE(x) (((x) & 3) << 19) -#define VM_L2_CNTL2 0x501 -#define INVALIDATE_ALL_L1_TLBS (1 << 0) -#define INVALIDATE_L2_CACHE (1 << 1) -#define INVALIDATE_CACHE_MODE(x) ((x) << 26) -#define INVALIDATE_PTE_AND_PDE_CACHES 0 -#define INVALIDATE_ONLY_PTE_CACHES 1 -#define INVALIDATE_ONLY_PDE_CACHES 2 -#define VM_L2_CNTL3 0x502 -#define BANK_SELECT(x) ((x) << 0) -#define L2_CACHE_UPDATE_MODE(x) ((x) << 6) -#define L2_CACHE_BIGK_FRAGMENT_SIZE(x) ((x) << 15) -#define L2_CACHE_BIGK_ASSOCIATIVITY (1 << 20) -#define VM_L2_STATUS 0x503 -#define L2_BUSY (1 << 0) -#define VM_CONTEXT0_CNTL 0x504 -#define ENABLE_CONTEXT (1 << 0) -#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1) -#define RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 3) -#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4) -#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 6) -#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 7) -#define PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 9) -#define PDE0_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 10) -#define VALID_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 12) -#define VALID_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 13) -#define READ_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 15) -#define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16) -#define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18) -#define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19) -#define PAGE_TABLE_BLOCK_SIZE(x) (((x) & 0xF) << 24) -#define VM_CONTEXT1_CNTL 0x505 -#define VM_CONTEXT0_CNTL2 0x50C -#define VM_CONTEXT1_CNTL2 0x50D -#define VM_CONTEXT8_PAGE_TABLE_BASE_ADDR 0x50E -#define VM_CONTEXT9_PAGE_TABLE_BASE_ADDR 0x50F -#define VM_CONTEXT10_PAGE_TABLE_BASE_ADDR 0x510 -#define VM_CONTEXT11_PAGE_TABLE_BASE_ADDR 0x511 -#define VM_CONTEXT12_PAGE_TABLE_BASE_ADDR 0x512 -#define VM_CONTEXT13_PAGE_TABLE_BASE_ADDR 0x513 -#define VM_CONTEXT14_PAGE_TABLE_BASE_ADDR 0x514 -#define VM_CONTEXT15_PAGE_TABLE_BASE_ADDR 0x515 - -#define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x53f -#define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x537 -#define PROTECTIONS_MASK (0xf << 0) -#define PROTECTIONS_SHIFT 0 - /* bit 0: range - * bit 1: pde0 - * bit 2: valid - * bit 3: read - * bit 4: write - */ -#define MEMORY_CLIENT_ID_MASK (0xff << 12) -#define MEMORY_CLIENT_ID_SHIFT 12 -#define MEMORY_CLIENT_RW_MASK (1 << 24) -#define MEMORY_CLIENT_RW_SHIFT 24 -#define FAULT_VMID_MASK (0xf << 25) -#define FAULT_VMID_SHIFT 25 - #define VM_INVALIDATE_REQUEST 0x51E #define VM_INVALIDATE_RESPONSE 0x51F -#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x546 -#define VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR 0x547 - -#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x54F -#define VM_CONTEXT1_PAGE_TABLE_BASE_ADDR 0x550 -#define VM_CONTEXT2_PAGE_TABLE_BASE_ADDR 0x551 -#define VM_CONTEXT3_PAGE_TABLE_BASE_ADDR 0x552 -#define VM_CONTEXT4_PAGE_TABLE_BASE_ADDR 0x553 -#define VM_CONTEXT5_PAGE_TABLE_BASE_ADDR 0x554 -#define VM_CONTEXT6_PAGE_TABLE_BASE_ADDR 0x555 -#define VM_CONTEXT7_PAGE_TABLE_BASE_ADDR 0x556 -#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x557 -#define VM_CONTEXT1_PAGE_TABLE_START_ADDR 0x558 - -#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x55F -#define VM_CONTEXT1_PAGE_TABLE_END_ADDR 0x560 - #define VM_L2_CG 0x570 #define MC_CG_ENABLE (1 << 18) #define MC_LS_ENABLE (1 << 19) -#define MC_SHARED_CHMAP 0x801 -#define NOOFCHAN_SHIFT 12 -#define NOOFCHAN_MASK 0x0000f000 -#define MC_SHARED_CHREMAP 0x802 - #define MC_VM_FB_LOCATION 0x809 #define MC_VM_AGP_TOP 0x80A #define MC_VM_AGP_BOT 0x80B @@ -491,21 +112,6 @@ #define MC_CITF_MISC_WR_CG 0x993 #define MC_CITF_MISC_VM_CG 0x994 -#define MC_ARB_RAMCFG 0x9D8 -#define NOOFBANK_SHIFT 0 -#define NOOFBANK_MASK 0x00000003 -#define NOOFRANK_SHIFT 2 -#define NOOFRANK_MASK 0x00000004 -#define NOOFROWS_SHIFT 3 -#define NOOFROWS_MASK 0x00000038 -#define NOOFCOLS_SHIFT 6 -#define NOOFCOLS_MASK 0x000000C0 -#define CHANSIZE_SHIFT 8 -#define CHANSIZE_MASK 0x00000100 -#define CHANSIZE_OVERRIDE (1 << 11) -#define NOOFGROUPS_SHIFT 12 -#define NOOFGROUPS_MASK 0x00001000 - #define MC_ARB_DRAM_TIMING 0x9DD #define MC_ARB_DRAM_TIMING2 0x9DE @@ -631,20 +237,6 @@ #define CLKS(x) ((x) << 0) #define CLKS_MASK (0xfff << 0) -#define HDP_HOST_PATH_CNTL 0xB00 -#define CLOCK_GATING_DIS (1 << 23) -#define HDP_NONSURFACE_BASE 0xB01 -#define HDP_NONSURFACE_INFO 0xB02 -#define HDP_NONSURFACE_SIZE 0xB03 - -#define HDP_DEBUG0 0xBCC - -#define HDP_ADDR_CONFIG 0xBD2 -#define HDP_MISC_CNTL 0xBD3 -#define HDP_FLUSH_INVALIDATE_CACHE (1 << 0) -#define HDP_MEM_POWER_LS 0xBD4 -#define HDP_LS_ENABLE (1 << 0) - #define ATC_MISC_CG 0xCD4 #define IH_RB_CNTL 0xF80 @@ -674,8 +266,6 @@ # define MC_WR_CLEAN_CNT(x) ((x) << 20) # define MC_VMID(x) ((x) << 25) -#define CONFIG_MEMSIZE 0x150A - #define INTERRUPT_CNTL 0x151A # define IH_DUMMY_RD_OVERRIDE (1 << 0) # define IH_DUMMY_RD_EN (1 << 1) @@ -683,486 +273,22 @@ # define GEN_IH_INT_EN (1 << 8) #define INTERRUPT_CNTL2 0x151B -#define HDP_MEM_COHERENCY_FLUSH_CNTL 0x1520 - -#define BIF_FB_EN 0x1524 -#define FB_READ_EN (1 << 0) -#define FB_WRITE_EN (1 << 1) - -#define HDP_REG_COHERENCY_FLUSH_CNTL 0x1528 - -/* DCE6 ELD audio interface */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0 0x28 /* LPCM */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1 0x29 /* AC3 */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2 0x2A /* MPEG1 */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3 0x2B /* MP3 */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4 0x2C /* MPEG2 */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5 0x2D /* AAC */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6 0x2E /* DTS */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7 0x2F /* ATRAC */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR8 0x30 /* one bit audio - leave at 0 (default) */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9 0x31 /* Dolby Digital */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10 0x32 /* DTS-HD */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11 0x33 /* MAT-MLP */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR12 0x34 /* DTS */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13 0x35 /* WMA Pro */ -# define MAX_CHANNELS(x) (((x) & 0x7) << 0) -/* max channels minus one. 7 = 8 channels */ -# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8) -# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16) -# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */ -/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO - * bit0 = 32 kHz - * bit1 = 44.1 kHz - * bit2 = 48 kHz - * bit3 = 88.2 kHz - * bit4 = 96 kHz - * bit5 = 176.4 kHz - * bit6 = 192 kHz - */ - -#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC 0x37 -# define VIDEO_LIPSYNC(x) (((x) & 0xff) << 0) -# define AUDIO_LIPSYNC(x) (((x) & 0xff) << 8) -/* VIDEO_LIPSYNC, AUDIO_LIPSYNC - * 0 = invalid - * x = legal delay value - * 255 = sync not supported - */ -#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_HBR 0x38 -# define HBR_CAPABLE (1 << 0) /* enabled by default */ - -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO0 0x3a -# define MANUFACTURER_ID(x) (((x) & 0xffff) << 0) -# define PRODUCT_ID(x) (((x) & 0xffff) << 16) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO1 0x3b -# define SINK_DESCRIPTION_LEN(x) (((x) & 0xff) << 0) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO2 0x3c -# define PORT_ID0(x) (((x) & 0xffffffff) << 0) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO3 0x3d -# define PORT_ID1(x) (((x) & 0xffffffff) << 0) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO4 0x3e -# define DESCRIPTION0(x) (((x) & 0xff) << 0) -# define DESCRIPTION1(x) (((x) & 0xff) << 8) -# define DESCRIPTION2(x) (((x) & 0xff) << 16) -# define DESCRIPTION3(x) (((x) & 0xff) << 24) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO5 0x3f -# define DESCRIPTION4(x) (((x) & 0xff) << 0) -# define DESCRIPTION5(x) (((x) & 0xff) << 8) -# define DESCRIPTION6(x) (((x) & 0xff) << 16) -# define DESCRIPTION7(x) (((x) & 0xff) << 24) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO6 0x40 -# define DESCRIPTION8(x) (((x) & 0xff) << 0) -# define DESCRIPTION9(x) (((x) & 0xff) << 8) -# define DESCRIPTION10(x) (((x) & 0xff) << 16) -# define DESCRIPTION11(x) (((x) & 0xff) << 24) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO7 0x41 -# define DESCRIPTION12(x) (((x) & 0xff) << 0) -# define DESCRIPTION13(x) (((x) & 0xff) << 8) -# define DESCRIPTION14(x) (((x) & 0xff) << 16) -# define DESCRIPTION15(x) (((x) & 0xff) << 24) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO8 0x42 -# define DESCRIPTION16(x) (((x) & 0xff) << 0) -# define DESCRIPTION17(x) (((x) & 0xff) << 8) - -#define AZ_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL 0x54 -# define AUDIO_ENABLED (1 << 31) - -#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT 0x56 -#define PORT_CONNECTIVITY_MASK (3 << 30) -#define PORT_CONNECTIVITY_SHIFT 30 - -#define DC_LB_MEMORY_SPLIT 0x1AC3 -#define DC_LB_MEMORY_CONFIG(x) ((x) << 20) - -#define PRIORITY_A_CNT 0x1AC6 -#define PRIORITY_MARK_MASK 0x7fff -#define PRIORITY_OFF (1 << 16) -#define PRIORITY_ALWAYS_ON (1 << 20) -#define PRIORITY_B_CNT 0x1AC7 - -#define DPG_PIPE_ARBITRATION_CONTROL3 0x1B32 -# define LATENCY_WATERMARK_MASK(x) ((x) << 16) -#define DPG_PIPE_LATENCY_CONTROL 0x1B33 -# define LATENCY_LOW_WATERMARK(x) ((x) << 0) -# define LATENCY_HIGH_WATERMARK(x) ((x) << 16) - -/* 0x6bb8, 0x77b8, 0x103b8, 0x10fb8, 0x11bb8, 0x127b8 */ -#define VLINE_STATUS 0x1AEE -# define VLINE_OCCURRED (1 << 0) -# define VLINE_ACK (1 << 4) -# define VLINE_STAT (1 << 12) -# define VLINE_INTERRUPT (1 << 16) -# define VLINE_INTERRUPT_TYPE (1 << 17) -/* 0x6bbc, 0x77bc, 0x103bc, 0x10fbc, 0x11bbc, 0x127bc */ -#define VBLANK_STATUS 0x1AEF -# define VBLANK_OCCURRED (1 << 0) -# define VBLANK_ACK (1 << 4) -# define VBLANK_STAT (1 << 12) -# define VBLANK_INTERRUPT (1 << 16) -# define VBLANK_INTERRUPT_TYPE (1 << 17) - -/* 0x6b40, 0x7740, 0x10340, 0x10f40, 0x11b40, 0x12740 */ -#define INT_MASK 0x1AD0 -# define VBLANK_INT_MASK (1 << 0) -# define VLINE_INT_MASK (1 << 4) - -#define DISP_INTERRUPT_STATUS 0x183D -# define LB_D1_VLINE_INTERRUPT (1 << 2) -# define LB_D1_VBLANK_INTERRUPT (1 << 3) -# define DC_HPD1_INTERRUPT (1 << 17) -# define DC_HPD1_RX_INTERRUPT (1 << 18) -# define DACA_AUTODETECT_INTERRUPT (1 << 22) -# define DACB_AUTODETECT_INTERRUPT (1 << 23) -# define DC_I2C_SW_DONE_INTERRUPT (1 << 24) -# define DC_I2C_HW_DONE_INTERRUPT (1 << 25) -#define DISP_INTERRUPT_STATUS_CONTINUE 0x183E -# define LB_D2_VLINE_INTERRUPT (1 << 2) -# define LB_D2_VBLANK_INTERRUPT (1 << 3) -# define DC_HPD2_INTERRUPT (1 << 17) -# define DC_HPD2_RX_INTERRUPT (1 << 18) -# define DISP_TIMER_INTERRUPT (1 << 24) -#define DISP_INTERRUPT_STATUS_CONTINUE2 0x183F -# define LB_D3_VLINE_INTERRUPT (1 << 2) -# define LB_D3_VBLANK_INTERRUPT (1 << 3) -# define DC_HPD3_INTERRUPT (1 << 17) -# define DC_HPD3_RX_INTERRUPT (1 << 18) -#define DISP_INTERRUPT_STATUS_CONTINUE3 0x1840 -# define LB_D4_VLINE_INTERRUPT (1 << 2) -# define LB_D4_VBLANK_INTERRUPT (1 << 3) -# define DC_HPD4_INTERRUPT (1 << 17) -# define DC_HPD4_RX_INTERRUPT (1 << 18) -#define DISP_INTERRUPT_STATUS_CONTINUE4 0x1853 -# define LB_D5_VLINE_INTERRUPT (1 << 2) -# define LB_D5_VBLANK_INTERRUPT (1 << 3) -# define DC_HPD5_INTERRUPT (1 << 17) -# define DC_HPD5_RX_INTERRUPT (1 << 18) -#define DISP_INTERRUPT_STATUS_CONTINUE5 0x1854 -# define LB_D6_VLINE_INTERRUPT (1 << 2) -# define LB_D6_VBLANK_INTERRUPT (1 << 3) -# define DC_HPD6_INTERRUPT (1 << 17) -# define DC_HPD6_RX_INTERRUPT (1 << 18) - -/* 0x6858, 0x7458, 0x10058, 0x10c58, 0x11858, 0x12458 */ -#define GRPH_INT_STATUS 0x1A16 -# define GRPH_PFLIP_INT_OCCURRED (1 << 0) -# define GRPH_PFLIP_INT_CLEAR (1 << 8) -/* 0x685c, 0x745c, 0x1005c, 0x10c5c, 0x1185c, 0x1245c */ -#define GRPH_INT_CONTROL 0x1A17 -# define GRPH_PFLIP_INT_MASK (1 << 0) -# define GRPH_PFLIP_INT_TYPE (1 << 8) - -#define DAC_AUTODETECT_INT_CONTROL 0x19F2 - -#define DC_HPD1_INT_STATUS 0x1807 -#define DC_HPD2_INT_STATUS 0x180A -#define DC_HPD3_INT_STATUS 0x180D -#define DC_HPD4_INT_STATUS 0x1810 -#define DC_HPD5_INT_STATUS 0x1813 -#define DC_HPD6_INT_STATUS 0x1816 -# define DC_HPDx_INT_STATUS (1 << 0) -# define DC_HPDx_SENSE (1 << 1) -# define DC_HPDx_RX_INT_STATUS (1 << 8) - -#define DC_HPD1_INT_CONTROL 0x1808 -#define DC_HPD2_INT_CONTROL 0x180B -#define DC_HPD3_INT_CONTROL 0x180E -#define DC_HPD4_INT_CONTROL 0x1811 -#define DC_HPD5_INT_CONTROL 0x1814 -#define DC_HPD6_INT_CONTROL 0x1817 -# define DC_HPDx_INT_ACK (1 << 0) -# define DC_HPDx_INT_POLARITY (1 << 8) -# define DC_HPDx_INT_EN (1 << 16) -# define DC_HPDx_RX_INT_ACK (1 << 20) -# define DC_HPDx_RX_INT_EN (1 << 24) - -#define DC_HPD1_CONTROL 0x1809 -#define DC_HPD2_CONTROL 0x180C -#define DC_HPD3_CONTROL 0x180F -#define DC_HPD4_CONTROL 0x1812 -#define DC_HPD5_CONTROL 0x1815 -#define DC_HPD6_CONTROL 0x1818 -# define DC_HPDx_CONNECTION_TIMER(x) ((x) << 0) -# define DC_HPDx_RX_INT_TIMER(x) ((x) << 16) -# define DC_HPDx_EN (1 << 28) - -#define DPG_PIPE_STUTTER_CONTROL 0x1B35 -# define STUTTER_ENABLE (1 << 0) - -/* 0x6e98, 0x7a98, 0x10698, 0x11298, 0x11e98, 0x12a98 */ -#define CRTC_STATUS_FRAME_COUNT 0x1BA6 - -/* Audio clocks */ -#define DCCG_AUDIO_DTO0_PHASE 0x05b0 -#define DCCG_AUDIO_DTO0_MODULE 0x05b4 -#define DCCG_AUDIO_DTO1_PHASE 0x05c0 -#define DCCG_AUDIO_DTO1_MODULE 0x05c4 - -#define GRBM_CNTL 0x2000 -#define GRBM_READ_TIMEOUT(x) ((x) << 0) - -#define GRBM_STATUS2 0x2002 -#define RLC_RQ_PENDING (1 << 0) -#define RLC_BUSY (1 << 8) -#define TC_BUSY (1 << 9) - -#define GRBM_STATUS 0x2004 -#define CMDFIFO_AVAIL_MASK 0x0000000F -#define RING2_RQ_PENDING (1 << 4) -#define SRBM_RQ_PENDING (1 << 5) -#define RING1_RQ_PENDING (1 << 6) -#define CF_RQ_PENDING (1 << 7) -#define PF_RQ_PENDING (1 << 8) -#define GDS_DMA_RQ_PENDING (1 << 9) -#define GRBM_EE_BUSY (1 << 10) -#define DB_CLEAN (1 << 12) -#define CB_CLEAN (1 << 13) -#define TA_BUSY (1 << 14) -#define GDS_BUSY (1 << 15) -#define VGT_BUSY (1 << 17) -#define IA_BUSY_NO_DMA (1 << 18) -#define IA_BUSY (1 << 19) -#define SX_BUSY (1 << 20) -#define SPI_BUSY (1 << 22) -#define BCI_BUSY (1 << 23) -#define SC_BUSY (1 << 24) -#define PA_BUSY (1 << 25) -#define DB_BUSY (1 << 26) -#define CP_COHERENCY_BUSY (1 << 28) -#define CP_BUSY (1 << 29) -#define CB_BUSY (1 << 30) -#define GUI_ACTIVE (1 << 31) -#define GRBM_STATUS_SE0 0x2005 -#define GRBM_STATUS_SE1 0x2006 -#define SE_DB_CLEAN (1 << 1) -#define SE_CB_CLEAN (1 << 2) -#define SE_BCI_BUSY (1 << 22) -#define SE_VGT_BUSY (1 << 23) -#define SE_PA_BUSY (1 << 24) -#define SE_TA_BUSY (1 << 25) -#define SE_SX_BUSY (1 << 26) -#define SE_SPI_BUSY (1 << 27) -#define SE_SC_BUSY (1 << 29) -#define SE_DB_BUSY (1 << 30) -#define SE_CB_BUSY (1 << 31) - -#define GRBM_INT_CNTL 0x2018 -# define RDERR_INT_ENABLE (1 << 0) -# define GUI_IDLE_INT_ENABLE (1 << 19) - -#define CP_STRMOUT_CNTL 0x213F -#define SCRATCH_REG0 0x2140 -#define SCRATCH_REG1 0x2141 -#define SCRATCH_REG2 0x2142 -#define SCRATCH_REG3 0x2143 -#define SCRATCH_REG4 0x2144 -#define SCRATCH_REG5 0x2145 -#define SCRATCH_REG6 0x2146 -#define SCRATCH_REG7 0x2147 - -#define SCRATCH_UMSK 0x2150 -#define SCRATCH_ADDR 0x2151 - -#define CP_SEM_WAIT_TIMER 0x216F - -#define CP_SEM_INCOMPLETE_TIMER_CNTL 0x2172 - -#define CP_ME_CNTL 0x21B6 -#define CP_CE_HALT (1 << 24) -#define CP_PFP_HALT (1 << 26) -#define CP_ME_HALT (1 << 28) - -#define CP_COHER_CNTL2 0x217A - -#define CP_RB2_RPTR 0x21BE -#define CP_RB1_RPTR 0x21BF -#define CP_RB0_RPTR 0x21C0 -#define CP_RB_WPTR_DELAY 0x21C1 - -#define CP_QUEUE_THRESHOLDS 0x21D8 -#define ROQ_IB1_START(x) ((x) << 0) -#define ROQ_IB2_START(x) ((x) << 8) -#define CP_MEQ_THRESHOLDS 0x21D9 -#define MEQ1_START(x) ((x) << 0) -#define MEQ2_START(x) ((x) << 8) - -#define CP_PERFMON_CNTL 0x21FF - #define VGT_VTX_VECT_EJECT_REG 0x222C - #define VGT_ESGS_RING_SIZE 0x2232 #define VGT_GSVS_RING_SIZE 0x2233 - #define VGT_GS_VERTEX_REUSE 0x2235 - #define VGT_PRIMITIVE_TYPE 0x2256 #define VGT_INDEX_TYPE 0x2257 - #define VGT_NUM_INDICES 0x225C #define VGT_NUM_INSTANCES 0x225D - #define VGT_TF_RING_SIZE 0x2262 - #define VGT_HS_OFFCHIP_PARAM 0x226C - #define VGT_TF_MEMORY_BASE 0x226E -#define PA_CL_ENHANCE 0x2285 -#define CLIP_VTX_REORDER_ENA (1 << 0) -#define NUM_CLIP_SEQ(x) ((x) << 1) - -#define PA_SU_LINE_STIPPLE_VALUE 0x2298 - -#define PA_SC_LINE_STIPPLE_STATE 0x22C4 - -#define PA_SC_FORCE_EOV_MAX_CNTS 0x22C9 -#define FORCE_EOV_MAX_CLK_CNT(x) ((x) << 0) -#define FORCE_EOV_MAX_REZ_CNT(x) ((x) << 16) - -#define PA_SC_FIFO_SIZE 0x22F3 -#define SC_FRONTEND_PRIM_FIFO_SIZE(x) ((x) << 0) -#define SC_BACKEND_PRIM_FIFO_SIZE(x) ((x) << 6) -#define SC_HIZ_TILE_FIFO_SIZE(x) ((x) << 15) -#define SC_EARLYZ_TILE_FIFO_SIZE(x) ((x) << 23) - #define PA_SC_ENHANCE 0x22FC -#define SQ_CONFIG 0x2300 - -#define SQC_CACHES 0x2302 - -#define SQ_POWER_THROTTLE 0x2396 -#define MIN_POWER(x) ((x) << 0) -#define MIN_POWER_MASK (0x3fff << 0) -#define MIN_POWER_SHIFT 0 -#define MAX_POWER(x) ((x) << 16) -#define MAX_POWER_MASK (0x3fff << 16) -#define MAX_POWER_SHIFT 0 -#define SQ_POWER_THROTTLE2 0x2397 -#define MAX_POWER_DELTA(x) ((x) << 0) -#define MAX_POWER_DELTA_MASK (0x3fff << 0) -#define MAX_POWER_DELTA_SHIFT 0 -#define STI_SIZE(x) ((x) << 16) -#define STI_SIZE_MASK (0x3ff << 16) -#define STI_SIZE_SHIFT 16 -#define LTI_RATIO(x) ((x) << 27) -#define LTI_RATIO_MASK (0xf << 27) -#define LTI_RATIO_SHIFT 27 - -#define SX_DEBUG_1 0x2418 - -#define SPI_STATIC_THREAD_MGMT_1 0x2438 -#define SPI_STATIC_THREAD_MGMT_2 0x2439 -#define SPI_STATIC_THREAD_MGMT_3 0x243A -#define SPI_PS_MAX_WAVE_ID 0x243B - -#define SPI_CONFIG_CNTL 0x2440 - -#define SPI_CONFIG_CNTL_1 0x244F -#define VTX_DONE_DELAY(x) ((x) << 0) -#define INTERP_ONE_PRIM_PER_ROW (1 << 4) - -#define CGTS_TCC_DISABLE 0x2452 -#define CGTS_USER_TCC_DISABLE 0x2453 -#define TCC_DISABLE_MASK 0xFFFF0000 -#define TCC_DISABLE_SHIFT 16 -#define CGTS_SM_CTRL_REG 0x2454 -#define OVERRIDE (1 << 21) -#define LS_OVERRIDE (1 << 22) - -#define SPI_LB_CU_MASK 0x24D5 - #define TA_CNTL_AUX 0x2542 -#define CC_RB_BACKEND_DISABLE 0x263D -#define BACKEND_DISABLE(x) ((x) << 16) -#define GB_ADDR_CONFIG 0x263E -#define NUM_PIPES(x) ((x) << 0) -#define NUM_PIPES_MASK 0x00000007 -#define NUM_PIPES_SHIFT 0 -#define PIPE_INTERLEAVE_SIZE(x) ((x) << 4) -#define PIPE_INTERLEAVE_SIZE_MASK 0x00000070 -#define PIPE_INTERLEAVE_SIZE_SHIFT 4 -#define NUM_SHADER_ENGINES(x) ((x) << 12) -#define NUM_SHADER_ENGINES_MASK 0x00003000 -#define NUM_SHADER_ENGINES_SHIFT 12 -#define SHADER_ENGINE_TILE_SIZE(x) ((x) << 16) -#define SHADER_ENGINE_TILE_SIZE_MASK 0x00070000 -#define SHADER_ENGINE_TILE_SIZE_SHIFT 16 -#define NUM_GPUS(x) ((x) << 20) -#define NUM_GPUS_MASK 0x00700000 -#define NUM_GPUS_SHIFT 20 -#define MULTI_GPU_TILE_SIZE(x) ((x) << 24) -#define MULTI_GPU_TILE_SIZE_MASK 0x03000000 -#define MULTI_GPU_TILE_SIZE_SHIFT 24 -#define ROW_SIZE(x) ((x) << 28) -#define ROW_SIZE_MASK 0x30000000 -#define ROW_SIZE_SHIFT 28 - -#define CB_PERFCOUNTER0_SELECT0 0x2688 -#define CB_PERFCOUNTER0_SELECT1 0x2689 -#define CB_PERFCOUNTER1_SELECT0 0x268A -#define CB_PERFCOUNTER1_SELECT1 0x268B -#define CB_PERFCOUNTER2_SELECT0 0x268C -#define CB_PERFCOUNTER2_SELECT1 0x268D -#define CB_PERFCOUNTER3_SELECT0 0x268E -#define CB_PERFCOUNTER3_SELECT1 0x268F - -#define CB_CGTT_SCLK_CTRL 0x2698 - -#define TCP_CHAN_STEER_LO 0x2B03 -#define TCP_CHAN_STEER_HI 0x2B94 - -#define CP_RB0_BASE 0x3040 -#define CP_RB0_CNTL 0x3041 -#define RB_BUFSZ(x) ((x) << 0) -#define RB_BLKSZ(x) ((x) << 8) -#define BUF_SWAP_32BIT (2 << 16) -#define RB_NO_UPDATE (1 << 27) -#define RB_RPTR_WR_ENA (1 << 31) - -#define CP_RB0_RPTR_ADDR 0x3043 -#define CP_RB0_RPTR_ADDR_HI 0x3044 -#define CP_RB0_WPTR 0x3045 - -#define CP_PFP_UCODE_ADDR 0x3054 -#define CP_PFP_UCODE_DATA 0x3055 -#define CP_ME_RAM_RADDR 0x3056 -#define CP_ME_RAM_WADDR 0x3057 -#define CP_ME_RAM_DATA 0x3058 - -#define CP_CE_UCODE_ADDR 0x305A -#define CP_CE_UCODE_DATA 0x305B - -#define CP_RB1_BASE 0x3060 -#define CP_RB1_CNTL 0x3061 -#define CP_RB1_RPTR_ADDR 0x3062 -#define CP_RB1_RPTR_ADDR_HI 0x3063 -#define CP_RB1_WPTR 0x3064 -#define CP_RB2_BASE 0x3065 -#define CP_RB2_CNTL 0x3066 -#define CP_RB2_RPTR_ADDR 0x3067 -#define CP_RB2_RPTR_ADDR_HI 0x3068 -#define CP_RB2_WPTR 0x3069 -#define CP_INT_CNTL_RING0 0x306A -#define CP_INT_CNTL_RING1 0x306B -#define CP_INT_CNTL_RING2 0x306C -# define CNTX_BUSY_INT_ENABLE (1 << 19) -# define CNTX_EMPTY_INT_ENABLE (1 << 20) -# define WAIT_MEM_SEM_INT_ENABLE (1 << 21) -# define TIME_STAMP_INT_ENABLE (1 << 26) -# define CP_RINGID2_INT_ENABLE (1 << 29) -# define CP_RINGID1_INT_ENABLE (1 << 30) -# define CP_RINGID0_INT_ENABLE (1 << 31) -#define CP_INT_STATUS_RING0 0x306D -#define CP_INT_STATUS_RING1 0x306E -#define CP_INT_STATUS_RING2 0x306F -# define WAIT_MEM_SEM_INT_STAT (1 << 21) -# define TIME_STAMP_INT_STAT (1 << 26) -# define CP_RINGID2_INT_STAT (1 << 29) -# define CP_RINGID1_INT_STAT (1 << 30) -# define CP_RINGID0_INT_STAT (1 << 31) - // #define PA_SC_RASTER_CONFIG 0xA0D4 # define RB_XSEL2(x) ((x) << 4) # define RB_XSEL2_MASK (0x3 << 4) @@ -1185,171 +311,14 @@ # define SE_YSEL(x) ((x) << 28) # define SE_YSEL_MASK (0x3 << 28) -/* PIF PHY0 registers idx/data 0x8/0xc */ -#define PB0_PIF_CNTL 0x10 -# define LS2_EXIT_TIME(x) ((x) << 17) -# define LS2_EXIT_TIME_MASK (0x7 << 17) -# define LS2_EXIT_TIME_SHIFT 17 -#define PB0_PIF_PAIRING 0x11 -# define MULTI_PIF (1 << 25) -#define PB0_PIF_PWRDOWN_0 0x12 -# define PLL_POWER_STATE_IN_TXS2_0(x) ((x) << 7) -# define PLL_POWER_STATE_IN_TXS2_0_MASK (0x7 << 7) -# define PLL_POWER_STATE_IN_TXS2_0_SHIFT 7 -# define PLL_POWER_STATE_IN_OFF_0(x) ((x) << 10) -# define PLL_POWER_STATE_IN_OFF_0_MASK (0x7 << 10) -# define PLL_POWER_STATE_IN_OFF_0_SHIFT 10 -# define PLL_RAMP_UP_TIME_0(x) ((x) << 24) -# define PLL_RAMP_UP_TIME_0_MASK (0x7 << 24) -# define PLL_RAMP_UP_TIME_0_SHIFT 24 -#define PB0_PIF_PWRDOWN_1 0x13 -# define PLL_POWER_STATE_IN_TXS2_1(x) ((x) << 7) -# define PLL_POWER_STATE_IN_TXS2_1_MASK (0x7 << 7) -# define PLL_POWER_STATE_IN_TXS2_1_SHIFT 7 -# define PLL_POWER_STATE_IN_OFF_1(x) ((x) << 10) -# define PLL_POWER_STATE_IN_OFF_1_MASK (0x7 << 10) -# define PLL_POWER_STATE_IN_OFF_1_SHIFT 10 -# define PLL_RAMP_UP_TIME_1(x) ((x) << 24) -# define PLL_RAMP_UP_TIME_1_MASK (0x7 << 24) -# define PLL_RAMP_UP_TIME_1_SHIFT 24 - -#define PB0_PIF_PWRDOWN_2 0x17 -# define PLL_POWER_STATE_IN_TXS2_2(x) ((x) << 7) -# define PLL_POWER_STATE_IN_TXS2_2_MASK (0x7 << 7) -# define PLL_POWER_STATE_IN_TXS2_2_SHIFT 7 -# define PLL_POWER_STATE_IN_OFF_2(x) ((x) << 10) -# define PLL_POWER_STATE_IN_OFF_2_MASK (0x7 << 10) -# define PLL_POWER_STATE_IN_OFF_2_SHIFT 10 -# define PLL_RAMP_UP_TIME_2(x) ((x) << 24) -# define PLL_RAMP_UP_TIME_2_MASK (0x7 << 24) -# define PLL_RAMP_UP_TIME_2_SHIFT 24 -#define PB0_PIF_PWRDOWN_3 0x18 -# define PLL_POWER_STATE_IN_TXS2_3(x) ((x) << 7) -# define PLL_POWER_STATE_IN_TXS2_3_MASK (0x7 << 7) -# define PLL_POWER_STATE_IN_TXS2_3_SHIFT 7 -# define PLL_POWER_STATE_IN_OFF_3(x) ((x) << 10) -# define PLL_POWER_STATE_IN_OFF_3_MASK (0x7 << 10) -# define PLL_POWER_STATE_IN_OFF_3_SHIFT 10 -# define PLL_RAMP_UP_TIME_3(x) ((x) << 24) -# define PLL_RAMP_UP_TIME_3_MASK (0x7 << 24) -# define PLL_RAMP_UP_TIME_3_SHIFT 24 -/* PIF PHY1 registers idx/data 0x10/0x14 */ -#define PB1_PIF_CNTL 0x10 -#define PB1_PIF_PAIRING 0x11 -#define PB1_PIF_PWRDOWN_0 0x12 -#define PB1_PIF_PWRDOWN_1 0x13 - -#define PB1_PIF_PWRDOWN_2 0x17 -#define PB1_PIF_PWRDOWN_3 0x18 -/* PCIE registers idx/data 0x30/0x34 */ -#define PCIE_CNTL2 0x1c /* PCIE */ -# define SLV_MEM_LS_EN (1 << 16) -# define SLV_MEM_AGGRESSIVE_LS_EN (1 << 17) -# define MST_MEM_LS_EN (1 << 18) -# define REPLAY_MEM_LS_EN (1 << 19) -#define PCIE_LC_STATUS1 0x28 /* PCIE */ -# define LC_REVERSE_RCVR (1 << 0) -# define LC_REVERSE_XMIT (1 << 1) -# define LC_OPERATING_LINK_WIDTH_MASK (0x7 << 2) -# define LC_OPERATING_LINK_WIDTH_SHIFT 2 -# define LC_DETECTED_LINK_WIDTH_MASK (0x7 << 5) -# define LC_DETECTED_LINK_WIDTH_SHIFT 5 - -#define PCIE_P_CNTL 0x40 /* PCIE */ -# define P_IGNORE_EDB_ERR (1 << 6) - /* PCIE PORT registers idx/data 0x38/0x3c */ -#define PCIE_LC_CNTL 0xa0 -# define LC_L0S_INACTIVITY(x) ((x) << 8) -# define LC_L0S_INACTIVITY_MASK (0xf << 8) -# define LC_L0S_INACTIVITY_SHIFT 8 -# define LC_L1_INACTIVITY(x) ((x) << 12) -# define LC_L1_INACTIVITY_MASK (0xf << 12) -# define LC_L1_INACTIVITY_SHIFT 12 -# define LC_PMI_TO_L1_DIS (1 << 16) -# define LC_ASPM_TO_L1_DIS (1 << 24) -#define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */ -# define LC_LINK_WIDTH_SHIFT 0 -# define LC_LINK_WIDTH_MASK 0x7 +// #define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */ # define LC_LINK_WIDTH_X0 0 # define LC_LINK_WIDTH_X1 1 # define LC_LINK_WIDTH_X2 2 # define LC_LINK_WIDTH_X4 3 # define LC_LINK_WIDTH_X8 4 # define LC_LINK_WIDTH_X16 6 -# define LC_LINK_WIDTH_RD_SHIFT 4 -# define LC_LINK_WIDTH_RD_MASK 0x70 -# define LC_RECONFIG_ARC_MISSING_ESCAPE (1 << 7) -# define LC_RECONFIG_NOW (1 << 8) -# define LC_RENEGOTIATION_SUPPORT (1 << 9) -# define LC_RENEGOTIATE_EN (1 << 10) -# define LC_SHORT_RECONFIG_EN (1 << 11) -# define LC_UPCONFIGURE_SUPPORT (1 << 12) -# define LC_UPCONFIGURE_DIS (1 << 13) -# define LC_DYN_LANES_PWR_STATE(x) ((x) << 21) -# define LC_DYN_LANES_PWR_STATE_MASK (0x3 << 21) -# define LC_DYN_LANES_PWR_STATE_SHIFT 21 -#define PCIE_LC_N_FTS_CNTL 0xa3 /* PCIE_P */ -# define LC_XMIT_N_FTS(x) ((x) << 0) -# define LC_XMIT_N_FTS_MASK (0xff << 0) -# define LC_XMIT_N_FTS_SHIFT 0 -# define LC_XMIT_N_FTS_OVERRIDE_EN (1 << 8) -# define LC_N_FTS_MASK (0xff << 24) -#define PCIE_LC_SPEED_CNTL 0xa4 /* PCIE_P */ -# define LC_GEN2_EN_STRAP (1 << 0) -# define LC_GEN3_EN_STRAP (1 << 1) -# define LC_TARGET_LINK_SPEED_OVERRIDE_EN (1 << 2) -# define LC_TARGET_LINK_SPEED_OVERRIDE_MASK (0x3 << 3) -# define LC_TARGET_LINK_SPEED_OVERRIDE_SHIFT 3 -# define LC_FORCE_EN_SW_SPEED_CHANGE (1 << 5) -# define LC_FORCE_DIS_SW_SPEED_CHANGE (1 << 6) -# define LC_FORCE_EN_HW_SPEED_CHANGE (1 << 7) -# define LC_FORCE_DIS_HW_SPEED_CHANGE (1 << 8) -# define LC_INITIATE_LINK_SPEED_CHANGE (1 << 9) -# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_MASK (0x3 << 10) -# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_SHIFT 10 -# define LC_CURRENT_DATA_RATE_MASK (0x3 << 13) /* 0/1/2 = gen1/2/3 */ -# define LC_CURRENT_DATA_RATE_SHIFT 13 -# define LC_CLR_FAILED_SPD_CHANGE_CNT (1 << 16) -# define LC_OTHER_SIDE_EVER_SENT_GEN2 (1 << 18) -# define LC_OTHER_SIDE_SUPPORTS_GEN2 (1 << 19) -# define LC_OTHER_SIDE_EVER_SENT_GEN3 (1 << 20) -# define LC_OTHER_SIDE_SUPPORTS_GEN3 (1 << 21) - -#define PCIE_LC_CNTL2 0xb1 -# define LC_ALLOW_PDWN_IN_L1 (1 << 17) -# define LC_ALLOW_PDWN_IN_L23 (1 << 18) - -#define PCIE_LC_CNTL3 0xb5 /* PCIE_P */ -# define LC_GO_TO_RECOVERY (1 << 30) -#define PCIE_LC_CNTL4 0xb6 /* PCIE_P */ -# define LC_REDO_EQ (1 << 5) -# define LC_SET_QUIESCE (1 << 13) - -/* - * UVD - */ -#define UVD_UDEC_ADDR_CONFIG 0x3bd3 -#define UVD_UDEC_DB_ADDR_CONFIG 0x3bd4 -#define UVD_UDEC_DBW_ADDR_CONFIG 0x3bd5 -#define UVD_RBC_RB_RPTR 0x3da4 -#define UVD_RBC_RB_WPTR 0x3da5 -#define UVD_STATUS 0x3daf - -#define UVD_CGC_CTRL 0x3dc2 -# define DCM (1 << 0) -# define CG_DT(x) ((x) << 2) -# define CG_DT_MASK (0xf << 2) -# define CLK_OD(x) ((x) << 6) -# define CLK_OD_MASK (0x1f << 6) - - /* UVD CTX indirect */ -#define UVD_CGC_MEM_CTRL 0xC0 -#define UVD_CGC_CTRL2 0xC1 -# define DYN_OR_EN (1 << 0) -# define DYN_RR_EN (1 << 1) -# define G_DIV_ID(x) ((x) << 2) -# define G_DIV_ID_MASK (0x7 << 2) /* * PM4 @@ -1583,45 +552,7 @@ /* ASYNC DMA - first instance at 0xd000, second at 0xd800 */ #define DMA0_REGISTER_OFFSET 0x0 /* not a register */ #define DMA1_REGISTER_OFFSET 0x200 /* not a register */ - -#define DMA_RB_CNTL 0x3400 -# define DMA_RB_ENABLE (1 << 0) -# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */ -# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */ -# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12) -# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */ -# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */ -#define DMA_RB_BASE 0x3401 -#define DMA_RB_RPTR 0x3402 -#define DMA_RB_WPTR 0x3403 - -#define DMA_RB_RPTR_ADDR_HI 0x3407 -#define DMA_RB_RPTR_ADDR_LO 0x3408 - -#define DMA_IB_CNTL 0x3409 -# define DMA_IB_ENABLE (1 << 0) -# define DMA_IB_SWAP_ENABLE (1 << 4) -# define CMD_VMID_FORCE (1 << 31) -#define DMA_IB_RPTR 0x340a -#define DMA_CNTL 0x340b -# define TRAP_ENABLE (1 << 0) -# define SEM_INCOMPLETE_INT_ENABLE (1 << 1) -# define SEM_WAIT_INT_ENABLE (1 << 2) -# define DATA_SWAP_ENABLE (1 << 3) -# define FENCE_SWAP_ENABLE (1 << 4) -# define CTXEMPTY_INT_ENABLE (1 << 28) -#define DMA_STATUS_REG 0x340d -# define DMA_IDLE (1 << 0) -#define DMA_TILING_CONFIG 0x342e - -#define DMA_POWER_CNTL 0x342f -# define MEM_POWER_OVERRIDE (1 << 8) -#define DMA_CLK_CTRL 0x3430 - -#define DMA_PG 0x3435 -# define PG_CNTL_ENABLE (1 << 0) -#define DMA_PGFSM_CONFIG 0x3436 -#define DMA_PGFSM_WRITE 0x3437 +#define SDMA_MAX_INSTANCE 2 #define DMA_PACKET(cmd, b, t, s, n) ((((cmd) & 0xF) << 28) | \ (((b) & 0x1) << 26) | \ @@ -1650,6 +581,7 @@ #define DMA_PACKET_POLL_REG_MEM 0xe #define DMA_PACKET_NOP 0xf +/* VCE */ #define VCE_STATUS 0x20004 #define VCE_VCPU_CNTL 0x20014 #define VCE_CLK_EN (1 << 0) @@ -1726,378 +658,118 @@ #define CURSOR_WIDTH 64 #define CURSOR_HEIGHT 64 -#define AMDGPU_MM_INDEX 0x0000 -#define AMDGPU_MM_DATA 0x0001 - -#define VERDE_NUM_CRTC 6 -#define BLACKOUT_MODE_MASK 0x00000007 -#define VGA_RENDER_CONTROL 0xC0 -#define R_000300_VGA_RENDER_CONTROL 0xC0 -#define C_000300_VGA_VSTATUS_CNTL 0xFFFCFFFF -#define EVERGREEN_CRTC_STATUS 0x1BA3 -#define EVERGREEN_CRTC_V_BLANK (1 << 0) -#define EVERGREEN_CRTC_STATUS_POSITION 0x1BA4 -/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */ -#define EVERGREEN_CRTC_V_BLANK_START_END 0x1b8d -#define EVERGREEN_CRTC_CONTROL 0x1b9c -#define EVERGREEN_CRTC_MASTER_EN (1 << 0) -#define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24) -#define EVERGREEN_CRTC_BLANK_CONTROL 0x1b9d -#define EVERGREEN_CRTC_BLANK_DATA_EN (1 << 8) -#define EVERGREEN_CRTC_V_BLANK (1 << 0) -#define EVERGREEN_CRTC_STATUS_HV_COUNT 0x1ba8 -#define EVERGREEN_CRTC_UPDATE_LOCK 0x1bb5 -#define EVERGREEN_MASTER_UPDATE_LOCK 0x1bbd -#define EVERGREEN_MASTER_UPDATE_MODE 0x1bbe -#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16) -#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07 -#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08 -#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS 0x1a04 -#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS 0x1a05 -#define EVERGREEN_GRPH_UPDATE 0x1a11 -#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS 0xc4 -#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH 0xc9 -#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2) - -#define EVERGREEN_DATA_FORMAT 0x1ac0 -# define EVERGREEN_INTERLEAVE_EN (1 << 0) - -#define R600_D1GRPH_ARRAY_MODE_LINEAR_GENERAL (0 << 20) -#define R600_D1GRPH_ARRAY_MODE_LINEAR_ALIGNED (1 << 20) -#define R600_D1GRPH_ARRAY_MODE_1D_TILED_THIN1 (2 << 20) -#define R600_D1GRPH_ARRAY_MODE_2D_TILED_THIN1 (4 << 20) - -#define R700_D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a45 -#define R700_D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1845 - -#define R700_D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1847 -#define R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a47 - -#define R600_D1GRPH_SWAP_CONTROL 0x1843 -#define R600_D1GRPH_SWAP_ENDIAN_NONE (0 << 0) -#define R600_D1GRPH_SWAP_ENDIAN_16BIT (1 << 0) -#define R600_D1GRPH_SWAP_ENDIAN_32BIT (2 << 0) -#define R600_D1GRPH_SWAP_ENDIAN_64BIT (3 << 0) - -#define AVIVO_D1VGA_CONTROL 0x00cc -# define AVIVO_DVGA_CONTROL_MODE_ENABLE (1 << 0) -# define AVIVO_DVGA_CONTROL_TIMING_SELECT (1 << 8) -# define AVIVO_DVGA_CONTROL_SYNC_POLARITY_SELECT (1 << 9) -# define AVIVO_DVGA_CONTROL_OVERSCAN_TIMING_SELECT (1 << 10) -# define AVIVO_DVGA_CONTROL_OVERSCAN_COLOR_EN (1 << 16) -# define AVIVO_DVGA_CONTROL_ROTATE (1 << 24) -#define AVIVO_D2VGA_CONTROL 0x00ce - -#define R600_BUS_CNTL 0x1508 -# define R600_BIOS_ROM_DIS (1 << 1) + #define R600_ROM_CNTL 0x580 # define R600_SCK_OVERWRITE (1 << 1) # define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28 # define R600_SCK_PRESCALE_CRYSTAL_CLK_MASK (0xf << 28) -#define FMT_BIT_DEPTH_CONTROL 0x1bf2 -#define FMT_TRUNCATE_EN (1 << 0) -#define FMT_TRUNCATE_DEPTH (1 << 4) -#define FMT_SPATIAL_DITHER_EN (1 << 8) -#define FMT_SPATIAL_DITHER_MODE(x) ((x) << 9) -#define FMT_SPATIAL_DITHER_DEPTH (1 << 12) -#define FMT_FRAME_RANDOM_ENABLE (1 << 13) -#define FMT_RGB_RANDOM_ENABLE (1 << 14) -#define FMT_HIGHPASS_RANDOM_ENABLE (1 << 15) -#define FMT_TEMPORAL_DITHER_EN (1 << 16) -#define FMT_TEMPORAL_DITHER_DEPTH (1 << 20) -#define FMT_TEMPORAL_DITHER_OFFSET(x) ((x) << 21) -#define FMT_TEMPORAL_LEVEL (1 << 24) -#define FMT_TEMPORAL_DITHER_RESET (1 << 25) -#define FMT_25FRC_SEL(x) ((x) << 26) -#define FMT_50FRC_SEL(x) ((x) << 28) -#define FMT_75FRC_SEL(x) ((x) << 30) - -#define EVERGREEN_DC_LUT_CONTROL 0x1a80 -#define EVERGREEN_DC_LUT_BLACK_OFFSET_BLUE 0x1a81 -#define EVERGREEN_DC_LUT_BLACK_OFFSET_GREEN 0x1a82 -#define EVERGREEN_DC_LUT_BLACK_OFFSET_RED 0x1a83 -#define EVERGREEN_DC_LUT_WHITE_OFFSET_BLUE 0x1a84 -#define EVERGREEN_DC_LUT_WHITE_OFFSET_GREEN 0x1a85 -#define EVERGREEN_DC_LUT_WHITE_OFFSET_RED 0x1a86 -#define EVERGREEN_DC_LUT_30_COLOR 0x1a7c -#define EVERGREEN_DC_LUT_RW_INDEX 0x1a79 -#define EVERGREEN_DC_LUT_WRITE_EN_MASK 0x1a7e -#define EVERGREEN_DC_LUT_RW_MODE 0x1a78 - -#define EVERGREEN_GRPH_ENABLE 0x1a00 -#define EVERGREEN_GRPH_CONTROL 0x1a01 -#define EVERGREEN_GRPH_DEPTH(x) (((x) & 0x3) << 0) -#define EVERGREEN_GRPH_DEPTH_8BPP 0 -#define EVERGREEN_GRPH_DEPTH_16BPP 1 -#define EVERGREEN_GRPH_DEPTH_32BPP 2 -#define EVERGREEN_GRPH_NUM_BANKS(x) (((x) & 0x3) << 2) -#define EVERGREEN_ADDR_SURF_2_BANK 0 -#define EVERGREEN_ADDR_SURF_4_BANK 1 -#define EVERGREEN_ADDR_SURF_8_BANK 2 -#define EVERGREEN_ADDR_SURF_16_BANK 3 -#define EVERGREEN_GRPH_Z(x) (((x) & 0x3) << 4) -#define EVERGREEN_GRPH_BANK_WIDTH(x) (((x) & 0x3) << 6) -#define EVERGREEN_ADDR_SURF_BANK_WIDTH_1 0 -#define EVERGREEN_ADDR_SURF_BANK_WIDTH_2 1 -#define EVERGREEN_ADDR_SURF_BANK_WIDTH_4 2 -#define EVERGREEN_ADDR_SURF_BANK_WIDTH_8 3 -#define EVERGREEN_GRPH_FORMAT(x) (((x) & 0x7) << 8) - -#define EVERGREEN_GRPH_FORMAT_INDEXED 0 -#define EVERGREEN_GRPH_FORMAT_ARGB1555 0 -#define EVERGREEN_GRPH_FORMAT_ARGB565 1 -#define EVERGREEN_GRPH_FORMAT_ARGB4444 2 -#define EVERGREEN_GRPH_FORMAT_AI88 3 -#define EVERGREEN_GRPH_FORMAT_MONO16 4 -#define EVERGREEN_GRPH_FORMAT_BGRA5551 5 +#define GRPH_ARRAY_LINEAR_GENERAL 0 +#define GRPH_ARRAY_LINEAR_ALIGNED 1 +#define GRPH_ARRAY_1D_TILED_THIN1 2 +#define GRPH_ARRAY_2D_TILED_THIN1 4 + +#define ES_AND_GS_AUTO 3 +#define BUF_SWAP_32BIT (2 << 16) + +#define GRPH_DEPTH_8BPP 0 +#define GRPH_DEPTH_16BPP 1 +#define GRPH_DEPTH_32BPP 2 + +/* 8 BPP */ +#define GRPH_FORMAT_INDEXED 0 + +/* 16 BPP */ +#define GRPH_FORMAT_ARGB1555 0 +#define GRPH_FORMAT_ARGB565 1 +#define GRPH_FORMAT_ARGB4444 2 +#define GRPH_FORMAT_AI88 3 +#define GRPH_FORMAT_MONO16 4 +#define GRPH_FORMAT_BGRA5551 5 /* 32 BPP */ -#define EVERGREEN_GRPH_FORMAT_ARGB8888 0 -#define EVERGREEN_GRPH_FORMAT_ARGB2101010 1 -#define EVERGREEN_GRPH_FORMAT_32BPP_DIG 2 -#define EVERGREEN_GRPH_FORMAT_8B_ARGB2101010 3 -#define EVERGREEN_GRPH_FORMAT_BGRA1010102 4 -#define EVERGREEN_GRPH_FORMAT_8B_BGRA1010102 5 -#define EVERGREEN_GRPH_FORMAT_RGB111110 6 -#define EVERGREEN_GRPH_FORMAT_BGR101111 7 -#define EVERGREEN_GRPH_BANK_HEIGHT(x) (((x) & 0x3) << 11) -#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_1 0 -#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_2 1 -#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_4 2 -#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_8 3 -#define EVERGREEN_GRPH_TILE_SPLIT(x) (((x) & 0x7) << 13) -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_64B 0 -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_128B 1 -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_256B 2 -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_512B 3 -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_1KB 4 -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_2KB 5 -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_4KB 6 -#define EVERGREEN_GRPH_MACRO_TILE_ASPECT(x) (((x) & 0x3) << 18) -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_1 0 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_2 1 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_4 2 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_8 3 -#define EVERGREEN_GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20) -#define EVERGREEN_GRPH_ARRAY_LINEAR_GENERAL 0 -#define EVERGREEN_GRPH_ARRAY_LINEAR_ALIGNED 1 -#define EVERGREEN_GRPH_ARRAY_1D_TILED_THIN1 2 -#define EVERGREEN_GRPH_ARRAY_2D_TILED_THIN1 4 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_1 0 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_2 1 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_4 2 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_8 3 - -#define EVERGREEN_GRPH_SWAP_CONTROL 0x1a03 -#define EVERGREEN_GRPH_ENDIAN_SWAP(x) (((x) & 0x3) << 0) -# define EVERGREEN_GRPH_ENDIAN_NONE 0 -# define EVERGREEN_GRPH_ENDIAN_8IN16 1 -# define EVERGREEN_GRPH_ENDIAN_8IN32 2 -# define EVERGREEN_GRPH_ENDIAN_8IN64 3 -#define EVERGREEN_GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4) -# define EVERGREEN_GRPH_RED_SEL_R 0 -# define EVERGREEN_GRPH_RED_SEL_G 1 -# define EVERGREEN_GRPH_RED_SEL_B 2 -# define EVERGREEN_GRPH_RED_SEL_A 3 -#define EVERGREEN_GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6) -# define EVERGREEN_GRPH_GREEN_SEL_G 0 -# define EVERGREEN_GRPH_GREEN_SEL_B 1 -# define EVERGREEN_GRPH_GREEN_SEL_A 2 -# define EVERGREEN_GRPH_GREEN_SEL_R 3 -#define EVERGREEN_GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8) -# define EVERGREEN_GRPH_BLUE_SEL_B 0 -# define EVERGREEN_GRPH_BLUE_SEL_A 1 -# define EVERGREEN_GRPH_BLUE_SEL_R 2 -# define EVERGREEN_GRPH_BLUE_SEL_G 3 -#define EVERGREEN_GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10) -# define EVERGREEN_GRPH_ALPHA_SEL_A 0 -# define EVERGREEN_GRPH_ALPHA_SEL_R 1 -# define EVERGREEN_GRPH_ALPHA_SEL_G 2 -# define EVERGREEN_GRPH_ALPHA_SEL_B 3 - -#define EVERGREEN_D3VGA_CONTROL 0xf8 -#define EVERGREEN_D4VGA_CONTROL 0xf9 -#define EVERGREEN_D5VGA_CONTROL 0xfa -#define EVERGREEN_D6VGA_CONTROL 0xfb - -#define EVERGREEN_GRPH_SURFACE_ADDRESS_MASK 0xffffff00 - -#define EVERGREEN_GRPH_LUT_10BIT_BYPASS_CONTROL 0x1a02 -#define EVERGREEN_LUT_10BIT_BYPASS_EN (1 << 8) - -#define EVERGREEN_GRPH_PITCH 0x1a06 -#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07 -#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08 -#define EVERGREEN_GRPH_SURFACE_OFFSET_X 0x1a09 -#define EVERGREEN_GRPH_SURFACE_OFFSET_Y 0x1a0a -#define EVERGREEN_GRPH_X_START 0x1a0b -#define EVERGREEN_GRPH_Y_START 0x1a0c -#define EVERGREEN_GRPH_X_END 0x1a0d -#define EVERGREEN_GRPH_Y_END 0x1a0e -#define EVERGREEN_GRPH_UPDATE 0x1a11 -#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2) -#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16) -#define EVERGREEN_GRPH_FLIP_CONTROL 0x1a12 -#define EVERGREEN_GRPH_SURFACE_UPDATE_H_RETRACE_EN (1 << 0) - -#define EVERGREEN_VIEWPORT_START 0x1b5c -#define EVERGREEN_VIEWPORT_SIZE 0x1b5d -#define EVERGREEN_DESKTOP_HEIGHT 0x1ac1 - -/* CUR blocks at 0x6998, 0x7598, 0x10198, 0x10d98, 0x11998, 0x12598 */ -#define EVERGREEN_CUR_CONTROL 0x1a66 -# define EVERGREEN_CURSOR_EN (1 << 0) -# define EVERGREEN_CURSOR_MODE(x) (((x) & 0x3) << 8) -# define EVERGREEN_CURSOR_MONO 0 -# define EVERGREEN_CURSOR_24_1 1 -# define EVERGREEN_CURSOR_24_8_PRE_MULT 2 -# define EVERGREEN_CURSOR_24_8_UNPRE_MULT 3 -# define EVERGREEN_CURSOR_2X_MAGNIFY (1 << 16) -# define EVERGREEN_CURSOR_FORCE_MC_ON (1 << 20) -# define EVERGREEN_CURSOR_URGENT_CONTROL(x) (((x) & 0x7) << 24) -# define EVERGREEN_CURSOR_URGENT_ALWAYS 0 -# define EVERGREEN_CURSOR_URGENT_1_8 1 -# define EVERGREEN_CURSOR_URGENT_1_4 2 -# define EVERGREEN_CURSOR_URGENT_3_8 3 -# define EVERGREEN_CURSOR_URGENT_1_2 4 -#define EVERGREEN_CUR_SURFACE_ADDRESS 0x1a67 -# define EVERGREEN_CUR_SURFACE_ADDRESS_MASK 0xfffff000 -#define EVERGREEN_CUR_SIZE 0x1a68 -#define EVERGREEN_CUR_SURFACE_ADDRESS_HIGH 0x1a69 -#define EVERGREEN_CUR_POSITION 0x1a6a -#define EVERGREEN_CUR_HOT_SPOT 0x1a6b -#define EVERGREEN_CUR_COLOR1 0x1a6c -#define EVERGREEN_CUR_COLOR2 0x1a6d -#define EVERGREEN_CUR_UPDATE 0x1a6e -# define EVERGREEN_CURSOR_UPDATE_PENDING (1 << 0) -# define EVERGREEN_CURSOR_UPDATE_TAKEN (1 << 1) -# define EVERGREEN_CURSOR_UPDATE_LOCK (1 << 16) -# define EVERGREEN_CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24) - - -#define NI_INPUT_CSC_CONTROL 0x1a35 -# define NI_INPUT_CSC_GRPH_MODE(x) (((x) & 0x3) << 0) -# define NI_INPUT_CSC_BYPASS 0 -# define NI_INPUT_CSC_PROG_COEFF 1 -# define NI_INPUT_CSC_PROG_SHARED_MATRIXA 2 -# define NI_INPUT_CSC_OVL_MODE(x) (((x) & 0x3) << 4) - -#define NI_OUTPUT_CSC_CONTROL 0x1a3c -# define NI_OUTPUT_CSC_GRPH_MODE(x) (((x) & 0x7) << 0) -# define NI_OUTPUT_CSC_BYPASS 0 -# define NI_OUTPUT_CSC_TV_RGB 1 -# define NI_OUTPUT_CSC_YCBCR_601 2 -# define NI_OUTPUT_CSC_YCBCR_709 3 -# define NI_OUTPUT_CSC_PROG_COEFF 4 -# define NI_OUTPUT_CSC_PROG_SHARED_MATRIXB 5 -# define NI_OUTPUT_CSC_OVL_MODE(x) (((x) & 0x7) << 4) - -#define NI_DEGAMMA_CONTROL 0x1a58 -# define NI_GRPH_DEGAMMA_MODE(x) (((x) & 0x3) << 0) -# define NI_DEGAMMA_BYPASS 0 -# define NI_DEGAMMA_SRGB_24 1 -# define NI_DEGAMMA_XVYCC_222 2 -# define NI_OVL_DEGAMMA_MODE(x) (((x) & 0x3) << 4) -# define NI_ICON_DEGAMMA_MODE(x) (((x) & 0x3) << 8) -# define NI_CURSOR_DEGAMMA_MODE(x) (((x) & 0x3) << 12) - -#define NI_GAMUT_REMAP_CONTROL 0x1a59 -# define NI_GRPH_GAMUT_REMAP_MODE(x) (((x) & 0x3) << 0) -# define NI_GAMUT_REMAP_BYPASS 0 -# define NI_GAMUT_REMAP_PROG_COEFF 1 -# define NI_GAMUT_REMAP_PROG_SHARED_MATRIXA 2 -# define NI_GAMUT_REMAP_PROG_SHARED_MATRIXB 3 -# define NI_OVL_GAMUT_REMAP_MODE(x) (((x) & 0x3) << 4) - -#define NI_REGAMMA_CONTROL 0x1aa0 -# define NI_GRPH_REGAMMA_MODE(x) (((x) & 0x7) << 0) -# define NI_REGAMMA_BYPASS 0 -# define NI_REGAMMA_SRGB_24 1 -# define NI_REGAMMA_XVYCC_222 2 -# define NI_REGAMMA_PROG_A 3 -# define NI_REGAMMA_PROG_B 4 -# define NI_OVL_REGAMMA_MODE(x) (((x) & 0x7) << 4) - - -#define NI_PRESCALE_GRPH_CONTROL 0x1a2d -# define NI_GRPH_PRESCALE_BYPASS (1 << 4) - -#define NI_PRESCALE_OVL_CONTROL 0x1a31 -# define NI_OVL_PRESCALE_BYPASS (1 << 4) - -#define NI_INPUT_GAMMA_CONTROL 0x1a10 -# define NI_GRPH_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 0) -# define NI_INPUT_GAMMA_USE_LUT 0 -# define NI_INPUT_GAMMA_BYPASS 1 -# define NI_INPUT_GAMMA_SRGB_24 2 -# define NI_INPUT_GAMMA_XVYCC_222 3 -# define NI_OVL_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 4) - -#define BLACKOUT_MODE_MASK 0x00000007 -#define VGA_RENDER_CONTROL 0xC0 -#define R_000300_VGA_RENDER_CONTROL 0xC0 -#define C_000300_VGA_VSTATUS_CNTL 0xFFFCFFFF -#define EVERGREEN_CRTC_STATUS 0x1BA3 -#define EVERGREEN_CRTC_V_BLANK (1 << 0) -#define EVERGREEN_CRTC_STATUS_POSITION 0x1BA4 -/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */ -#define EVERGREEN_CRTC_V_BLANK_START_END 0x1b8d -#define EVERGREEN_CRTC_CONTROL 0x1b9c -# define EVERGREEN_CRTC_MASTER_EN (1 << 0) -# define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24) -#define EVERGREEN_CRTC_BLANK_CONTROL 0x1b9d -# define EVERGREEN_CRTC_BLANK_DATA_EN (1 << 8) -# define EVERGREEN_CRTC_V_BLANK (1 << 0) -#define EVERGREEN_CRTC_STATUS_HV_COUNT 0x1ba8 -#define EVERGREEN_CRTC_UPDATE_LOCK 0x1bb5 -#define EVERGREEN_MASTER_UPDATE_LOCK 0x1bbd -#define EVERGREEN_MASTER_UPDATE_MODE 0x1bbe -#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16) -#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07 -#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08 -#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS 0x1a04 -#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS 0x1a05 -#define EVERGREEN_GRPH_UPDATE 0x1a11 -#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS 0xc4 -#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH 0xc9 -#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2) - -#define mmVM_CONTEXT1_CNTL__xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x10 -#define mmVM_CONTEXT1_CNTL__xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x4 -#define mmVM_CONTEXT1_CNTL__xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x80 -#define mmVM_CONTEXT1_CNTL__xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x7 -#define mmVM_CONTEXT1_CNTL__xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x400 -#define mmVM_CONTEXT1_CNTL__xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0xa -#define mmVM_CONTEXT1_CNTL__xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x2000 -#define mmVM_CONTEXT1_CNTL__xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0xd -#define mmVM_CONTEXT1_CNTL__xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x10000 -#define mmVM_CONTEXT1_CNTL__xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x10 -#define mmVM_CONTEXT1_CNTL__xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x80000 -#define mmVM_CONTEXT1_CNTL__xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x13 - -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxVMID_MASK 0x1e000000 -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxVMID__SHIFT 0x19 -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxPROTECTIONS_MASK 0xff -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxPROTECTIONS__SHIFT 0x0 -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_ID_MASK 0xff000 -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_ID__SHIFT 0xc -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_RW_MASK 0x1000000 -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_RW__SHIFT 0x18 - -#define mmMC_SHARED_BLACKOUT_CNTL__xxBLACKOUT_MODE_MASK 0x7 -#define mmMC_SHARED_BLACKOUT_CNTL__xxBLACKOUT_MODE__SHIFT 0x0 - -#define mmBIF_FB_EN__xxFB_READ_EN_MASK 0x1 -#define mmBIF_FB_EN__xxFB_READ_EN__SHIFT 0x0 -#define mmBIF_FB_EN__xxFB_WRITE_EN_MASK 0x2 -#define mmBIF_FB_EN__xxFB_WRITE_EN__SHIFT 0x1 - -#define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC_MASK 0x20000 -#define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC__SHIFT 0x11 -#define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC_MASK 0x800 -#define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC__SHIFT 0xb +#define GRPH_FORMAT_ARGB8888 0 +#define GRPH_FORMAT_ARGB2101010 1 +#define GRPH_FORMAT_32BPP_DIG 2 +#define GRPH_FORMAT_8B_ARGB2101010 3 +#define GRPH_FORMAT_BGRA1010102 4 +#define GRPH_FORMAT_8B_BGRA1010102 5 +#define GRPH_FORMAT_RGB111110 6 +#define GRPH_FORMAT_BGR101111 7 + +#define GRPH_ENDIAN_NONE 0 +#define GRPH_ENDIAN_8IN16 1 +#define GRPH_ENDIAN_8IN32 2 +#define GRPH_ENDIAN_8IN64 3 +#define GRPH_RED_SEL_R 0 +#define GRPH_RED_SEL_G 1 +#define GRPH_RED_SEL_B 2 +#define GRPH_RED_SEL_A 3 + +#define GRPH_GREEN_SEL_G 0 +#define GRPH_GREEN_SEL_B 1 +#define GRPH_GREEN_SEL_A 2 +#define GRPH_GREEN_SEL_R 3 + +#define GRPH_BLUE_SEL_B 0 +#define GRPH_BLUE_SEL_A 1 +#define GRPH_BLUE_SEL_R 2 +#define GRPH_BLUE_SEL_G 3 + +#define GRPH_ALPHA_SEL_A 0 +#define GRPH_ALPHA_SEL_R 1 +#define GRPH_ALPHA_SEL_G 2 +#define GRPH_ALPHA_SEL_B 3 + +/* CUR_CONTROL */ + #define CURSOR_MONO 0 + #define CURSOR_24_1 1 + #define CURSOR_24_8_PRE_MULT 2 + #define CURSOR_24_8_UNPRE_MULT 3 + #define CURSOR_URGENT_ALWAYS 0 + #define CURSOR_URGENT_1_8 1 + #define CURSOR_URGENT_1_4 2 + #define CURSOR_URGENT_3_8 3 + #define CURSOR_URGENT_1_2 4 + +/* INPUT_CSC_CONTROL */ +# define INPUT_CSC_BYPASS 0 +# define INPUT_CSC_PROG_COEFF 1 +# define INPUT_CSC_PROG_SHARED_MATRIXA 2 + +/* OUTPUT_CSC_CONTROL */ +# define OUTPUT_CSC_BYPASS 0 +# define OUTPUT_CSC_TV_RGB 1 +# define OUTPUT_CSC_YCBCR_601 2 +# define OUTPUT_CSC_YCBCR_709 3 +# define OUTPUT_CSC_PROG_COEFF 4 +# define OUTPUT_CSC_PROG_SHARED_MATRIXB 5 + +/* DEGAMMA_CONTROL */ +# define DEGAMMA_BYPASS 0 +# define DEGAMMA_SRGB_24 1 +# define DEGAMMA_XVYCC_222 2 + +/* GAMUT_REMAP_CONTROL */ +# define GAMUT_REMAP_BYPASS 0 +# define GAMUT_REMAP_PROG_COEFF 1 +# define GAMUT_REMAP_PROG_SHARED_MATRIXA 2 +# define GAMUT_REMAP_PROG_SHARED_MATRIXB 3 + +/* REGAMMA_CONTROL */ +# define REGAMMA_BYPASS 0 +# define REGAMMA_SRGB_24 1 +# define REGAMMA_XVYCC_222 2 +# define REGAMMA_PROG_A 3 +# define REGAMMA_PROG_B 4 + + +/* INPUT_GAMMA_CONTROL */ +# define INPUT_GAMMA_USE_LUT 0 +# define INPUT_GAMMA_BYPASS 1 +# define INPUT_GAMMA_SRGB_24 2 +# define INPUT_GAMMA_XVYCC_222 3 #define MC_SEQ_MISC0__MT__MASK 0xf0000000 #define MC_SEQ_MISC0__MT__GDDR1 0x10000000 @@ -2113,20 +785,9 @@ #define PACKET3_SEM_SEL_SIGNAL (0x6 << 29) #define PACKET3_SEM_SEL_WAIT (0x7 << 29) -#define CONFIG_CNTL 0x1509 -#define CC_DRM_ID_STRAPS 0X1559 #define AMDGPU_PCIE_INDEX 0xc #define AMDGPU_PCIE_DATA 0xd -#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0x3411 -#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0x3412 -#define DMA_MODE 0x342f -#define DMA_RB_RPTR_ADDR_HI 0x3407 -#define DMA_RB_RPTR_ADDR_LO 0x3408 -#define DMA_BUSY_MASK 0x20 -#define DMA1_BUSY_MASK 0X40 -#define SDMA_MAX_INSTANCE 2 - #define PCIE_BUS_CLK 10000 #define TCLK (PCIE_BUS_CLK / 10) #define PCIE_PORT_INDEX 0xe @@ -2136,8 +797,6 @@ #define EVERGREEN_PIF_PHY1_INDEX 0x10 #define EVERGREEN_PIF_PHY1_DATA 0x14 -#define MC_VM_FB_OFFSET 0x81a - /* Discrete VCE clocks */ #define CG_VCEPLL_FUNC_CNTL 0xc0030600 #define VCEPLL_RESET_MASK 0x00000001 diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 659eab9b90be..c457be3a3c56 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -584,6 +584,8 @@ soc15_asic_reset_method(struct amdgpu_device *adev) * Enable triggering of GPU reset only if specified * by module parameter. */ + if (adev->pcie_reset_ctx.in_link_reset) + return AMD_RESET_METHOD_LINK; if (amdgpu_gpu_recovery == 4 || amdgpu_gpu_recovery == 5) return AMD_RESET_METHOD_MODE2; else if (!(adev->flags & AMD_IS_APU)) @@ -640,6 +642,9 @@ asic_reset: case AMD_RESET_METHOD_MODE2: dev_info(adev->dev, "MODE2 reset\n"); return amdgpu_dpm_mode2_reset(adev); + case AMD_RESET_METHOD_LINK: + dev_info(adev->dev, "Link reset\n"); + return amdgpu_device_link_reset(adev); default: dev_info(adev->dev, "MODE1 reset\n"); return amdgpu_device_mode1_reset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index ef7c603b50ae..c8ac11a9cdef 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -118,7 +118,6 @@ int vega10_reg_base_init(struct amdgpu_device *adev); int vega20_reg_base_init(struct amdgpu_device *adev); int arct_reg_base_init(struct amdgpu_device *adev); int aldebaran_reg_base_init(struct amdgpu_device *adev); -void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev); u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id); int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev); ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index a5000c171c02..cf93fa477674 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -552,6 +552,11 @@ # define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2) # define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) +#define PACKET3_RUN_CLEANER_SHADER_9_0 0xD7 +/* 1. header + * 2. RESERVED [31:0] + */ + #define PACKET3_RUN_CLEANER_SHADER 0xD2 /* 1. header * 2. RESERVED [31:0] diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index a3b5fda22432..8a3f326474e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -92,6 +92,9 @@ enum ta_ras_block { TA_RAS_BLOCK__MCA, TA_RAS_BLOCK__VCN, TA_RAS_BLOCK__JPEG, + TA_RAS_BLOCK__IH, + TA_RAS_BLOCK__MPIO, + TA_RAS_BLOCK__MMSCH, TA_NUM_BLOCK_MAX }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 74f57b2d30a5..e590cbdd8de9 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -85,7 +85,8 @@ bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_sta return (amdgpu_ras_is_poison_mode_supported(adev) && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)); + ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) || + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Poison) == 1))); } bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status) @@ -173,19 +174,76 @@ static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev, umc_v12_0_reset_error_count(adev); } +static void umc_v12_0_get_retire_flip_bits(struct amdgpu_device *adev) +{ + enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; + uint32_t vram_type = adev->gmc.vram_type; + struct amdgpu_umc_flip_bits *flip_bits = &(adev->umc.flip_bits); + + if (adev->gmc.gmc_funcs->query_mem_partition_mode) + nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + + /* default setting */ + flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_C2_BIT; + flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C3_BIT; + flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_C4_BIT; + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R13_BIT; + flip_bits->flip_row_bit = 13; + flip_bits->bit_num = 4; + flip_bits->r13_in_pa = UMC_V12_0_PA_R13_BIT; + + if (nps == AMDGPU_NPS2_PARTITION_MODE) { + flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH5_BIT; + flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C2_BIT; + flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B1_BIT; + flip_bits->r13_in_pa = UMC_V12_0_PA_R12_BIT; + } else if (nps == AMDGPU_NPS4_PARTITION_MODE) { + flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH4_BIT; + flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_CH5_BIT; + flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B0_BIT; + flip_bits->r13_in_pa = UMC_V12_0_PA_R11_BIT; + } + + switch (vram_type) { + case AMDGPU_VRAM_TYPE_HBM: + /* other nps modes are taken as nps1 */ + if (nps == AMDGPU_NPS2_PARTITION_MODE) + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT; + else if (nps == AMDGPU_NPS4_PARTITION_MODE) + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT; + + break; + case AMDGPU_VRAM_TYPE_HBM3E: + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT; + flip_bits->flip_row_bit = 12; + + if (nps == AMDGPU_NPS2_PARTITION_MODE) + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT; + else if (nps == AMDGPU_NPS4_PARTITION_MODE) + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R10_BIT; + + break; + default: + dev_warn(adev->dev, + "Unknown HBM type, set RAS retire flip bits to the value in NPS1 mode.\n"); + break; + } + + adev->umc.retire_unit = 0x1 << flip_bits->bit_num; +} + static int umc_v12_0_convert_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, struct ta_ras_query_address_input *addr_in, struct ta_ras_query_address_output *addr_out, bool dump_addr) { - uint32_t col, col_lower, row, row_lower, bank; + uint32_t col, col_lower, row, row_lower, row_high, bank; uint32_t channel_index = 0, umc_inst = 0; - uint32_t i, loop_bits[UMC_V12_0_RETIRE_LOOP_BITS]; + uint32_t i, bit_num, retire_unit, *flip_bits; uint64_t soc_pa, column, err_addr; struct ta_ras_query_address_output addr_out_tmp; struct ta_ras_query_address_output *paddr_out; - enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; int ret = 0; if (!addr_out) @@ -210,46 +268,46 @@ static int umc_v12_0_convert_error_address(struct amdgpu_device *adev, umc_inst = addr_in->ma.umc_inst; } - loop_bits[0] = UMC_V12_0_PA_C2_BIT; - loop_bits[1] = UMC_V12_0_PA_C3_BIT; - loop_bits[2] = UMC_V12_0_PA_C4_BIT; - loop_bits[3] = UMC_V12_0_PA_R13_BIT; - - if (adev->gmc.gmc_funcs->query_mem_partition_mode) - nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); - - /* other nps modes are taken as nps1 */ - if (nps == AMDGPU_NPS4_PARTITION_MODE) { - loop_bits[0] = UMC_V12_0_PA_CH4_BIT; - loop_bits[1] = UMC_V12_0_PA_CH5_BIT; - loop_bits[2] = UMC_V12_0_PA_B0_BIT; - loop_bits[3] = UMC_V12_0_PA_R11_BIT; - } + flip_bits = adev->umc.flip_bits.flip_bits_in_pa; + bit_num = adev->umc.flip_bits.bit_num; + retire_unit = adev->umc.retire_unit; soc_pa = paddr_out->pa.pa; channel_index = paddr_out->pa.channel_idx; /* clear loop bits in soc physical address */ - for (i = 0; i < UMC_V12_0_RETIRE_LOOP_BITS; i++) - soc_pa &= ~BIT_ULL(loop_bits[i]); + for (i = 0; i < bit_num; i++) + soc_pa &= ~BIT_ULL(flip_bits[i]); paddr_out->pa.pa = soc_pa; /* get column bit 0 and 1 in mca address */ col_lower = (err_addr >> 1) & 0x3ULL; - /* MA_R13_BIT will be handled later */ + /* extra row bit will be handled later */ row_lower = (err_addr >> UMC_V12_0_MA_R0_BIT) & 0x1fffULL; + row_lower &= ~BIT_ULL(adev->umc.flip_bits.flip_row_bit); + + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 5, 0)) { + row_high = (soc_pa >> adev->umc.flip_bits.r13_in_pa) & 0x3ULL; + /* it's 2.25GB in each channel, from MCA address to PA + * [R14 R13] is converted if the two bits value are 0x3, + * get them from PA instead of MCA address. + */ + row_lower |= (row_high << 13); + } if (!err_data && !dump_addr) goto out; /* loop for all possibilities of retired bits */ - for (column = 0; column < UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; column++) { + for (column = 0; column < retire_unit; column++) { soc_pa = paddr_out->pa.pa; - for (i = 0; i < UMC_V12_0_RETIRE_LOOP_BITS; i++) - soc_pa |= (((column >> i) & 0x1ULL) << loop_bits[i]); + for (i = 0; i < bit_num; i++) + soc_pa |= (((column >> i) & 0x1ULL) << flip_bits[i]); col = ((column & 0x7) << 2) | col_lower; - /* add row bit 13 */ - row = ((column >> 3) << 13) | row_lower; + /* handle extra row bit */ + if (bit_num == RETIRE_FLIP_BITS_NUM) + row = ((column >> 3) << adev->umc.flip_bits.flip_row_bit) | + row_lower; if (dump_addr) dev_info(adev->dev, @@ -427,8 +485,12 @@ static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank bank->regs[ACA_REG_IDX_ADDR]); ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status); - count = ext_error_code == 0 ? - ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL; + if (umc_v12_0_is_deferred_error(adev, status)) + count = ext_error_code == 0 ? + adev->umc.err_addr_cnt / adev->umc.retire_unit : 1ULL; + else + count = ext_error_code == 0 ? + ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL; return aca_error_cache_log_bank_error(handle, &info, err_type, count); } @@ -468,8 +530,7 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, uint64_t err_addr, pa_addr = 0; struct ras_ecc_err *ecc_err; struct ta_ras_query_address_output addr_out; - enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; - uint32_t shift_bit = UMC_V12_0_PA_C4_BIT; + uint32_t shift_bit = adev->umc.flip_bits.flip_bits_in_pa[2]; int count, ret, i; hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); @@ -514,11 +575,6 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, ecc_err->pa_pfn = pa_addr >> AMDGPU_GPU_PAGE_SHIFT; ecc_err->channel_idx = addr_out.pa.channel_idx; - if (adev->gmc.gmc_funcs->query_mem_partition_mode) - nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); - if (nps == AMDGPU_NPS4_PARTITION_MODE) - shift_bit = UMC_V12_0_PA_B0_BIT; - /* If converted pa_pfn is 0, use pa C4 pfn. */ if (!ecc_err->pa_pfn) ecc_err->pa_pfn = BIT_ULL(shift_bit) >> AMDGPU_GPU_PAGE_SHIFT; @@ -664,5 +720,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = { .update_ecc_status = umc_v12_0_update_ecc_status, .convert_ras_err_addr = umc_v12_0_convert_error_address, .get_die_id_from_pa = umc_v12_0_get_die_id, + .get_retire_flip_bits = umc_v12_0_get_retire_flip_bits, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index 9298018d938f..63b7e7254526 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -55,8 +55,6 @@ #define UMC_V12_0_NA_MAP_PA_NUM 8 /* R13 bit shift should be considered, double the number */ #define UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL (UMC_V12_0_NA_MAP_PA_NUM * 2) -/* C2, C3, C4, R13, four bits in MCA address are looped in retirement */ -#define UMC_V12_0_RETIRE_LOOP_BITS 4 /* column bits in SOC physical address */ #define UMC_V12_0_PA_C2_BIT 15 @@ -64,13 +62,16 @@ #define UMC_V12_0_PA_C4_BIT 21 /* row bits in SOC physical address */ #define UMC_V12_0_PA_R0_BIT 22 +#define UMC_V12_0_PA_R10_BIT 32 #define UMC_V12_0_PA_R11_BIT 33 +#define UMC_V12_0_PA_R12_BIT 34 #define UMC_V12_0_PA_R13_BIT 35 /* channel bit in SOC physical address */ #define UMC_V12_0_PA_CH4_BIT 12 #define UMC_V12_0_PA_CH5_BIT 13 /* bank bit in SOC physical address */ #define UMC_V12_0_PA_B0_BIT 19 +#define UMC_V12_0_PA_B1_BIT 20 /* row bits in MCA address */ #define UMC_V12_0_MA_R0_BIT 10 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 21b57c29bf7d..c74947705d77 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1009,6 +1009,11 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_vcn_inst *vinst) jpeg_v1_0_start(adev, 0); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(UVD, 0, mmUVD_STATUS); + return 0; } @@ -1154,6 +1159,11 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst) jpeg_v1_0_start(adev, 1); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(UVD, 0, mmUVD_STATUS); + return 0; } @@ -1216,6 +1226,12 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_vcn_inst *vinst) vcn_v1_0_enable_clock_gating(vinst); vcn_1_0_enable_static_power_gating(vinst); + + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(UVD, 0, mmUVD_STATUS); + return 0; } @@ -1250,6 +1266,11 @@ static int vcn_v1_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(UVD, 0, mmUVD_STATUS); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 8e7a36f26e9c..68b4371df0f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -39,6 +39,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x1fd #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x503 @@ -97,6 +98,8 @@ static int vcn_v2_0_set_pg_state(struct amdgpu_vcn_inst *vinst, static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, struct dpg_pause_state *new_state); static int vcn_v2_0_start_sriov(struct amdgpu_device *adev); +static int vcn_v2_0_reset(struct amdgpu_vcn_inst *vinst); + /** * vcn_v2_0_early_init - set function pointers and load microcode * @@ -212,6 +215,12 @@ static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block) } adev->vcn.inst[0].pause_dpg_mode = vcn_v2_0_pause_dpg_mode; + adev->vcn.inst[0].reset = vcn_v2_0_reset; + + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + if (!amdgpu_sriov_vf(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_virt_alloc_mm_table(adev); if (r) @@ -232,6 +241,10 @@ static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.ip_dump = ptr; } + r = amdgpu_vcn_sysfs_reset_mask_init(adev); + if (r) + return r; + return 0; } @@ -259,6 +272,8 @@ static int vcn_v2_0_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; + amdgpu_vcn_sysfs_reset_mask_fini(adev); + r = amdgpu_vcn_sw_fini(adev, 0); kfree(adev->vcn.ip_dump); @@ -977,6 +992,12 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) /* Unstall DPG */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); + + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(UVD, 0, mmUVD_STATUS); + return 0; } @@ -1151,6 +1172,11 @@ static int vcn_v2_0_start(struct amdgpu_vcn_inst *vinst) WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET; + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(UVD, 0, mmUVD_STATUS); + return 0; } @@ -1182,6 +1208,11 @@ static int vcn_v2_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(UVD, 0, mmUVD_STATUS); + return 0; } @@ -1247,6 +1278,11 @@ static int vcn_v2_0_stop(struct amdgpu_vcn_inst *vinst) vcn_v2_0_enable_clock_gating(vinst); vcn_v2_0_enable_static_power_gating(vinst); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, 0, mmUVD_STATUS); + power_off: if (adev->pm.dpm_enabled) amdgpu_dpm_enable_vcn(adev, false, 0); @@ -1333,6 +1369,16 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, return 0; } +static int vcn_v2_0_reset(struct amdgpu_vcn_inst *vinst) +{ + int r; + + r = vcn_v2_0_stop(vinst); + if (r) + return r; + return vcn_v2_0_start(vinst); +} + static bool vcn_v2_0_is_idle(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -2154,6 +2200,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = amdgpu_vcn_ring_reset, }; static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { @@ -2183,6 +2230,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = amdgpu_vcn_ring_reset, }; static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index d716510b8dd6..bc30a5326866 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -39,6 +39,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f @@ -101,6 +102,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, struct dpg_pause_state *new_state); static int vcn_v2_5_sriov_start(struct amdgpu_device *adev); static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev); +static int vcn_v2_5_reset(struct amdgpu_vcn_inst *vinst); static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, @@ -403,8 +405,14 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.inst[j].pause_dpg_mode = vcn_v2_5_pause_dpg_mode; + adev->vcn.inst[j].reset = vcn_v2_5_reset; } + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + if (!amdgpu_sriov_vf(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); if (r) @@ -424,6 +432,10 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.ip_dump = ptr; } + r = amdgpu_vcn_sysfs_reset_mask_init(adev); + if (r) + return r; + return 0; } @@ -454,6 +466,8 @@ static int vcn_v2_5_sw_fini(struct amdgpu_ip_block *ip_block) if (amdgpu_sriov_vf(adev)) amdgpu_virt_free_mm_table(adev); + amdgpu_vcn_sysfs_reset_mask_fini(adev); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { r = amdgpu_vcn_suspend(adev, i); if (r) @@ -1157,6 +1171,11 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS); + return 0; } @@ -1342,6 +1361,11 @@ static int vcn_v2_5_start(struct amdgpu_vcn_inst *vinst) WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4); fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET; + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, i, mmUVD_STATUS); + return 0; } @@ -1568,6 +1592,11 @@ static int vcn_v2_5_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS); + return 0; } @@ -1634,6 +1663,10 @@ static int vcn_v2_5_stop(struct amdgpu_vcn_inst *vinst) UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, i, mmUVD_STATUS); done: if (adev->pm.dpm_enabled) amdgpu_dpm_enable_vcn(adev, false, i); @@ -1796,6 +1829,7 @@ static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = amdgpu_vcn_ring_reset, }; /** @@ -1894,6 +1928,7 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = amdgpu_vcn_ring_reset, }; static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) @@ -1922,6 +1957,16 @@ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev) } } +static int vcn_v2_5_reset(struct amdgpu_vcn_inst *vinst) +{ + int r; + + r = vcn_v2_5_stop(vinst); + if (r) + return r; + return vcn_v2_5_start(vinst); +} + static bool vcn_v2_5_is_idle(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 22ae1939476f..4b8f4407047f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -40,6 +40,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f @@ -109,6 +110,7 @@ static int vcn_v3_0_set_pg_state(struct amdgpu_vcn_inst *vinst, enum amd_powergating_state state); static int vcn_v3_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, struct dpg_pause_state *new_state); +static int vcn_v3_0_reset(struct amdgpu_vcn_inst *vinst); static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring); static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring); @@ -288,8 +290,14 @@ static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.inst[i].pause_dpg_mode = vcn_v3_0_pause_dpg_mode; + adev->vcn.inst[i].reset = vcn_v3_0_reset; } + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + if (!amdgpu_sriov_vf(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); if (r) @@ -305,6 +313,10 @@ static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.ip_dump = ptr; } + r = amdgpu_vcn_sysfs_reset_mask_init(adev); + if (r) + return r; + return 0; } @@ -337,6 +349,8 @@ static int vcn_v3_0_sw_fini(struct amdgpu_ip_block *ip_block) if (amdgpu_sriov_vf(adev)) amdgpu_virt_free_mm_table(adev); + amdgpu_vcn_sysfs_reset_mask_fini(adev); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { r = amdgpu_vcn_suspend(adev, i); if (r) @@ -1172,6 +1186,11 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS); + return 0; } @@ -1359,6 +1378,11 @@ static int vcn_v3_0_start(struct amdgpu_vcn_inst *vinst) fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); } + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, i, mmUVD_STATUS); + return 0; } @@ -1601,6 +1625,11 @@ static int vcn_v3_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS); + return 0; } @@ -1673,6 +1702,11 @@ static int vcn_v3_0_stop(struct amdgpu_vcn_inst *vinst) /* enable VCN power gating */ vcn_v3_0_enable_static_power_gating(vinst); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, i, mmUVD_STATUS); + done: if (adev->pm.dpm_enabled) amdgpu_dpm_enable_vcn(adev, false, i); @@ -2012,6 +2046,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = amdgpu_vcn_ring_reset, }; /** @@ -2110,6 +2145,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = { .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = amdgpu_vcn_ring_reset, }; static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev) @@ -2143,6 +2179,18 @@ static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev) } } +static int vcn_v3_0_reset(struct amdgpu_vcn_inst *vinst) +{ + int r; + + r = vcn_v3_0_stop(vinst); + if (r) + return r; + vcn_v3_0_enable_clock_gating(vinst); + vcn_v3_0_enable_static_power_gating(vinst); + return vcn_v3_0_start(vinst); +} + static bool vcn_v3_0_is_idle(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index c6f6392c1c20..1924e075b66f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -46,6 +46,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define VCN_HARVEST_MMSCH 0 @@ -238,9 +239,10 @@ static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_pause_dpg_mode; } - /* TODO: Add queue reset mask when FW fully supports it */ adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + if (!amdgpu_sriov_vf(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); @@ -614,7 +616,8 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, /* VCN global tiling registers */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG), + adev->gfx.config.gb_addr_config, 0, indirect); } /** @@ -1120,6 +1123,11 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | VCN_RB1_DB_CTRL__EN_MASK); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, inst_idx, regUVD_STATUS); + return 0; } @@ -1301,6 +1309,11 @@ static int vcn_v4_0_start(struct amdgpu_vcn_inst *vinst) WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, i, regUVD_STATUS); + return 0; } @@ -1581,6 +1594,11 @@ static void vcn_v4_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) /* disable dynamic power gating mode */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, inst_idx, regUVD_STATUS); } /** @@ -1664,6 +1682,11 @@ static int vcn_v4_0_stop(struct amdgpu_vcn_inst *vinst) /* enable VCN power gating */ vcn_v4_0_enable_static_power_gating(vinst); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, i, regUVD_STATUS); + done: if (adev->pm.dpm_enabled) amdgpu_dpm_enable_vcn(adev, false, i); @@ -1945,6 +1968,24 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, return 0; } +static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + int r; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = vcn_v4_0_stop(vinst); + if (r) + return r; + r = vcn_v4_0_start(vinst); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1974,6 +2015,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = vcn_v4_0_ring_reset, }; /** diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 7446ecc55714..2a3663b551af 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -45,6 +45,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = { SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), @@ -168,6 +169,10 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; + /* VCN POISON TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst->ras_poison_irq); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { r = amdgpu_vcn_sw_init(adev, i); @@ -287,6 +292,31 @@ static int vcn_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) return 0; } +static int vcn_v4_0_3_hw_init_inst(struct amdgpu_vcn_inst *vinst) +{ + int vcn_inst; + struct amdgpu_device *adev = vinst->adev; + struct amdgpu_ring *ring; + int inst_idx = vinst->inst; + + vcn_inst = GET_INST(VCN, inst_idx); + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + if (ring->use_doorbell) { + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * vcn_inst, + adev->vcn.inst[inst_idx].aid_id); + + WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL); + } + + return 0; +} + /** * vcn_v4_0_3_hw_init - start and test VCN block * @@ -298,7 +328,8 @@ static int vcn_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; - int i, r, vcn_inst; + struct amdgpu_vcn_inst *vinst; + int i, r; if (amdgpu_sriov_vf(adev)) { r = vcn_v4_0_3_start_sriov(adev); @@ -321,28 +352,9 @@ static int vcn_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { struct amdgpu_vcn4_fw_shared *fw_shared; - vcn_inst = GET_INST(VCN, i); ring = &adev->vcn.inst[i].ring_enc[0]; - - if (ring->use_doorbell) { - adev->nbio.funcs->vcn_doorbell_range( - adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 9 * vcn_inst, - adev->vcn.inst[i].aid_id); - - WREG32_SOC15( - VCN, GET_INST(VCN, ring->me), - regVCN_RB1_DB_CTRL, - ring->doorbell_index - << VCN_RB1_DB_CTRL__OFFSET__SHIFT | - VCN_RB1_DB_CTRL__EN_MASK); - - /* Read DB_CTRL to flush the write DB_CTRL command. */ - RREG32_SOC15( - VCN, GET_INST(VCN, ring->me), - regVCN_RB1_DB_CTRL); - } + vinst = &adev->vcn.inst[i]; + vcn_v4_0_3_hw_init_inst(vinst); /* Re-init fw_shared when RAS fatal error occurred */ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; @@ -379,6 +391,9 @@ static int vcn_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block) vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); } + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) + amdgpu_irq_put(adev, &adev->vcn.inst->ras_poison_irq, 0); + return 0; } @@ -962,6 +977,11 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_vcn_inst *vinst, /*resetting done, fw can check RB ring */ fw_shared->sq.queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS); + return 0; } @@ -1355,6 +1375,12 @@ static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) /* disable dynamic power gating mode */ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS); + return 0; } @@ -1438,6 +1464,11 @@ static int vcn_v4_0_3_stop(struct amdgpu_vcn_inst *vinst) /* apply HW clock gating */ vcn_v4_0_3_enable_clock_gating(vinst); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS); + Done: return 0; } @@ -1563,6 +1594,34 @@ static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring) } } +static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + int r = 0; + int vcn_inst; + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + + vcn_inst = GET_INST(VCN, ring->me); + r = amdgpu_dpm_reset_vcn(adev, 1 << vcn_inst); + + if (r) { + DRM_DEV_ERROR(adev->dev, "VCN reset fail : %d\n", r); + return r; + } + + /* This flag is not set for VF, assumed to be disabled always */ + if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100) + adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED); + vcn_v4_0_3_hw_init_inst(vinst); + vcn_v4_0_3_start_dpg_mode(vinst, adev->vcn.inst[ring->me].indirect_sram); + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1591,6 +1650,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg, .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = vcn_v4_0_3_ring_reset, }; /** @@ -1774,11 +1834,24 @@ static int vcn_v4_0_3_process_interrupt(struct amdgpu_device *adev, return 0; } +static int vcn_v4_0_3_set_ras_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + static const struct amdgpu_irq_src_funcs vcn_v4_0_3_irq_funcs = { .set = vcn_v4_0_3_set_interrupt_state, .process = vcn_v4_0_3_process_interrupt, }; +static const struct amdgpu_irq_src_funcs vcn_v4_0_3_ras_irq_funcs = { + .set = vcn_v4_0_3_set_ras_interrupt_state, + .process = amdgpu_vcn_process_poison_irq, +}; + /** * vcn_v4_0_3_set_irq_funcs - set VCN block interrupt irq functions * @@ -1794,6 +1867,9 @@ static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) adev->vcn.inst->irq.num_types++; } adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs; + + adev->vcn.inst->ras_poison_irq.num_types = 1; + adev->vcn.inst->ras_poison_irq.funcs = &vcn_v4_0_3_ras_irq_funcs; } static void vcn_v4_0_3_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) @@ -1941,9 +2017,44 @@ static void vcn_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev) vcn_v4_0_3_inst_reset_ras_error_count(adev, i); } +static uint32_t vcn_v4_0_3_query_poison_by_instance(struct amdgpu_device *adev, + uint32_t instance, uint32_t sub_block) +{ + uint32_t poison_stat = 0, reg_value = 0; + + switch (sub_block) { + case AMDGPU_VCN_V4_0_3_VCPU_VCODEC: + reg_value = RREG32_SOC15(VCN, instance, regUVD_RAS_VCPU_VCODEC_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF); + break; + default: + break; + } + + if (poison_stat) + dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n", + instance, sub_block); + + return poison_stat; +} + +static bool vcn_v4_0_3_query_poison_status(struct amdgpu_device *adev) +{ + uint32_t inst, sub; + uint32_t poison_stat = 0; + + for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++) + for (sub = 0; sub < AMDGPU_VCN_V4_0_3_MAX_SUB_BLOCK; sub++) + poison_stat += + vcn_v4_0_3_query_poison_by_instance(adev, inst, sub); + + return !!poison_stat; +} + static const struct amdgpu_ras_block_hw_ops vcn_v4_0_3_ras_hw_ops = { .query_ras_error_count = vcn_v4_0_3_query_ras_error_count, .reset_ras_error_count = vcn_v4_0_3_reset_ras_error_count, + .query_poison_status = vcn_v4_0_3_query_poison_status, }; static int vcn_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, @@ -1965,7 +2076,7 @@ static int vcn_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank 1ULL); break; case ACA_SMU_TYPE_CE: - bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, ACA_REG__MISC0__ERRCNT(misc0)); break; @@ -2019,6 +2130,13 @@ static int vcn_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_commo if (r) return r; + if (amdgpu_ras_is_supported(adev, ras_block->block) && + adev->vcn.inst->ras_poison_irq.funcs) { + r = amdgpu_irq_get(adev, &adev->vcn.inst->ras_poison_irq, 0); + if (r) + goto late_fini; + } + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__VCN, &vcn_v4_0_3_aca_info, NULL); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h index 03572a1d0c9c..aeab89853a92 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h @@ -24,6 +24,12 @@ #ifndef __VCN_V4_0_3_H__ #define __VCN_V4_0_3_H__ +enum amdgpu_vcn_v4_0_3_sub_block { + AMDGPU_VCN_V4_0_3_VCPU_VCODEC = 0, + + AMDGPU_VCN_V4_0_3_MAX_SUB_BLOCK, +}; + extern const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block; void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index ba603b2246e2..caf2d95a85d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -46,6 +46,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 (0x48300 + 0x38000) +#define VCN1_AON_SOC_ADDRESS_3_0 (0x48000 + 0x38000) #define VCN_HARVEST_MMSCH 0 @@ -207,6 +208,10 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) if (amdgpu_sriov_vf(adev)) fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT; + fw_shared->drm_key_wa.method = + AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING; + if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); @@ -214,6 +219,14 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_5_pause_dpg_mode; } + adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + if (!amdgpu_sriov_vf(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + + r = amdgpu_vcn_sysfs_reset_mask_init(adev); + if (r) + return r; + if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); if (r) @@ -1022,6 +1035,10 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | VCN_RB1_DB_CTRL__EN_MASK); + /* Keeping one read-back to ensure all register writes are done, otherwise + * it may introduce race conditions */ + RREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL); + return 0; } @@ -1204,6 +1221,10 @@ static int vcn_v4_0_5_start(struct amdgpu_vcn_inst *vinst) WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + /* Keeping one read-back to ensure all register writes are done, otherwise + * it may introduce race conditions */ + RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + return 0; } @@ -1234,6 +1255,11 @@ static void vcn_v4_0_5_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) /* disable dynamic power gating mode */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, inst_idx, regUVD_STATUS); } /** @@ -1317,6 +1343,11 @@ static int vcn_v4_0_5_stop(struct amdgpu_vcn_inst *vinst) /* enable VCN power gating */ vcn_v4_0_5_enable_static_power_gating(vinst); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, i, regUVD_STATUS); + done: if (adev->pm.dpm_enabled) amdgpu_dpm_enable_vcn(adev, false, i); @@ -1435,6 +1466,24 @@ static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring) } } +static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + int r; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = vcn_v4_0_5_stop(vinst); + if (r) + return r; + r = vcn_v4_0_5_start(vinst); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1462,6 +1511,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = vcn_v4_0_5_ring_reset, }; /** diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index d99d05f42f1d..07a6e9582880 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -196,9 +196,10 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.inst[i].pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; } - /* TODO: Add queue reset mask when FW fully supports it */ adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + if (!amdgpu_sriov_vf(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; vcn_v5_0_0_alloc_ip_dump(adev); @@ -533,7 +534,8 @@ static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, /* VCN global tiling registers */ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( - VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG), + adev->gfx.config.gb_addr_config, 0, indirect); return; } @@ -793,6 +795,11 @@ static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | VCN_RB1_DB_CTRL__EN_MASK); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, inst_idx, regUVD_STATUS); + return 0; } @@ -945,6 +952,11 @@ static int vcn_v5_0_0_start(struct amdgpu_vcn_inst *vinst) WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, i, regUVD_STATUS); + return 0; } @@ -976,6 +988,11 @@ static void vcn_v5_0_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, inst_idx, regUVD_STATUS); + return; } @@ -1057,6 +1074,11 @@ static int vcn_v5_0_0_stop(struct amdgpu_vcn_inst *vinst) /* enable VCN power gating */ vcn_v5_0_0_enable_static_power_gating(vinst); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, i, regUVD_STATUS); + done: if (adev->pm.dpm_enabled) amdgpu_dpm_enable_vcn(adev, false, i); @@ -1171,6 +1193,24 @@ static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring) } } +static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + int r; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = vcn_v5_0_0_stop(vinst); + if (r) + return r; + r = vcn_v5_0_0_start(vinst); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1198,6 +1238,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = { .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = vcn_v5_0_0_ring_reset, }; /** diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 581d8629b9d9..cdefd7fcb0da 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -30,6 +30,7 @@ #include "soc15_hw_ip.h" #include "vcn_v2_0.h" #include "vcn_v4_0_3.h" +#include "mmsch_v5_0.h" #include "vcn/vcn_5_0_0_offset.h" #include "vcn/vcn_5_0_0_sh_mask.h" @@ -39,12 +40,13 @@ #include <drm/drm_drv.h> +static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev); static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v5_0_1_set_pg_state(struct amdgpu_vcn_inst *vinst, enum amd_powergating_state state); static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring); - +static void vcn_v5_0_1_set_ras_funcs(struct amdgpu_device *adev); /** * vcn_v5_0_1_early_init - set function pointers and load microcode * @@ -64,6 +66,7 @@ static int vcn_v5_0_1_early_init(struct amdgpu_ip_block *ip_block) vcn_v5_0_1_set_unified_ring_funcs(adev); vcn_v5_0_1_set_irq_funcs(adev); + vcn_v5_0_1_set_ras_funcs(adev); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { adev->vcn.inst[i].set_pg_state = vcn_v5_0_1_set_pg_state; @@ -111,6 +114,10 @@ static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; + /* VCN POISON TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_5_0__SRCID_UVD_POISON, &adev->vcn.inst->ras_poison_irq); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { vcn_inst = GET_INST(VCN, i); @@ -126,7 +133,14 @@ static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) ring = &adev->vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * vcn_inst; + if (!amdgpu_sriov_vf(adev)) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 11 * vcn_inst; + else + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 32 * vcn_inst; ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); @@ -143,6 +157,12 @@ static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_alloc_mm_table(adev); + if (r) + return r; + } + vcn_v5_0_0_alloc_ip_dump(adev); return amdgpu_vcn_sysfs_reset_mask_init(adev); @@ -172,6 +192,9 @@ static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block) drm_dev_exit(idx); } + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_free_mm_table(adev); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { r = amdgpu_vcn_suspend(adev, i); if (r) @@ -204,24 +227,38 @@ static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_ring *ring; int i, r, vcn_inst; - if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100) - adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED); - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - vcn_inst = GET_INST(VCN, i); - ring = &adev->vcn.inst[i].ring_enc[0]; - - if (ring->use_doorbell) - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 9 * vcn_inst), - adev->vcn.inst[i].aid_id); - - /* Re-init fw_shared, if required */ - vcn_v5_0_1_fw_shared_init(adev, i); - - r = amdgpu_ring_test_helper(ring); + if (amdgpu_sriov_vf(adev)) { + r = vcn_v5_0_1_start_sriov(adev); if (r) return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v5_0_1_unified_ring_set_wptr(ring); + ring->sched.ready = true; + } + } else { + if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100) + adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + vcn_inst = GET_INST(VCN, i); + ring = &adev->vcn.inst[i].ring_enc[0]; + + if (ring->use_doorbell) + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 11 * vcn_inst), + adev->vcn.inst[i].aid_id); + + /* Re-init fw_shared, if required */ + vcn_v5_0_1_fw_shared_init(adev, i); + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } } return 0; @@ -247,6 +284,9 @@ static int vcn_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block) vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); } + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) + amdgpu_irq_put(adev, &adev->vcn.inst->ras_poison_irq, 0); + return 0; } @@ -503,6 +543,52 @@ static void vcn_v5_0_1_enable_clock_gating(struct amdgpu_vcn_inst *vinst) } /** + * vcn_v5_0_1_pause_dpg_mode - VCN pause with dpg mode + * + * @vinst: VCN instance + * @new_state: pause state + * + * Pause dpg mode for VCN block + */ +static int vcn_v5_0_1_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state) +{ + struct amdgpu_device *adev = vinst->adev; + uint32_t reg_data = 0; + int vcn_inst; + + vcn_inst = GET_INST(VCN, vinst->inst); + + /* pause/unpause if state is changed */ + if (vinst->pause_state.fw_based != new_state->fw_based) { + DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d %s\n", + vinst->pause_state.fw_based, new_state->fw_based, + new_state->fw_based ? "VCN_DPG_STATE__PAUSE" : "VCN_DPG_STATE__UNPAUSE"); + reg_data = RREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + } else { + /* unpause DPG, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); + } + vinst->pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + + +/** * vcn_v5_0_1_start_dpg_mode - VCN start with dpg mode * * @vinst: VCN instance @@ -518,6 +604,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, volatile struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE}; int vcn_inst; uint32_t tmp; @@ -582,6 +669,12 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, if (indirect) amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + /* resetting ring, fw should not check RB ring */ + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + + /* Pause dpg */ + vcn_v5_0_1_pause_dpg_mode(vinst, &state); + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr)); @@ -591,7 +684,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); - fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); @@ -602,6 +695,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); tmp |= VCN_RB_ENABLE__RB1_EN_MASK; WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + /* resetting done, fw can check RB ring */ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, @@ -613,6 +707,195 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, return 0; } +static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev) +{ + int i, vcn_inst; + struct amdgpu_ring *ring_enc; + uint64_t cache_addr; + uint64_t rb_enc_addr; + uint64_t ctx_addr; + uint32_t param, resp, expected; + uint32_t offset, cache_size; + uint32_t tmp, timeout; + + struct amdgpu_mm_table *table = &adev->virt.mm_table; + uint32_t *table_loc; + uint32_t table_size; + uint32_t size, size_dw; + uint32_t init_status; + uint32_t enabled_vcn; + + struct mmsch_v5_0_cmd_direct_write + direct_wt = { {0} }; + struct mmsch_v5_0_cmd_direct_read_modify_write + direct_rd_mod_wt = { {0} }; + struct mmsch_v5_0_cmd_end end = { {0} }; + struct mmsch_v5_0_init_header header; + + volatile struct amdgpu_vcn5_fw_shared *fw_shared; + volatile struct amdgpu_fw_shared_rb_setup *rb_setup; + + direct_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_WRITE; + direct_rd_mod_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; + end.cmd_header.command_type = MMSCH_COMMAND__END; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + vcn_inst = GET_INST(VCN, i); + + vcn_v5_0_1_fw_shared_init(adev, vcn_inst); + + memset(&header, 0, sizeof(struct mmsch_v5_0_init_header)); + header.version = MMSCH_VERSION; + header.total_size = sizeof(struct mmsch_v5_0_init_header) >> 2; + + table_loc = (uint32_t *)table->cpu_addr; + table_loc += header.total_size; + + table_size = 0; + + MMSCH_V5_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS), + ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); + + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo); + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi); + + offset = 0; + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_OFFSET0), 0); + } else { + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].gpu_addr)); + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].gpu_addr)); + offset = cache_size; + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_SIZE0), + cache_size); + + cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset; + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr)); + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr)); + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_OFFSET1), 0); + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE); + + cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE; + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr)); + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr)); + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_OFFSET2), 0); + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE); + + fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr; + rb_setup = &fw_shared->rb_setup; + + ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0]; + ring_enc->wptr = 0; + rb_enc_addr = ring_enc->gpu_addr; + + rb_setup->is_rb_enabled_flags |= RB_ENABLED; + rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); + rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr); + rb_setup->rb_size = ring_enc->ring_size / 4; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr)); + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr)); + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); + MMSCH_V5_0_INSERT_END(); + + header.vcn0.init_status = 0; + header.vcn0.table_offset = header.total_size; + header.vcn0.table_size = table_size; + header.total_size += table_size; + + /* Send init table to mmsch */ + size = sizeof(struct mmsch_v5_0_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy((void *)table_loc, &header, size); + + ctx_addr = table->gpu_addr; + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); + + tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID); + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp); + + size = header.total_size; + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size); + + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0); + + param = 0x00000001; + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param); + tmp = 0; + timeout = 1000; + resp = 0; + expected = MMSCH_VF_MAILBOX_RESP__OK; + while (resp != expected) { + resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP); + if (resp != 0) + break; + + udelay(10); + tmp = tmp + 10; + if (tmp >= timeout) { + DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ + " waiting for regMMSCH_VF_MAILBOX_RESP "\ + "(expected=0x%08x, readback=0x%08x)\n", + tmp, expected, resp); + return -EBUSY; + } + } + + enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0; + init_status = ((struct mmsch_v5_0_init_header *)(table_loc))->vcn0.init_status; + if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE + && init_status != MMSCH_VF_ENGINE_STATUS__PASS) { + DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\ + "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status); + } + } + + return 0; +} + /** * vcn_v5_0_1_start - VCN start * @@ -759,6 +1042,11 @@ static int vcn_v5_0_1_start(struct amdgpu_vcn_inst *vinst) WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS); + return 0; } @@ -775,9 +1063,13 @@ static void vcn_v5_0_1_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) int inst_idx = vinst->inst; uint32_t tmp; int vcn_inst; + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; vcn_inst = GET_INST(VCN, inst_idx); + /* Unpause dpg */ + vcn_v5_0_1_pause_dpg_mode(vinst, &state); + /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); @@ -789,6 +1081,11 @@ static void vcn_v5_0_1_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) /* disable dynamic power gating mode */ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS); } /** @@ -864,6 +1161,11 @@ static int vcn_v5_0_1_stop(struct amdgpu_vcn_inst *vinst) /* clear status */ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0); + /* Keeping one read-back to ensure all register writes are done, + * otherwise it may introduce race conditions. + */ + RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS); + return 0; } @@ -1049,8 +1351,18 @@ static int vcn_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, static int vcn_v5_0_1_set_pg_state(struct amdgpu_vcn_inst *vinst, enum amd_powergating_state state) { + struct amdgpu_device *adev = vinst->adev; int ret = 0; + /* for SRIOV, guest should not control VCN Power-gating + * MMSCH FW should control Power-gating and clock-gating + * guest should avoid touching CGC and PG + */ + if (amdgpu_sriov_vf(adev)) { + vinst->cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + if (state == vinst->cur_state) return 0; @@ -1106,10 +1418,24 @@ static int vcn_v5_0_1_process_interrupt(struct amdgpu_device *adev, struct amdgp return 0; } +static int vcn_v5_0_1_set_ras_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + static const struct amdgpu_irq_src_funcs vcn_v5_0_1_irq_funcs = { .process = vcn_v5_0_1_process_interrupt, }; +static const struct amdgpu_irq_src_funcs vcn_v5_0_1_ras_irq_funcs = { + .set = vcn_v5_0_1_set_ras_interrupt_state, + .process = amdgpu_vcn_process_poison_irq, +}; + + /** * vcn_v5_0_1_set_irq_funcs - set VCN block interrupt irq functions * @@ -1123,7 +1449,12 @@ static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) adev->vcn.inst->irq.num_types++; + adev->vcn.inst->irq.funcs = &vcn_v5_0_1_irq_funcs; + + adev->vcn.inst->ras_poison_irq.num_types = 1; + adev->vcn.inst->ras_poison_irq.funcs = &vcn_v5_0_1_ras_irq_funcs; + } static const struct amd_ip_funcs vcn_v5_0_1_ip_funcs = { @@ -1155,3 +1486,139 @@ const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block = { .rev = 1, .funcs = &vcn_v5_0_1_ip_funcs, }; + +static uint32_t vcn_v5_0_1_query_poison_by_instance(struct amdgpu_device *adev, + uint32_t instance, uint32_t sub_block) +{ + uint32_t poison_stat = 0, reg_value = 0; + + switch (sub_block) { + case AMDGPU_VCN_V5_0_1_VCPU_VCODEC: + reg_value = RREG32_SOC15(VCN, instance, regUVD_RAS_VCPU_VCODEC_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF); + break; + default: + break; + } + + if (poison_stat) + dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n", + instance, sub_block); + + return poison_stat; +} + +static bool vcn_v5_0_1_query_poison_status(struct amdgpu_device *adev) +{ + uint32_t inst, sub; + uint32_t poison_stat = 0; + + for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++) + for (sub = 0; sub < AMDGPU_VCN_V5_0_1_MAX_SUB_BLOCK; sub++) + poison_stat += + vcn_v5_0_1_query_poison_by_instance(adev, inst, sub); + + return !!poison_stat; +} + +static const struct amdgpu_ras_block_hw_ops vcn_v5_0_1_ras_hw_ops = { + .query_poison_status = vcn_v5_0_1_query_poison_status, +}; + +static int vcn_v5_0_1_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct aca_bank_info info; + u64 misc0; + int ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + switch (type) { + case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); + break; + case ACA_SMU_TYPE_CE: + bank->aca_err_type = ACA_ERROR_TYPE_CE; + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, + ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; + } + + return ret; +} + +/* reference to smu driver if header file */ +static int vcn_v5_0_1_err_codes[] = { + 14, 15, /* VCN */ +}; + +static bool vcn_v5_0_1_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + + if (instlo != mmSMNAID_AID0_MCA_SMU) + return false; + + if (aca_bank_check_error_codes(handle->adev, bank, + vcn_v5_0_1_err_codes, + ARRAY_SIZE(vcn_v5_0_1_err_codes))) + return false; + + return true; +} + +static const struct aca_bank_ops vcn_v5_0_1_aca_bank_ops = { + .aca_bank_parser = vcn_v5_0_1_aca_bank_parser, + .aca_bank_is_valid = vcn_v5_0_1_aca_bank_is_valid, +}; + +static const struct aca_info vcn_v5_0_1_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK, + .bank_ops = &vcn_v5_0_1_aca_bank_ops, +}; + +static int vcn_v5_0_1_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r; + + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__VCN, + &vcn_v5_0_1_aca_info, NULL); + if (r) + goto late_fini; + + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + + return r; +} + +static struct amdgpu_vcn_ras vcn_v5_0_1_ras = { + .ras_block = { + .hw_ops = &vcn_v5_0_1_ras_hw_ops, + .ras_late_init = vcn_v5_0_1_ras_late_init, + }, +}; + +static void vcn_v5_0_1_set_ras_funcs(struct amdgpu_device *adev) +{ + adev->vcn.ras = &vcn_v5_0_1_ras; +} diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h index 8fd90bd10807..b72e4da68317 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h @@ -27,6 +27,13 @@ #define regVCN_RRMT_CNTL 0x0940 #define regVCN_RRMT_CNTL_BASE_IDX 1 + +enum amdgpu_vcn_v5_0_1_sub_block { + AMDGPU_VCN_V5_0_1_VCPU_VCODEC = 0, + + AMDGPU_VCN_V5_0_1_MAX_SUB_BLOCK, +}; + extern const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block; #endif /* __VCN_v5_0_1_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h index 3ca8a417c6d8..8de4ccce5e38 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h +++ b/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h @@ -64,9 +64,6 @@ #define HEADER_BARRIER 5 #define SDMA_OP_AQL_COPY 0 #define SDMA_OP_AQL_BARRIER_OR 0 -/* vm invalidation is only available for GC9.4.3/GC9.4.4/GC9.5.0 */ -#define SDMA_OP_VM_INVALIDATE 8 -#define SDMA_SUBOP_VM_INVALIDATE 4 /*define for op field*/ #define SDMA_PKT_HEADER_op_offset 0 @@ -3334,72 +3331,5 @@ #define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0 #define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift) -/* -** Definitions for SDMA_PKT_VM_INVALIDATION packet -*/ - -/*define for HEADER word*/ -/*define for op field*/ -#define SDMA_PKT_VM_INVALIDATION_HEADER_op_offset 0 -#define SDMA_PKT_VM_INVALIDATION_HEADER_op_mask 0x000000FF -#define SDMA_PKT_VM_INVALIDATION_HEADER_op_shift 0 -#define SDMA_PKT_VM_INVALIDATION_HEADER_OP(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_op_shift) - -/*define for sub_op field*/ -#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_offset 0 -#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask 0x000000FF -#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift 8 -#define SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift) - -/*define for xcc0_eng_id field*/ -#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_offset 0 -#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_mask 0x0000001F -#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_shift 16 -#define SDMA_PKT_VM_INVALIDATION_HEADER_XCC0_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_shift) - -/*define for xcc1_eng_id field*/ -#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_offset 0 -#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_mask 0x0000001F -#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_shift 21 -#define SDMA_PKT_VM_INVALIDATION_HEADER_XCC1_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_shift) - -/*define for mmhub_eng_id field*/ -#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_offset 0 -#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_mask 0x0000001F -#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_shift 26 -#define SDMA_PKT_VM_INVALIDATION_HEADER_MMHUB_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_shift) - -/*define for INVALIDATEREQ word*/ -/*define for invalidatereq field*/ -#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_offset 1 -#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask 0xFFFFFFFF -#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift 0 -#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(x) ((x & SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask) << SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift) - -/*define for ADDRESSRANGELO word*/ -/*define for addressrangelo field*/ -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_offset 2 -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask 0xFFFFFFFF -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift 0 -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_ADDRESSRANGELO(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift) - -/*define for ADDRESSRANGEHI word*/ -/*define for invalidateack field*/ -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_offset 3 -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask 0x0000FFFF -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift 0 -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift) - -/*define for addressrangehi field*/ -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_offset 3 -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask 0x0000001F -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift 16 -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift) - -/*define for reserved field*/ -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_offset 3 -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask 0x000001FF -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift 23 -#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_RESERVED(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift) #endif /* __SDMA_PKT_OPEN_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index faa0dd75dd6d..85846fd08ce4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -350,6 +350,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) if (ret) return ret; } + ih[i]->overflow = false; } if (!amdgpu_sriov_vf(adev)) @@ -437,7 +438,10 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) goto out; - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + if (!amdgpu_sriov_vf(adev)) + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + else + ih->overflow = true; /* When a ring buffer overflow happen start parsing interrupt * from the last not overwritten vector (wptr + 32). Hopefully diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 86d8bc10d90a..9b3510e53112 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -239,6 +239,13 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] = .max_pixels_per_frame = 4096 * 4096, .max_level = 186, }, + { + .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, + .max_width = 4096, + .max_height = 4096, + .max_pixels_per_frame = 4096 * 4096, + .max_level = 0, + }, }; static const struct amdgpu_video_codecs cz_video_codecs_decode = |