diff options
Diffstat (limited to 'drivers/gpu/drm')
42 files changed, 498 insertions, 132 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index fbe7616555c8..a2879d2b7c8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -250,16 +250,24 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc) { - if (adev->kfd.dev) - kgd2kfd_suspend(adev->kfd.dev, suspend_proc); + if (adev->kfd.dev) { + if (adev->in_s0ix) + kgd2kfd_stop_sched_all_nodes(adev->kfd.dev); + else + kgd2kfd_suspend(adev->kfd.dev, suspend_proc); + } } int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc) { int r = 0; - if (adev->kfd.dev) - r = kgd2kfd_resume(adev->kfd.dev, resume_proc); + if (adev->kfd.dev) { + if (adev->in_s0ix) + r = kgd2kfd_start_sched_all_nodes(adev->kfd.dev); + else + r = kgd2kfd_resume(adev->kfd.dev, resume_proc); + } return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 33eb4826b58b..aa88bad7416b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -426,7 +426,9 @@ void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask); int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd); void kgd2kfd_unlock_kfd(struct kfd_dev *kfd); int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); +int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd); int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); +int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd); bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id); bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry, bool retry_fault); @@ -516,11 +518,21 @@ static inline int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id) return 0; } +static inline int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd) +{ + return 0; +} + static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) { return 0; } +static inline int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd) +{ + return 0; +} + static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) { return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 01d234cf8156..c8459337fcb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5136,7 +5136,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) adev->in_suspend = true; if (amdgpu_sriov_vf(adev)) { - if (!adev->in_s0ix && !adev->in_runpm) + if (!adev->in_runpm) amdgpu_amdkfd_suspend_process(adev); amdgpu_virt_fini_data_exchange(adev); r = amdgpu_virt_request_full_gpu(adev, false); @@ -5156,10 +5156,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) amdgpu_device_ip_suspend_phase1(adev); - if (!adev->in_s0ix) { - amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); - amdgpu_userq_suspend(adev); - } + amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); + amdgpu_userq_suspend(adev); r = amdgpu_device_evict_resources(adev); if (r) @@ -5254,15 +5252,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients) goto exit; } - if (!adev->in_s0ix) { - r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); - if (r) - goto exit; + r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); + if (r) + goto exit; - r = amdgpu_userq_resume(adev); - if (r) - goto exit; - } + r = amdgpu_userq_resume(adev); + if (r) + goto exit; r = amdgpu_device_ip_late_init(adev); if (r) @@ -5275,7 +5271,7 @@ exit: amdgpu_virt_init_data_exchange(adev); amdgpu_virt_release_full_gpu(adev, true); - if (!adev->in_s0ix && !r && !adev->in_runpm) + if (!r && !adev->in_runpm) r = amdgpu_amdkfd_resume_process(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index c85de8c8f6f5..c37527704d43 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1654,6 +1654,21 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) } } break; + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.pfp_fw_version >= 102 && + adev->gfx.mec_fw_version >= 66 && + adev->mes.fw_version[0] >= 128) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 7e749f9b6d69..349c351e242b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1550,6 +1550,25 @@ int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id) return ret; } +int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd) +{ + struct kfd_node *node; + int i, r; + + if (!kfd->init_complete) + return 0; + + for (i = 0; i < kfd->num_nodes; i++) { + node = kfd->nodes[i]; + r = node->dqm->ops.unhalt(node->dqm); + if (r) { + dev_err(kfd_device, "Error in starting scheduler\n"); + return r; + } + } + return 0; +} + int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) { struct kfd_node *node; @@ -1567,6 +1586,23 @@ int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) return node->dqm->ops.halt(node->dqm); } +int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd) +{ + struct kfd_node *node; + int i, r; + + if (!kfd->init_complete) + return 0; + + for (i = 0; i < kfd->num_nodes; i++) { + node = kfd->nodes[i]; + r = node->dqm->ops.halt(node->dqm); + if (r) + return r; + } + return 0; +} + bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) { struct kfd_node *node; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4e86370ae705..ef026143dc1c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2037,6 +2037,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) dc_hardware_init(adev->dm.dc); + adev->dm.restore_backlight = true; + adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev); if (!adev->dm.hpd_rx_offload_wq) { drm_err(adev_to_drm(adev), "failed to create hpd rx offload workqueue.\n"); @@ -3399,6 +3401,7 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); dc_resume(dm->dc); + adev->dm.restore_backlight = true; amdgpu_dm_irq_resume_early(adev); @@ -8717,7 +8720,16 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev, static void manage_dm_interrupts(struct amdgpu_device *adev, struct amdgpu_crtc *acrtc, struct dm_crtc_state *acrtc_state) -{ +{ /* + * We cannot be sure that the frontend index maps to the same + * backend index - some even map to more than one. + * So we have to go through the CRTC to find the right IRQ. + */ + int irq_type = amdgpu_display_crtc_idx_to_irq_type( + adev, + acrtc->crtc_id); + struct drm_device *dev = adev_to_drm(adev); + struct drm_vblank_crtc_config config = {0}; struct dc_crtc_timing *timing; int offdelay; @@ -8770,7 +8782,35 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, drm_crtc_vblank_on_config(&acrtc->base, &config); + /* Allow RX6xxx, RX7700, RX7800 GPUs to call amdgpu_irq_get.*/ + switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { + case IP_VERSION(3, 0, 0): + case IP_VERSION(3, 0, 2): + case IP_VERSION(3, 0, 3): + case IP_VERSION(3, 2, 0): + if (amdgpu_irq_get(adev, &adev->pageflip_irq, irq_type)) + drm_err(dev, "DM_IRQ: Cannot get pageflip irq!\n"); +#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) + if (amdgpu_irq_get(adev, &adev->vline0_irq, irq_type)) + drm_err(dev, "DM_IRQ: Cannot get vline0 irq!\n"); +#endif + } + } else { + /* Allow RX6xxx, RX7700, RX7800 GPUs to call amdgpu_irq_put.*/ + switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { + case IP_VERSION(3, 0, 0): + case IP_VERSION(3, 0, 2): + case IP_VERSION(3, 0, 3): + case IP_VERSION(3, 2, 0): +#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) + if (amdgpu_irq_put(adev, &adev->vline0_irq, irq_type)) + drm_err(dev, "DM_IRQ: Cannot put vline0 irq!\n"); +#endif + if (amdgpu_irq_put(adev, &adev->pageflip_irq, irq_type)) + drm_err(dev, "DM_IRQ: Cannot put pageflip irq!\n"); + } + drm_crtc_vblank_off(&acrtc->base); } } @@ -9792,7 +9832,6 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, bool mode_set_reset_required = false; u32 i; struct dc_commit_streams_params params = {dc_state->streams, dc_state->stream_count}; - bool set_backlight_level = false; /* Disable writeback */ for_each_old_connector_in_state(state, connector, old_con_state, i) { @@ -9912,7 +9951,6 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, acrtc->hw_mode = new_crtc_state->mode; crtc->hwmode = new_crtc_state->mode; mode_set_reset_required = true; - set_backlight_level = true; } else if (modereset_required(new_crtc_state)) { drm_dbg_atomic(dev, "Atomic commit: RESET. crtc id %d:[%p]\n", @@ -9969,13 +10007,16 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, * to fix a flicker issue. * It will cause the dm->actual_brightness is not the current panel brightness * level. (the dm->brightness is the correct panel level) - * So we set the backlight level with dm->brightness value after set mode + * So we set the backlight level with dm->brightness value after initial + * set mode. Use restore_backlight flag to avoid setting backlight level + * for every subsequent mode set. */ - if (set_backlight_level) { + if (dm->restore_backlight) { for (i = 0; i < dm->num_of_edps; i++) { if (dm->backlight_dev[i]) amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]); } + dm->restore_backlight = false; } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index b937da0a4e4a..6aae51c1beb3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -611,6 +611,13 @@ struct amdgpu_display_manager { u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP]; /** + * @restore_backlight: + * + * Flag to indicate whether to restore backlight after modeset. + */ + bool restore_backlight; + + /** * @aux_hpd_discon_quirk: * * quirk for hpd discon while aux is on-going. diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index ebabfe3a512f..c0dfe2d8b3be 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -821,7 +821,7 @@ int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev, struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); const struct drm_color_lut *shaper = NULL, *lut3d = NULL; uint32_t exp_size, size, dim_size = MAX_COLOR_3DLUT_SIZE; - bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut; + bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut || adev->dm.dc->caps.color.mpc.preblend; /* shaper LUT is only available if 3D LUT color caps */ exp_size = has_3dlut ? MAX_COLOR_LUT_ENTRIES : 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index eef51652ca35..3d2f8eedeef2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1633,7 +1633,7 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, drm_object_attach_property(&plane->base, dm->adev->mode_info.plane_ctm_property, 0); - if (dpp_color_caps.hw_3d_lut) { + if (dpp_color_caps.hw_3d_lut || dm->dc->caps.color.mpc.preblend) { drm_object_attach_property(&plane->base, mode_info.plane_shaper_lut_property, 0); drm_object_attach_property(&plane->base, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index bb1ac12a2b09..0e638bc6bf77 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -587,9 +587,118 @@ bool dcn35_are_clock_states_equal(struct dc_clocks *a, return true; } -static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, +static void dcn35_save_clk_registers_internal(struct dcn35_clk_internal *internal, struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + // read dtbclk + internal->CLK1_CLK4_CURRENT_CNT = REG_READ(CLK1_CLK4_CURRENT_CNT); + internal->CLK1_CLK4_BYPASS_CNTL = REG_READ(CLK1_CLK4_BYPASS_CNTL); + + // read dcfclk + internal->CLK1_CLK3_CURRENT_CNT = REG_READ(CLK1_CLK3_CURRENT_CNT); + internal->CLK1_CLK3_BYPASS_CNTL = REG_READ(CLK1_CLK3_BYPASS_CNTL); + + // read dcf deep sleep divider + internal->CLK1_CLK3_DS_CNTL = REG_READ(CLK1_CLK3_DS_CNTL); + internal->CLK1_CLK3_ALLOW_DS = REG_READ(CLK1_CLK3_ALLOW_DS); + + // read dppclk + internal->CLK1_CLK1_CURRENT_CNT = REG_READ(CLK1_CLK1_CURRENT_CNT); + internal->CLK1_CLK1_BYPASS_CNTL = REG_READ(CLK1_CLK1_BYPASS_CNTL); + + // read dprefclk + internal->CLK1_CLK2_CURRENT_CNT = REG_READ(CLK1_CLK2_CURRENT_CNT); + internal->CLK1_CLK2_BYPASS_CNTL = REG_READ(CLK1_CLK2_BYPASS_CNTL); + + // read dispclk + internal->CLK1_CLK0_CURRENT_CNT = REG_READ(CLK1_CLK0_CURRENT_CNT); + internal->CLK1_CLK0_BYPASS_CNTL = REG_READ(CLK1_CLK0_BYPASS_CNTL); +} + +static void dcn35_save_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, struct clk_mgr_dcn35 *clk_mgr) { + struct dcn35_clk_internal internal = {0}; + char *bypass_clks[5] = {"0x0 DFS", "0x1 REFCLK", "0x2 ERROR", "0x3 400 FCH", "0x4 600 FCH"}; + + dcn35_save_clk_registers_internal(&internal, &clk_mgr->base.base); + + regs_and_bypass->dcfclk = internal.CLK1_CLK3_CURRENT_CNT / 10; + regs_and_bypass->dcf_deep_sleep_divider = internal.CLK1_CLK3_DS_CNTL / 10; + regs_and_bypass->dcf_deep_sleep_allow = internal.CLK1_CLK3_ALLOW_DS; + regs_and_bypass->dprefclk = internal.CLK1_CLK2_CURRENT_CNT / 10; + regs_and_bypass->dispclk = internal.CLK1_CLK0_CURRENT_CNT / 10; + regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10; + regs_and_bypass->dtbclk = internal.CLK1_CLK4_CURRENT_CNT / 10; + + regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dppclk_bypass < 0 || regs_and_bypass->dppclk_bypass > 4) + regs_and_bypass->dppclk_bypass = 0; + regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dcfclk_bypass < 0 || regs_and_bypass->dcfclk_bypass > 4) + regs_and_bypass->dcfclk_bypass = 0; + regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dispclk_bypass < 0 || regs_and_bypass->dispclk_bypass > 4) + regs_and_bypass->dispclk_bypass = 0; + regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dprefclk_bypass < 0 || regs_and_bypass->dprefclk_bypass > 4) + regs_and_bypass->dprefclk_bypass = 0; + + if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) { + DC_LOG_SMU("clk_type,clk_value,deepsleep_cntl,deepsleep_allow,bypass\n"); + + DC_LOG_SMU("dcfclk,%d,%d,%d,%s\n", + regs_and_bypass->dcfclk, + regs_and_bypass->dcf_deep_sleep_divider, + regs_and_bypass->dcf_deep_sleep_allow, + bypass_clks[(int) regs_and_bypass->dcfclk_bypass]); + + DC_LOG_SMU("dprefclk,%d,N/A,N/A,%s\n", + regs_and_bypass->dprefclk, + bypass_clks[(int) regs_and_bypass->dprefclk_bypass]); + + DC_LOG_SMU("dispclk,%d,N/A,N/A,%s\n", + regs_and_bypass->dispclk, + bypass_clks[(int) regs_and_bypass->dispclk_bypass]); + + // REGISTER VALUES + DC_LOG_SMU("reg_name,value,clk_type"); + + DC_LOG_SMU("CLK1_CLK3_CURRENT_CNT,%d,dcfclk", + internal.CLK1_CLK3_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK4_CURRENT_CNT,%d,dtbclk", + internal.CLK1_CLK4_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK3_DS_CNTL,%d,dcf_deep_sleep_divider", + internal.CLK1_CLK3_DS_CNTL); + + DC_LOG_SMU("CLK1_CLK3_ALLOW_DS,%d,dcf_deep_sleep_allow", + internal.CLK1_CLK3_ALLOW_DS); + + DC_LOG_SMU("CLK1_CLK2_CURRENT_CNT,%d,dprefclk", + internal.CLK1_CLK2_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK0_CURRENT_CNT,%d,dispclk", + internal.CLK1_CLK0_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK1_CURRENT_CNT,%d,dppclk", + internal.CLK1_CLK1_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK3_BYPASS_CNTL,%d,dcfclk_bypass", + internal.CLK1_CLK3_BYPASS_CNTL); + + DC_LOG_SMU("CLK1_CLK2_BYPASS_CNTL,%d,dprefclk_bypass", + internal.CLK1_CLK2_BYPASS_CNTL); + + DC_LOG_SMU("CLK1_CLK0_BYPASS_CNTL,%d,dispclk_bypass", + internal.CLK1_CLK0_BYPASS_CNTL); + + DC_LOG_SMU("CLK1_CLK1_BYPASS_CNTL,%d,dppclk_bypass", + internal.CLK1_CLK1_BYPASS_CNTL); + + } } static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base) @@ -623,6 +732,7 @@ static void init_clk_states(struct clk_mgr *clk_mgr) void dcn35_init_clocks(struct clk_mgr *clk_mgr) { struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); + struct clk_mgr_dcn35 *clk_mgr_dcn35 = TO_CLK_MGR_DCN35(clk_mgr_int); init_clk_states(clk_mgr); @@ -633,6 +743,13 @@ void dcn35_init_clocks(struct clk_mgr *clk_mgr) else clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz; + dcn35_save_clk_registers(&clk_mgr->boot_snapshot, clk_mgr_dcn35); + + clk_mgr->clks.ref_dtbclk_khz = clk_mgr->boot_snapshot.dtbclk * 10; + if (clk_mgr->boot_snapshot.dtbclk > 59000) { + /*dtbclk enabled based on */ + clk_mgr->clks.dtbclk_en = true; + } } static struct clk_bw_params dcn35_bw_params = { .vram_type = Ddr4MemType, @@ -1323,7 +1440,7 @@ void dcn35_clk_mgr_construct( dcn35_bw_params.wm_table = ddr5_wm_table; } /* Saved clocks configured at boot for debug purposes */ - dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, clk_mgr); + dcn35_save_clk_registers(&clk_mgr->base.base.boot_snapshot, clk_mgr); clk_mgr->base.base.dprefclk_khz = dcn35_smu_get_dprefclk(&clk_mgr->base); clk_mgr->base.base.clks.ref_dtbclk_khz = 600000; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index f24e1da68269..8c230cf8939b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1348,7 +1348,6 @@ union surface_update_flags { uint32_t in_transfer_func_change:1; uint32_t input_csc_change:1; uint32_t coeff_reduction_change:1; - uint32_t output_tf_change:1; uint32_t pixel_format_change:1; uint32_t plane_size_change:1; uint32_t gamut_remap_change:1; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index b7c2d3095b25..5e57bd1a08e7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1982,10 +1982,8 @@ static void dcn20_program_pipe( * updating on slave planes */ if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->update_flags.bits.plane_changed || - pipe_ctx->stream->update_flags.bits.out_tf || - (pipe_ctx->plane_state && - pipe_ctx->plane_state->update_flags.bits.output_tf_change)) + pipe_ctx->update_flags.bits.plane_changed || + pipe_ctx->stream->update_flags.bits.out_tf) hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); /* If the pipe has been enabled or has a different opp, we diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index cc9f40d97af2..61167c19359d 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -2019,10 +2019,8 @@ void dcn401_program_pipe( * updating on slave planes */ if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->update_flags.bits.plane_changed || - pipe_ctx->stream->update_flags.bits.out_tf || - (pipe_ctx->plane_state && - pipe_ctx->plane_state->update_flags.bits.output_tf_change)) + pipe_ctx->update_flags.bits.plane_changed || + pipe_ctx->stream->update_flags.bits.out_tf) hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); /* If the pipe has been enabled or has a different opp, we diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index b47cb4a5f488..408f05dfab90 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2236,7 +2236,7 @@ static int smu_resume(struct amdgpu_ip_block *ip_block) return ret; } - if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { + if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL && smu->od_enabled) { ret = smu_od_edit_dpm_table(smu, PP_OD_COMMIT_DPM_TABLE, NULL, 0); if (ret) return ret; diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c index 19c04687b0fe..8e650a02c528 100644 --- a/drivers/gpu/drm/ast/ast_dp.c +++ b/drivers/gpu/drm/ast/ast_dp.c @@ -134,7 +134,7 @@ static int ast_astdp_read_edid_block(void *data, u8 *buf, unsigned int block, si * 3. The Delays are often longer a lot when system resume from S3/S4. */ if (j) - mdelay(j + 1); + msleep(j + 1); /* Wait for EDID offset to show up in mirror register */ vgacrd7 = ast_get_index_reg(ast, AST_IO_VGACRI, 0xd7); diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index c0ad8f59e483..8b3304dedcd9 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -2677,7 +2677,7 @@ static int anx7625_i2c_probe(struct i2c_client *client) ret = devm_request_threaded_irq(dev, platform->pdata.intp_irq, NULL, anx7625_intr_hpd_isr, IRQF_TRIGGER_FALLING | - IRQF_ONESHOT, + IRQF_ONESHOT | IRQF_NO_AUTOEN, "anx7625-intp", platform); if (ret) { DRM_DEV_ERROR(dev, "fail to request irq\n"); @@ -2746,8 +2746,10 @@ static int anx7625_i2c_probe(struct i2c_client *client) } /* Add work function */ - if (platform->pdata.intp_irq) + if (platform->pdata.intp_irq) { + enable_irq(platform->pdata.intp_irq); queue_work(platform->workqueue, &platform->work); + } if (platform->pdata.audio_en) anx7625_register_audio(dev, platform); diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c index a614d1384f71..38726ae1bf15 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c @@ -1984,8 +1984,10 @@ static void cdns_mhdp_atomic_enable(struct drm_bridge *bridge, mhdp_state = to_cdns_mhdp_bridge_state(new_state); mhdp_state->current_mode = drm_mode_duplicate(bridge->dev, mode); - if (!mhdp_state->current_mode) - return; + if (!mhdp_state->current_mode) { + ret = -EINVAL; + goto out; + } drm_mode_set_name(mhdp_state->current_mode); diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index db9b089ef62c..86853535fb7b 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -2432,8 +2432,6 @@ static const struct drm_gpuvm_ops lock_ops = { * * The expected usage is:: * - * .. code-block:: c - * * vm_bind { * struct drm_exec exec; * diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c index 1cf394369127..c0feca58511d 100644 --- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c +++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c @@ -726,8 +726,8 @@ void oaktrail_hdmi_teardown(struct drm_device *dev) if (hdmi_dev) { pdev = hdmi_dev->dev; - pci_set_drvdata(pdev, NULL); oaktrail_hdmi_i2c_exit(pdev); + pci_set_drvdata(pdev, NULL); iounmap(hdmi_dev->regs); kfree(hdmi_dev); pci_dev_put(pdev); diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 0405396c7750..9ecbb4b99c37 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -596,8 +596,9 @@ intel_ddi_transcoder_func_reg_val_get(struct intel_encoder *encoder, enum transcoder master; master = crtc_state->mst_master_transcoder; - drm_WARN_ON(display->drm, - master == INVALID_TRANSCODER); + if (drm_WARN_ON(display->drm, + master == INVALID_TRANSCODER)) + master = TRANSCODER_A; temp |= TRANS_DDI_MST_TRANSPORT_SELECT(master); } } else { diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 41228478b21c..0a3a3f6a5f9d 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -546,7 +546,7 @@ static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, luminance_range->max_luminance, panel->vbt.backlight.pwm_freq_hz, intel_dp->edp_dpcd, ¤t_level, ¤t_mode, - false); + panel->backlight.edp.vesa.luminance_control_support); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index e3d188455f67..b9dae15c1d16 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -514,6 +514,13 @@ static int __create_shmem(struct drm_i915_private *i915, if (IS_ERR(filp)) return PTR_ERR(filp); + /* + * Prevent -EFBIG by allowing large writes beyond MAX_NON_LFS on shmem + * objects by setting O_LARGEFILE. + */ + if (force_o_largefile()) + filp->f_flags |= O_LARGEFILE; + obj->filp = filp; return 0; } diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 8f17394cc82a..df76653e649a 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -886,8 +886,7 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * if (IS_ERR_OR_NULL(queue)) return; - if (queue->entity.fence_context) - drm_sched_entity_destroy(&queue->entity); + drm_sched_entity_destroy(&queue->entity); if (queue->scheduler.ops) drm_sched_fini(&queue->scheduler); @@ -3558,11 +3557,6 @@ int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle) if (!group) return -EINVAL; - for (u32 i = 0; i < group->queue_count; i++) { - if (group->queues[i]) - drm_sched_entity_destroy(&group->queues[i]->entity); - } - mutex_lock(&sched->reset.lock); mutex_lock(&sched->lock); group->destroyed = true; diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index 81eb046aeebf..b9f67d7a00d8 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -117,6 +117,7 @@ enum xe_guc_action { XE_GUC_ACTION_ENTER_S_STATE = 0x501, XE_GUC_ACTION_EXIT_S_STATE = 0x502, XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506, + XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV = 0x509, XE_GUC_ACTION_SCHED_CONTEXT = 0x1000, XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001, XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 0366a9da5977..d7719d0e36ca 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -17,6 +17,7 @@ * | 0 | 31:16 | **KEY** - KLV key identifier | * | | | - `GuC Self Config KLVs`_ | * | | | - `GuC Opt In Feature KLVs`_ | + * | | | - `GuC Scheduling Policies KLVs`_ | * | | | - `GuC VGT Policy KLVs`_ | * | | | - `GuC VF Configuration KLVs`_ | * | | | | @@ -153,6 +154,30 @@ enum { #define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_LEN 0u /** + * DOC: GuC Scheduling Policies KLVs + * + * `GuC KLV`_ keys available for use with UPDATE_SCHEDULING_POLICIES_KLV. + * + * _`GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD` : 0x1001 + * Some platforms do not allow concurrent execution of RCS and CCS + * workloads from different address spaces. By default, the GuC prioritizes + * RCS submissions over CCS ones, which can lead to CCS workloads being + * significantly (or completely) starved of execution time. This KLV allows + * the driver to specify a quantum (in ms) and a ratio (percentage value + * between 0 and 100), and the GuC will prioritize the CCS for that + * percentage of each quantum. For example, specifying 100ms and 30% will + * make the GuC prioritize the CCS for 30ms of every 100ms. + * Note that this does not necessarly mean that RCS and CCS engines will + * only be active for their percentage of the quantum, as the restriction + * only kicks in if both classes are fully busy with non-compatible address + * spaces; i.e., if one engine is idle or running the same address space, + * a pending job on the other engine will still be submitted to the HW no + * matter what the ratio is + */ +#define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_KEY 0x1001 +#define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_LEN 2u + +/** * DOC: GuC VGT Policy KLVs * * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY. diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 7484ce55a303..d5dbc51e8612 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -158,8 +158,8 @@ int xe_bo_evict_all(struct xe_device *xe) if (ret) return ret; - ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, - &xe->pinned.late.evicted, xe_bo_evict_pinned); + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external, + &xe->pinned.late.external, xe_bo_evict_pinned); if (!ret) ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index e9b46a2d0019..58c1f397c68c 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -404,7 +404,7 @@ int __init xe_configfs_init(void) return 0; } -void __exit xe_configfs_exit(void) +void xe_configfs_exit(void) { configfs_unregister_subsystem(&xe_configfs); } diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index bd9015761aa0..927ee7991696 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -308,15 +308,19 @@ int xe_device_sysfs_init(struct xe_device *xe) return ret; } - if (xe->info.platform == XE_BATTLEMAGE) { + if (xe->info.platform == XE_BATTLEMAGE && !IS_SRIOV_VF(xe)) { ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs); if (ret) - return ret; + goto cleanup; ret = late_bind_create_files(dev); if (ret) - return ret; + goto cleanup; } return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe); + +cleanup: + xe_device_sysfs_fini(xe); + return ret; } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 8991b4aed440..c07edcda99c5 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -151,6 +151,16 @@ err_lrc: return err; } +static void __xe_exec_queue_fini(struct xe_exec_queue *q) +{ + int i; + + q->ops->fini(q); + + for (i = 0; i < q->width; ++i) + xe_lrc_put(q->lrc[i]); +} + struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, u16 width, struct xe_hw_engine *hwe, u32 flags, @@ -181,11 +191,13 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v if (xe_exec_queue_uses_pxp(q)) { err = xe_pxp_exec_queue_add(xe->pxp, q); if (err) - goto err_post_alloc; + goto err_post_init; } return q; +err_post_init: + __xe_exec_queue_fini(q); err_post_alloc: __xe_exec_queue_free(q); return ERR_PTR(err); @@ -283,13 +295,11 @@ void xe_exec_queue_destroy(struct kref *ref) xe_exec_queue_put(eq); } - q->ops->fini(q); + q->ops->destroy(q); } void xe_exec_queue_fini(struct xe_exec_queue *q) { - int i; - /* * Before releasing our ref to lrc and xef, accumulate our run ticks * and wakeup any waiters. @@ -298,9 +308,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) wake_up_var(&q->xef->exec_queue.pending_removal); - for (i = 0; i < q->width; ++i) - xe_lrc_put(q->lrc[i]); - + __xe_exec_queue_fini(q); __xe_exec_queue_free(q); } diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index cc1cffb5c87f..1c9d03f2a3e5 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -166,8 +166,14 @@ struct xe_exec_queue_ops { int (*init)(struct xe_exec_queue *q); /** @kill: Kill inflight submissions for backend */ void (*kill)(struct xe_exec_queue *q); - /** @fini: Fini exec queue for submission backend */ + /** @fini: Undoes the init() for submission backend */ void (*fini)(struct xe_exec_queue *q); + /** + * @destroy: Destroy exec queue for submission backend. The backend + * function must call xe_exec_queue_fini() (which will in turn call the + * fini() backend function) to ensure the queue is properly cleaned up. + */ + void (*destroy)(struct xe_exec_queue *q); /** @set_priority: Set priority for exec queue */ int (*set_priority)(struct xe_exec_queue *q, enum xe_exec_queue_priority priority); diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 788f56b066b6..f83d421ac9d3 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -385,10 +385,20 @@ err_free: return err; } -static void execlist_exec_queue_fini_async(struct work_struct *w) +static void execlist_exec_queue_fini(struct xe_exec_queue *q) +{ + struct xe_execlist_exec_queue *exl = q->execlist; + + drm_sched_entity_fini(&exl->entity); + drm_sched_fini(&exl->sched); + + kfree(exl); +} + +static void execlist_exec_queue_destroy_async(struct work_struct *w) { struct xe_execlist_exec_queue *ee = - container_of(w, struct xe_execlist_exec_queue, fini_async); + container_of(w, struct xe_execlist_exec_queue, destroy_async); struct xe_exec_queue *q = ee->q; struct xe_execlist_exec_queue *exl = q->execlist; struct xe_device *xe = gt_to_xe(q->gt); @@ -401,10 +411,6 @@ static void execlist_exec_queue_fini_async(struct work_struct *w) list_del(&exl->active_link); spin_unlock_irqrestore(&exl->port->lock, flags); - drm_sched_entity_fini(&exl->entity); - drm_sched_fini(&exl->sched); - kfree(exl); - xe_exec_queue_fini(q); } @@ -413,10 +419,10 @@ static void execlist_exec_queue_kill(struct xe_exec_queue *q) /* NIY */ } -static void execlist_exec_queue_fini(struct xe_exec_queue *q) +static void execlist_exec_queue_destroy(struct xe_exec_queue *q) { - INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async); - queue_work(system_unbound_wq, &q->execlist->fini_async); + INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async); + queue_work(system_unbound_wq, &q->execlist->destroy_async); } static int execlist_exec_queue_set_priority(struct xe_exec_queue *q, @@ -467,6 +473,7 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = { .init = execlist_exec_queue_init, .kill = execlist_exec_queue_kill, .fini = execlist_exec_queue_fini, + .destroy = execlist_exec_queue_destroy, .set_priority = execlist_exec_queue_set_priority, .set_timeslice = execlist_exec_queue_set_timeslice, .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout, diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h b/drivers/gpu/drm/xe/xe_execlist_types.h index 415140936f11..92c4ba52db0c 100644 --- a/drivers/gpu/drm/xe/xe_execlist_types.h +++ b/drivers/gpu/drm/xe/xe_execlist_types.h @@ -42,7 +42,7 @@ struct xe_execlist_exec_queue { bool has_run; - struct work_struct fini_async; + struct work_struct destroy_async; enum xe_exec_queue_priority active_priority; struct list_head active_link; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index c8eda36546d3..17634195cdc2 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -41,6 +41,7 @@ #include "xe_gt_topology.h" #include "xe_guc_exec_queue_types.h" #include "xe_guc_pc.h" +#include "xe_guc_submit.h" #include "xe_hw_fence.h" #include "xe_hw_engine_class_sysfs.h" #include "xe_irq.h" @@ -97,7 +98,7 @@ void xe_gt_sanitize(struct xe_gt *gt) * FIXME: if xe_uc_sanitize is called here, on TGL driver will not * reload */ - gt->uc.guc.submission_state.enabled = false; + xe_guc_submit_disable(>->uc.guc); } static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 494909f74eb2..d84831a03610 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1632,7 +1632,6 @@ static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) u64 fair; fair = div_u64(available, num_vfs); - fair = rounddown_pow_of_two(fair); /* XXX: ttm_vram_mgr & drm_buddy limitation */ fair = ALIGN_DOWN(fair, alignment); #ifdef MAX_FAIR_LMEM fair = min_t(u64, MAX_FAIR_LMEM, fair); diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index b1d1d6da3758..270fc3792493 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -880,9 +880,7 @@ int xe_guc_post_load_init(struct xe_guc *guc) return ret; } - guc->submission_state.enabled = true; - - return 0; + return xe_guc_submit_enable(guc); } int xe_guc_reset(struct xe_guc *guc) @@ -1579,7 +1577,7 @@ void xe_guc_sanitize(struct xe_guc *guc) { xe_uc_fw_sanitize(&guc->fw); xe_guc_ct_disable(&guc->ct); - guc->submission_state.enabled = false; + xe_guc_submit_disable(guc); } int xe_guc_reset_prepare(struct xe_guc *guc) diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h index a3f421e2adc0..c30c0e3ccbbb 100644 --- a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h @@ -35,8 +35,8 @@ struct xe_guc_exec_queue { struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE]; /** @lr_tdr: long running TDR worker */ struct work_struct lr_tdr; - /** @fini_async: do final fini async from this worker */ - struct work_struct fini_async; + /** @destroy_async: do final destroy async from this worker */ + struct work_struct destroy_async; /** @resume_time: time of last resume */ u64 resume_time; /** @state: GuC specific state for this xe_exec_queue */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index cafb47711e9b..0104afbc941c 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -32,6 +32,7 @@ #include "xe_guc_ct.h" #include "xe_guc_exec_queue_types.h" #include "xe_guc_id_mgr.h" +#include "xe_guc_klv_helpers.h" #include "xe_guc_submit_types.h" #include "xe_hw_engine.h" #include "xe_hw_fence.h" @@ -316,6 +317,71 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); } +/* + * Given that we want to guarantee enough RCS throughput to avoid missing + * frames, we set the yield policy to 20% of each 80ms interval. + */ +#define RC_YIELD_DURATION 80 /* in ms */ +#define RC_YIELD_RATIO 20 /* in percent */ +static u32 *emit_render_compute_yield_klv(u32 *emit) +{ + *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); + *emit++ = RC_YIELD_DURATION; + *emit++ = RC_YIELD_RATIO; + + return emit; +} + +#define SCHEDULING_POLICY_MAX_DWORDS 16 +static int guc_init_global_schedule_policy(struct xe_guc *guc) +{ + u32 data[SCHEDULING_POLICY_MAX_DWORDS]; + u32 *emit = data; + u32 count = 0; + int ret; + + if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) + return 0; + + *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; + + if (CCS_MASK(guc_to_gt(guc))) + emit = emit_render_compute_yield_klv(emit); + + count = emit - data; + if (count > 1) { + xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); + + ret = xe_guc_ct_send_block(&guc->ct, data, count); + if (ret < 0) { + xe_gt_err(guc_to_gt(guc), + "failed to enable GuC sheduling policies: %pe\n", + ERR_PTR(ret)); + return ret; + } + } + + return 0; +} + +int xe_guc_submit_enable(struct xe_guc *guc) +{ + int ret; + + ret = guc_init_global_schedule_policy(guc); + if (ret) + return ret; + + guc->submission_state.enabled = true; + + return 0; +} + +void xe_guc_submit_disable(struct xe_guc *guc) +{ + guc->submission_state.enabled = false; +} + static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) { int i; @@ -1277,48 +1343,57 @@ rearm: return DRM_GPU_SCHED_STAT_NO_HANG; } -static void __guc_exec_queue_fini_async(struct work_struct *w) +static void guc_exec_queue_fini(struct xe_exec_queue *q) +{ + struct xe_guc_exec_queue *ge = q->guc; + struct xe_guc *guc = exec_queue_to_guc(q); + + release_guc_id(guc, q); + xe_sched_entity_fini(&ge->entity); + xe_sched_fini(&ge->sched); + + /* + * RCU free due sched being exported via DRM scheduler fences + * (timeline name). + */ + kfree_rcu(ge, rcu); +} + +static void __guc_exec_queue_destroy_async(struct work_struct *w) { struct xe_guc_exec_queue *ge = - container_of(w, struct xe_guc_exec_queue, fini_async); + container_of(w, struct xe_guc_exec_queue, destroy_async); struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); xe_pm_runtime_get(guc_to_xe(guc)); trace_xe_exec_queue_destroy(q); - release_guc_id(guc, q); if (xe_exec_queue_is_lr(q)) cancel_work_sync(&ge->lr_tdr); /* Confirm no work left behind accessing device structures */ cancel_delayed_work_sync(&ge->sched.base.work_tdr); - xe_sched_entity_fini(&ge->entity); - xe_sched_fini(&ge->sched); - /* - * RCU free due sched being exported via DRM scheduler fences - * (timeline name). - */ - kfree_rcu(ge, rcu); xe_exec_queue_fini(q); + xe_pm_runtime_put(guc_to_xe(guc)); } -static void guc_exec_queue_fini_async(struct xe_exec_queue *q) +static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); - INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); + INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); /* We must block on kernel engines so slabs are empty on driver unload */ if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) - __guc_exec_queue_fini_async(&q->guc->fini_async); + __guc_exec_queue_destroy_async(&q->guc->destroy_async); else - queue_work(xe->destroy_wq, &q->guc->fini_async); + queue_work(xe->destroy_wq, &q->guc->destroy_async); } -static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) +static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) { /* * Might be done from within the GPU scheduler, need to do async as we @@ -1327,7 +1402,7 @@ static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) * this we and don't really care when everything is fini'd, just that it * is. */ - guc_exec_queue_fini_async(q); + guc_exec_queue_destroy_async(q); } static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) @@ -1341,7 +1416,7 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) if (exec_queue_registered(q)) disable_scheduling_deregister(guc, q); else - __guc_exec_queue_fini(guc, q); + __guc_exec_queue_destroy(guc, q); } static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) @@ -1574,14 +1649,14 @@ static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, #define STATIC_MSG_CLEANUP 0 #define STATIC_MSG_SUSPEND 1 #define STATIC_MSG_RESUME 2 -static void guc_exec_queue_fini(struct xe_exec_queue *q) +static void guc_exec_queue_destroy(struct xe_exec_queue *q) { struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) guc_exec_queue_add_msg(q, msg, CLEANUP); else - __guc_exec_queue_fini(exec_queue_to_guc(q), q); + __guc_exec_queue_destroy(exec_queue_to_guc(q), q); } static int guc_exec_queue_set_priority(struct xe_exec_queue *q, @@ -1711,6 +1786,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = { .init = guc_exec_queue_init, .kill = guc_exec_queue_kill, .fini = guc_exec_queue_fini, + .destroy = guc_exec_queue_destroy, .set_priority = guc_exec_queue_set_priority, .set_timeslice = guc_exec_queue_set_timeslice, .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, @@ -1732,7 +1808,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) xe_exec_queue_put(q); else if (exec_queue_destroyed(q)) - __guc_exec_queue_fini(guc, q); + __guc_exec_queue_destroy(guc, q); } if (q->guc->suspend_pending) { set_exec_queue_suspended(q); @@ -1989,7 +2065,7 @@ static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) xe_exec_queue_put(q); else - __guc_exec_queue_fini(guc, q); + __guc_exec_queue_destroy(guc, q); } int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index 9b71a986c6ca..0d126b807c10 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -13,6 +13,8 @@ struct xe_exec_queue; struct xe_guc; int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids); +int xe_guc_submit_enable(struct xe_guc *guc); +void xe_guc_submit_disable(struct xe_guc *guc); int xe_guc_submit_reset_prepare(struct xe_guc *guc); void xe_guc_submit_reset_wait(struct xe_guc *guc); diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index c17ed1ae8649..c5b63e10bb91 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -286,7 +286,7 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg */ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *value) { - u64 reg_val = 0, min, max; + u32 reg_val = 0; struct xe_device *xe = hwmon->xe; struct xe_reg rapl_limit, pkg_power_sku; struct xe_mmio *mmio = xe_root_tile_mmio(xe); @@ -294,7 +294,7 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channe mutex_lock(&hwmon->hwmon_lock); if (hwmon->xe->info.has_mbx_power_limits) { - xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, (u32 *)®_val); + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, ®_val); } else { rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); @@ -304,19 +304,21 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channe /* Check if PL limits are disabled. */ if (!(reg_val & PWR_LIM_EN)) { *value = PL_DISABLE; - drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%016llx\n", + drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%08x\n", PWR_ATTR_TO_STR(attr), channel, reg_val); goto unlock; } reg_val = REG_FIELD_GET(PWR_LIM_VAL, reg_val); - *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); + *value = mul_u32_u32(reg_val, SF_POWER) >> hwmon->scl_shift_power; /* For platforms with mailbox power limit support clamping would be done by pcode. */ if (!hwmon->xe->info.has_mbx_power_limits) { - reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku); - min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); - max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); + u64 pkg_pwr, min, max; + + pkg_pwr = xe_mmio_read64_2x32(mmio, pkg_power_sku); + min = REG_FIELD_GET(PKG_MIN_PWR, pkg_pwr); + max = REG_FIELD_GET(PKG_MAX_PWR, pkg_pwr); min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); if (min && max) @@ -493,8 +495,8 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at { struct xe_hwmon *hwmon = dev_get_drvdata(dev); struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); - u32 x, y, x_w = 2; /* 2 bits */ - u64 r, tau4, out; + u32 reg_val, x, y, x_w = 2; /* 2 bits */ + u64 tau4, out; int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD; u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; @@ -505,23 +507,24 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at mutex_lock(&hwmon->hwmon_lock); if (hwmon->xe->info.has_mbx_power_limits) { - ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, (u32 *)&r); + ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, ®_val); if (ret) { drm_err(&hwmon->xe->drm, - "power interval read fail, ch %d, attr %d, r 0%llx, ret %d\n", - channel, power_attr, r, ret); - r = 0; + "power interval read fail, ch %d, attr %d, val 0x%08x, ret %d\n", + channel, power_attr, reg_val, ret); + reg_val = 0; } } else { - r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel)); + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, + channel)); } mutex_unlock(&hwmon->hwmon_lock); xe_pm_runtime_put(hwmon->xe); - x = REG_FIELD_GET(PWR_LIM_TIME_X, r); - y = REG_FIELD_GET(PWR_LIM_TIME_Y, r); + x = REG_FIELD_GET(PWR_LIM_TIME_X, reg_val); + y = REG_FIELD_GET(PWR_LIM_TIME_Y, reg_val); /* * tau = (1 + (x / 4)) * power(2,y), x = bits(23:22), y = bits(21:17) diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c index 61b0a1531a53..2cfe9eb67391 100644 --- a/drivers/gpu/drm/xe/xe_nvm.c +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -35,6 +35,10 @@ static const struct intel_dg_nvm_region regions[INTEL_DG_NVM_REGIONS] = { static void xe_nvm_release_dev(struct device *dev) { + struct auxiliary_device *aux = container_of(dev, struct auxiliary_device, dev); + struct intel_dg_nvm_dev *nvm = container_of(aux, struct intel_dg_nvm_dev, aux_dev); + + kfree(nvm); } static bool xe_nvm_non_posted_erase(struct xe_device *xe) @@ -162,6 +166,5 @@ void xe_nvm_fini(struct xe_device *xe) auxiliary_device_delete(&nvm->aux_dev); auxiliary_device_uninit(&nvm->aux_dev); - kfree(nvm); xe->nvm = NULL; } diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c index b804234a6551..9e1236a9ec67 100644 --- a/drivers/gpu/drm/xe/xe_tile_sysfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c @@ -44,16 +44,18 @@ int xe_tile_sysfs_init(struct xe_tile *tile) kt->tile = tile; err = kobject_add(&kt->base, &dev->kobj, "tile%d", tile->id); - if (err) { - kobject_put(&kt->base); - return err; - } + if (err) + goto err_object; tile->sysfs = &kt->base; err = xe_vram_freq_sysfs_init(tile); if (err) - return err; + goto err_object; return devm_add_action_or_reset(xe->drm.dev, tile_sysfs_fini, tile); + +err_object: + kobject_put(&kt->base); + return err; } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index dc4f61e56579..5146999d27fa 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -240,8 +240,8 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) pfence = xe_preempt_fence_create(q, q->lr.context, ++q->lr.seqno); - if (!pfence) { - err = -ENOMEM; + if (IS_ERR(pfence)) { + err = PTR_ERR(pfence); goto out_fini; } |