summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/nouveau/nvkm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/nouveau/nvkm')
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/base.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/fw.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/Kbuild2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.c320
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.h24
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gp10b.c185
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/clk/gp10b.h18
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb100.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb202.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/gh100.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c69
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c4
19 files changed, 625 insertions, 32 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 3375a59ebf1a..2517b65d8faa 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2280,6 +2280,7 @@ nv13b_chipset = {
.acr = { 0x00000001, gp10b_acr_new },
.bar = { 0x00000001, gm20b_bar_new },
.bus = { 0x00000001, gf100_bus_new },
+ .clk = { 0x00000001, gp10b_clk_new },
.fault = { 0x00000001, gp10b_fault_new },
.fb = { 0x00000001, gp10b_fb_new },
.fuse = { 0x00000001, gm107_fuse_new },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index 114e50ca1827..03aa6f09ec89 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
@@ -259,6 +259,10 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
tdev->func = func;
tdev->pdev = pdev;
+ tdev->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(tdev->regs))
+ return PTR_ERR(tdev->regs);
+
if (func->require_vdd) {
tdev->vdd = devm_regulator_get(&pdev->dev, "vdd");
if (IS_ERR(tdev->vdd)) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c b/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c
index cac6d64ab67d..4e8b3f1c7e25 100644
--- a/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c
@@ -159,6 +159,8 @@ nvkm_falcon_fw_dtor(struct nvkm_falcon_fw *fw)
nvkm_memory_unref(&fw->inst);
nvkm_falcon_fw_dtor_sigs(fw);
nvkm_firmware_dtor(&fw->fw);
+ kfree(fw->boot);
+ fw->boot = NULL;
}
static const struct nvkm_firmware_func
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/Kbuild
index dcecd499d8df..be8f3283ee16 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/Kbuild
@@ -10,6 +10,8 @@ nvkm-y += nvkm/subdev/clk/gf100.o
nvkm-y += nvkm/subdev/clk/gk104.o
nvkm-y += nvkm/subdev/clk/gk20a.o
nvkm-y += nvkm/subdev/clk/gm20b.o
+nvkm-y += nvkm/subdev/clk/gp10b.o
+nvkm-$(CONFIG_PM_DEVFREQ) += nvkm/subdev/clk/gk20a_devfreq.o
nvkm-y += nvkm/subdev/clk/pllnv04.o
nvkm-y += nvkm/subdev/clk/pllgt215.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c
index d573fb0917fc..65f5d0f1f3bf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c
@@ -23,6 +23,7 @@
*
*/
#include "priv.h"
+#include "gk20a_devfreq.h"
#include "gk20a.h"
#include <core/tegra.h>
@@ -589,6 +590,10 @@ gk20a_clk_init(struct nvkm_clk *base)
return ret;
}
+ ret = gk20a_devfreq_init(base, &clk->devfreq);
+ if (ret)
+ return ret;
+
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h
index 286413ff4a9e..ea5b0bab4cce 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h
@@ -118,6 +118,7 @@ struct gk20a_clk {
const struct gk20a_clk_pllg_params *params;
struct gk20a_pll pll;
u32 parent_rate;
+ struct gk20a_devfreq *devfreq;
u32 (*div_to_pl)(u32);
u32 (*pl_to_div)(u32);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.c
new file mode 100644
index 000000000000..41003cbcdbfa
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.c
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: MIT
+#include <linux/clk.h>
+#include <linux/math64.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+
+#include <drm/drm_managed.h>
+
+#include <subdev/clk.h>
+
+#include "nouveau_drv.h"
+#include "nouveau_chan.h"
+#include "priv.h"
+#include "gk20a_devfreq.h"
+#include "gk20a.h"
+#include "gp10b.h"
+
+#define PMU_BUSY_CYCLES_NORM_MAX 1000U
+
+#define PWR_PMU_IDLE_COUNTER_TOTAL 0U
+#define PWR_PMU_IDLE_COUNTER_BUSY 4U
+
+#define PWR_PMU_IDLE_COUNT_REG_OFFSET 0x0010A508U
+#define PWR_PMU_IDLE_COUNT_REG_SIZE 16U
+#define PWR_PMU_IDLE_COUNT_MASK 0x7FFFFFFFU
+#define PWR_PMU_IDLE_COUNT_RESET_VALUE (0x1U << 31U)
+
+#define PWR_PMU_IDLE_INTR_REG_OFFSET 0x0010A9E8U
+#define PWR_PMU_IDLE_INTR_ENABLE_VALUE 0U
+
+#define PWR_PMU_IDLE_INTR_STATUS_REG_OFFSET 0x0010A9ECU
+#define PWR_PMU_IDLE_INTR_STATUS_MASK 0x00000001U
+#define PWR_PMU_IDLE_INTR_STATUS_RESET_VALUE 0x1U
+
+#define PWR_PMU_IDLE_THRESHOLD_REG_OFFSET 0x0010A8A0U
+#define PWR_PMU_IDLE_THRESHOLD_REG_SIZE 4U
+#define PWR_PMU_IDLE_THRESHOLD_MAX_VALUE 0x7FFFFFFFU
+
+#define PWR_PMU_IDLE_CTRL_REG_OFFSET 0x0010A50CU
+#define PWR_PMU_IDLE_CTRL_REG_SIZE 16U
+#define PWR_PMU_IDLE_CTRL_VALUE_MASK 0x3U
+#define PWR_PMU_IDLE_CTRL_VALUE_BUSY 0x2U
+#define PWR_PMU_IDLE_CTRL_VALUE_ALWAYS 0x3U
+#define PWR_PMU_IDLE_CTRL_FILTER_MASK (0x1U << 2)
+#define PWR_PMU_IDLE_CTRL_FILTER_DISABLED 0x0U
+
+#define PWR_PMU_IDLE_MASK_REG_OFFSET 0x0010A504U
+#define PWR_PMU_IDLE_MASK_REG_SIZE 16U
+#define PWM_PMU_IDLE_MASK_GR_ENABLED 0x1U
+#define PWM_PMU_IDLE_MASK_CE_2_ENABLED 0x200000U
+
+/**
+ * struct gk20a_devfreq - Device frequency management
+ */
+struct gk20a_devfreq {
+ /** @devfreq: devfreq device. */
+ struct devfreq *devfreq;
+
+ /** @regs: Device registers. */
+ void __iomem *regs;
+
+ /** @gov_data: Governor data. */
+ struct devfreq_simple_ondemand_data gov_data;
+
+ /** @busy_time: Busy time. */
+ ktime_t busy_time;
+
+ /** @total_time: Total time. */
+ ktime_t total_time;
+
+ /** @time_last_update: Last update time. */
+ ktime_t time_last_update;
+};
+
+static struct gk20a_devfreq *dev_to_gk20a_devfreq(struct device *dev)
+{
+ struct nouveau_drm *drm = dev_get_drvdata(dev);
+ struct nvkm_subdev *subdev = nvkm_device_subdev(drm->nvkm, NVKM_SUBDEV_CLK, 0);
+ struct nvkm_clk *base = nvkm_clk(subdev);
+
+ switch (drm->nvkm->chipset) {
+ case 0x13b: return gp10b_clk(base)->devfreq; break;
+ default: return gk20a_clk(base)->devfreq; break;
+ }
+}
+
+static void gk20a_pmu_init_perfmon_counter(struct gk20a_devfreq *gdevfreq)
+{
+ u32 data;
+
+ // Set pmu idle intr status bit on total counter overflow
+ writel(PWR_PMU_IDLE_INTR_ENABLE_VALUE,
+ gdevfreq->regs + PWR_PMU_IDLE_INTR_REG_OFFSET);
+
+ writel(PWR_PMU_IDLE_THRESHOLD_MAX_VALUE,
+ gdevfreq->regs + PWR_PMU_IDLE_THRESHOLD_REG_OFFSET +
+ (PWR_PMU_IDLE_COUNTER_TOTAL * PWR_PMU_IDLE_THRESHOLD_REG_SIZE));
+
+ // Setup counter for total cycles
+ data = readl(gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET +
+ (PWR_PMU_IDLE_COUNTER_TOTAL * PWR_PMU_IDLE_CTRL_REG_SIZE));
+ data &= ~(PWR_PMU_IDLE_CTRL_VALUE_MASK | PWR_PMU_IDLE_CTRL_FILTER_MASK);
+ data |= PWR_PMU_IDLE_CTRL_VALUE_ALWAYS | PWR_PMU_IDLE_CTRL_FILTER_DISABLED;
+ writel(data, gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET +
+ (PWR_PMU_IDLE_COUNTER_TOTAL * PWR_PMU_IDLE_CTRL_REG_SIZE));
+
+ // Setup counter for busy cycles
+ writel(PWM_PMU_IDLE_MASK_GR_ENABLED | PWM_PMU_IDLE_MASK_CE_2_ENABLED,
+ gdevfreq->regs + PWR_PMU_IDLE_MASK_REG_OFFSET +
+ (PWR_PMU_IDLE_COUNTER_BUSY * PWR_PMU_IDLE_MASK_REG_SIZE));
+
+ data = readl(gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET +
+ (PWR_PMU_IDLE_COUNTER_BUSY * PWR_PMU_IDLE_CTRL_REG_SIZE));
+ data &= ~(PWR_PMU_IDLE_CTRL_VALUE_MASK | PWR_PMU_IDLE_CTRL_FILTER_MASK);
+ data |= PWR_PMU_IDLE_CTRL_VALUE_BUSY | PWR_PMU_IDLE_CTRL_FILTER_DISABLED;
+ writel(data, gdevfreq->regs + PWR_PMU_IDLE_CTRL_REG_OFFSET +
+ (PWR_PMU_IDLE_COUNTER_BUSY * PWR_PMU_IDLE_CTRL_REG_SIZE));
+}
+
+static u32 gk20a_pmu_read_idle_counter(struct gk20a_devfreq *gdevfreq, u32 counter_id)
+{
+ u32 ret;
+
+ ret = readl(gdevfreq->regs + PWR_PMU_IDLE_COUNT_REG_OFFSET +
+ (counter_id * PWR_PMU_IDLE_COUNT_REG_SIZE));
+
+ return ret & PWR_PMU_IDLE_COUNT_MASK;
+}
+
+static void gk20a_pmu_reset_idle_counter(struct gk20a_devfreq *gdevfreq, u32 counter_id)
+{
+ writel(PWR_PMU_IDLE_COUNT_RESET_VALUE, gdevfreq->regs + PWR_PMU_IDLE_COUNT_REG_OFFSET +
+ (counter_id * PWR_PMU_IDLE_COUNT_REG_SIZE));
+}
+
+static u32 gk20a_pmu_read_idle_intr_status(struct gk20a_devfreq *gdevfreq)
+{
+ u32 ret;
+
+ ret = readl(gdevfreq->regs + PWR_PMU_IDLE_INTR_STATUS_REG_OFFSET);
+
+ return ret & PWR_PMU_IDLE_INTR_STATUS_MASK;
+}
+
+static void gk20a_pmu_clear_idle_intr_status(struct gk20a_devfreq *gdevfreq)
+{
+ writel(PWR_PMU_IDLE_INTR_STATUS_RESET_VALUE,
+ gdevfreq->regs + PWR_PMU_IDLE_INTR_STATUS_REG_OFFSET);
+}
+
+static void gk20a_devfreq_update_utilization(struct gk20a_devfreq *gdevfreq)
+{
+ ktime_t now, last;
+ u64 busy_cycles, total_cycles;
+ u32 norm, intr_status;
+
+ now = ktime_get();
+ last = gdevfreq->time_last_update;
+ gdevfreq->total_time = ktime_us_delta(now, last);
+
+ busy_cycles = gk20a_pmu_read_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_BUSY);
+ total_cycles = gk20a_pmu_read_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_TOTAL);
+ intr_status = gk20a_pmu_read_idle_intr_status(gdevfreq);
+
+ gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_BUSY);
+ gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_TOTAL);
+
+ if (intr_status != 0UL) {
+ norm = PMU_BUSY_CYCLES_NORM_MAX;
+ gk20a_pmu_clear_idle_intr_status(gdevfreq);
+ } else if (total_cycles == 0ULL || busy_cycles > total_cycles) {
+ norm = PMU_BUSY_CYCLES_NORM_MAX;
+ } else {
+ norm = (u32)div64_u64(busy_cycles * PMU_BUSY_CYCLES_NORM_MAX,
+ total_cycles);
+ }
+
+ gdevfreq->busy_time = div_u64(gdevfreq->total_time * norm, PMU_BUSY_CYCLES_NORM_MAX);
+ gdevfreq->time_last_update = now;
+}
+
+static int gk20a_devfreq_target(struct device *dev, unsigned long *freq,
+ u32 flags)
+{
+ struct nouveau_drm *drm = dev_get_drvdata(dev);
+ struct nvkm_subdev *subdev = nvkm_device_subdev(drm->nvkm, NVKM_SUBDEV_CLK, 0);
+ struct nvkm_clk *base = nvkm_clk(subdev);
+ struct nvkm_pstate *pstates = base->func->pstates;
+ int nr_pstates = base->func->nr_pstates;
+ int i, ret;
+
+ for (i = 0; i < nr_pstates - 1; i++)
+ if (pstates[i].base.domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV >= *freq)
+ break;
+
+ ret = nvkm_clk_ustate(base, pstates[i].pstate, 0);
+ ret |= nvkm_clk_ustate(base, pstates[i].pstate, 1);
+ if (ret) {
+ nvkm_error(subdev, "cannot update clock\n");
+ return ret;
+ }
+
+ *freq = pstates[i].base.domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV;
+
+ return 0;
+}
+
+static int gk20a_devfreq_get_cur_freq(struct device *dev, unsigned long *freq)
+{
+ struct nouveau_drm *drm = dev_get_drvdata(dev);
+ struct nvkm_subdev *subdev = nvkm_device_subdev(drm->nvkm, NVKM_SUBDEV_CLK, 0);
+ struct nvkm_clk *base = nvkm_clk(subdev);
+
+ *freq = nvkm_clk_read(base, nv_clk_src_gpc) * GK20A_CLK_GPC_MDIV;
+
+ return 0;
+}
+
+static void gk20a_devfreq_reset(struct gk20a_devfreq *gdevfreq)
+{
+ gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_BUSY);
+ gk20a_pmu_reset_idle_counter(gdevfreq, PWR_PMU_IDLE_COUNTER_TOTAL);
+ gk20a_pmu_clear_idle_intr_status(gdevfreq);
+
+ gdevfreq->busy_time = 0;
+ gdevfreq->total_time = 0;
+ gdevfreq->time_last_update = ktime_get();
+}
+
+static int gk20a_devfreq_get_dev_status(struct device *dev,
+ struct devfreq_dev_status *status)
+{
+ struct nouveau_drm *drm = dev_get_drvdata(dev);
+ struct gk20a_devfreq *gdevfreq = dev_to_gk20a_devfreq(dev);
+
+ gk20a_devfreq_get_cur_freq(dev, &status->current_frequency);
+
+ gk20a_devfreq_update_utilization(gdevfreq);
+
+ status->busy_time = ktime_to_ns(gdevfreq->busy_time);
+ status->total_time = ktime_to_ns(gdevfreq->total_time);
+
+ gk20a_devfreq_reset(gdevfreq);
+
+ NV_DEBUG(drm, "busy %lu total %lu %lu %% freq %lu MHz\n",
+ status->busy_time, status->total_time,
+ status->busy_time / (status->total_time / 100),
+ status->current_frequency / 1000 / 1000);
+
+ return 0;
+}
+
+static struct devfreq_dev_profile gk20a_devfreq_profile = {
+ .timer = DEVFREQ_TIMER_DELAYED,
+ .polling_ms = 50,
+ .target = gk20a_devfreq_target,
+ .get_cur_freq = gk20a_devfreq_get_cur_freq,
+ .get_dev_status = gk20a_devfreq_get_dev_status,
+};
+
+int gk20a_devfreq_init(struct nvkm_clk *base, struct gk20a_devfreq **gdevfreq)
+{
+ struct nvkm_device *device = base->subdev.device;
+ struct nouveau_drm *drm = dev_get_drvdata(device->dev);
+ struct nvkm_device_tegra *tdev = device->func->tegra(device);
+ struct nvkm_pstate *pstates = base->func->pstates;
+ int nr_pstates = base->func->nr_pstates;
+ struct gk20a_devfreq *new_gdevfreq;
+ int i;
+
+ new_gdevfreq = drmm_kzalloc(drm->dev, sizeof(struct gk20a_devfreq), GFP_KERNEL);
+ if (!new_gdevfreq)
+ return -ENOMEM;
+
+ new_gdevfreq->regs = tdev->regs;
+
+ for (i = 0; i < nr_pstates; i++)
+ dev_pm_opp_add(base->subdev.device->dev,
+ pstates[i].base.domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV, 0);
+
+ gk20a_pmu_init_perfmon_counter(new_gdevfreq);
+ gk20a_devfreq_reset(new_gdevfreq);
+
+ gk20a_devfreq_profile.initial_freq =
+ nvkm_clk_read(base, nv_clk_src_gpc) * GK20A_CLK_GPC_MDIV;
+
+ new_gdevfreq->gov_data.upthreshold = 45;
+ new_gdevfreq->gov_data.downdifferential = 5;
+
+ new_gdevfreq->devfreq = devm_devfreq_add_device(device->dev,
+ &gk20a_devfreq_profile,
+ DEVFREQ_GOV_SIMPLE_ONDEMAND,
+ &new_gdevfreq->gov_data);
+ if (IS_ERR(new_gdevfreq->devfreq))
+ return PTR_ERR(new_gdevfreq->devfreq);
+
+ *gdevfreq = new_gdevfreq;
+
+ return 0;
+}
+
+int gk20a_devfreq_resume(struct device *dev)
+{
+ struct gk20a_devfreq *gdevfreq = dev_to_gk20a_devfreq(dev);
+
+ if (!gdevfreq || !gdevfreq->devfreq)
+ return 0;
+
+ return devfreq_resume_device(gdevfreq->devfreq);
+}
+
+int gk20a_devfreq_suspend(struct device *dev)
+{
+ struct gk20a_devfreq *gdevfreq = dev_to_gk20a_devfreq(dev);
+
+ if (!gdevfreq || !gdevfreq->devfreq)
+ return 0;
+
+ return devfreq_suspend_device(gdevfreq->devfreq);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.h b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.h
new file mode 100644
index 000000000000..5b7ca8a7a5cd
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a_devfreq.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef __GK20A_DEVFREQ_H__
+#define __GK20A_DEVFREQ_H__
+
+#include <linux/devfreq.h>
+
+struct gk20a_devfreq;
+
+#if defined(CONFIG_PM_DEVFREQ)
+int gk20a_devfreq_init(struct nvkm_clk *base, struct gk20a_devfreq **devfreq);
+
+int gk20a_devfreq_resume(struct device *dev);
+int gk20a_devfreq_suspend(struct device *dev);
+#else
+static inline int gk20a_devfreq_init(struct nvkm_clk *base, struct gk20a_devfreq **devfreq)
+{
+ return 0;
+}
+
+static inline int gk20a_devfreq_resume(struct device dev) { return 0; }
+static inline int gk20a_devfreq_suspend(struct device *dev) { return 0; }
+#endif /* CONFIG_PM_DEVFREQ */
+
+#endif /* __GK20A_DEVFREQ_H__ */
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c
index 7c33542f651b..fa8ca53acbd1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c
@@ -27,6 +27,7 @@
#include <core/tegra.h>
#include "priv.h"
+#include "gk20a_devfreq.h"
#include "gk20a.h"
#define GPCPLL_CFG_SYNC_MODE BIT(2)
@@ -869,6 +870,10 @@ gm20b_clk_init(struct nvkm_clk *base)
return ret;
}
+ ret = gk20a_devfreq_init(base, &clk->devfreq);
+ if (ret)
+ return ret;
+
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gp10b.c
new file mode 100644
index 000000000000..492b62c0ee96
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gp10b.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: MIT
+#include <subdev/clk.h>
+#include <subdev/timer.h>
+#include <core/device.h>
+#include <core/tegra.h>
+
+#include "priv.h"
+#include "gk20a_devfreq.h"
+#include "gk20a.h"
+#include "gp10b.h"
+
+static int
+gp10b_clk_init(struct nvkm_clk *base)
+{
+ struct gp10b_clk *clk = gp10b_clk(base);
+ struct nvkm_subdev *subdev = &clk->base.subdev;
+ int ret;
+
+ /* Start with the highest frequency, matching the BPMP default */
+ base->func->calc(base, &base->func->pstates[base->func->nr_pstates - 1].base);
+ ret = base->func->prog(base);
+ if (ret) {
+ nvkm_error(subdev, "cannot initialize clock\n");
+ return ret;
+ }
+
+ ret = gk20a_devfreq_init(base, &clk->devfreq);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int
+gp10b_clk_read(struct nvkm_clk *base, enum nv_clk_src src)
+{
+ struct gp10b_clk *clk = gp10b_clk(base);
+ struct nvkm_subdev *subdev = &clk->base.subdev;
+
+ switch (src) {
+ case nv_clk_src_gpc:
+ return clk_get_rate(clk->clk) / GK20A_CLK_GPC_MDIV;
+ default:
+ nvkm_error(subdev, "invalid clock source %d\n", src);
+ return -EINVAL;
+ }
+}
+
+static int
+gp10b_clk_calc(struct nvkm_clk *base, struct nvkm_cstate *cstate)
+{
+ struct gp10b_clk *clk = gp10b_clk(base);
+ u32 target_rate = cstate->domain[nv_clk_src_gpc] * GK20A_CLK_GPC_MDIV;
+
+ clk->new_rate = clk_round_rate(clk->clk, target_rate) / GK20A_CLK_GPC_MDIV;
+
+ return 0;
+}
+
+static int
+gp10b_clk_prog(struct nvkm_clk *base)
+{
+ struct gp10b_clk *clk = gp10b_clk(base);
+ int ret;
+
+ ret = clk_set_rate(clk->clk, clk->new_rate * GK20A_CLK_GPC_MDIV);
+ if (ret < 0)
+ return ret;
+
+ clk->rate = clk_get_rate(clk->clk) / GK20A_CLK_GPC_MDIV;
+
+ return 0;
+}
+
+static struct nvkm_pstate
+gp10b_pstates[] = {
+ {
+ .base = {
+ .domain[nv_clk_src_gpc] = 114750,
+ },
+ },
+ {
+ .base = {
+ .domain[nv_clk_src_gpc] = 216750,
+ },
+ },
+ {
+ .base = {
+ .domain[nv_clk_src_gpc] = 318750,
+ },
+ },
+ {
+ .base = {
+ .domain[nv_clk_src_gpc] = 420750,
+ },
+ },
+ {
+ .base = {
+ .domain[nv_clk_src_gpc] = 522750,
+ },
+ },
+ {
+ .base = {
+ .domain[nv_clk_src_gpc] = 624750,
+ },
+ },
+ {
+ .base = {
+ .domain[nv_clk_src_gpc] = 726750,
+ },
+ },
+ {
+ .base = {
+ .domain[nv_clk_src_gpc] = 828750,
+ },
+ },
+ {
+ .base = {
+ .domain[nv_clk_src_gpc] = 930750,
+ },
+ },
+ {
+ .base = {
+ .domain[nv_clk_src_gpc] = 1032750,
+ },
+ },
+ {
+ .base = {
+ .domain[nv_clk_src_gpc] = 1134750,
+ },
+ },
+ {
+ .base = {
+ .domain[nv_clk_src_gpc] = 1236750,
+ },
+ },
+ {
+ .base = {
+ .domain[nv_clk_src_gpc] = 1300500,
+ },
+ },
+};
+
+static const struct nvkm_clk_func
+gp10b_clk = {
+ .init = gp10b_clk_init,
+ .read = gp10b_clk_read,
+ .calc = gp10b_clk_calc,
+ .prog = gp10b_clk_prog,
+ .tidy = gk20a_clk_tidy,
+ .pstates = gp10b_pstates,
+ .nr_pstates = ARRAY_SIZE(gp10b_pstates),
+ .domains = {
+ { nv_clk_src_gpc, 0xff, 0, "core", GK20A_CLK_GPC_MDIV },
+ { nv_clk_src_max }
+ }
+};
+
+int
+gp10b_clk_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+ struct nvkm_clk **pclk)
+{
+ struct nvkm_device_tegra *tdev = device->func->tegra(device);
+ const struct nvkm_clk_func *func = &gp10b_clk;
+ struct gp10b_clk *clk;
+ int ret, i;
+
+ clk = kzalloc(sizeof(*clk), GFP_KERNEL);
+ if (!clk)
+ return -ENOMEM;
+ *pclk = &clk->base;
+ clk->clk = tdev->clk;
+
+ /* Finish initializing the pstates */
+ for (i = 0; i < func->nr_pstates; i++) {
+ INIT_LIST_HEAD(&func->pstates[i].list);
+ func->pstates[i].pstate = i + 1;
+ }
+
+ ret = nvkm_clk_ctor(func, device, type, inst, true, &clk->base);
+ if (ret)
+ return ret;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gp10b.h b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gp10b.h
new file mode 100644
index 000000000000..178e3bcdbbf7
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gp10b.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef __NVKM_CLK_GP10B_H__
+#define __NVKM_CLK_GP10B_H__
+
+struct gp10b_clk {
+ /* currently applied parameters */
+ struct nvkm_clk base;
+ struct gk20a_devfreq *devfreq;
+ struct clk *clk;
+ u32 rate;
+
+ /* new parameters to apply */
+ u32 new_rate;
+};
+
+#define gp10b_clk(p) container_of((p), struct gp10b_clk, base)
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
index 8a286a9349ac..7ce1b65e2c1c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
@@ -279,7 +279,7 @@ nvkm_fb_ctor(const struct nvkm_fb_func *func, struct nvkm_device *device,
mutex_init(&fb->tags.mutex);
if (func->sysmem.flush_page_init) {
- fb->sysmem.flush_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ fb->sysmem.flush_page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
if (!fb->sysmem.flush_page)
return -ENOMEM;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb100.c
index 1c78c8853617..170776cc82fb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb100.c
@@ -15,6 +15,9 @@ gb100_fb_sysmem_flush_page_init(struct nvkm_fb *fb)
const u32 hshub = DRF_LO(NV_PFB_HSHUB0);
struct nvkm_device *device = fb->subdev.device;
+ // Ensure that the address is within hardware limits
+ WARN_ON(fb->sysmem.flush_page_addr > DMA_BIT_MASK(52));
+
nvkm_wr32(device, hshub + NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_HI, addr_hi);
nvkm_wr32(device, hshub + NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_LO, addr_lo);
nvkm_wr32(device, hshub + NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_HI, addr_hi);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb202.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb202.c
index 848505026d02..a21bf19e1041 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb202.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gb202.c
@@ -13,6 +13,9 @@ gb202_fb_sysmem_flush_page_init(struct nvkm_fb *fb)
struct nvkm_device *device = fb->subdev.device;
const u64 addr = fb->sysmem.flush_page_addr;
+ // Ensure that the address is within hardware limits
+ WARN_ON(fb->sysmem.flush_page_addr > DMA_BIT_MASK(52));
+
nvkm_wr32(device, NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI, upper_32_bits(addr));
nvkm_wr32(device, NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO, lower_32_bits(addr));
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
index 07db9b397ac1..64281a09fb39 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
@@ -80,6 +80,9 @@ gf100_fb_init_page(struct nvkm_fb *fb)
void
gf100_fb_sysmem_flush_page_init(struct nvkm_fb *fb)
{
+ // Ensure that the address can actually fit in the register
+ WARN_ON(fb->sysmem.flush_page_addr > DMA_BIT_MASK(40));
+
nvkm_wr32(fb->subdev.device, 0x100c10, fb->sysmem.flush_page_addr >> 8);
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gh100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gh100.c
index 2d8c51f882d5..8c9394048f25 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gh100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gh100.c
@@ -13,6 +13,9 @@ gh100_fb_sysmem_flush_page_init(struct nvkm_fb *fb)
const u64 addr = fb->sysmem.flush_page_addr >> NV_PFB_NISO_FLUSH_SYSMEM_ADDR_SHIFT;
struct nvkm_device *device = fb->subdev.device;
+ // Ensure that the address is within hardware limits
+ WARN_ON(fb->sysmem.flush_page_addr > DMA_BIT_MASK(52));
+
nvkm_wr32(device, NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI, upper_32_bits(addr));
nvkm_wr32(device, NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_LO, lower_32_bits(addr));
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c
index a6efbd913c13..076d968b7297 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c
@@ -214,6 +214,9 @@ nv50_fb_tags(struct nvkm_fb *base)
static void
nv50_fb_sysmem_flush_page_init(struct nvkm_fb *fb)
{
+ // Ensure that the address can actually fit in the register
+ WARN_ON(fb->sysmem.flush_page_addr > DMA_BIT_MASK(40));
+
nvkm_wr32(fb->subdev.device, 0x100c08, fb->sysmem.flush_page_addr >> 8);
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c
index 851fd847a2a9..ed15a4475181 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c
@@ -21,9 +21,7 @@
*/
#include "vmm.h"
-#include <core/client.h>
#include <subdev/fb.h>
-#include <subdev/ltc.h>
#include <subdev/timer.h>
#include <engine/gr.h>
@@ -111,13 +109,33 @@ gp100_vmm_pgt_pfn(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt,
nvkm_done(pt->memory);
}
+static inline u64
+gp100_vmm_comptag_nr(u64 size)
+{
+ return size >> 16; /* One comptag per 64KiB VRAM. */
+}
+
+static inline u64
+gp100_vmm_pte_comptagline_base(u64 addr)
+{
+ /* RM allocates enough comptags for all of VRAM, so use a 1:1 mapping. */
+ return (1 + gp100_vmm_comptag_nr(addr)) << 36; /* NV_MMU_VER2_PTE_COMPTAGLINE */
+}
+
+static inline u64
+gp100_vmm_pte_comptagline_incr(u32 page_size)
+{
+ return gp100_vmm_comptag_nr(page_size) << 36; /* NV_MMU_VER2_PTE_COMPTAGLINE */
+}
+
static inline void
gp100_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt,
u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr)
{
u64 data = (addr >> 4) | map->type;
- map->type += ptes * map->ctag;
+ if (map->ctag)
+ data |= gp100_vmm_pte_comptagline_base(addr);
while (ptes--) {
VMM_WO064(pt, vmm, ptei++ * 8, data);
@@ -142,7 +160,6 @@ gp100_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt,
while (ptes--) {
const u64 data = (*map->dma++ >> 4) | map->type;
VMM_WO064(pt, vmm, ptei++ * 8, data);
- map->type += map->ctag;
}
nvkm_done(pt->memory);
return;
@@ -200,7 +217,8 @@ gp100_vmm_pd0_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt,
{
u64 data = (addr >> 4) | map->type;
- map->type += ptes * map->ctag;
+ if (map->ctag)
+ data |= gp100_vmm_pte_comptagline_base(addr);
while (ptes--) {
VMM_WO128(pt, vmm, ptei++ * 0x10, data, 0ULL);
@@ -411,8 +429,6 @@ gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc,
struct gp100_vmm_map_vn vn;
struct gp100_vmm_map_v0 v0;
} *args = argv;
- struct nvkm_device *device = vmm->mmu->subdev.device;
- struct nvkm_memory *memory = map->memory;
u8 kind, kind_inv, priv, ro, vol;
int kindn, aper, ret = -ENOSYS;
const u8 *kindm;
@@ -449,29 +465,24 @@ gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc,
return -EINVAL;
}
+ /* Handle compression. */
if (kindm[kind] != kind) {
- u64 tags = nvkm_memory_size(memory) >> 16;
- if (aper != 0 || !(page->type & NVKM_VMM_PAGE_COMP)) {
- VMM_DEBUG(vmm, "comp %d %02x", aper, page->type);
- return -EINVAL;
- }
-
- if (!map->no_comp) {
- ret = nvkm_memory_tags_get(memory, device, tags,
- nvkm_ltc_tags_clear,
- &map->tags);
- if (ret) {
- VMM_DEBUG(vmm, "comp %d", ret);
- return ret;
+ struct nvkm_device *device = vmm->mmu->subdev.device;
+
+ /* Compression is only supported when using GSP-RM, as
+ * PMU firmware is required in order to initialise the
+ * compbit backing store.
+ */
+ if (nvkm_gsp_rm(device->gsp)) {
+ /* Turing GPUs require PTE_COMPTAGLINE to be filled,
+ * in addition to specifying a compressed kind.
+ */
+ if (device->card_type < GA100) {
+ map->ctag = gp100_vmm_pte_comptagline_incr(1 << map->page->shift);
+ map->next |= map->ctag;
}
- }
-
- if (!map->no_comp && map->tags->mn) {
- tags = map->tags->mn->offset + (map->offset >> 16);
- map->ctag |= ((1ULL << page->shift) >> 16) << 36;
- map->type |= tags << 36;
- map->next |= map->ctag;
} else {
+ /* Revert to non-compressed kind. */
kind = kindm[kind];
}
}
@@ -592,8 +603,8 @@ gp100_vmm = {
{ 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx },
{ 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx },
{ 29, &gp100_vmm_desc_16[2], NVKM_VMM_PAGE_Sxxx },
- { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SVxC },
- { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SVxC },
+ { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SVxx },
+ { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SVxx },
{ 12, &gp100_vmm_desc_12[0], NVKM_VMM_PAGE_SVHx },
{}
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c
index e081239afe58..5791d134962b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c
@@ -34,8 +34,8 @@ gp10b_vmm = {
{ 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx },
{ 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx },
{ 29, &gp100_vmm_desc_16[2], NVKM_VMM_PAGE_Sxxx },
- { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SxHC },
- { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SxHC },
+ { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SxHx },
+ { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SxHx },
{ 12, &gp100_vmm_desc_12[0], NVKM_VMM_PAGE_SxHx },
{}
}