summaryrefslogtreecommitdiff
path: root/drivers/gpu/host1x
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/host1x')
-rw-r--r--drivers/gpu/host1x/bus.c12
-rw-r--r--drivers/gpu/host1x/dev.c20
-rw-r--r--drivers/gpu/host1x/dev.h3
-rw-r--r--drivers/gpu/host1x/hw/channel_hw.c106
-rw-r--r--drivers/gpu/host1x/hw/intr_hw.c56
-rw-r--r--drivers/gpu/host1x/syncpt.c4
6 files changed, 141 insertions, 60 deletions
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 344cc9e741c1..723a80895cd4 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -471,6 +471,18 @@ static int host1x_device_add(struct host1x *host1x,
mutex_unlock(&clients_lock);
+ /*
+ * Add device even if there are no subdevs to ensure syncpoint functionality
+ * is available regardless of whether any engine subdevices are present
+ */
+ if (list_empty(&device->subdevs)) {
+ err = device_add(&device->dev);
+ if (err < 0)
+ dev_err(&device->dev, "failed to add device: %d\n", err);
+ else
+ device->registered = true;
+ }
+
return 0;
}
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 1f93e5e276c0..3f475f0e6545 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -71,6 +71,15 @@ u32 host1x_sync_readl(struct host1x *host1x, u32 r)
return readl(sync_regs + r);
}
+#ifdef CONFIG_64BIT
+u64 host1x_sync_readq(struct host1x *host1x, u32 r)
+{
+ void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset;
+
+ return readq(sync_regs + r);
+}
+#endif
+
void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r)
{
writel(v, ch->regs + r);
@@ -585,14 +594,8 @@ static int host1x_probe(struct platform_device *pdev)
}
host->clk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(host->clk)) {
- err = PTR_ERR(host->clk);
-
- if (err != -EPROBE_DEFER)
- dev_err(&pdev->dev, "failed to get clock: %d\n", err);
-
- return err;
- }
+ if (IS_ERR(host->clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(host->clk), "failed to get clock\n");
err = host1x_get_resets(host);
if (err)
@@ -821,6 +824,7 @@ u64 host1x_get_dma_mask(struct host1x *host1x)
}
EXPORT_SYMBOL(host1x_get_dma_mask);
+MODULE_SOFTDEP("post: tegra-drm");
MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>");
MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>");
MODULE_DESCRIPTION("Host1x driver for Tegra products");
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index d3855a1c6b47..ef44618ed88a 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -179,6 +179,9 @@ void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r);
u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r);
void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r);
u32 host1x_sync_readl(struct host1x *host1x, u32 r);
+#ifdef CONFIG_64BIT
+u64 host1x_sync_readq(struct host1x *host1x, u32 r);
+#endif
void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r);
u32 host1x_ch_readl(struct host1x_channel *ch, u32 r);
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index d44b8de890be..2df6a16d484e 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -47,24 +47,11 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
}
}
-static void submit_wait(struct host1x_job *job, u32 id, u32 threshold,
- u32 next_class)
+static void submit_wait(struct host1x_job *job, u32 id, u32 threshold)
{
struct host1x_cdma *cdma = &job->channel->cdma;
-#if HOST1X_HW >= 6
- u32 stream_id;
-
- /*
- * If a memory context has been set, use it. Otherwise
- * (if context isolation is disabled) use the engine's
- * firmware stream ID.
- */
- if (job->memory_context)
- stream_id = job->memory_context->stream_id;
- else
- stream_id = job->engine_fallback_streamid;
-
+#if HOST1X_HW >= 2
host1x_cdma_push_wide(cdma,
host1x_opcode_setclass(
HOST1X_CLASS_HOST1X,
@@ -76,23 +63,6 @@ static void submit_wait(struct host1x_job *job, u32 id, u32 threshold,
id,
HOST1X_OPCODE_NOP
);
- host1x_cdma_push_wide(&job->channel->cdma,
- host1x_opcode_setclass(job->class, 0, 0),
- host1x_opcode_setpayload(stream_id),
- host1x_opcode_setstreamid(job->engine_streamid_offset / 4),
- HOST1X_OPCODE_NOP);
-#elif HOST1X_HW >= 2
- host1x_cdma_push_wide(cdma,
- host1x_opcode_setclass(
- HOST1X_CLASS_HOST1X,
- HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32,
- /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */
- BIT(0) | BIT(2)
- ),
- threshold,
- id,
- host1x_opcode_setclass(next_class, 0, 0)
- );
#else
/* TODO add waitchk or use waitbases or other mitigation */
host1x_cdma_push(cdma,
@@ -103,6 +73,32 @@ static void submit_wait(struct host1x_job *job, u32 id, u32 threshold,
),
host1x_class_host_wait_syncpt(id, threshold)
);
+#endif
+}
+
+static void submit_setclass(struct host1x_job *job, u32 next_class)
+{
+ struct host1x_cdma *cdma = &job->channel->cdma;
+
+#if HOST1X_HW >= 6
+ u32 stream_id;
+
+ /*
+ * If a memory context has been set, use it. Otherwise
+ * (if context isolation is disabled) use the engine's
+ * firmware stream ID.
+ */
+ if (job->memory_context)
+ stream_id = job->memory_context->stream_id;
+ else
+ stream_id = job->engine_fallback_streamid;
+
+ host1x_cdma_push_wide(cdma,
+ host1x_opcode_setclass(next_class, 0, 0),
+ host1x_opcode_setpayload(stream_id),
+ host1x_opcode_setstreamid(job->engine_streamid_offset / 4),
+ HOST1X_OPCODE_NOP);
+#else
host1x_cdma_push(cdma,
host1x_opcode_setclass(next_class, 0, 0),
HOST1X_OPCODE_NOP
@@ -110,7 +106,8 @@ static void submit_wait(struct host1x_job *job, u32 id, u32 threshold,
#endif
}
-static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base)
+static void submit_gathers(struct host1x_job *job, struct host1x_job_cmd *cmds, u32 num_cmds,
+ u32 job_syncpt_base)
{
struct host1x_cdma *cdma = &job->channel->cdma;
#if HOST1X_HW < 6
@@ -119,8 +116,8 @@ static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base)
unsigned int i;
u32 threshold;
- for (i = 0; i < job->num_cmds; i++) {
- struct host1x_job_cmd *cmd = &job->cmds[i];
+ for (i = 0; i < num_cmds; i++) {
+ struct host1x_job_cmd *cmd = &cmds[i];
if (cmd->is_wait) {
if (cmd->wait.relative)
@@ -128,7 +125,8 @@ static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base)
else
threshold = cmd->wait.threshold;
- submit_wait(job, cmd->wait.id, threshold, cmd->wait.next_class);
+ submit_wait(job, cmd->wait.id, threshold);
+ submit_setclass(job, cmd->wait.next_class);
} else {
struct host1x_job_gather *g = &cmd->gather;
@@ -216,7 +214,34 @@ static void channel_program_cdma(struct host1x_job *job)
#if HOST1X_HW >= 6
u32 fence;
+ int i = 0;
+
+ if (job->num_cmds == 0)
+ goto prefences_done;
+ if (!job->cmds[0].is_wait || job->cmds[0].wait.relative)
+ goto prefences_done;
+
+ /* Enter host1x class with invalid stream ID for prefence waits. */
+ host1x_cdma_push_wide(cdma,
+ host1x_opcode_acquire_mlock(1),
+ host1x_opcode_setclass(1, 0, 0),
+ host1x_opcode_setpayload(0),
+ host1x_opcode_setstreamid(0x1fffff));
+
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_job_cmd *cmd = &job->cmds[i];
+
+ if (!cmd->is_wait || cmd->wait.relative)
+ break;
+
+ submit_wait(job, cmd->wait.id, cmd->wait.threshold);
+ }
+
+ host1x_cdma_push(cdma,
+ HOST1X_OPCODE_NOP,
+ host1x_opcode_release_mlock(1));
+prefences_done:
/* Enter engine class with invalid stream ID. */
host1x_cdma_push_wide(cdma,
host1x_opcode_acquire_mlock(job->class),
@@ -230,11 +255,12 @@ static void channel_program_cdma(struct host1x_job *job)
host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1),
HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) |
HOST1X_UCLASS_INCR_SYNCPT_COND_F(4));
- submit_wait(job, job->syncpt->id, fence, job->class);
+ submit_wait(job, job->syncpt->id, fence);
+ submit_setclass(job, job->class);
/* Submit work. */
job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs);
- submit_gathers(job, job->syncpt_end - job->syncpt_incrs);
+ submit_gathers(job, job->cmds + i, job->num_cmds - i, job->syncpt_end - job->syncpt_incrs);
/* Before releasing MLOCK, ensure engine is idle again. */
fence = host1x_syncpt_incr_max(sp, 1);
@@ -242,7 +268,7 @@ static void channel_program_cdma(struct host1x_job *job)
host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1),
HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) |
HOST1X_UCLASS_INCR_SYNCPT_COND_F(4));
- submit_wait(job, job->syncpt->id, fence, job->class);
+ submit_wait(job, job->syncpt->id, fence);
/* Release MLOCK. */
host1x_cdma_push(cdma,
@@ -272,7 +298,7 @@ static void channel_program_cdma(struct host1x_job *job)
job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs);
- submit_gathers(job, job->syncpt_end - job->syncpt_incrs);
+ submit_gathers(job, job->cmds, job->num_cmds, job->syncpt_end - job->syncpt_incrs);
#endif
}
diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
index 415f8d7e4202..bd5b5ef62f35 100644
--- a/drivers/gpu/host1x/hw/intr_hw.c
+++ b/drivers/gpu/host1x/hw/intr_hw.c
@@ -11,26 +11,64 @@
#include "../intr.h"
#include "../dev.h"
+static void process_32_syncpts(struct host1x *host, unsigned long val, u32 reg_offset)
+{
+ unsigned int id;
+
+ if (!val)
+ return;
+
+ host1x_sync_writel(host, val, HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(reg_offset));
+ host1x_sync_writel(host, val, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(reg_offset));
+
+ for_each_set_bit(id, &val, 32)
+ host1x_intr_handle_interrupt(host, reg_offset * 32 + id);
+}
+
static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
{
struct host1x_intr_irq_data *irq_data = dev_id;
struct host1x *host = irq_data->host;
unsigned long reg;
- unsigned int i, id;
+ unsigned int i;
+#if !defined(CONFIG_64BIT)
for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 32);
i += host->num_syncpt_irqs) {
reg = host1x_sync_readl(host,
HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
- host1x_sync_writel(host, reg,
- HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(i));
- host1x_sync_writel(host, reg,
+ process_32_syncpts(host, reg, i);
+ }
+#elif HOST1X_HW == 6 || HOST1X_HW == 7
+ /*
+ * Tegra186 and Tegra194 have the first INT_STATUS register not 64-bit aligned,
+ * and only have one interrupt line.
+ */
+ reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(0));
+ process_32_syncpts(host, reg, 0);
+
+ for (i = 1; i < (host->info->nb_pts / 32) - 1; i += 2) {
+ reg = host1x_sync_readq(host,
HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
- for_each_set_bit(id, &reg, 32)
- host1x_intr_handle_interrupt(host, i * 32 + id);
+ process_32_syncpts(host, lower_32_bits(reg), i);
+ process_32_syncpts(host, upper_32_bits(reg), i + 1);
+ }
+
+ reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
+ process_32_syncpts(host, reg, i);
+#else
+ /* All 64-bit capable SoCs have number of syncpoints divisible by 64 */
+ for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 64);
+ i += host->num_syncpt_irqs) {
+ reg = host1x_sync_readq(host,
+ HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i * 2));
+
+ process_32_syncpts(host, lower_32_bits(reg), i * 2 + 0);
+ process_32_syncpts(host, upper_32_bits(reg), i * 2 + 1);
}
+#endif
return IRQ_HANDLED;
}
@@ -68,12 +106,12 @@ host1x_intr_init_host_sync(struct host1x *host, u32 cpm)
/*
* Program threshold interrupt destination among 8 lines per VM,
- * per syncpoint. For each group of 32 syncpoints (corresponding to one
- * interrupt status register), direct to one interrupt line, going
+ * per syncpoint. For each group of 64 syncpoints (corresponding to two
+ * interrupt status registers), direct to one interrupt line, going
* around in a round robin fashion.
*/
for (id = 0; id < host->info->nb_pts; id++) {
- u32 reg_offset = id / 32;
+ u32 reg_offset = id / 64;
u32 irq_index = reg_offset % host->num_syncpt_irqs;
host1x_sync_writel(host, irq_index, HOST1X_SYNC_SYNCPT_INTR_DEST(id));
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index f63d14a57a1d..acc7d82e0585 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -345,8 +345,6 @@ static void syncpt_release(struct kref *ref)
sp->locked = false;
- mutex_lock(&sp->host->syncpt_mutex);
-
host1x_syncpt_base_free(sp->base);
kfree(sp->name);
sp->base = NULL;
@@ -369,7 +367,7 @@ void host1x_syncpt_put(struct host1x_syncpt *sp)
if (!sp)
return;
- kref_put(&sp->ref, syncpt_release);
+ kref_put_mutex(&sp->ref, syncpt_release, &sp->host->syncpt_mutex);
}
EXPORT_SYMBOL(host1x_syncpt_put);