diff options
Diffstat (limited to 'drivers/gpu/host1x')
| -rw-r--r-- | drivers/gpu/host1x/bus.c | 12 | ||||
| -rw-r--r-- | drivers/gpu/host1x/dev.c | 20 | ||||
| -rw-r--r-- | drivers/gpu/host1x/dev.h | 3 | ||||
| -rw-r--r-- | drivers/gpu/host1x/hw/channel_hw.c | 106 | ||||
| -rw-r--r-- | drivers/gpu/host1x/hw/intr_hw.c | 56 | ||||
| -rw-r--r-- | drivers/gpu/host1x/syncpt.c | 4 |
6 files changed, 141 insertions, 60 deletions
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index 344cc9e741c1..723a80895cd4 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -471,6 +471,18 @@ static int host1x_device_add(struct host1x *host1x, mutex_unlock(&clients_lock); + /* + * Add device even if there are no subdevs to ensure syncpoint functionality + * is available regardless of whether any engine subdevices are present + */ + if (list_empty(&device->subdevs)) { + err = device_add(&device->dev); + if (err < 0) + dev_err(&device->dev, "failed to add device: %d\n", err); + else + device->registered = true; + } + return 0; } diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 1f93e5e276c0..3f475f0e6545 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -71,6 +71,15 @@ u32 host1x_sync_readl(struct host1x *host1x, u32 r) return readl(sync_regs + r); } +#ifdef CONFIG_64BIT +u64 host1x_sync_readq(struct host1x *host1x, u32 r) +{ + void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset; + + return readq(sync_regs + r); +} +#endif + void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r) { writel(v, ch->regs + r); @@ -585,14 +594,8 @@ static int host1x_probe(struct platform_device *pdev) } host->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(host->clk)) { - err = PTR_ERR(host->clk); - - if (err != -EPROBE_DEFER) - dev_err(&pdev->dev, "failed to get clock: %d\n", err); - - return err; - } + if (IS_ERR(host->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(host->clk), "failed to get clock\n"); err = host1x_get_resets(host); if (err) @@ -821,6 +824,7 @@ u64 host1x_get_dma_mask(struct host1x *host1x) } EXPORT_SYMBOL(host1x_get_dma_mask); +MODULE_SOFTDEP("post: tegra-drm"); MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>"); MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>"); MODULE_DESCRIPTION("Host1x driver for Tegra products"); diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index d3855a1c6b47..ef44618ed88a 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -179,6 +179,9 @@ void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r); u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r); void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r); u32 host1x_sync_readl(struct host1x *host1x, u32 r); +#ifdef CONFIG_64BIT +u64 host1x_sync_readq(struct host1x *host1x, u32 r); +#endif void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r); u32 host1x_ch_readl(struct host1x_channel *ch, u32 r); diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index d44b8de890be..2df6a16d484e 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -47,24 +47,11 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo, } } -static void submit_wait(struct host1x_job *job, u32 id, u32 threshold, - u32 next_class) +static void submit_wait(struct host1x_job *job, u32 id, u32 threshold) { struct host1x_cdma *cdma = &job->channel->cdma; -#if HOST1X_HW >= 6 - u32 stream_id; - - /* - * If a memory context has been set, use it. Otherwise - * (if context isolation is disabled) use the engine's - * firmware stream ID. - */ - if (job->memory_context) - stream_id = job->memory_context->stream_id; - else - stream_id = job->engine_fallback_streamid; - +#if HOST1X_HW >= 2 host1x_cdma_push_wide(cdma, host1x_opcode_setclass( HOST1X_CLASS_HOST1X, @@ -76,23 +63,6 @@ static void submit_wait(struct host1x_job *job, u32 id, u32 threshold, id, HOST1X_OPCODE_NOP ); - host1x_cdma_push_wide(&job->channel->cdma, - host1x_opcode_setclass(job->class, 0, 0), - host1x_opcode_setpayload(stream_id), - host1x_opcode_setstreamid(job->engine_streamid_offset / 4), - HOST1X_OPCODE_NOP); -#elif HOST1X_HW >= 2 - host1x_cdma_push_wide(cdma, - host1x_opcode_setclass( - HOST1X_CLASS_HOST1X, - HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32, - /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */ - BIT(0) | BIT(2) - ), - threshold, - id, - host1x_opcode_setclass(next_class, 0, 0) - ); #else /* TODO add waitchk or use waitbases or other mitigation */ host1x_cdma_push(cdma, @@ -103,6 +73,32 @@ static void submit_wait(struct host1x_job *job, u32 id, u32 threshold, ), host1x_class_host_wait_syncpt(id, threshold) ); +#endif +} + +static void submit_setclass(struct host1x_job *job, u32 next_class) +{ + struct host1x_cdma *cdma = &job->channel->cdma; + +#if HOST1X_HW >= 6 + u32 stream_id; + + /* + * If a memory context has been set, use it. Otherwise + * (if context isolation is disabled) use the engine's + * firmware stream ID. + */ + if (job->memory_context) + stream_id = job->memory_context->stream_id; + else + stream_id = job->engine_fallback_streamid; + + host1x_cdma_push_wide(cdma, + host1x_opcode_setclass(next_class, 0, 0), + host1x_opcode_setpayload(stream_id), + host1x_opcode_setstreamid(job->engine_streamid_offset / 4), + HOST1X_OPCODE_NOP); +#else host1x_cdma_push(cdma, host1x_opcode_setclass(next_class, 0, 0), HOST1X_OPCODE_NOP @@ -110,7 +106,8 @@ static void submit_wait(struct host1x_job *job, u32 id, u32 threshold, #endif } -static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base) +static void submit_gathers(struct host1x_job *job, struct host1x_job_cmd *cmds, u32 num_cmds, + u32 job_syncpt_base) { struct host1x_cdma *cdma = &job->channel->cdma; #if HOST1X_HW < 6 @@ -119,8 +116,8 @@ static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base) unsigned int i; u32 threshold; - for (i = 0; i < job->num_cmds; i++) { - struct host1x_job_cmd *cmd = &job->cmds[i]; + for (i = 0; i < num_cmds; i++) { + struct host1x_job_cmd *cmd = &cmds[i]; if (cmd->is_wait) { if (cmd->wait.relative) @@ -128,7 +125,8 @@ static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base) else threshold = cmd->wait.threshold; - submit_wait(job, cmd->wait.id, threshold, cmd->wait.next_class); + submit_wait(job, cmd->wait.id, threshold); + submit_setclass(job, cmd->wait.next_class); } else { struct host1x_job_gather *g = &cmd->gather; @@ -216,7 +214,34 @@ static void channel_program_cdma(struct host1x_job *job) #if HOST1X_HW >= 6 u32 fence; + int i = 0; + + if (job->num_cmds == 0) + goto prefences_done; + if (!job->cmds[0].is_wait || job->cmds[0].wait.relative) + goto prefences_done; + + /* Enter host1x class with invalid stream ID for prefence waits. */ + host1x_cdma_push_wide(cdma, + host1x_opcode_acquire_mlock(1), + host1x_opcode_setclass(1, 0, 0), + host1x_opcode_setpayload(0), + host1x_opcode_setstreamid(0x1fffff)); + + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_cmd *cmd = &job->cmds[i]; + + if (!cmd->is_wait || cmd->wait.relative) + break; + + submit_wait(job, cmd->wait.id, cmd->wait.threshold); + } + + host1x_cdma_push(cdma, + HOST1X_OPCODE_NOP, + host1x_opcode_release_mlock(1)); +prefences_done: /* Enter engine class with invalid stream ID. */ host1x_cdma_push_wide(cdma, host1x_opcode_acquire_mlock(job->class), @@ -230,11 +255,12 @@ static void channel_program_cdma(struct host1x_job *job) host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1), HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) | HOST1X_UCLASS_INCR_SYNCPT_COND_F(4)); - submit_wait(job, job->syncpt->id, fence, job->class); + submit_wait(job, job->syncpt->id, fence); + submit_setclass(job, job->class); /* Submit work. */ job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs); - submit_gathers(job, job->syncpt_end - job->syncpt_incrs); + submit_gathers(job, job->cmds + i, job->num_cmds - i, job->syncpt_end - job->syncpt_incrs); /* Before releasing MLOCK, ensure engine is idle again. */ fence = host1x_syncpt_incr_max(sp, 1); @@ -242,7 +268,7 @@ static void channel_program_cdma(struct host1x_job *job) host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1), HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) | HOST1X_UCLASS_INCR_SYNCPT_COND_F(4)); - submit_wait(job, job->syncpt->id, fence, job->class); + submit_wait(job, job->syncpt->id, fence); /* Release MLOCK. */ host1x_cdma_push(cdma, @@ -272,7 +298,7 @@ static void channel_program_cdma(struct host1x_job *job) job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs); - submit_gathers(job, job->syncpt_end - job->syncpt_incrs); + submit_gathers(job, job->cmds, job->num_cmds, job->syncpt_end - job->syncpt_incrs); #endif } diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c index 415f8d7e4202..bd5b5ef62f35 100644 --- a/drivers/gpu/host1x/hw/intr_hw.c +++ b/drivers/gpu/host1x/hw/intr_hw.c @@ -11,26 +11,64 @@ #include "../intr.h" #include "../dev.h" +static void process_32_syncpts(struct host1x *host, unsigned long val, u32 reg_offset) +{ + unsigned int id; + + if (!val) + return; + + host1x_sync_writel(host, val, HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(reg_offset)); + host1x_sync_writel(host, val, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(reg_offset)); + + for_each_set_bit(id, &val, 32) + host1x_intr_handle_interrupt(host, reg_offset * 32 + id); +} + static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id) { struct host1x_intr_irq_data *irq_data = dev_id; struct host1x *host = irq_data->host; unsigned long reg; - unsigned int i, id; + unsigned int i; +#if !defined(CONFIG_64BIT) for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 32); i += host->num_syncpt_irqs) { reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); - host1x_sync_writel(host, reg, - HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(i)); - host1x_sync_writel(host, reg, + process_32_syncpts(host, reg, i); + } +#elif HOST1X_HW == 6 || HOST1X_HW == 7 + /* + * Tegra186 and Tegra194 have the first INT_STATUS register not 64-bit aligned, + * and only have one interrupt line. + */ + reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(0)); + process_32_syncpts(host, reg, 0); + + for (i = 1; i < (host->info->nb_pts / 32) - 1; i += 2) { + reg = host1x_sync_readq(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); - for_each_set_bit(id, ®, 32) - host1x_intr_handle_interrupt(host, i * 32 + id); + process_32_syncpts(host, lower_32_bits(reg), i); + process_32_syncpts(host, upper_32_bits(reg), i + 1); + } + + reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); + process_32_syncpts(host, reg, i); +#else + /* All 64-bit capable SoCs have number of syncpoints divisible by 64 */ + for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 64); + i += host->num_syncpt_irqs) { + reg = host1x_sync_readq(host, + HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i * 2)); + + process_32_syncpts(host, lower_32_bits(reg), i * 2 + 0); + process_32_syncpts(host, upper_32_bits(reg), i * 2 + 1); } +#endif return IRQ_HANDLED; } @@ -68,12 +106,12 @@ host1x_intr_init_host_sync(struct host1x *host, u32 cpm) /* * Program threshold interrupt destination among 8 lines per VM, - * per syncpoint. For each group of 32 syncpoints (corresponding to one - * interrupt status register), direct to one interrupt line, going + * per syncpoint. For each group of 64 syncpoints (corresponding to two + * interrupt status registers), direct to one interrupt line, going * around in a round robin fashion. */ for (id = 0; id < host->info->nb_pts; id++) { - u32 reg_offset = id / 32; + u32 reg_offset = id / 64; u32 irq_index = reg_offset % host->num_syncpt_irqs; host1x_sync_writel(host, irq_index, HOST1X_SYNC_SYNCPT_INTR_DEST(id)); diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index f63d14a57a1d..acc7d82e0585 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -345,8 +345,6 @@ static void syncpt_release(struct kref *ref) sp->locked = false; - mutex_lock(&sp->host->syncpt_mutex); - host1x_syncpt_base_free(sp->base); kfree(sp->name); sp->base = NULL; @@ -369,7 +367,7 @@ void host1x_syncpt_put(struct host1x_syncpt *sp) if (!sp) return; - kref_put(&sp->ref, syncpt_release); + kref_put_mutex(&sp->ref, syncpt_release, &sp->host->syncpt_mutex); } EXPORT_SYMBOL(host1x_syncpt_put); |
