From 9e410fe3dc9a938bc47f71dff254be7419bd40d2 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 3 Mar 2023 13:34:11 -0800 Subject: dmaengine: idxd: Add descriptor definitions for 16 bytes of pattern in memory fill operation The memory fill operation (0x04) can fill in memory with either 8 bytes or 16 bytes of pattern. To fill in memory with 16 bytes of pattern, the first 8 bytes are provided in pattern lower in bytes 16-23 and the next 8 bytes are in pattern upper in bytes 40-47 in the descriptor. Currently only 8 bytes of pattern is enabled. Add descriptor definitions for pattern lower and pattern upper so that user can use 16 bytes of pattern to fill memory. Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230303213413.3357431-2-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 1d553bedbdb5..c43d7df5fc15 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -180,6 +180,7 @@ struct dsa_hw_desc { uint64_t rdback_addr; uint64_t pattern; uint64_t desc_list_addr; + uint64_t pattern_lower; }; union { uint64_t dst_addr; @@ -244,6 +245,9 @@ struct dsa_hw_desc { uint16_t dest_app_tag_seed; }; + /* Fill */ + uint64_t pattern_upper; + uint8_t op_specific[24]; }; } __attribute__((packed)); -- cgit v1.3 From 12bbc2c2605516e781cd86e3cde9fe1f889b72cc Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 3 Mar 2023 13:34:12 -0800 Subject: dmaengine: idxd: Add descriptor definitions for DIX generate operation The Data Integrity Extension (DIX) generate operation (0x17) computes the Data Integrity Field (DIF) on the source data and writes only the computed DIF for each source block to the PI destination address. Add descriptor definitions for this operation so that user can use DSA to accelerate DIX generate operation. Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230303213413.3357431-3-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index c43d7df5fc15..4c12e93a6aa6 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -78,6 +78,7 @@ enum dsa_opcode { DSA_OPCODE_DIF_INS, DSA_OPCODE_DIF_STRP, DSA_OPCODE_DIF_UPDT, + DSA_OPCODE_DIX_GEN = 0x17, DSA_OPCODE_CFLUSH = 0x20, }; @@ -248,6 +249,17 @@ struct dsa_hw_desc { /* Fill */ uint64_t pattern_upper; + /* DIX generate */ + struct { + uint8_t dix_gen_res; + uint8_t dest_dif_flags; + uint8_t dif_flags; + uint8_t dix_gen_res2[13]; + uint32_t ref_tag_seed; + uint16_t app_tag_mask; + uint16_t app_tag_seed; + }; + uint8_t op_specific[24]; }; } __attribute__((packed)); @@ -326,6 +338,14 @@ struct dsa_completion_record { uint16_t dif_upd_dest_app_tag; }; + /* DIX generate */ + struct { + uint64_t dix_gen_res; + uint32_t dix_ref_tag; + uint16_t dix_app_tag_mask; + uint16_t dix_app_tag; + }; + uint8_t op_specific[16]; }; } __attribute__((packed)); -- cgit v1.3 From 6fec8938b7b4fe2b2c503fe87b2783a50bff0415 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 3 Mar 2023 13:34:13 -0800 Subject: dmaengine: idxd: Add descriptor definitions for translation fetch operation The translation fetch operation (0x0A) fetches address translations for the address range specified in the descriptor by issuing address translation (ATS) requests to the IOMMU. Add descriptor definitions for the operation so that user can use DSA to accelerate translation fetch. Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230303213413.3357431-4-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 4c12e93a6aa6..fc47635b57dc 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -72,6 +72,7 @@ enum dsa_opcode { DSA_OPCODE_CR_DELTA, DSA_OPCODE_AP_DELTA, DSA_OPCODE_DUALCAST, + DSA_OPCODE_TRANSL_FETCH, DSA_OPCODE_CRCGEN = 0x10, DSA_OPCODE_COPY_CRC, DSA_OPCODE_DIF_CHECK, @@ -182,6 +183,7 @@ struct dsa_hw_desc { uint64_t pattern; uint64_t desc_list_addr; uint64_t pattern_lower; + uint64_t transl_fetch_addr; }; union { uint64_t dst_addr; @@ -192,6 +194,7 @@ struct dsa_hw_desc { union { uint32_t xfer_size; uint32_t desc_count; + uint32_t region_size; }; uint16_t int_handle; uint16_t rsvd1; @@ -249,6 +252,12 @@ struct dsa_hw_desc { /* Fill */ uint64_t pattern_upper; + /* Translation fetch */ + struct { + uint64_t transl_fetch_res; + uint32_t region_stride; + }; + /* DIX generate */ struct { uint8_t dix_gen_res; -- cgit v1.3 From f806bea3093cb3568e01b4375d5e1d7c8c47c1d4 Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Thu, 23 Mar 2023 17:31:07 +0530 Subject: dmaengine: ti: k3-udma: Workaround errata i2234 Per [1], UDMA TR15 transactions may hang if ICNT0 is less than 64B Work around is to set EOL flag is to 1 for ICNT0. Since, there is no performance penalty / side effects of setting EOL flag event ICNTO > 64B, just set the flag for all UDMAP TR15 descriptors. [1] https://www.ti.com/lit/er/sprz455a/sprz455a.pdf Errata doc for J721E DRA829/TDA4VM Processors Silicon Revision 1.1/1.0 (Rev. A) Signed-off-by: Vignesh Raghavendra [j-choudhary@ti.com: minor cleanups] Signed-off-by: Jayesh Choudhary Link: https://lore.kernel.org/r/20230323120107.27638-1-j-choudhary@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma.c | 20 +++++++++++--------- include/linux/dma/ti-cppi5.h | 1 + 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index f652a217be76..c07feaff69c0 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -2966,6 +2966,7 @@ udma_prep_slave_sg_triggered_tr(struct udma_chan *uc, struct scatterlist *sgl, struct scatterlist *sgent; struct cppi5_tr_type15_t *tr_req = NULL; enum dma_slave_buswidth dev_width; + u32 csf = CPPI5_TR_CSF_SUPR_EVT; u16 tr_cnt0, tr_cnt1; dma_addr_t dev_addr; struct udma_desc *d; @@ -3036,6 +3037,7 @@ udma_prep_slave_sg_triggered_tr(struct udma_chan *uc, struct scatterlist *sgl, if (uc->ud->match_data->type == DMA_TYPE_UDMA) { asel = 0; + csf |= CPPI5_TR_CSF_EOL_ICNT0; } else { asel = (u64)uc->config.asel << K3_ADDRESS_ASEL_SHIFT; dev_addr |= asel; @@ -3059,7 +3061,7 @@ udma_prep_slave_sg_triggered_tr(struct udma_chan *uc, struct scatterlist *sgl, cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE15, false, true, CPPI5_TR_EVENT_SIZE_COMPLETION, 0); - cppi5_tr_csf_set(&tr_req[tr_idx].flags, CPPI5_TR_CSF_SUPR_EVT); + cppi5_tr_csf_set(&tr_req[tr_idx].flags, csf); cppi5_tr_set_trigger(&tr_req[tr_idx].flags, uc->config.tr_trigger_type, CPPI5_TR_TRIGGER_TYPE_ICNT2_DEC, 0, 0); @@ -3105,8 +3107,7 @@ udma_prep_slave_sg_triggered_tr(struct udma_chan *uc, struct scatterlist *sgl, cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE15, false, true, CPPI5_TR_EVENT_SIZE_COMPLETION, 0); - cppi5_tr_csf_set(&tr_req[tr_idx].flags, - CPPI5_TR_CSF_SUPR_EVT); + cppi5_tr_csf_set(&tr_req[tr_idx].flags, csf); cppi5_tr_set_trigger(&tr_req[tr_idx].flags, uc->config.tr_trigger_type, CPPI5_TR_TRIGGER_TYPE_ICNT2_DEC, @@ -3150,8 +3151,7 @@ udma_prep_slave_sg_triggered_tr(struct udma_chan *uc, struct scatterlist *sgl, d->residue += sg_len; } - cppi5_tr_csf_set(&tr_req[tr_idx - 1].flags, - CPPI5_TR_CSF_SUPR_EVT | CPPI5_TR_CSF_EOP); + cppi5_tr_csf_set(&tr_req[tr_idx - 1].flags, csf | CPPI5_TR_CSF_EOP); return d; } @@ -3680,6 +3680,7 @@ udma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, int num_tr; size_t tr_size = sizeof(struct cppi5_tr_type15_t); u16 tr0_cnt0, tr0_cnt1, tr1_cnt0; + u32 csf = CPPI5_TR_CSF_SUPR_EVT; if (uc->config.dir != DMA_MEM_TO_MEM) { dev_err(chan->device->dev, @@ -3710,13 +3711,15 @@ udma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, if (uc->ud->match_data->type != DMA_TYPE_UDMA) { src |= (u64)uc->ud->asel << K3_ADDRESS_ASEL_SHIFT; dest |= (u64)uc->ud->asel << K3_ADDRESS_ASEL_SHIFT; + } else { + csf |= CPPI5_TR_CSF_EOL_ICNT0; } tr_req = d->hwdesc[0].tr_req_base; cppi5_tr_init(&tr_req[0].flags, CPPI5_TR_TYPE15, false, true, CPPI5_TR_EVENT_SIZE_COMPLETION, 0); - cppi5_tr_csf_set(&tr_req[0].flags, CPPI5_TR_CSF_SUPR_EVT); + cppi5_tr_csf_set(&tr_req[0].flags, csf); tr_req[0].addr = src; tr_req[0].icnt0 = tr0_cnt0; @@ -3735,7 +3738,7 @@ udma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, if (num_tr == 2) { cppi5_tr_init(&tr_req[1].flags, CPPI5_TR_TYPE15, false, true, CPPI5_TR_EVENT_SIZE_COMPLETION, 0); - cppi5_tr_csf_set(&tr_req[1].flags, CPPI5_TR_CSF_SUPR_EVT); + cppi5_tr_csf_set(&tr_req[1].flags, csf); tr_req[1].addr = src + tr0_cnt1 * tr0_cnt0; tr_req[1].icnt0 = tr1_cnt0; @@ -3750,8 +3753,7 @@ udma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, tr_req[1].dicnt3 = 1; } - cppi5_tr_csf_set(&tr_req[num_tr - 1].flags, - CPPI5_TR_CSF_SUPR_EVT | CPPI5_TR_CSF_EOP); + cppi5_tr_csf_set(&tr_req[num_tr - 1].flags, csf | CPPI5_TR_CSF_EOP); if (uc->config.metadata_size) d->vd.tx.metadata_ops = &metadata_ops; diff --git a/include/linux/dma/ti-cppi5.h b/include/linux/dma/ti-cppi5.h index efa2f0309f00..c53c0f6e3b1a 100644 --- a/include/linux/dma/ti-cppi5.h +++ b/include/linux/dma/ti-cppi5.h @@ -616,6 +616,7 @@ static inline void *cppi5_hdesc_get_swdata(struct cppi5_host_desc_t *desc) #define CPPI5_TR_CSF_SUPR_EVT BIT(2) #define CPPI5_TR_CSF_EOL_ADV_SHIFT (4U) #define CPPI5_TR_CSF_EOL_ADV_MASK GENMASK(6, 4) +#define CPPI5_TR_CSF_EOL_ICNT0 BIT(4) #define CPPI5_TR_CSF_EOP BIT(7) /** -- cgit v1.3 From 244da66cda359227d80ccb41dbcb99da40eae186 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:30 -0700 Subject: dmaengine: idxd: setup event log configuration Add setup of event log feature for supported device. Event log addresses error reporting that was lacking in gen 1 DSA devices where a second error event does not get reported when a first event is pending software handling. The event log allows a circular buffer that the device can push error events to. It is up to the user to create a large enough event log ring in order to capture the expected events. The evl size can be set in the device sysfs attribute. By default 64 entries are supported as minimal when event log is enabled. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-4-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 89 +++++++++++++++++++++++++++++++++++++++++++- drivers/dma/idxd/idxd.h | 19 ++++++++++ drivers/dma/idxd/init.c | 1 + drivers/dma/idxd/registers.h | 72 ++++++++++++++++++++++++++++++++++- drivers/dma/idxd/sysfs.c | 3 +- include/uapi/linux/idxd.h | 1 + 6 files changed, 181 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 5f321f3b4242..230fe9bb56ae 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -752,6 +752,83 @@ void idxd_device_clear_state(struct idxd_device *idxd) spin_unlock(&idxd->dev_lock); } +static int idxd_device_evl_setup(struct idxd_device *idxd) +{ + union gencfg_reg gencfg; + union evlcfg_reg evlcfg; + union genctrl_reg genctrl; + struct device *dev = &idxd->pdev->dev; + void *addr; + dma_addr_t dma_addr; + int size; + struct idxd_evl *evl = idxd->evl; + + if (!evl) + return 0; + + size = evl_size(idxd); + /* + * Address needs to be page aligned. However, dma_alloc_coherent() provides + * at minimal page size aligned address. No manual alignment required. + */ + addr = dma_alloc_coherent(dev, size, &dma_addr, GFP_KERNEL); + if (!addr) + return -ENOMEM; + + memset(addr, 0, size); + + spin_lock(&evl->lock); + evl->log = addr; + evl->dma = dma_addr; + evl->log_size = size; + + memset(&evlcfg, 0, sizeof(evlcfg)); + evlcfg.bits[0] = dma_addr & GENMASK(63, 12); + evlcfg.size = evl->size; + + iowrite64(evlcfg.bits[0], idxd->reg_base + IDXD_EVLCFG_OFFSET); + iowrite64(evlcfg.bits[1], idxd->reg_base + IDXD_EVLCFG_OFFSET + 8); + + genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET); + genctrl.evl_int_en = 1; + iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET); + + gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); + gencfg.evl_en = 1; + iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); + + spin_unlock(&evl->lock); + return 0; +} + +static void idxd_device_evl_free(struct idxd_device *idxd) +{ + union gencfg_reg gencfg; + union genctrl_reg genctrl; + struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl = idxd->evl; + + gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); + if (!gencfg.evl_en) + return; + + spin_lock(&evl->lock); + gencfg.evl_en = 0; + iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); + + genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET); + genctrl.evl_int_en = 0; + iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET); + + iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET); + iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET + 8); + + dma_free_coherent(dev, evl->log_size, evl->log, evl->dma); + evl->log = NULL; + evl->size = IDXD_EVL_SIZE_MIN; + spin_unlock(&evl->lock); +} + static void idxd_group_config_write(struct idxd_group *group) { struct idxd_device *idxd = group->idxd; @@ -1451,15 +1528,24 @@ int idxd_device_drv_probe(struct idxd_dev *idxd_dev) if (rc < 0) return -ENXIO; + rc = idxd_device_evl_setup(idxd); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_DEV_EVL_ERR; + return rc; + } + /* Start device */ rc = idxd_device_enable(idxd); - if (rc < 0) + if (rc < 0) { + idxd_device_evl_free(idxd); return rc; + } /* Setup DMA device without channels */ rc = idxd_register_dma_device(idxd); if (rc < 0) { idxd_device_disable(idxd); + idxd_device_evl_free(idxd); idxd->cmd_status = IDXD_SCMD_DEV_DMA_ERR; return rc; } @@ -1488,6 +1574,7 @@ void idxd_device_drv_remove(struct idxd_dev *idxd_dev) idxd_device_disable(idxd); if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) idxd_device_reset(idxd); + idxd_device_evl_free(idxd); } static enum idxd_dev_type dev_types[] = { diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 2a71273f1822..c74681f02b18 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -262,7 +262,15 @@ struct idxd_driver_data { }; struct idxd_evl { + /* Lock to protect event log access. */ + spinlock_t lock; + void *log; + dma_addr_t dma; + /* Total size of event log = number of entries * entry size. */ + unsigned int log_size; + /* The number of entries in the event log. */ u16 size; + u16 head; }; struct idxd_device { @@ -324,6 +332,17 @@ struct idxd_device { struct idxd_evl *evl; }; +static inline unsigned int evl_ent_size(struct idxd_device *idxd) +{ + return idxd->hw.gen_cap.evl_support ? + (32 * (1 << idxd->hw.gen_cap.evl_support)) : 0; +} + +static inline unsigned int evl_size(struct idxd_device *idxd) +{ + return idxd->evl->size * evl_ent_size(idxd); +} + /* IDXD software descriptor */ struct idxd_desc { union { diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 525de59df82a..f44719a11c95 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -343,6 +343,7 @@ static int idxd_init_evl(struct idxd_device *idxd) if (!evl) return -ENOMEM; + spin_lock_init(&evl->lock); evl->size = IDXD_EVL_SIZE_MIN; idxd->evl = evl; return 0; diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index ea3a499a3c3c..11bb97cf7481 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -3,6 +3,8 @@ #ifndef _IDXD_REGISTERS_H_ #define _IDXD_REGISTERS_H_ +#include + /* PCI Config */ #define PCI_DEVICE_ID_INTEL_DSA_SPR0 0x0b25 #define PCI_DEVICE_ID_INTEL_IAX_SPR0 0x0cfe @@ -119,7 +121,8 @@ union gencfg_reg { u32 rdbuf_limit:8; u32 rsvd:4; u32 user_int_en:1; - u32 rsvd2:19; + u32 evl_en:1; + u32 rsvd2:18; }; u32 bits; } __packed; @@ -129,7 +132,8 @@ union genctrl_reg { struct { u32 softerr_int_en:1; u32 halt_int_en:1; - u32 rsvd:30; + u32 evl_int_en:1; + u32 rsvd:29; }; u32 bits; } __packed; @@ -299,6 +303,21 @@ union iaa_cap_reg { #define IDXD_IAACAP_OFFSET 0x180 +#define IDXD_EVLCFG_OFFSET 0xe0 +union evlcfg_reg { + struct { + u64 pasid_en:1; + u64 priv:1; + u64 rsvd:10; + u64 base_addr:52; + + u64 size:16; + u64 pasid:20; + u64 rsvd2:28; + }; + u64 bits[2]; +} __packed; + #define IDXD_EVL_SIZE_MIN 0x0040 #define IDXD_EVL_SIZE_MAX 0xffff @@ -539,4 +558,53 @@ union filter_cfg { u64 val; } __packed; +struct __evl_entry { + u64 rsvd:2; + u64 desc_valid:1; + u64 wq_idx_valid:1; + u64 batch:1; + u64 fault_rw:1; + u64 priv:1; + u64 err_info_valid:1; + u64 error:8; + u64 wq_idx:8; + u64 batch_id:8; + u64 operation:8; + u64 pasid:20; + u64 rsvd2:4; + + u16 batch_idx; + u16 rsvd3; + union { + /* Invalid Flags 0x11 */ + u32 invalid_flags; + /* Invalid Int Handle 0x19 */ + /* Page fault 0x1a */ + /* Page fault 0x06, 0x1f, only operand_id */ + /* Page fault before drain or in batch, 0x26, 0x27 */ + struct { + u16 int_handle; + u16 rci:1; + u16 ims:1; + u16 rcr:1; + u16 first_err_in_batch:1; + u16 rsvd4_2:9; + u16 operand_id:3; + }; + }; + u64 fault_addr; + u64 rsvd5; +} __packed; + +struct dsa_evl_entry { + struct __evl_entry e; + struct dsa_completion_record cr; +} __packed; + +struct iax_evl_entry { + struct __evl_entry e; + u64 rsvd[4]; + struct iax_completion_record cr; +} __packed; + #endif diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 85644e5bde83..163fdfaa5022 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1605,7 +1605,8 @@ static ssize_t event_log_size_store(struct device *dev, if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) return -EPERM; - if (val < IDXD_EVL_SIZE_MIN || val > IDXD_EVL_SIZE_MAX) + if (val < IDXD_EVL_SIZE_MIN || val > IDXD_EVL_SIZE_MAX || + (val * evl_ent_size(idxd) > ULONG_MAX - idxd->evl->dma)) return -EINVAL; idxd->evl->size = val; diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index fc47635b57dc..5d05bf12f2bd 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -30,6 +30,7 @@ enum idxd_scmd_stat { IDXD_SCMD_WQ_NO_PRIV = 0x800f0000, IDXD_SCMD_WQ_IRQ_ERR = 0x80100000, IDXD_SCMD_WQ_USER_NO_IOMMU = 0x80110000, + IDXD_SCMD_DEV_EVL_ERR = 0x80120000, }; #define IDXD_SCMD_SOFTERR_MASK 0x80000000 -- cgit v1.3 From 2f431ba908d2ef05da478d10925207728f1ff483 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:31 -0700 Subject: dmaengine: idxd: add interrupt handling for event log An event log interrupt is raised in the misc interrupt INTCAUSE register when an event is written by the hardware. Add basic event log processing support to the interrupt handler. The event log is a ring where the hardware owns the tail and the software owns the head. The hardware will advance the tail index when an additional event has been pushed to memory. The software will process the log entry and then advances the head. The log is full when (tail + 1) % log_size = head. The hardware will stop writing when the log is full. The user is expected to create a log size large enough to handle all the expected events. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-5-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/irq.c | 48 ++++++++++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/registers.h | 19 ++++++++++++++++++ include/uapi/linux/idxd.h | 1 + 3 files changed, 68 insertions(+) (limited to 'include') diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 0d639303b515..52b8b7d9db22 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -217,6 +217,49 @@ static void idxd_int_handle_revoke(struct work_struct *work) kfree(revoke); } +static void process_evl_entry(struct idxd_device *idxd, struct __evl_entry *entry_head) +{ + struct device *dev = &idxd->pdev->dev; + u8 status; + + status = DSA_COMP_STATUS(entry_head->error); + dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n", + status, entry_head->operation, entry_head->fault_addr); +} + +static void process_evl_entries(struct idxd_device *idxd) +{ + union evl_status_reg evl_status; + unsigned int h, t; + struct idxd_evl *evl = idxd->evl; + struct __evl_entry *entry_head; + unsigned int ent_size = evl_ent_size(idxd); + u32 size; + + evl_status.bits = 0; + evl_status.int_pending = 1; + + spin_lock(&evl->lock); + /* Clear interrupt pending bit */ + iowrite32(evl_status.bits_upper32, + idxd->reg_base + IDXD_EVLSTATUS_OFFSET + sizeof(u32)); + h = evl->head; + evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + t = evl_status.tail; + size = idxd->evl->size; + + while (h != t) { + entry_head = (struct __evl_entry *)(evl->log + (h * ent_size)); + process_evl_entry(idxd, entry_head); + h = (h + 1) % size; + } + + evl->head = h; + evl_status.head = h; + iowrite32(evl_status.bits_lower32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + spin_unlock(&evl->lock); +} + irqreturn_t idxd_misc_thread(int vec, void *data) { struct idxd_irq_entry *irq_entry = data; @@ -304,6 +347,11 @@ irqreturn_t idxd_misc_thread(int vec, void *data) perfmon_counter_overflow(idxd); } + if (cause & IDXD_INTC_EVL) { + val |= IDXD_INTC_EVL; + process_evl_entries(idxd); + } + val ^= cause; if (val) dev_warn_once(dev, "Unexpected interrupt cause bits set: %#x\n", diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 11bb97cf7481..148db94f9373 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -168,6 +168,7 @@ enum idxd_device_reset_type { #define IDXD_INTC_OCCUPY 0x04 #define IDXD_INTC_PERFMON_OVFL 0x08 #define IDXD_INTC_HALT_STATE 0x10 +#define IDXD_INTC_EVL 0x20 #define IDXD_INTC_INT_HANDLE_REVOKED 0x80000000 #define IDXD_CMD_OFFSET 0xa0 @@ -558,6 +559,24 @@ union filter_cfg { u64 val; } __packed; +#define IDXD_EVLSTATUS_OFFSET 0xf0 + +union evl_status_reg { + struct { + u32 head:16; + u32 rsvd:16; + u32 tail:16; + u32 rsvd2:14; + u32 int_pending:1; + u32 rsvd3:1; + }; + struct { + u32 bits_lower32; + u32 bits_upper32; + }; + u64 bits; +} __packed; + struct __evl_entry { u64 rsvd:2; u64 desc_valid:1; diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 5d05bf12f2bd..0bc8eea18586 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -170,6 +170,7 @@ enum iax_completion_status { #define DSA_COMP_STATUS_MASK 0x7f #define DSA_COMP_STATUS_WRITE 0x80 +#define DSA_COMP_STATUS(status) ((status) & DSA_COMP_STATUS_MASK) struct dsa_hw_desc { uint32_t pasid:20; -- cgit v1.3 From 5fbe6503b52f5665560584f62adab5db70ac910e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:32 -0700 Subject: dmanegine: idxd: add debugfs for event log dump Add debugfs entry to dump the content of the event log for debugging. The function will dump all non-zero entries in the event log. It will note which entries are processed and which entries are still pending processing at the time of the dump. The entries may not always be in chronological order due to the log is a circular buffer. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-6-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/Makefile | 2 +- drivers/dma/idxd/debugfs.c | 138 +++++++++++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/idxd.h | 9 +++ drivers/dma/idxd/init.c | 12 ++++ include/uapi/linux/idxd.h | 6 +- 5 files changed, 164 insertions(+), 3 deletions(-) create mode 100644 drivers/dma/idxd/debugfs.c (limited to 'include') diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile index a1e9f2b3a37c..dc096839ac63 100644 --- a/drivers/dma/idxd/Makefile +++ b/drivers/dma/idxd/Makefile @@ -1,7 +1,7 @@ ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=IDXD obj-$(CONFIG_INTEL_IDXD) += idxd.o -idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o +idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o debugfs.o idxd-$(CONFIG_INTEL_IDXD_PERFMON) += perfmon.o diff --git a/drivers/dma/idxd/debugfs.c b/drivers/dma/idxd/debugfs.c new file mode 100644 index 000000000000..9cfbd9b14c4c --- /dev/null +++ b/drivers/dma/idxd/debugfs.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */ +#include +#include +#include +#include +#include +#include +#include +#include "idxd.h" +#include "registers.h" + +static struct dentry *idxd_debugfs_dir; + +static void dump_event_entry(struct idxd_device *idxd, struct seq_file *s, + u16 index, int *count, bool processed) +{ + struct idxd_evl *evl = idxd->evl; + struct dsa_evl_entry *entry; + struct dsa_completion_record *cr; + u64 *raw; + int i; + int evl_strides = evl_ent_size(idxd) / sizeof(u64); + + entry = (struct dsa_evl_entry *)evl->log + index; + + if (!entry->e.desc_valid) + return; + + seq_printf(s, "Event Log entry %d (real index %u) processed: %u\n", + *count, index, processed); + + seq_printf(s, "desc valid %u wq idx valid %u\n" + "batch %u fault rw %u priv %u error 0x%x\n" + "wq idx %u op %#x pasid %u batch idx %u\n" + "fault addr %#llx\n", + entry->e.desc_valid, entry->e.wq_idx_valid, + entry->e.batch, entry->e.fault_rw, entry->e.priv, + entry->e.error, entry->e.wq_idx, entry->e.operation, + entry->e.pasid, entry->e.batch_idx, entry->e.fault_addr); + + cr = &entry->cr; + seq_printf(s, "status %#x result %#x fault_info %#x bytes_completed %u\n" + "fault addr %#llx inv flags %#x\n\n", + cr->status, cr->result, cr->fault_info, cr->bytes_completed, + cr->fault_addr, cr->invalid_flags); + + raw = (u64 *)entry; + + for (i = 0; i < evl_strides; i++) + seq_printf(s, "entry[%d] = %#llx\n", i, raw[i]); + + seq_puts(s, "\n"); + *count += 1; +} + +static int debugfs_evl_show(struct seq_file *s, void *d) +{ + struct idxd_device *idxd = s->private; + struct idxd_evl *evl = idxd->evl; + union evl_status_reg evl_status; + u16 h, t, evl_size, i; + int count = 0; + bool processed = true; + + if (!evl || !evl->log) + return 0; + + spin_lock(&evl->lock); + + h = evl->head; + evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + t = evl_status.tail; + evl_size = evl->size; + + seq_printf(s, "Event Log head %u tail %u interrupt pending %u\n\n", + evl_status.head, evl_status.tail, evl_status.int_pending); + + i = t; + while (1) { + i = (i + 1) % evl_size; + if (i == t) + break; + + if (processed && i == h) + processed = false; + dump_event_entry(idxd, s, i, &count, processed); + } + + spin_unlock(&evl->lock); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(debugfs_evl); + +int idxd_device_init_debugfs(struct idxd_device *idxd) +{ + if (IS_ERR_OR_NULL(idxd_debugfs_dir)) + return 0; + + idxd->dbgfs_dir = debugfs_create_dir(dev_name(idxd_confdev(idxd)), idxd_debugfs_dir); + if (IS_ERR(idxd->dbgfs_dir)) + return PTR_ERR(idxd->dbgfs_dir); + + if (idxd->evl) { + idxd->dbgfs_evl_file = debugfs_create_file("event_log", 0400, + idxd->dbgfs_dir, idxd, + &debugfs_evl_fops); + if (IS_ERR(idxd->dbgfs_evl_file)) { + debugfs_remove_recursive(idxd->dbgfs_dir); + idxd->dbgfs_dir = NULL; + return PTR_ERR(idxd->dbgfs_evl_file); + } + } + + return 0; +} + +void idxd_device_remove_debugfs(struct idxd_device *idxd) +{ + debugfs_remove_recursive(idxd->dbgfs_dir); +} + +int idxd_init_debugfs(void) +{ + if (!debugfs_initialized()) + return 0; + + idxd_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); + if (IS_ERR(idxd_debugfs_dir)) + return PTR_ERR(idxd_debugfs_dir); + return 0; +} + +void idxd_remove_debugfs(void) +{ + debugfs_remove_recursive(idxd_debugfs_dir); +} diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index c74681f02b18..b923b90b7299 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -330,6 +330,9 @@ struct idxd_device { unsigned long *opcap_bmap; struct idxd_evl *evl; + + struct dentry *dbgfs_dir; + struct dentry *dbgfs_evl_file; }; static inline unsigned int evl_ent_size(struct idxd_device *idxd) @@ -704,4 +707,10 @@ static inline void perfmon_init(void) {} static inline void perfmon_exit(void) {} #endif +/* debugfs */ +int idxd_device_init_debugfs(struct idxd_device *idxd); +void idxd_device_remove_debugfs(struct idxd_device *idxd); +int idxd_init_debugfs(void); +void idxd_remove_debugfs(void); + #endif diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index f44719a11c95..e21c4b7a1d32 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -669,6 +669,10 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_dev_register; } + rc = idxd_device_init_debugfs(idxd); + if (rc) + dev_warn(dev, "IDXD debugfs failed to setup\n"); + dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n", idxd->hw.version); @@ -731,6 +735,7 @@ static void idxd_remove(struct pci_dev *pdev) idxd_shutdown(pdev); if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); + idxd_device_remove_debugfs(idxd); irq_entry = idxd_get_ie(idxd, 0); free_irq(irq_entry->vector, irq_entry); @@ -788,6 +793,10 @@ static int __init idxd_init_module(void) if (err) goto err_cdev_register; + err = idxd_init_debugfs(); + if (err) + goto err_debugfs; + err = pci_register_driver(&idxd_pci_driver); if (err) goto err_pci_register; @@ -795,6 +804,8 @@ static int __init idxd_init_module(void) return 0; err_pci_register: + idxd_remove_debugfs(); +err_debugfs: idxd_cdev_remove(); err_cdev_register: idxd_driver_unregister(&idxd_user_drv); @@ -815,5 +826,6 @@ static void __exit idxd_exit_module(void) pci_unregister_driver(&idxd_pci_driver); idxd_cdev_remove(); perfmon_exit(); + idxd_remove_debugfs(); } module_exit(idxd_exit_module); diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 0bc8eea18586..e86199d09a91 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -311,7 +311,8 @@ struct dsa_completion_record { uint8_t result; uint8_t dif_status; }; - uint16_t rsvd; + uint8_t fault_info; + uint8_t rsvd; uint32_t bytes_completed; uint64_t fault_addr; union { @@ -368,7 +369,8 @@ struct dsa_raw_completion_record { struct iax_completion_record { volatile uint8_t status; uint8_t error_code; - uint16_t rsvd; + uint8_t fault_info; + uint8_t rsvd; uint32_t bytes_completed; uint64_t fault_addr; uint32_t invalid_flags; -- cgit v1.3 From c40bd7d9737bdcfb02d42765bc6c59b338151123 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:36 -0700 Subject: dmaengine: idxd: process user page faults for completion record DSA supports page fault handling through PRS. However, the DMA engine that's processing the descriptor is blocked until the PRS response is received. Other workqueues sharing the engine are also blocked. Page fault handing by the driver with PRS disabled can be used to mitigate the stalling. With PRS disabled while ATS remain enabled, DSA handles page faults on a completion record by reporting an event in the event log. In this instance, the descriptor is completed and the event log contains the completion record address and the contents of the completion record. Add support to the event log handling code to fault in the completion record and copy the content of the completion record to user memory. A bitmap is introduced to keep track of discarded event log entries. When the user process initiates ->release() of the char device, it no longer is interested in any remaining event log entries tied to the relevant wq and PASID. The driver will mark the event log entry index in the bitmap. Upon encountering the entries during processing, the event log handler will just clear the bitmap bit and skip the entry rather than attempt to process the event log entry. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-10-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/cdev.c | 30 ++++++++++++++++ drivers/dma/idxd/device.c | 22 ++++++++++-- drivers/dma/idxd/idxd.h | 2 ++ drivers/dma/idxd/init.c | 2 ++ drivers/dma/idxd/irq.c | 87 ++++++++++++++++++++++++++++++++++++++++++++--- include/uapi/linux/idxd.h | 1 + 6 files changed, 137 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 8b8a0a0fb054..0a51c33198f6 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -164,6 +164,35 @@ failed: return rc; } +static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid) +{ + struct idxd_device *idxd = wq->idxd; + struct idxd_evl *evl = idxd->evl; + union evl_status_reg status; + u16 h, t, size; + int ent_size = evl_ent_size(idxd); + struct __evl_entry *entry_head; + + if (!evl) + return; + + spin_lock(&evl->lock); + status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + t = status.tail; + h = evl->head; + size = evl->size; + + while (h != t) { + entry_head = (struct __evl_entry *)(evl->log + (h * ent_size)); + if (entry_head->pasid == pasid && entry_head->wq_idx == wq->id) + set_bit(h, evl->bmap); + h = (h + 1) % size; + } + spin_unlock(&evl->lock); + + drain_workqueue(wq->wq); +} + static int idxd_cdev_release(struct inode *node, struct file *filep) { struct idxd_user_context *ctx = filep->private_data; @@ -190,6 +219,7 @@ static int idxd_cdev_release(struct inode *node, struct file *filep) } if (ctx->sva) { + idxd_cdev_evl_drain_pasid(wq, ctx->pasid); iommu_sva_unbind_device(ctx->sva); idxd_xa_pasid_remove(ctx); } diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 230fe9bb56ae..fd97b2b58734 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -762,18 +762,29 @@ static int idxd_device_evl_setup(struct idxd_device *idxd) dma_addr_t dma_addr; int size; struct idxd_evl *evl = idxd->evl; + unsigned long *bmap; + int rc; if (!evl) return 0; size = evl_size(idxd); + + bmap = bitmap_zalloc(size, GFP_KERNEL); + if (!bmap) { + rc = -ENOMEM; + goto err_bmap; + } + /* * Address needs to be page aligned. However, dma_alloc_coherent() provides * at minimal page size aligned address. No manual alignment required. */ addr = dma_alloc_coherent(dev, size, &dma_addr, GFP_KERNEL); - if (!addr) - return -ENOMEM; + if (!addr) { + rc = -ENOMEM; + goto err_alloc; + } memset(addr, 0, size); @@ -781,6 +792,7 @@ static int idxd_device_evl_setup(struct idxd_device *idxd) evl->log = addr; evl->dma = dma_addr; evl->log_size = size; + evl->bmap = bmap; memset(&evlcfg, 0, sizeof(evlcfg)); evlcfg.bits[0] = dma_addr & GENMASK(63, 12); @@ -799,6 +811,11 @@ static int idxd_device_evl_setup(struct idxd_device *idxd) spin_unlock(&evl->lock); return 0; + +err_alloc: + bitmap_free(bmap); +err_bmap: + return rc; } static void idxd_device_evl_free(struct idxd_device *idxd) @@ -824,6 +841,7 @@ static void idxd_device_evl_free(struct idxd_device *idxd) iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET + 8); dma_free_coherent(dev, evl->log_size, evl->log, evl->dma); + bitmap_free(evl->bmap); evl->log = NULL; evl->size = IDXD_EVL_SIZE_MIN; spin_unlock(&evl->lock); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index b3f9a12adce2..3963c83165a6 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -264,6 +264,7 @@ struct idxd_driver_data { struct device_type *dev_type; int compl_size; int align; + int evl_cr_off; }; struct idxd_evl { @@ -276,6 +277,7 @@ struct idxd_evl { /* The number of entries in the event log. */ u16 size; u16 head; + unsigned long *bmap; }; struct idxd_evl_fault { diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index e6faff58733d..18344593b83f 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -46,6 +46,7 @@ static struct idxd_driver_data idxd_driver_data[] = { .compl_size = sizeof(struct dsa_completion_record), .align = 32, .dev_type = &dsa_device_type, + .evl_cr_off = offsetof(struct dsa_evl_entry, cr), }, [IDXD_TYPE_IAX] = { .name_prefix = "iax", @@ -53,6 +54,7 @@ static struct idxd_driver_data idxd_driver_data[] = { .compl_size = sizeof(struct iax_completion_record), .align = 64, .dev_type = &iax_device_type, + .evl_cr_off = offsetof(struct iax_evl_entry, cr), }, }; diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 52b8b7d9db22..96983975f974 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include "../dmaengine.h" #include "idxd.h" @@ -217,14 +219,89 @@ static void idxd_int_handle_revoke(struct work_struct *work) kfree(revoke); } -static void process_evl_entry(struct idxd_device *idxd, struct __evl_entry *entry_head) +static void idxd_evl_fault_work(struct work_struct *work) +{ + struct idxd_evl_fault *fault = container_of(work, struct idxd_evl_fault, work); + struct idxd_wq *wq = fault->wq; + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + struct __evl_entry *entry_head = fault->entry; + void *cr = (void *)entry_head + idxd->data->evl_cr_off; + int cr_size = idxd->data->compl_size, copied; + + switch (fault->status) { + case DSA_COMP_CRA_XLAT: + case DSA_COMP_DRAIN_EVL: + /* + * Copy completion record to fault_addr in user address space + * that is found by wq and PASID. + */ + copied = idxd_copy_cr(wq, entry_head->pasid, + entry_head->fault_addr, + cr, cr_size); + /* + * The task that triggered the page fault is unknown currently + * because multiple threads may share the user address + * space or the task exits already before this fault. + * So if the copy fails, SIGSEGV can not be sent to the task. + * Just print an error for the failure. The user application + * waiting for the completion record will time out on this + * failure. + */ + if (copied != cr_size) { + dev_dbg_ratelimited(dev, "Failed to write to completion record. (%d:%d)\n", + cr_size, copied); + } + break; + default: + dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n", + DSA_COMP_STATUS(entry_head->error)); + break; + } + + kmem_cache_free(idxd->evl_cache, fault); +} + +static void process_evl_entry(struct idxd_device *idxd, + struct __evl_entry *entry_head, unsigned int index) { struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl = idxd->evl; u8 status; - status = DSA_COMP_STATUS(entry_head->error); - dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n", - status, entry_head->operation, entry_head->fault_addr); + if (test_bit(index, evl->bmap)) { + clear_bit(index, evl->bmap); + } else { + status = DSA_COMP_STATUS(entry_head->error); + + if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL) { + struct idxd_evl_fault *fault; + int ent_size = evl_ent_size(idxd); + + if (entry_head->rci) + dev_dbg(dev, "Completion Int Req set, ignoring!\n"); + + if (!entry_head->rcr && status == DSA_COMP_DRAIN_EVL) + return; + + fault = kmem_cache_alloc(idxd->evl_cache, GFP_ATOMIC); + if (fault) { + struct idxd_wq *wq = idxd->wqs[entry_head->wq_idx]; + + fault->wq = wq; + fault->status = status; + memcpy(&fault->entry, entry_head, ent_size); + INIT_WORK(&fault->work, idxd_evl_fault_work); + queue_work(wq->wq, &fault->work); + } else { + dev_warn(dev, "Failed to service fault work.\n"); + } + } else { + dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n", + status, entry_head->operation, + entry_head->fault_addr); + } + } } static void process_evl_entries(struct idxd_device *idxd) @@ -250,7 +327,7 @@ static void process_evl_entries(struct idxd_device *idxd) while (h != t) { entry_head = (struct __evl_entry *)(evl->log + (h * ent_size)); - process_evl_entry(idxd, entry_head); + process_evl_entry(idxd, entry_head, h); h = (h + 1) % size; } diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index e86199d09a91..4b584d5afd87 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -135,6 +135,7 @@ enum dsa_completion_status { DSA_COMP_HW_ERR1, DSA_COMP_HW_ERR_DRB, DSA_COMP_TRANSLATION_FAIL, + DSA_COMP_DRAIN_EVL = 0x26, }; enum iax_completion_status { -- cgit v1.3 From 6926987185a3ae92c31b99ce1bfdfb04e95057c0 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:37 -0700 Subject: dmaengine: idxd: add descs_completed field for completion record The descs_completed field for a completion record is part of a batch descriptor completion record. It takes the same location as bytes_completed in a normal descriptor field. Add to expose to user. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-11-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 4b584d5afd87..76ad71bf751e 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -314,7 +314,10 @@ struct dsa_completion_record { }; uint8_t fault_info; uint8_t rsvd; - uint32_t bytes_completed; + union { + uint32_t bytes_completed; + uint32_t descs_completed; + }; uint64_t fault_addr; union { /* common record */ -- cgit v1.3 From 2442b7473ad03671378d2d95651bd6bbe09a0943 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:38 -0700 Subject: dmaengine: idxd: process batch descriptor completion record faults Add event log processing for faulting of user batch descriptor completion record. When encountering an event log entry for a page fault on a completion record, the driver is expected to do the following: 1. If the "first error in batch" bit in event log entry error info is set, discard any previously recorded errors associated with the "batch identifier". 2. Fix the page fault according to the fault address in the event log. If successful, write the completion record to the fault address in user space. 3. If an error is encountered while writing the completion record and it is associated to a descriptor in the batch, the driver associates the error with the batch identifier of the event log entry and tracks it until the event log entry for the corresponding batch desc is encountered. While processing an event log entry for a batch descriptor with error indicating that one or more descs in the batch had event log entries, the driver will do the following before writing the batch completion record: 1. If the status field of the completion record is 0x1, the driver will change it to error code 0x5 (one or more operations in batch completed with status not successful) and changes the result field to 1. 2. If the status is error code 0x6 (page fault on batch descriptor list address), change the result field to 1. 3. If status is any other value, the completion record is not changed. 4. Clear the recorded error in preparation for next batch with same batch identifier. The result field is for user software to determine whether to set the "Batch Error" flag bit in the descriptor for continuation of partial batch descriptor completion. See DSA spec 2.0 for additional information. If no error has been recorded for the batch, the batch completion record is written to user space as is. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-12-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/idxd.h | 3 ++ drivers/dma/idxd/init.c | 4 ++ drivers/dma/idxd/irq.c | 91 ++++++++++++++++++++++++++++++++------------ drivers/dma/idxd/registers.h | 4 +- include/uapi/linux/idxd.h | 1 + 5 files changed, 78 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 3963c83165a6..4c4baa80c731 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -265,6 +265,8 @@ struct idxd_driver_data { int compl_size; int align; int evl_cr_off; + int cr_status_off; + int cr_result_off; }; struct idxd_evl { @@ -278,6 +280,7 @@ struct idxd_evl { u16 size; u16 head; unsigned long *bmap; + bool batch_fail[IDXD_MAX_BATCH_IDENT]; }; struct idxd_evl_fault { diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 18344593b83f..92f60d364392 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -47,6 +47,8 @@ static struct idxd_driver_data idxd_driver_data[] = { .align = 32, .dev_type = &dsa_device_type, .evl_cr_off = offsetof(struct dsa_evl_entry, cr), + .cr_status_off = offsetof(struct dsa_completion_record, status), + .cr_result_off = offsetof(struct dsa_completion_record, result), }, [IDXD_TYPE_IAX] = { .name_prefix = "iax", @@ -55,6 +57,8 @@ static struct idxd_driver_data idxd_driver_data[] = { .align = 64, .dev_type = &iax_device_type, .evl_cr_off = offsetof(struct iax_evl_entry, cr), + .cr_status_off = offsetof(struct iax_completion_record, status), + .cr_result_off = offsetof(struct iax_completion_record, error_code), }, }; diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 96983975f974..c660d63a3eb8 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -225,37 +225,79 @@ static void idxd_evl_fault_work(struct work_struct *work) struct idxd_wq *wq = fault->wq; struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl = idxd->evl; struct __evl_entry *entry_head = fault->entry; void *cr = (void *)entry_head + idxd->data->evl_cr_off; - int cr_size = idxd->data->compl_size, copied; + int cr_size = idxd->data->compl_size; + u8 *status = (u8 *)cr + idxd->data->cr_status_off; + u8 *result = (u8 *)cr + idxd->data->cr_result_off; + int copied, copy_size; + bool *bf; switch (fault->status) { case DSA_COMP_CRA_XLAT: - case DSA_COMP_DRAIN_EVL: - /* - * Copy completion record to fault_addr in user address space - * that is found by wq and PASID. - */ - copied = idxd_copy_cr(wq, entry_head->pasid, - entry_head->fault_addr, - cr, cr_size); - /* - * The task that triggered the page fault is unknown currently - * because multiple threads may share the user address - * space or the task exits already before this fault. - * So if the copy fails, SIGSEGV can not be sent to the task. - * Just print an error for the failure. The user application - * waiting for the completion record will time out on this - * failure. - */ - if (copied != cr_size) { - dev_dbg_ratelimited(dev, "Failed to write to completion record. (%d:%d)\n", - cr_size, copied); + if (entry_head->batch && entry_head->first_err_in_batch) + evl->batch_fail[entry_head->batch_id] = false; + + copy_size = cr_size; + break; + case DSA_COMP_BATCH_EVL_ERR: + bf = &evl->batch_fail[entry_head->batch_id]; + + copy_size = entry_head->rcr || *bf ? cr_size : 0; + if (*bf) { + if (*status == DSA_COMP_SUCCESS) + *status = DSA_COMP_BATCH_FAIL; + *result = 1; + *bf = false; } break; + case DSA_COMP_DRAIN_EVL: + copy_size = cr_size; + break; default: - dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n", - DSA_COMP_STATUS(entry_head->error)); + copy_size = 0; + dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n", fault->status); + break; + } + + if (copy_size == 0) + return; + + /* + * Copy completion record to fault_addr in user address space + * that is found by wq and PASID. + */ + copied = idxd_copy_cr(wq, entry_head->pasid, entry_head->fault_addr, + cr, copy_size); + /* + * The task that triggered the page fault is unknown currently + * because multiple threads may share the user address + * space or the task exits already before this fault. + * So if the copy fails, SIGSEGV can not be sent to the task. + * Just print an error for the failure. The user application + * waiting for the completion record will time out on this + * failure. + */ + switch (fault->status) { + case DSA_COMP_CRA_XLAT: + if (copied != copy_size) { + dev_dbg_ratelimited(dev, "Failed to write to completion record: (%d:%d)\n", + copy_size, copied); + if (entry_head->batch) + evl->batch_fail[entry_head->batch_id] = true; + } + break; + case DSA_COMP_BATCH_EVL_ERR: + if (copied != copy_size) { + dev_dbg_ratelimited(dev, "Failed to write to batch completion record: (%d:%d)\n", + copy_size, copied); + } + break; + case DSA_COMP_DRAIN_EVL: + if (copied != copy_size) + dev_dbg_ratelimited(dev, "Failed to write to drain completion record: (%d:%d)\n", + copy_size, copied); break; } @@ -274,7 +316,8 @@ static void process_evl_entry(struct idxd_device *idxd, } else { status = DSA_COMP_STATUS(entry_head->error); - if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL) { + if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL || + status == DSA_COMP_BATCH_EVL_ERR) { struct idxd_evl_fault *fault; int ent_size = evl_ent_size(idxd); diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 148db94f9373..9f3959d001b6 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -35,7 +35,7 @@ union gen_cap_reg { u64 drain_readback:1; u64 rsvd2:3; u64 evl_support:2; - u64 rsvd4:1; + u64 batch_continuation:1; u64 max_xfer_shift:5; u64 max_batch_shift:4; u64 max_ims_mult:6; @@ -577,6 +577,8 @@ union evl_status_reg { u64 bits; } __packed; +#define IDXD_MAX_BATCH_IDENT 256 + struct __evl_entry { u64 rsvd:2; u64 desc_valid:1; diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 76ad71bf751e..606b52e88ce3 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -136,6 +136,7 @@ enum dsa_completion_status { DSA_COMP_HW_ERR_DRB, DSA_COMP_TRANSLATION_FAIL, DSA_COMP_DRAIN_EVL = 0x26, + DSA_COMP_BATCH_EVL_ERR, }; enum iax_completion_status { -- cgit v1.3