From aa207a05f95abc3530b7415232f0f73278336bd3 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 18 Aug 2020 14:51:45 +0300 Subject: mei: add connect with vtag ioctl This IOCTL is used to associate the current file descriptor with a FW Client (given by UUID), and virtual tag (vtag). The IOCTL opens a communication channel between a host client and a FW client on a tagged channel. From this point on, every reader and write will communicate with the associated FW client on the tagged channel. Upon close() the communication is terminated. The IOCTL argument is a struct with a union that contains the input parameter and the output parameter for this IOCTL. The input parameter is UUID of the FW Client, a vtag [0,255] The output parameter is the properties of the FW client Clients that do not support tagged connection will respond with -EOPNOTSUPP Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20200818115147.2567012-12-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/mei.h | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mei.h b/include/uapi/linux/mei.h index c6aec86cc5de..4f3638489d01 100644 --- a/include/uapi/linux/mei.h +++ b/include/uapi/linux/mei.h @@ -66,4 +66,53 @@ struct mei_connect_client_data { */ #define IOCTL_MEI_NOTIFY_GET _IOR('H', 0x03, __u32) +/** + * struct mei_connect_client_vtag - mei client information struct with vtag + * + * @in_client_uuid: UUID of client to connect + * @vtag: virtual tag + * @reserved: reserved for future use + */ +struct mei_connect_client_vtag { + uuid_le in_client_uuid; + __u8 vtag; + __u8 reserved[3]; +}; + +/** + * struct mei_connect_client_data_vtag - IOCTL connect data union + * + * @connect: input connect data + * @out_client_properties: output client data + */ +struct mei_connect_client_data_vtag { + union { + struct mei_connect_client_vtag connect; + struct mei_client out_client_properties; + }; +}; + +/** + * DOC: + * This IOCTL is used to associate the current file descriptor with a + * FW Client (given by UUID), and virtual tag (vtag). + * The IOCTL opens a communication channel between a host client and + * a FW client on a tagged channel. From this point on, every read + * and write will communicate with the associated FW client with + * on the tagged channel. + * Upone close() the communication is terminated. + * + * The IOCTL argument is a struct with a union that contains + * the input parameter and the output parameter for this IOCTL. + * + * The input parameter is UUID of the FW Client, a vtag [0,255] + * The output parameter is the properties of the FW client + * (FW protocool version and max message size). + * + * Clients that do not support tagged connection + * will respond with -EOPNOTSUPP. + */ +#define IOCTL_MEI_CONNECT_CLIENT_VTAG \ + _IOWR('H', 0x04, struct mei_connect_client_data_vtag) + #endif /* _LINUX_MEI_H */ -- cgit v1.2.3 From 7c920da30e04c2dd78d988e0cefb8e5bd2e48b26 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Tue, 8 Sep 2020 09:10:09 -0400 Subject: misc: fastrpc: fix indentation error in uapi header Use tabs instead of spaces. Fixes: 2419e55e532d ("misc: fastrpc: add mmap/unmap support") Signed-off-by: Jonathan Marek Link: https://lore.kernel.org/r/20200908131013.19630-2-jonathan@marek.ca Signed-off-by: Greg Kroah-Hartman --- include/uapi/misc/fastrpc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index 07de2b7aac85..de31f0bd4779 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -10,8 +10,8 @@ #define FASTRPC_IOCTL_INVOKE _IOWR('R', 3, struct fastrpc_invoke) #define FASTRPC_IOCTL_INIT_ATTACH _IO('R', 4) #define FASTRPC_IOCTL_INIT_CREATE _IOWR('R', 5, struct fastrpc_init_create) -#define FASTRPC_IOCTL_MMAP _IOWR('R', 6, struct fastrpc_req_mmap) -#define FASTRPC_IOCTL_MUNMAP _IOWR('R', 7, struct fastrpc_req_munmap) +#define FASTRPC_IOCTL_MMAP _IOWR('R', 6, struct fastrpc_req_mmap) +#define FASTRPC_IOCTL_MUNMAP _IOWR('R', 7, struct fastrpc_req_munmap) struct fastrpc_invoke_args { __u64 ptr; -- cgit v1.2.3 From 6010d9befc8df899b61378adfd153f0b53075092 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Tue, 8 Sep 2020 09:10:11 -0400 Subject: misc: fastrpc: add ioctl for attaching to sensors pd Initializing sensors requires attaching to pd 2. Add an ioctl for that. This corresponds to FASTRPC_INIT_ATTACH_SENSORS in the downstream driver. Signed-off-by: Jonathan Marek Link: https://lore.kernel.org/r/20200908131013.19630-4-jonathan@marek.ca Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 9 ++++++--- include/uapi/misc/fastrpc.h | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index a7290ced73fb..994ab67bc2dc 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1281,7 +1281,7 @@ static int fastrpc_dmabuf_alloc(struct fastrpc_user *fl, char __user *argp) return 0; } -static int fastrpc_init_attach(struct fastrpc_user *fl) +static int fastrpc_init_attach(struct fastrpc_user *fl, int pd) { struct fastrpc_invoke_args args[1]; int tgid = fl->tgid; @@ -1292,7 +1292,7 @@ static int fastrpc_init_attach(struct fastrpc_user *fl) args[0].fd = -1; args[0].reserved = 0; sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_ATTACH, 1, 0); - fl->pd = AUDIO_PD; + fl->pd = pd; return fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc, &args[0]); @@ -1482,7 +1482,10 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, err = fastrpc_invoke(fl, argp); break; case FASTRPC_IOCTL_INIT_ATTACH: - err = fastrpc_init_attach(fl); + err = fastrpc_init_attach(fl, AUDIO_PD); + break; + case FASTRPC_IOCTL_INIT_ATTACH_SNS: + err = fastrpc_init_attach(fl, SENSORS_PD); break; case FASTRPC_IOCTL_INIT_CREATE: err = fastrpc_init_create_process(fl, argp); diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index de31f0bd4779..0a89f95463f6 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -12,6 +12,7 @@ #define FASTRPC_IOCTL_INIT_CREATE _IOWR('R', 5, struct fastrpc_init_create) #define FASTRPC_IOCTL_MMAP _IOWR('R', 6, struct fastrpc_req_mmap) #define FASTRPC_IOCTL_MUNMAP _IOWR('R', 7, struct fastrpc_req_munmap) +#define FASTRPC_IOCTL_INIT_ATTACH_SNS _IO('R', 8) struct fastrpc_invoke_args { __u64 ptr; -- cgit v1.2.3 From 4af8b3d3eb5032fe6f4a8104c48c176bf68a6946 Mon Sep 17 00:00:00 2001 From: Tingwei Zhang Date: Wed, 16 Sep 2020 13:17:23 -0600 Subject: coresight: stm: Support marked packet STP_PACKET_MARKED is not supported by STM currently. Add STM_FLAG_MARKED to support marked packet in STM. Signed-off-by: Tingwei Zhang Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20200916191737.4001561-3-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-stm.c | 11 +++++++---- include/uapi/linux/coresight-stm.h | 1 + 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c index 673d2f56ed1e..2ba819a47cf6 100644 --- a/drivers/hwtracing/coresight/coresight-stm.c +++ b/drivers/hwtracing/coresight/coresight-stm.c @@ -412,6 +412,7 @@ static ssize_t notrace stm_generic_packet(struct stm_data *stm_data, void __iomem *ch_addr; struct stm_drvdata *drvdata = container_of(stm_data, struct stm_drvdata, stm); + unsigned int stm_flags; if (!(drvdata && local_read(&drvdata->mode))) return -EACCES; @@ -421,8 +422,9 @@ static ssize_t notrace stm_generic_packet(struct stm_data *stm_data, ch_addr = stm_channel_addr(drvdata, channel); - flags = (flags == STP_PACKET_TIMESTAMPED) ? STM_FLAG_TIMESTAMPED : 0; - flags |= test_bit(channel, drvdata->chs.guaranteed) ? + stm_flags = (flags & STP_PACKET_TIMESTAMPED) ? + STM_FLAG_TIMESTAMPED : 0; + stm_flags |= test_bit(channel, drvdata->chs.guaranteed) ? STM_FLAG_GUARANTEED : 0; if (size > drvdata->write_bytes) @@ -432,7 +434,7 @@ static ssize_t notrace stm_generic_packet(struct stm_data *stm_data, switch (packet) { case STP_PACKET_FLAG: - ch_addr += stm_channel_off(STM_PKT_TYPE_FLAG, flags); + ch_addr += stm_channel_off(STM_PKT_TYPE_FLAG, stm_flags); /* * The generic STM core sets a size of '0' on flag packets. @@ -444,7 +446,8 @@ static ssize_t notrace stm_generic_packet(struct stm_data *stm_data, break; case STP_PACKET_DATA: - ch_addr += stm_channel_off(STM_PKT_TYPE_DATA, flags); + stm_flags |= (flags & STP_PACKET_MARKED) ? STM_FLAG_MARKED : 0; + ch_addr += stm_channel_off(STM_PKT_TYPE_DATA, stm_flags); stm_send(ch_addr, payload, size, drvdata->write_bytes); break; diff --git a/include/uapi/linux/coresight-stm.h b/include/uapi/linux/coresight-stm.h index 8847dbf24151..7ff3709c01b8 100644 --- a/include/uapi/linux/coresight-stm.h +++ b/include/uapi/linux/coresight-stm.h @@ -5,6 +5,7 @@ #include #define STM_FLAG_TIMESTAMPED _BITUL(3) +#define STM_FLAG_MARKED _BITUL(4) #define STM_FLAG_GUARANTEED _BITUL(7) /* -- cgit v1.2.3 From 15b760c37ad3c3f2b922506eaca4ca8b4292e621 Mon Sep 17 00:00:00 2001 From: Andra Paraschiv Date: Mon, 21 Sep 2020 15:17:15 +0300 Subject: nitro_enclaves: Add ioctl interface definition The Nitro Enclaves driver handles the enclave lifetime management. This includes enclave creation, termination and setting up its resources such as memory and CPU. An enclave runs alongside the VM that spawned it. It is abstracted as a process running in the VM that launched it. The process interacts with the NE driver, that exposes an ioctl interface for creating an enclave and setting up its resources. Changelog v9 -> v10 * Update commit message to include the changelog before the SoB tag(s). v8 -> v9 * No changes. v7 -> v8 * Add NE custom error codes for user space memory regions not backed by pages multiple of 2 MiB, invalid flags and enclave CID. * Add max flag value for enclave image load info. v6 -> v7 * Clarify in the ioctls documentation that the return value is -1 and errno is set on failure. * Update the error code value for NE_ERR_INVALID_MEM_REGION_SIZE as it gets in user space as value 25 (ENOTTY) instead of 515. Update the NE custom error codes values range to not be the same as the ones defined in include/linux/errno.h, although these are not propagated to user space. v5 -> v6 * Fix typo in the description about the NE CPU pool. * Update documentation to kernel-doc format. * Remove the ioctl to query API version. v4 -> v5 * Add more details about the ioctl calls usage e.g. error codes, file descriptors used. * Update the ioctl to set an enclave vCPU to not return a file descriptor. * Add specific NE error codes. v3 -> v4 * Decouple NE ioctl interface from KVM API. * Add NE API version and the corresponding ioctl call. * Add enclave / image load flags options. v2 -> v3 * Remove the GPL additional wording as SPDX-License-Identifier is already in place. v1 -> v2 * Add ioctl for getting enclave image load metadata. * Update NE_ENCLAVE_START ioctl name to NE_START_ENCLAVE. * Add entry in Documentation/userspace-api/ioctl/ioctl-number.rst for NE ioctls. * Update NE ioctls definition based on the updated ioctl range for major and minor. Reviewed-by: Alexander Graf Reviewed-by: Stefan Hajnoczi Signed-off-by: Alexandru Vasile Signed-off-by: Andra Paraschiv Link: https://lore.kernel.org/r/20200921121732.44291-2-andraprs@amazon.com Signed-off-by: Greg Kroah-Hartman --- Documentation/userspace-api/ioctl/ioctl-number.rst | 5 +- include/linux/nitro_enclaves.h | 11 + include/uapi/linux/nitro_enclaves.h | 359 +++++++++++++++++++++ 3 files changed, 374 insertions(+), 1 deletion(-) create mode 100644 include/linux/nitro_enclaves.h create mode 100644 include/uapi/linux/nitro_enclaves.h (limited to 'include/uapi') diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 2a198838fca9..5f7ff00f394e 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -328,8 +328,11 @@ Code Seq# Include File Comments 0xAC 00-1F linux/raw.h 0xAD 00 Netfilter device in development: -0xAE all linux/kvm.h Kernel-based Virtual Machine +0xAE 00-1F linux/kvm.h Kernel-based Virtual Machine +0xAE 40-FF linux/kvm.h Kernel-based Virtual Machine + +0xAE 20-3F linux/nitro_enclaves.h Nitro Enclaves 0xAF 00-1F linux/fsl_hypervisor.h Freescale hypervisor 0xB0 all RATIO devices in development: diff --git a/include/linux/nitro_enclaves.h b/include/linux/nitro_enclaves.h new file mode 100644 index 000000000000..d91ef2bfdf47 --- /dev/null +++ b/include/linux/nitro_enclaves.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + */ + +#ifndef _LINUX_NITRO_ENCLAVES_H_ +#define _LINUX_NITRO_ENCLAVES_H_ + +#include + +#endif /* _LINUX_NITRO_ENCLAVES_H_ */ diff --git a/include/uapi/linux/nitro_enclaves.h b/include/uapi/linux/nitro_enclaves.h new file mode 100644 index 000000000000..b945073fe544 --- /dev/null +++ b/include/uapi/linux/nitro_enclaves.h @@ -0,0 +1,359 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + */ + +#ifndef _UAPI_LINUX_NITRO_ENCLAVES_H_ +#define _UAPI_LINUX_NITRO_ENCLAVES_H_ + +#include + +/** + * DOC: Nitro Enclaves (NE) Kernel Driver Interface + */ + +/** + * NE_CREATE_VM - The command is used to create a slot that is associated with + * an enclave VM. + * The generated unique slot id is an output parameter. + * The ioctl can be invoked on the /dev/nitro_enclaves fd, before + * setting any resources, such as memory and vCPUs, for an + * enclave. Memory and vCPUs are set for the slot mapped to an enclave. + * A NE CPU pool has to be set before calling this function. The + * pool can be set after the NE driver load, using + * /sys/module/nitro_enclaves/parameters/ne_cpus. + * Its format is the detailed in the cpu-lists section: + * https://www.kernel.org/doc/html/latest/admin-guide/kernel-parameters.html + * CPU 0 and its siblings have to remain available for the + * primary / parent VM, so they cannot be set for enclaves. Full + * CPU core(s), from the same NUMA node, need(s) to be included + * in the CPU pool. + * + * Context: Process context. + * Return: + * * Enclave file descriptor - Enclave file descriptor used with + * ioctl calls to set vCPUs and memory + * regions, then start the enclave. + * * -1 - There was a failure in the ioctl logic. + * On failure, errno is set to: + * * EFAULT - copy_to_user() failure. + * * ENOMEM - Memory allocation failure for internal + * bookkeeping variables. + * * NE_ERR_NO_CPUS_AVAIL_IN_POOL - No NE CPU pool set / no CPUs available + * in the pool. + * * Error codes from get_unused_fd_flags() and anon_inode_getfile(). + * * Error codes from the NE PCI device request. + */ +#define NE_CREATE_VM _IOR(0xAE, 0x20, __u64) + +/** + * NE_ADD_VCPU - The command is used to set a vCPU for an enclave. The vCPU can + * be auto-chosen from the NE CPU pool or it can be set by the + * caller, with the note that it needs to be available in the NE + * CPU pool. Full CPU core(s), from the same NUMA node, need(s) to + * be associated with an enclave. + * The vCPU id is an input / output parameter. If its value is 0, + * then a CPU is chosen from the enclave CPU pool and returned via + * this parameter. + * The ioctl can be invoked on the enclave fd, before an enclave + * is started. + * + * Context: Process context. + * Return: + * * 0 - Logic succesfully completed. + * * -1 - There was a failure in the ioctl logic. + * On failure, errno is set to: + * * EFAULT - copy_from_user() / copy_to_user() failure. + * * ENOMEM - Memory allocation failure for internal + * bookkeeping variables. + * * EIO - Current task mm is not the same as the one + * that created the enclave. + * * NE_ERR_NO_CPUS_AVAIL_IN_POOL - No CPUs available in the NE CPU pool. + * * NE_ERR_VCPU_ALREADY_USED - The provided vCPU is already used. + * * NE_ERR_VCPU_NOT_IN_CPU_POOL - The provided vCPU is not available in the + * NE CPU pool. + * * NE_ERR_VCPU_INVALID_CPU_CORE - The core id of the provided vCPU is invalid + * or out of range. + * * NE_ERR_NOT_IN_INIT_STATE - The enclave is not in init state + * (init = before being started). + * * NE_ERR_INVALID_VCPU - The provided vCPU is not in the available + * CPUs range. + * * Error codes from the NE PCI device request. + */ +#define NE_ADD_VCPU _IOWR(0xAE, 0x21, __u32) + +/** + * NE_GET_IMAGE_LOAD_INFO - The command is used to get information needed for + * in-memory enclave image loading e.g. offset in + * enclave memory to start placing the enclave image. + * The image load info is an input / output parameter. + * It includes info provided by the caller - flags - + * and returns the offset in enclave memory where to + * start placing the enclave image. + * The ioctl can be invoked on the enclave fd, before + * an enclave is started. + * + * Context: Process context. + * Return: + * * 0 - Logic succesfully completed. + * * -1 - There was a failure in the ioctl logic. + * On failure, errno is set to: + * * EFAULT - copy_from_user() / copy_to_user() failure. + * * NE_ERR_NOT_IN_INIT_STATE - The enclave is not in init state (init = + * before being started). + * * NE_ERR_INVALID_FLAG_VALUE - The value of the provided flag is invalid. + */ +#define NE_GET_IMAGE_LOAD_INFO _IOWR(0xAE, 0x22, struct ne_image_load_info) + +/** + * NE_SET_USER_MEMORY_REGION - The command is used to set a memory region for an + * enclave, given the allocated memory from the + * userspace. Enclave memory needs to be from the + * same NUMA node as the enclave CPUs. + * The user memory region is an input parameter. It + * includes info provided by the caller - flags, + * memory size and userspace address. + * The ioctl can be invoked on the enclave fd, + * before an enclave is started. + * + * Context: Process context. + * Return: + * * 0 - Logic succesfully completed. + * * -1 - There was a failure in the ioctl logic. + * On failure, errno is set to: + * * EFAULT - copy_from_user() failure. + * * EINVAL - Invalid physical memory region(s) e.g. + * unaligned address. + * * EIO - Current task mm is not the same as + * the one that created the enclave. + * * ENOMEM - Memory allocation failure for internal + * bookkeeping variables. + * * NE_ERR_NOT_IN_INIT_STATE - The enclave is not in init state + * (init = before being started). + * * NE_ERR_INVALID_MEM_REGION_SIZE - The memory size of the region is not + * multiple of 2 MiB. + * * NE_ERR_INVALID_MEM_REGION_ADDR - Invalid user space address given. + * * NE_ERR_UNALIGNED_MEM_REGION_ADDR - Unaligned user space address given. + * * NE_ERR_MEM_REGION_ALREADY_USED - The memory region is already used. + * * NE_ERR_MEM_NOT_HUGE_PAGE - The memory region is not backed by + * huge pages. + * * NE_ERR_MEM_DIFFERENT_NUMA_NODE - The memory region is not from the same + * NUMA node as the CPUs. + * * NE_ERR_MEM_MAX_REGIONS - The number of memory regions set for + * the enclave reached maximum. + * * NE_ERR_INVALID_PAGE_SIZE - The memory region is not backed by + * pages multiple of 2 MiB. + * * NE_ERR_INVALID_FLAG_VALUE - The value of the provided flag is invalid. + * * Error codes from get_user_pages(). + * * Error codes from the NE PCI device request. + */ +#define NE_SET_USER_MEMORY_REGION _IOW(0xAE, 0x23, struct ne_user_memory_region) + +/** + * NE_START_ENCLAVE - The command is used to trigger enclave start after the + * enclave resources, such as memory and CPU, have been set. + * The enclave start info is an input / output parameter. It + * includes info provided by the caller - enclave cid and + * flags - and returns the cid (if input cid is 0). + * The ioctl can be invoked on the enclave fd, after an + * enclave slot is created and resources, such as memory and + * vCPUs are set for an enclave. + * + * Context: Process context. + * Return: + * * 0 - Logic succesfully completed. + * * -1 - There was a failure in the ioctl logic. + * On failure, errno is set to: + * * EFAULT - copy_from_user() / copy_to_user() failure. + * * NE_ERR_NOT_IN_INIT_STATE - The enclave is not in init state + * (init = before being started). + * * NE_ERR_NO_MEM_REGIONS_ADDED - No memory regions are set. + * * NE_ERR_NO_VCPUS_ADDED - No vCPUs are set. + * * NE_ERR_FULL_CORES_NOT_USED - Full core(s) not set for the enclave. + * * NE_ERR_ENCLAVE_MEM_MIN_SIZE - Enclave memory is less than minimum + * memory size (64 MiB). + * * NE_ERR_INVALID_FLAG_VALUE - The value of the provided flag is invalid. + * * NE_ERR_INVALID_ENCLAVE_CID - The provided enclave CID is invalid. + * * Error codes from the NE PCI device request. + */ +#define NE_START_ENCLAVE _IOWR(0xAE, 0x24, struct ne_enclave_start_info) + +/** + * DOC: NE specific error codes + */ + +/** + * NE_ERR_VCPU_ALREADY_USED - The provided vCPU is already used. + */ +#define NE_ERR_VCPU_ALREADY_USED (256) +/** + * NE_ERR_VCPU_NOT_IN_CPU_POOL - The provided vCPU is not available in the + * NE CPU pool. + */ +#define NE_ERR_VCPU_NOT_IN_CPU_POOL (257) +/** + * NE_ERR_VCPU_INVALID_CPU_CORE - The core id of the provided vCPU is invalid + * or out of range of the NE CPU pool. + */ +#define NE_ERR_VCPU_INVALID_CPU_CORE (258) +/** + * NE_ERR_INVALID_MEM_REGION_SIZE - The user space memory region size is not + * multiple of 2 MiB. + */ +#define NE_ERR_INVALID_MEM_REGION_SIZE (259) +/** + * NE_ERR_INVALID_MEM_REGION_ADDR - The user space memory region address range + * is invalid. + */ +#define NE_ERR_INVALID_MEM_REGION_ADDR (260) +/** + * NE_ERR_UNALIGNED_MEM_REGION_ADDR - The user space memory region address is + * not aligned. + */ +#define NE_ERR_UNALIGNED_MEM_REGION_ADDR (261) +/** + * NE_ERR_MEM_REGION_ALREADY_USED - The user space memory region is already used. + */ +#define NE_ERR_MEM_REGION_ALREADY_USED (262) +/** + * NE_ERR_MEM_NOT_HUGE_PAGE - The user space memory region is not backed by + * contiguous physical huge page(s). + */ +#define NE_ERR_MEM_NOT_HUGE_PAGE (263) +/** + * NE_ERR_MEM_DIFFERENT_NUMA_NODE - The user space memory region is backed by + * pages from different NUMA nodes than the CPUs. + */ +#define NE_ERR_MEM_DIFFERENT_NUMA_NODE (264) +/** + * NE_ERR_MEM_MAX_REGIONS - The supported max memory regions per enclaves has + * been reached. + */ +#define NE_ERR_MEM_MAX_REGIONS (265) +/** + * NE_ERR_NO_MEM_REGIONS_ADDED - The command to start an enclave is triggered + * and no memory regions are added. + */ +#define NE_ERR_NO_MEM_REGIONS_ADDED (266) +/** + * NE_ERR_NO_VCPUS_ADDED - The command to start an enclave is triggered and no + * vCPUs are added. + */ +#define NE_ERR_NO_VCPUS_ADDED (267) +/** + * NE_ERR_ENCLAVE_MEM_MIN_SIZE - The enclave memory size is lower than the + * minimum supported. + */ +#define NE_ERR_ENCLAVE_MEM_MIN_SIZE (268) +/** + * NE_ERR_FULL_CORES_NOT_USED - The command to start an enclave is triggered and + * full CPU cores are not set. + */ +#define NE_ERR_FULL_CORES_NOT_USED (269) +/** + * NE_ERR_NOT_IN_INIT_STATE - The enclave is not in init state when setting + * resources or triggering start. + */ +#define NE_ERR_NOT_IN_INIT_STATE (270) +/** + * NE_ERR_INVALID_VCPU - The provided vCPU is out of range of the available CPUs. + */ +#define NE_ERR_INVALID_VCPU (271) +/** + * NE_ERR_NO_CPUS_AVAIL_IN_POOL - The command to create an enclave is triggered + * and no CPUs are available in the pool. + */ +#define NE_ERR_NO_CPUS_AVAIL_IN_POOL (272) +/** + * NE_ERR_INVALID_PAGE_SIZE - The user space memory region is not backed by pages + * multiple of 2 MiB. + */ +#define NE_ERR_INVALID_PAGE_SIZE (273) +/** + * NE_ERR_INVALID_FLAG_VALUE - The provided flag value is invalid. + */ +#define NE_ERR_INVALID_FLAG_VALUE (274) +/** + * NE_ERR_INVALID_ENCLAVE_CID - The provided enclave CID is invalid, either + * being a well-known value or the CID of the + * parent / primary VM. + */ +#define NE_ERR_INVALID_ENCLAVE_CID (275) + +/** + * DOC: Image load info flags + */ + +/** + * NE_EIF_IMAGE - Enclave Image Format (EIF) + */ +#define NE_EIF_IMAGE (0x01) + +#define NE_IMAGE_LOAD_MAX_FLAG_VAL (0x02) + +/** + * struct ne_image_load_info - Info necessary for in-memory enclave image + * loading (in / out). + * @flags: Flags to determine the enclave image type + * (e.g. Enclave Image Format - EIF) (in). + * @memory_offset: Offset in enclave memory where to start placing the + * enclave image (out). + */ +struct ne_image_load_info { + __u64 flags; + __u64 memory_offset; +}; + +/** + * DOC: User memory region flags + */ + +/** + * NE_DEFAULT_MEMORY_REGION - Memory region for enclave general usage. + */ +#define NE_DEFAULT_MEMORY_REGION (0x00) + +#define NE_MEMORY_REGION_MAX_FLAG_VAL (0x01) + +/** + * struct ne_user_memory_region - Memory region to be set for an enclave (in). + * @flags: Flags to determine the usage for the memory region (in). + * @memory_size: The size, in bytes, of the memory region to be set for + * an enclave (in). + * @userspace_addr: The start address of the userspace allocated memory of + * the memory region to set for an enclave (in). + */ +struct ne_user_memory_region { + __u64 flags; + __u64 memory_size; + __u64 userspace_addr; +}; + +/** + * DOC: Enclave start info flags + */ + +/** + * NE_ENCLAVE_PRODUCTION_MODE - Start enclave in production mode. + */ +#define NE_ENCLAVE_PRODUCTION_MODE (0x00) +/** + * NE_ENCLAVE_DEBUG_MODE - Start enclave in debug mode. + */ +#define NE_ENCLAVE_DEBUG_MODE (0x01) + +#define NE_ENCLAVE_START_MAX_FLAG_VAL (0x02) + +/** + * struct ne_enclave_start_info - Setup info necessary for enclave start (in / out). + * @flags: Flags for the enclave to start with (e.g. debug mode) (in). + * @enclave_cid: Context ID (CID) for the enclave vsock device. If 0 as + * input, the CID is autogenerated by the hypervisor and + * returned back as output by the driver (in / out). + */ +struct ne_enclave_start_info { + __u64 flags; + __u64 enclave_cid; +}; + +#endif /* _UAPI_LINUX_NITRO_ENCLAVES_H_ */ -- cgit v1.2.3 From 0a068adde505a90ece23caaf19b77567e1d18298 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Tue, 21 Jul 2020 10:49:51 +0300 Subject: habanalabs: add information about PCIe controller Update firmware header with new API for getting pcie info such as tx/rx throughput and replay counter. These counters are needed by customers for monitor and maintenance of multiple devices. Add new opcodes to the INFO ioctl to retrieve these counters. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 48 +++++++++++++++++++++++ drivers/misc/habanalabs/common/habanalabs.h | 4 ++ drivers/misc/habanalabs/common/habanalabs_ioctl.c | 41 +++++++++++++++++++ drivers/misc/habanalabs/gaudi/gaudi.c | 4 ++ drivers/misc/habanalabs/goya/goya.c | 4 ++ drivers/misc/habanalabs/include/common/armcp_if.h | 10 +++++ include/uapi/misc/habanalabs.h | 27 +++++++++++++ 7 files changed, 138 insertions(+) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index f52bc690dfc5..61f5edc96e16 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -363,6 +363,54 @@ out: return rc; } +int hl_fw_armcp_pci_counters_get(struct hl_device *hdev, + struct hl_info_pci_counters *counters) +{ + struct armcp_packet pkt = {}; + long result; + int rc; + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_PCIE_THROUGHPUT_GET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + + /* Fetch PCI rx counter */ + pkt.index = cpu_to_le32(armcp_pcie_throughput_rx); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_ARMCP_INFO_TIMEOUT_USEC, &result); + if (rc) { + dev_err(hdev->dev, + "Failed to handle ArmCP PCI info pkt, error %d\n", rc); + return rc; + } + counters->rx_throughput = result; + + /* Fetch PCI tx counter */ + pkt.index = cpu_to_le32(armcp_pcie_throughput_tx); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_ARMCP_INFO_TIMEOUT_USEC, &result); + if (rc) { + dev_err(hdev->dev, + "Failed to handle ArmCP PCI info pkt, error %d\n", rc); + return rc; + } + counters->tx_throughput = result; + + /* Fetch PCI replay counter */ + pkt.ctl = cpu_to_le32(ARMCP_PACKET_PCIE_REPLAY_CNT_GET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_ARMCP_INFO_TIMEOUT_USEC, &result); + if (rc) { + dev_err(hdev->dev, + "Failed to handle ArmCP PCI info pkt, error %d\n", rc); + return rc; + } + counters->replay_cnt = (u32) result; + + return rc; +} + static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg) { u32 err_val; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index f97eebc64979..2c9fcb513215 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1483,6 +1483,7 @@ struct hl_device_idle_busy_ts { * @soft_reset_cnt: number of soft reset since the driver was loaded. * @hard_reset_cnt: number of hard reset since the driver was loaded. * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr + * @clk_throttling_reason: bitmask represents the current clk throttling reasons * @id: device minor. * @id_control: minor of the control device * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit @@ -1587,6 +1588,7 @@ struct hl_device { u32 soft_reset_cnt; u32 hard_reset_cnt; u32 idle_busy_ts_idx; + u32 clk_throttling_reason; u16 id; u16 id_control; u16 cpu_pci_msb_addr; @@ -1841,6 +1843,8 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, int hl_fw_send_heartbeat(struct hl_device *hdev); int hl_fw_armcp_info_get(struct hl_device *hdev); int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size); +int hl_fw_armcp_pci_counters_get(struct hl_device *hdev, + struct hl_info_pci_counters *counters); int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, u32 boot_err0_reg, bool skip_bmc, diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 5af1c03da473..4d838b1a3bbe 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -276,6 +276,41 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args) min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0; } +static int pci_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_info_pci_counters pci_counters = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int rc; + + if ((!max_size) || (!out)) + return -EINVAL; + + rc = hl_fw_armcp_pci_counters_get(hdev, &pci_counters); + if (rc) + return rc; + + return copy_to_user(out, &pci_counters, + min((size_t) max_size, sizeof(pci_counters))) ? -EFAULT : 0; +} + +static int clk_throttle_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_info_clk_throttle clk_throttle = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + clk_throttle.clk_throttling_reason = hdev->clk_throttling_reason; + + return copy_to_user(out, &clk_throttle, + min((size_t) max_size, sizeof(clk_throttle))) ? -EFAULT : 0; +} + static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) { struct hl_device *hdev = hpriv->hdev; @@ -360,6 +395,12 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_CS_COUNTERS: return cs_counters_info(hpriv, args); + case HL_INFO_PCI_COUNTERS: + return pci_counters_info(hpriv, args); + + case HL_INFO_CLK_THROTTLE_REASON: + return clk_throttle_info(hpriv, args); + default: dev_err(dev, "Invalid request %d\n", args->op); rc = -ENOTTY; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 4009b7df4caf..adb5c5594ac1 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -5653,21 +5653,25 @@ static void gaudi_print_clk_change_info(struct hl_device *hdev, { switch (event_type) { case GAUDI_EVENT_FIX_POWER_ENV_S: + hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER; dev_info_ratelimited(hdev->dev, "Clock throttling due to power consumption\n"); break; case GAUDI_EVENT_FIX_POWER_ENV_E: + hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER; dev_info_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n"); break; case GAUDI_EVENT_FIX_THERMAL_ENV_S: + hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL; dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n"); break; case GAUDI_EVENT_FIX_THERMAL_ENV_E: + hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL; dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n"); break; diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 33cd2ae653d2..954f2c022d33 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -4580,18 +4580,22 @@ static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type) { switch (event_type) { case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S: + hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER; dev_info_ratelimited(hdev->dev, "Clock throttling due to power consumption\n"); break; case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E: + hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER; dev_info_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n"); break; case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S: + hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL; dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n"); break; case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E: + hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL; dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n"); break; diff --git a/drivers/misc/habanalabs/include/common/armcp_if.h b/drivers/misc/habanalabs/include/common/armcp_if.h index 07f9972db28d..1403c937253c 100644 --- a/drivers/misc/habanalabs/include/common/armcp_if.h +++ b/drivers/misc/habanalabs/include/common/armcp_if.h @@ -243,6 +243,8 @@ enum armcp_packet_id { ARMCP_PACKET_TEMPERATURE_SET, /* sysfs */ ARMCP_PACKET_VOLTAGE_SET, /* sysfs */ ARMCP_PACKET_CURRENT_SET, /* sysfs */ + ARMCP_PACKET_PCIE_THROUGHPUT_GET, /* internal */ + ARMCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */ }; #define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5 @@ -277,6 +279,9 @@ struct armcp_packet { __u8 pad; /* unused */ }; + /* For any general request */ + __le32 index; + /* For frequency get/set */ __le32 pll_index; @@ -344,6 +349,11 @@ enum armcp_pwm_attributes { armcp_pwm_enable }; +enum armcp_pcie_throughput_attributes { + armcp_pcie_throughput_tx, + armcp_pcie_throughput_rx +}; + /* Event Queue Packets */ struct eq_generic_event { diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index d5c4f983b7a8..ee13b919db35 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -264,6 +264,8 @@ enum hl_device_status { * HL_INFO_TIME_SYNC - Retrieve the device's time alongside the host's time * for synchronization. * HL_INFO_CS_COUNTERS - Retrieve command submission counters + * HL_INFO_PCI_COUNTERS - Retrieve PCI counters + * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 @@ -276,6 +278,8 @@ enum hl_device_status { #define HL_INFO_RESET_COUNT 9 #define HL_INFO_TIME_SYNC 10 #define HL_INFO_CS_COUNTERS 11 +#define HL_INFO_PCI_COUNTERS 12 +#define HL_INFO_CLK_THROTTLE_REASON 13 #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 @@ -340,6 +344,29 @@ struct hl_info_time_sync { __u64 host_time; }; +/** + * struct hl_info_pci_counters - pci counters + * @rx_throughput: PCI rx throughput KBps + * @tx_throughput: PCI tx throughput KBps + * @replay_cnt: PCI replay counter + */ +struct hl_info_pci_counters { + __u64 rx_throughput; + __u64 tx_throughput; + __u64 replay_cnt; +}; + +#define HL_CLK_THROTTLE_POWER 0x1 +#define HL_CLK_THROTTLE_THERMAL 0x2 + +/** + * struct hl_info_clk_throttle - clock throttling reason + * @clk_throttling_reason: each bit represents a clk throttling reason + */ +struct hl_info_clk_throttle { + __u32 clk_throttling_reason; +}; + /** * struct hl_info_cs_counters - command submission counters * @out_of_mem_drop_cnt: dropped due to memory allocation issue -- cgit v1.2.3 From 843839bec3a304f8313d6ae554f618a91e52731a Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Sun, 19 Jul 2020 11:08:09 +0300 Subject: habanalabs: expose sync manager resources allocation in INFO IOCTL Although the driver defines the first user-available sync manager object and monitor in habanalabs.h, we would like to also expose this information via the INFO IOCTL so the runtime can get this information dynamically. This is because in future ASICs we won't need to define it statically. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 6 +++++ drivers/misc/habanalabs/common/habanalabs_ioctl.c | 30 ++++++++++++++++++++++- drivers/misc/habanalabs/gaudi/gaudi.c | 7 ++++++ include/uapi/misc/habanalabs.h | 23 +++++++++++++++++ 4 files changed, 65 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 2c9fcb513215..caced12f278f 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -65,6 +65,8 @@ #define HL_PCI_NUM_BARS 6 +#define HL_MAX_DCORES 4 + /** * struct pgt_info - MMU hop page info. * @node: hash linked-list node for the pgts shadow hash of pgts. @@ -291,6 +293,8 @@ struct hl_mmu_properties { * @max_queues: maximum amount of queues in the system * @sync_stream_first_sob: first sync object available for sync stream use * @sync_stream_first_mon: first monitor available for sync stream use + * @first_available_user_sob: first sob available for the user + * @first_available_user_mon: first monitor available for the user * @tpc_enabled_mask: which TPCs are enabled. * @completion_queues_count: number of completion queues. */ @@ -337,6 +341,8 @@ struct asic_fixed_properties { u32 max_queues; u16 sync_stream_first_sob; u16 sync_stream_first_mon; + u16 first_available_user_sob[HL_MAX_DCORES]; + u16 first_available_user_mon[HL_MAX_DCORES]; u8 tpc_enabled_mask; u8 completion_queues_count; }; diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 4d838b1a3bbe..fe6c5534d378 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -8,6 +8,7 @@ #include #include "habanalabs.h" +#include #include #include #include @@ -314,7 +315,7 @@ static int clk_throttle_info(struct hl_fpriv *hpriv, struct hl_info_args *args) static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) { struct hl_device *hdev = hpriv->hdev; - struct hl_info_cs_counters cs_counters = {0}; + struct hl_info_cs_counters cs_counters = { {0} }; u32 max_size = args->return_size; void __user *out = (void __user *) (uintptr_t) args->return_pointer; @@ -332,6 +333,30 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0; } +static int sync_manager_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_info_sync_manager sm_info = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + if (args->dcore_id >= HL_MAX_DCORES) + return -EINVAL; + + sm_info.first_available_sync_object = + prop->first_available_user_sob[args->dcore_id]; + sm_info.first_available_monitor = + prop->first_available_user_mon[args->dcore_id]; + + + return copy_to_user(out, &sm_info, min_t(size_t, (size_t) max_size, + sizeof(sm_info))) ? -EFAULT : 0; +} + static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, struct device *dev) { @@ -401,6 +426,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_CLK_THROTTLE_REASON: return clk_throttle_info(hpriv, args); + case HL_INFO_SYNC_MANAGER: + return sync_manager_info(hpriv, args); + default: dev_err(dev, "Invalid request %d\n", args->op); rc = -ENOTTY; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index adb5c5594ac1..45ba3a5f5b14 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -367,6 +367,7 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); static int gaudi_get_fixed_properties(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; + u32 num_sync_stream_queues = 0; int i; prop->max_queues = GAUDI_QUEUE_ID_SIZE; @@ -383,6 +384,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) prop->hw_queues_props[i].driver_only = 0; prop->hw_queues_props[i].requires_kernel_cb = 1; prop->hw_queues_props[i].supports_sync_stream = 1; + num_sync_stream_queues++; } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) { prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; prop->hw_queues_props[i].driver_only = 1; @@ -469,6 +471,11 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) prop->max_pending_cs = GAUDI_MAX_PENDING_CS; + prop->first_available_user_sob[HL_GAUDI_WS_DCORE] = + num_sync_stream_queues * HL_RSVD_SOBS; + prop->first_available_user_mon[HL_GAUDI_WS_DCORE] = + num_sync_stream_queues * HL_RSVD_MONS; + return 0; } diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index ee13b919db35..ca6dc1fc250e 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -266,6 +266,7 @@ enum hl_device_status { * HL_INFO_CS_COUNTERS - Retrieve command submission counters * HL_INFO_PCI_COUNTERS - Retrieve PCI counters * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason + * HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 @@ -280,6 +281,7 @@ enum hl_device_status { #define HL_INFO_CS_COUNTERS 11 #define HL_INFO_PCI_COUNTERS 12 #define HL_INFO_CLK_THROTTLE_REASON 13 +#define HL_INFO_SYNC_MANAGER 14 #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 @@ -367,6 +369,16 @@ struct hl_info_clk_throttle { __u32 clk_throttling_reason; }; +/** + * struct hl_info_sync_manager - sync manager information + * @first_available_sync_object: first available sob + * @first_available_monitor: first available monitor + */ +struct hl_info_sync_manager { + __u32 first_available_sync_object; + __u32 first_available_monitor; +}; + /** * struct hl_info_cs_counters - command submission counters * @out_of_mem_drop_cnt: dropped due to memory allocation issue @@ -386,6 +398,13 @@ struct hl_info_cs_counters { struct hl_cs_counters ctx_cs_counters; }; +enum gaudi_dcores { + HL_GAUDI_WS_DCORE, + HL_GAUDI_WN_DCORE, + HL_GAUDI_EN_DCORE, + HL_GAUDI_ES_DCORE +}; + struct hl_info_args { /* Location of relevant struct in userspace */ __u64 return_pointer; @@ -402,6 +421,10 @@ struct hl_info_args { __u32 op; union { + /* Dcore id for which the information is relevant. + * For Gaudi refer to 'enum gaudi_dcores' + */ + __u32 dcore_id; /* Context ID - Currently not in use */ __u32 ctx_id; /* Period value for utilization rate (100ms - 1000ms, in 100ms -- cgit v1.2.3 From d90416c84d86ff78a2181f135d72d564430107b8 Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Wed, 12 Aug 2020 17:20:13 +0300 Subject: habanalabs: extend busy engines mask to 64 bits change busy engines bitmask to 64 bits in order to represent more engines, needed for future ASIC support. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 2 +- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 2 +- drivers/misc/habanalabs/gaudi/gaudi.c | 2 +- drivers/misc/habanalabs/goya/goya.c | 2 +- include/uapi/misc/habanalabs.h | 6 ++++++ 5 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 2fd268e4cf10..fbdf105c4bb2 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -754,7 +754,7 @@ struct hl_asic_funcs { void (*set_clock_gating)(struct hl_device *hdev); void (*disable_clock_gating)(struct hl_device *hdev); int (*debug_coresight)(struct hl_device *hdev, void *data); - bool (*is_device_idle)(struct hl_device *hdev, u32 *mask, + bool (*is_device_idle)(struct hl_device *hdev, u64 *mask, struct seq_file *s); int (*soft_reset_late_init)(struct hl_device *hdev); void (*hw_queues_lock)(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index fe6c5534d378..a94800014243 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -132,7 +132,7 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args) return -EINVAL; hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, - &hw_idle.busy_engines_mask, NULL); + &hw_idle.busy_engines_mask_ext, NULL); return copy_to_user(out, &hw_idle, min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index a0932261e67c..ba964a316b0b 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -6083,7 +6083,7 @@ static int gaudi_armcp_info_get(struct hl_device *hdev) return 0; } -static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask, +static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask, struct seq_file *s) { struct gaudi_device *gaudi = hdev->asic_specific; diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index ac4d44fa56e4..5fb3565c80c5 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -5148,7 +5148,7 @@ static void goya_disable_clock_gating(struct hl_device *hdev) /* clock gating not supported in Goya */ } -static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask, +static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask, struct seq_file *s) { const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n"; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index ca6dc1fc250e..693081728ef3 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -319,6 +319,12 @@ struct hl_info_hw_idle { * Bits definition is according to `enum _enging_id'. */ __u32 busy_engines_mask; + + /* + * Extended Bitmask of busy engines. + * Bits definition is according to `enum _enging_id'. + */ + __u64 busy_engines_mask_ext; }; struct hl_info_device_status { -- cgit v1.2.3 From 9f3064913e1b9b4153accbd33aaf1983be92c569 Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Sun, 9 Aug 2020 16:25:53 +0300 Subject: habanalabs: add support for getting device total energy Add driver implementation for reading the total energy consumption from the device ARM FW. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 24 +++++++++++++++++++++++ drivers/misc/habanalabs/common/habanalabs.h | 2 ++ drivers/misc/habanalabs/common/habanalabs_ioctl.c | 24 +++++++++++++++++++++++ drivers/misc/habanalabs/include/common/armcp_if.h | 1 + include/uapi/misc/habanalabs.h | 10 ++++++++++ 5 files changed, 61 insertions(+) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 61f5edc96e16..eb66ff532c6a 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -411,6 +411,30 @@ int hl_fw_armcp_pci_counters_get(struct hl_device *hdev, return rc; } +int hl_fw_armcp_total_energy_get(struct hl_device *hdev, + u64 *total_energy) +{ + struct armcp_packet pkt = {}; + long result; + int rc; + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_TOTAL_ENERGY_GET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_ARMCP_INFO_TIMEOUT_USEC, &result); + if (rc) { + dev_err(hdev->dev, + "Failed to handle ArmCP total energy pkt, error %d\n", + rc); + return rc; + } + + *total_energy = result; + + return rc; +} + static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg) { u32 err_val; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 5ef487a3b843..6577a73e3227 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1852,6 +1852,8 @@ int hl_fw_armcp_info_get(struct hl_device *hdev); int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size); int hl_fw_armcp_pci_counters_get(struct hl_device *hdev, struct hl_info_pci_counters *counters); +int hl_fw_armcp_total_energy_get(struct hl_device *hdev, + u64 *total_energy); int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, u32 boot_err0_reg, bool skip_bmc, diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index a94800014243..18ee14b4b0e1 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -357,6 +357,27 @@ static int sync_manager_info(struct hl_fpriv *hpriv, struct hl_info_args *args) sizeof(sm_info))) ? -EFAULT : 0; } +static int total_energy_consumption_info(struct hl_fpriv *hpriv, + struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_info_energy total_energy = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int rc; + + if ((!max_size) || (!out)) + return -EINVAL; + + rc = hl_fw_armcp_total_energy_get(hdev, + &total_energy.total_energy_consumption); + if (rc) + return rc; + + return copy_to_user(out, &total_energy, + min((size_t) max_size, sizeof(total_energy))) ? -EFAULT : 0; +} + static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, struct device *dev) { @@ -429,6 +450,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_SYNC_MANAGER: return sync_manager_info(hpriv, args); + case HL_INFO_TOTAL_ENERGY: + return total_energy_consumption_info(hpriv, args); + default: dev_err(dev, "Invalid request %d\n", args->op); rc = -ENOTTY; diff --git a/drivers/misc/habanalabs/include/common/armcp_if.h b/drivers/misc/habanalabs/include/common/armcp_if.h index 1403c937253c..4d78898524e9 100644 --- a/drivers/misc/habanalabs/include/common/armcp_if.h +++ b/drivers/misc/habanalabs/include/common/armcp_if.h @@ -245,6 +245,7 @@ enum armcp_packet_id { ARMCP_PACKET_CURRENT_SET, /* sysfs */ ARMCP_PACKET_PCIE_THROUGHPUT_GET, /* internal */ ARMCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */ + ARMCP_PACKET_TOTAL_ENERGY_GET, /* internal */ }; #define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5 diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 693081728ef3..6803991726e8 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -267,6 +267,7 @@ enum hl_device_status { * HL_INFO_PCI_COUNTERS - Retrieve PCI counters * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason * HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore + * HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 @@ -282,6 +283,7 @@ enum hl_device_status { #define HL_INFO_PCI_COUNTERS 12 #define HL_INFO_CLK_THROTTLE_REASON 13 #define HL_INFO_SYNC_MANAGER 14 +#define HL_INFO_TOTAL_ENERGY 15 #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 @@ -375,6 +377,14 @@ struct hl_info_clk_throttle { __u32 clk_throttling_reason; }; +/** + * struct hl_info_energy - device energy information + * @total_energy_consumption: total device energy consumption + */ +struct hl_info_energy { + __u64 total_energy_consumption; +}; + /** * struct hl_info_sync_manager - sync manager information * @first_available_sync_object: first available sob -- cgit v1.2.3 From 2f55342c5e4d3ea94c0b8237f3ad26963269f90f Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 15 Aug 2020 16:28:10 +0300 Subject: habanalabs: replace armcp with the generic cpucp ArmCP mandates that the device CPU is always an ARM processor, which might be wrong in the future. Most of this change is an internal renaming of variables, functions and defines but there are two entries in sysfs which have armcp in their names. Add identical cpucp entries but don't remove yet the armcp entries. Those will be deprecated next year. Add the documentation about it in sysfs documentation. Signed-off-by: Moti Haimovski Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- Documentation/ABI/testing/sysfs-driver-habanalabs | 18 +- drivers/misc/habanalabs/common/debugfs.c | 18 +- drivers/misc/habanalabs/common/device.c | 2 +- drivers/misc/habanalabs/common/firmware_if.c | 127 ++++--- drivers/misc/habanalabs/common/habanalabs.h | 20 +- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 12 +- drivers/misc/habanalabs/common/hwmon.c | 60 ++-- drivers/misc/habanalabs/common/irq.c | 2 +- drivers/misc/habanalabs/common/sysfs.c | 60 ++-- drivers/misc/habanalabs/gaudi/gaudi.c | 30 +- drivers/misc/habanalabs/gaudi/gaudiP.h | 4 +- drivers/misc/habanalabs/goya/goya.c | 34 +- drivers/misc/habanalabs/goya/goyaP.h | 2 +- drivers/misc/habanalabs/include/common/armcp_if.h | 418 ---------------------- drivers/misc/habanalabs/include/common/cpucp_if.h | 417 +++++++++++++++++++++ include/uapi/misc/habanalabs.h | 4 +- 16 files changed, 631 insertions(+), 597 deletions(-) delete mode 100644 drivers/misc/habanalabs/include/common/armcp_if.h create mode 100644 drivers/misc/habanalabs/include/common/cpucp_if.h (limited to 'include/uapi') diff --git a/Documentation/ABI/testing/sysfs-driver-habanalabs b/Documentation/ABI/testing/sysfs-driver-habanalabs index 1a14bf9b22ba..169ae4b2a180 100644 --- a/Documentation/ABI/testing/sysfs-driver-habanalabs +++ b/Documentation/ABI/testing/sysfs-driver-habanalabs @@ -2,13 +2,17 @@ What: /sys/class/habanalabs/hl/armcp_kernel_ver Date: Jan 2019 KernelVersion: 5.1 Contact: oded.gabbay@gmail.com -Description: Version of the Linux kernel running on the device's CPU +Description: Version of the Linux kernel running on the device's CPU. + Will be DEPRECATED in Linux kernel version 5.10, and be + replaced with cpucp_kernel_ver What: /sys/class/habanalabs/hl/armcp_ver Date: Jan 2019 KernelVersion: 5.1 Contact: oded.gabbay@gmail.com Description: Version of the application running on the device's CPU + Will be DEPRECATED in Linux kernel version 5.10, and be + replaced with cpucp_ver What: /sys/class/habanalabs/hl/clk_max_freq_mhz Date: Jun 2019 @@ -33,6 +37,18 @@ KernelVersion: 5.1 Contact: oded.gabbay@gmail.com Description: Version of the Device's CPLD F/W +What: /sys/class/habanalabs/hl/cpucp_kernel_ver +Date: Oct 2020 +KernelVersion: 5.10 +Contact: oded.gabbay@gmail.com +Description: Version of the Linux kernel running on the device's CPU + +What: /sys/class/habanalabs/hl/cpucp_ver +Date: Oct 2020 +KernelVersion: 5.10 +Contact: oded.gabbay@gmail.com +Description: Version of the application running on the device's CPU + What: /sys/class/habanalabs/hl/device_type Date: Jan 2019 KernelVersion: 5.1 diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 4b416f64f6ec..c27c0f94c97a 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -21,7 +21,7 @@ static struct dentry *hl_debug_root; static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, u8 i2c_reg, long *val) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; if (hl_device_disabled_or_in_reset(hdev)) @@ -29,8 +29,8 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_RD << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_RD << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.i2c_bus = i2c_bus; pkt.i2c_addr = i2c_addr; pkt.i2c_reg = i2c_reg; @@ -47,7 +47,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, u8 i2c_reg, u32 val) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; if (hl_device_disabled_or_in_reset(hdev)) @@ -55,8 +55,8 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_WR << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_WR << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.i2c_bus = i2c_bus; pkt.i2c_addr = i2c_addr; pkt.i2c_reg = i2c_reg; @@ -73,7 +73,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; if (hl_device_disabled_or_in_reset(hdev)) @@ -81,8 +81,8 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state) memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_LED_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_LED_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.led_index = cpu_to_le32(led); pkt.value = cpu_to_le64(state); diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index dcb7f9ca7a67..6e916cc22a4c 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -871,7 +871,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, * so this message won't be sent */ if (hl_fw_send_pci_access_msg(hdev, - ARMCP_PACKET_DISABLE_PCI_ACCESS)) + CPUCP_PACKET_DISABLE_PCI_ACCESS)) dev_warn(hdev->dev, "Failed to disable PCI access by F/W\n"); } diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index eb66ff532c6a..f2a38e95359a 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -68,9 +68,9 @@ out: int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode) { - struct armcp_packet pkt = {}; + struct cpucp_packet pkt = {}; - pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(opcode << CPUCP_PKT_CTL_OPCODE_SHIFT); return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); @@ -79,7 +79,7 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode) int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, u16 len, u32 timeout, long *result) { - struct armcp_packet *pkt; + struct cpucp_packet *pkt; dma_addr_t pkt_dma_addr; u32 tmp; int rc = 0; @@ -111,7 +111,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, } rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp, - (tmp == ARMCP_PACKET_FENCE_VAL), 1000, + (tmp == CPUCP_PACKET_FENCE_VAL), 1000, timeout, true); hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); @@ -124,12 +124,12 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, tmp = le32_to_cpu(pkt->ctl); - rc = (tmp & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT; + rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT; if (rc) { dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n", rc, - (tmp & ARMCP_PKT_CTL_OPCODE_MASK) - >> ARMCP_PKT_CTL_OPCODE_SHIFT); + (tmp & CPUCP_PKT_CTL_OPCODE_MASK) + >> CPUCP_PKT_CTL_OPCODE_SHIFT); rc = -EIO; } else if (result) { *result = (long) le64_to_cpu(pkt->result); @@ -145,14 +145,14 @@ out: int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type) { - struct armcp_packet pkt; + struct cpucp_packet pkt; long result; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.value = cpu_to_le64(event_type); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), @@ -167,12 +167,12 @@ int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type) int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, size_t irq_arr_size) { - struct armcp_unmask_irq_arr_packet *pkt; + struct cpucp_unmask_irq_arr_packet *pkt; size_t total_pkt_size; long result; int rc; - total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) + + total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) + irq_arr_size; /* data should be aligned to 8 bytes in order to ArmCP to copy it */ @@ -191,8 +191,8 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0])); memcpy(&pkt->irqs, irq_arr, irq_arr_size); - pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << + CPUCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, total_pkt_size, 0, &result); @@ -207,19 +207,19 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, int hl_fw_test_cpu_queue(struct hl_device *hdev) { - struct armcp_packet test_pkt = {}; + struct cpucp_packet test_pkt = {}; long result; int rc; - test_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST << - ARMCP_PKT_CTL_OPCODE_SHIFT); - test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); + test_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST << + CPUCP_PKT_CTL_OPCODE_SHIFT); + test_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt, sizeof(test_pkt), 0, &result); if (!rc) { - if (result != ARMCP_PACKET_FENCE_VAL) + if (result != CPUCP_PACKET_FENCE_VAL) dev_err(hdev->dev, "CPU queue test failed (0x%08lX)\n", result); } else { @@ -251,61 +251,61 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, int hl_fw_send_heartbeat(struct hl_device *hdev) { - struct armcp_packet hb_pkt = {}; + struct cpucp_packet hb_pkt = {}; long result; int rc; - hb_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST << - ARMCP_PKT_CTL_OPCODE_SHIFT); - hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); + hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST << + CPUCP_PKT_CTL_OPCODE_SHIFT); + hb_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, sizeof(hb_pkt), 0, &result); - if ((rc) || (result != ARMCP_PACKET_FENCE_VAL)) + if ((rc) || (result != CPUCP_PACKET_FENCE_VAL)) rc = -EIO; return rc; } -int hl_fw_armcp_info_get(struct hl_device *hdev) +int hl_fw_cpucp_info_get(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; - struct armcp_packet pkt = {}; - void *armcp_info_cpu_addr; - dma_addr_t armcp_info_dma_addr; + struct cpucp_packet pkt = {}; + void *cpucp_info_cpu_addr; + dma_addr_t cpucp_info_dma_addr; long result; int rc; - armcp_info_cpu_addr = + cpucp_info_cpu_addr = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, - sizeof(struct armcp_info), - &armcp_info_dma_addr); - if (!armcp_info_cpu_addr) { + sizeof(struct cpucp_info), + &cpucp_info_dma_addr); + if (!cpucp_info_cpu_addr) { dev_err(hdev->dev, "Failed to allocate DMA memory for ArmCP info packet\n"); return -ENOMEM; } - memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info)); + memset(cpucp_info_cpu_addr, 0, sizeof(struct cpucp_info)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.addr = cpu_to_le64(armcp_info_dma_addr); - pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info)); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_INFO_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.addr = cpu_to_le64(cpucp_info_dma_addr); + pkt.data_max_size = cpu_to_le32(sizeof(struct cpucp_info)); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_ARMCP_INFO_TIMEOUT_USEC, &result); + HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { dev_err(hdev->dev, "Failed to handle ArmCP info pkt, error %d\n", rc); goto out; } - memcpy(&prop->armcp_info, armcp_info_cpu_addr, - sizeof(prop->armcp_info)); + memcpy(&prop->cpucp_info, cpucp_info_cpu_addr, + sizeof(prop->cpucp_info)); - rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors); + rc = hl_build_hwmon_channel_info(hdev, prop->cpucp_info.sensors); if (rc) { dev_err(hdev->dev, "Failed to build hwmon channel info, error %d\n", rc); @@ -315,14 +315,14 @@ int hl_fw_armcp_info_get(struct hl_device *hdev) out: hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, - sizeof(struct armcp_info), armcp_info_cpu_addr); + sizeof(struct cpucp_info), cpucp_info_cpu_addr); return rc; } int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) { - struct armcp_packet pkt = {}; + struct cpucp_packet pkt = {}; void *eeprom_info_cpu_addr; dma_addr_t eeprom_info_dma_addr; long result; @@ -339,13 +339,13 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) memset(eeprom_info_cpu_addr, 0, max_size); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_EEPROM_DATA_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.addr = cpu_to_le64(eeprom_info_dma_addr); pkt.data_max_size = cpu_to_le32(max_size); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_ARMCP_EEPROM_TIMEOUT_USEC, &result); + HL_CPUCP_EEPROM_TIMEOUT_USEC, &result); if (rc) { dev_err(hdev->dev, @@ -363,20 +363,20 @@ out: return rc; } -int hl_fw_armcp_pci_counters_get(struct hl_device *hdev, +int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, struct hl_info_pci_counters *counters) { - struct armcp_packet pkt = {}; + struct cpucp_packet pkt = {}; long result; int rc; - pkt.ctl = cpu_to_le32(ARMCP_PACKET_PCIE_THROUGHPUT_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); /* Fetch PCI rx counter */ - pkt.index = cpu_to_le32(armcp_pcie_throughput_rx); + pkt.index = cpu_to_le32(cpucp_pcie_throughput_rx); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_ARMCP_INFO_TIMEOUT_USEC, &result); + HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { dev_err(hdev->dev, "Failed to handle ArmCP PCI info pkt, error %d\n", rc); @@ -385,9 +385,9 @@ int hl_fw_armcp_pci_counters_get(struct hl_device *hdev, counters->rx_throughput = result; /* Fetch PCI tx counter */ - pkt.index = cpu_to_le32(armcp_pcie_throughput_tx); + pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_ARMCP_INFO_TIMEOUT_USEC, &result); + HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { dev_err(hdev->dev, "Failed to handle ArmCP PCI info pkt, error %d\n", rc); @@ -396,11 +396,11 @@ int hl_fw_armcp_pci_counters_get(struct hl_device *hdev, counters->tx_throughput = result; /* Fetch PCI replay counter */ - pkt.ctl = cpu_to_le32(ARMCP_PACKET_PCIE_REPLAY_CNT_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_ARMCP_INFO_TIMEOUT_USEC, &result); + HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { dev_err(hdev->dev, "Failed to handle ArmCP PCI info pkt, error %d\n", rc); @@ -411,21 +411,20 @@ int hl_fw_armcp_pci_counters_get(struct hl_device *hdev, return rc; } -int hl_fw_armcp_total_energy_get(struct hl_device *hdev, - u64 *total_energy) +int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy) { - struct armcp_packet pkt = {}; + struct cpucp_packet pkt = {}; long result; int rc; - pkt.ctl = cpu_to_le32(ARMCP_PACKET_TOTAL_ENERGY_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_TOTAL_ENERGY_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_ARMCP_INFO_TIMEOUT_USEC, &result); + HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { dev_err(hdev->dev, - "Failed to handle ArmCP total energy pkt, error %d\n", + "Failed to handle CpuCP total energy pkt, error %d\n", rc); return rc; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 6577a73e3227..6912f88a4b01 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -8,7 +8,7 @@ #ifndef HABANALABSP_H_ #define HABANALABSP_H_ -#include "../include/common/armcp_if.h" +#include "../include/common/cpucp_if.h" #include "../include/common/qman_if.h" #include @@ -34,8 +34,8 @@ #define HL_PLL_LOW_JOB_FREQ_USEC 5000000 /* 5 s */ -#define HL_ARMCP_INFO_TIMEOUT_USEC 10000000 /* 10s */ -#define HL_ARMCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */ +#define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */ +#define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */ #define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */ @@ -250,7 +250,7 @@ struct hl_mmu_properties { /** * struct asic_fixed_properties - ASIC specific immutable properties. * @hw_queues_props: H/W queues properties. - * @armcp_info: received various information from ArmCP regarding the H/W, e.g. + * @cpucp_info: received various information from CPU-CP regarding the H/W, e.g. * available sensors. * @uboot_ver: F/W U-boot version. * @preboot_ver: F/W Preboot version. @@ -301,7 +301,7 @@ struct hl_mmu_properties { */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; - struct armcp_info armcp_info; + struct cpucp_info cpucp_info; char uboot_ver[VERSION_MAX_LEN]; char preboot_ver[VERSION_MAX_LEN]; struct hl_mmu_properties dmmu; @@ -1588,7 +1588,7 @@ struct hl_device { u64 clock_gating_mask; atomic_t in_reset; enum hl_pll_frequency curr_pll_profile; - enum armcp_card_types card_type; + enum cpucp_card_types card_type; int cs_active_cnt; u32 major; u32 high_pll; @@ -1776,7 +1776,7 @@ int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq); uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms); int hl_build_hwmon_channel_info(struct hl_device *hdev, - struct armcp_sensor *sensors_arr); + struct cpucp_sensor *sensors_arr); int hl_sysfs_init(struct hl_device *hdev); void hl_sysfs_fini(struct hl_device *hdev); @@ -1848,11 +1848,11 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr); int hl_fw_send_heartbeat(struct hl_device *hdev); -int hl_fw_armcp_info_get(struct hl_device *hdev); +int hl_fw_cpucp_info_get(struct hl_device *hdev); int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size); -int hl_fw_armcp_pci_counters_get(struct hl_device *hdev, +int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, struct hl_info_pci_counters *counters); -int hl_fw_armcp_total_energy_get(struct hl_device *hdev, +int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy); int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 18ee14b4b0e1..07317ea49129 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -65,14 +65,14 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) hw_ip.dram_enabled = 1; hw_ip.num_of_events = prop->num_of_events; - memcpy(hw_ip.armcp_version, prop->armcp_info.armcp_version, + memcpy(hw_ip.cpucp_version, prop->cpucp_info.cpucp_version, min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN)); - memcpy(hw_ip.card_name, prop->armcp_info.card_name, + memcpy(hw_ip.card_name, prop->cpucp_info.card_name, min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN)); - hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version); - hw_ip.module_id = le32_to_cpu(prop->armcp_info.card_location); + hw_ip.cpld_version = le32_to_cpu(prop->cpucp_info.cpld_version); + hw_ip.module_id = le32_to_cpu(prop->cpucp_info.card_location); hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr; hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf; @@ -288,7 +288,7 @@ static int pci_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - rc = hl_fw_armcp_pci_counters_get(hdev, &pci_counters); + rc = hl_fw_cpucp_pci_counters_get(hdev, &pci_counters); if (rc) return rc; @@ -369,7 +369,7 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv, if ((!max_size) || (!out)) return -EINVAL; - rc = hl_fw_armcp_total_energy_get(hdev, + rc = hl_fw_cpucp_total_energy_get(hdev, &total_energy.total_energy_consumption); if (rc) return rc; diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c index b997336fa75f..2ac29cb2fe61 100644 --- a/drivers/misc/habanalabs/common/hwmon.c +++ b/drivers/misc/habanalabs/common/hwmon.c @@ -13,7 +13,7 @@ #define HWMON_NR_SENSOR_TYPES (hwmon_pwm + 1) int hl_build_hwmon_channel_info(struct hl_device *hdev, - struct armcp_sensor *sensors_arr) + struct cpucp_sensor *sensors_arr) { u32 counts[HWMON_NR_SENSOR_TYPES] = {0}; u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL}; @@ -24,7 +24,7 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, enum hwmon_sensor_types type; int rc, i, j; - for (i = 0 ; i < ARMCP_MAX_SENSORS ; i++) { + for (i = 0 ; i < CPUCP_MAX_SENSORS ; i++) { type = le32_to_cpu(sensors_arr[i].type); if ((type == 0) && (sensors_arr[i].flags == 0)) @@ -311,13 +311,13 @@ static const struct hwmon_ops hl_hwmon_ops = { int hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long *value) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEMPERATURE_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); @@ -337,13 +337,13 @@ int hl_get_temperature(struct hl_device *hdev, int hl_set_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long value) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEMPERATURE_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); pkt.value = __cpu_to_le64(value); @@ -362,13 +362,13 @@ int hl_set_temperature(struct hl_device *hdev, int hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long *value) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_VOLTAGE_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); @@ -388,13 +388,13 @@ int hl_get_voltage(struct hl_device *hdev, int hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr, long *value) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_CURRENT_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); @@ -414,13 +414,13 @@ int hl_get_current(struct hl_device *hdev, int hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr, long *value) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_FAN_SPEED_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_FAN_SPEED_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); @@ -440,13 +440,13 @@ int hl_get_fan_speed(struct hl_device *hdev, int hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long *value) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_PWM_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); @@ -466,13 +466,13 @@ int hl_get_pwm_info(struct hl_device *hdev, void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long value) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_PWM_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); pkt.value = cpu_to_le64(value); @@ -489,13 +489,13 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_VOLTAGE_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); pkt.value = __cpu_to_le64(value); @@ -514,13 +514,13 @@ int hl_set_voltage(struct hl_device *hdev, int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_CURRENT_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); pkt.value = __cpu_to_le64(value); @@ -549,7 +549,7 @@ int hl_hwmon_init(struct hl_device *hdev) hdev->hl_chip_info->ops = &hl_hwmon_ops; hdev->hwmon_dev = hwmon_device_register_with_info(dev, - prop->armcp_info.card_name, hdev, + prop->cpucp_info.card_name, hdev, hdev->hl_chip_info, NULL); if (IS_ERR(hdev->hwmon_dev)) { rc = PTR_ERR(hdev->hwmon_dev); diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c index c8db717023f5..d20e40a53d70 100644 --- a/drivers/misc/habanalabs/common/irq.c +++ b/drivers/misc/habanalabs/common/irq.c @@ -11,7 +11,7 @@ /** * struct hl_eqe_work - This structure is used to schedule work of EQ - * entry and armcp_reset event + * entry and cpucp_reset event * * @eq_work: workqueue object to run when EQ entry is received * @hdev: pointer to device structure diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c index 5ae484cc84cd..3ceae87016b1 100644 --- a/drivers/misc/habanalabs/common/sysfs.c +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -11,18 +11,18 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) { - struct armcp_packet pkt; + struct cpucp_packet pkt; long result; int rc; memset(&pkt, 0, sizeof(pkt)); if (curr) - pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_CURR_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); else - pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.pll_index = cpu_to_le32(pll_index); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), @@ -40,13 +40,13 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.pll_index = cpu_to_le32(pll_index); pkt.value = cpu_to_le64(freq); @@ -61,14 +61,14 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) u64 hl_get_max_power(struct hl_device *hdev) { - struct armcp_packet pkt; + struct cpucp_packet pkt; long result; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result); @@ -83,13 +83,13 @@ u64 hl_get_max_power(struct hl_device *hdev) void hl_set_max_power(struct hl_device *hdev) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.value = cpu_to_le64(hdev->max_power); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), @@ -112,7 +112,7 @@ static ssize_t armcp_kernel_ver_show(struct device *dev, { struct hl_device *hdev = dev_get_drvdata(dev); - return sprintf(buf, "%s", hdev->asic_prop.armcp_info.kernel_version); + return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.kernel_version); } static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr, @@ -120,7 +120,7 @@ static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr, { struct hl_device *hdev = dev_get_drvdata(dev); - return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.armcp_version); + return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version); } static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr, @@ -129,7 +129,23 @@ static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr, struct hl_device *hdev = dev_get_drvdata(dev); return sprintf(buf, "0x%08x\n", - hdev->asic_prop.armcp_info.cpld_version); + hdev->asic_prop.cpucp_info.cpld_version); +} + +static ssize_t cpucp_kernel_ver_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.kernel_version); +} + +static ssize_t cpucp_ver_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version); } static ssize_t infineon_ver_show(struct device *dev, @@ -138,7 +154,7 @@ static ssize_t infineon_ver_show(struct device *dev, struct hl_device *hdev = dev_get_drvdata(dev); return sprintf(buf, "0x%04x\n", - hdev->asic_prop.armcp_info.infineon_version); + hdev->asic_prop.cpucp_info.infineon_version); } static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr, @@ -146,7 +162,7 @@ static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr, { struct hl_device *hdev = dev_get_drvdata(dev); - return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.fuse_version); + return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.fuse_version); } static ssize_t thermal_ver_show(struct device *dev, @@ -154,7 +170,7 @@ static ssize_t thermal_ver_show(struct device *dev, { struct hl_device *hdev = dev_get_drvdata(dev); - return sprintf(buf, "%s", hdev->asic_prop.armcp_info.thermal_version); + return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.thermal_version); } static ssize_t preboot_btl_ver_show(struct device *dev, @@ -356,6 +372,8 @@ out: static DEVICE_ATTR_RO(armcp_kernel_ver); static DEVICE_ATTR_RO(armcp_ver); static DEVICE_ATTR_RO(cpld_ver); +static DEVICE_ATTR_RO(cpucp_kernel_ver); +static DEVICE_ATTR_RO(cpucp_ver); static DEVICE_ATTR_RO(device_type); static DEVICE_ATTR_RO(fuse_ver); static DEVICE_ATTR_WO(hard_reset); @@ -380,6 +398,8 @@ static struct attribute *hl_dev_attrs[] = { &dev_attr_armcp_kernel_ver.attr, &dev_attr_armcp_ver.attr, &dev_attr_cpld_ver.attr, + &dev_attr_cpucp_kernel_ver.attr, + &dev_attr_cpucp_ver.attr, &dev_attr_device_type.attr, &dev_attr_fuse_ver.attr, &dev_attr_hard_reset.attr, diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 4a4327d9cbbf..076a7697f85d 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -359,7 +359,7 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id); static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev); -static int gaudi_armcp_info_get(struct hl_device *hdev); +static int gaudi_cpucp_info_get(struct hl_device *hdev); static void gaudi_disable_clock_gating(struct hl_device *hdev); static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); @@ -465,7 +465,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; - strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME, + strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); prop->max_pending_cs = GAUDI_MAX_PENDING_CS; @@ -786,13 +786,13 @@ static int gaudi_late_init(struct hl_device *hdev) struct gaudi_device *gaudi = hdev->asic_specific; int rc; - rc = gaudi->armcp_info_get(hdev); + rc = gaudi->cpucp_info_get(hdev); if (rc) { - dev_err(hdev->dev, "Failed to get armcp info\n"); + dev_err(hdev->dev, "Failed to get cpucp info\n"); return rc; } - rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS); + rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS); if (rc) { dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); return rc; @@ -817,7 +817,7 @@ static int gaudi_late_init(struct hl_device *hdev) return 0; disable_pci_access: - hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); + hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS); return rc; } @@ -987,7 +987,7 @@ static int gaudi_sw_init(struct hl_device *hdev) } } - gaudi->armcp_info_get = gaudi_armcp_info_get; + gaudi->cpucp_info_get = gaudi_cpucp_info_get; gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ; @@ -3078,7 +3078,7 @@ static int gaudi_suspend(struct hl_device *hdev) { int rc; - rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); + rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS); if (rc) dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); @@ -6053,7 +6053,7 @@ static int gaudi_send_heartbeat(struct hl_device *hdev) return hl_fw_send_heartbeat(hdev); } -static int gaudi_armcp_info_get(struct hl_device *hdev) +static int gaudi_cpucp_info_get(struct hl_device *hdev) { struct gaudi_device *gaudi = hdev->asic_specific; struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -6062,19 +6062,19 @@ static int gaudi_armcp_info_get(struct hl_device *hdev) if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) return 0; - rc = hl_fw_armcp_info_get(hdev); + rc = hl_fw_cpucp_info_get(hdev); if (rc) return rc; - if (!strlen(prop->armcp_info.card_name)) - strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME, + if (!strlen(prop->cpucp_info.card_name)) + strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); - hdev->card_type = le32_to_cpu(hdev->asic_prop.armcp_info.card_type); + hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); - if (hdev->card_type == armcp_card_type_pci) + if (hdev->card_type == cpucp_card_type_pci) prop->max_power_default = MAX_POWER_DEFAULT_PCI; - else if (hdev->card_type == armcp_card_type_pmc) + else if (hdev->card_type == cpucp_card_type_pmc) prop->max_power_default = MAX_POWER_DEFAULT_PMC; hdev->max_power = prop->max_power_default; diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index 1368f6298c80..b86eb98b145c 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -216,7 +216,7 @@ struct gaudi_internal_qman_info { /** * struct gaudi_device - ASIC specific manage structure. - * @armcp_info_get: get information on device from ArmCP + * @cpucp_info_get: get information on device from CPU-CP * @hw_queues_lock: protects the H/W queues from concurrent access. * @clk_gate_mutex: protects code areas that require clock gating to be disabled * temporarily @@ -239,7 +239,7 @@ struct gaudi_internal_qman_info { * 8-bit value so use u8. */ struct gaudi_device { - int (*armcp_info_get)(struct hl_device *hdev); + int (*cpucp_info_get)(struct hl_device *hdev); /* TODO: remove hw_queues_lock after moving to scheduler code */ spinlock_t hw_queues_lock; diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 5fb3565c80c5..c41f2917863b 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -449,7 +449,7 @@ int goya_get_fixed_properties(struct hl_device *hdev) prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; - strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME, + strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); prop->max_pending_cs = GOYA_MAX_PENDING_CS; @@ -727,9 +727,9 @@ int goya_late_init(struct hl_device *hdev) if (rc) return rc; - rc = goya_armcp_info_get(hdev); + rc = goya_cpucp_info_get(hdev); if (rc) { - dev_err(hdev->dev, "Failed to get armcp info %d\n", rc); + dev_err(hdev->dev, "Failed to get cpucp info %d\n", rc); return rc; } @@ -739,7 +739,7 @@ int goya_late_init(struct hl_device *hdev) */ WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size)); - rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS); + rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS); if (rc) { dev_err(hdev->dev, "Failed to enable PCI access from CPU %d\n", rc); @@ -2648,7 +2648,7 @@ int goya_suspend(struct hl_device *hdev) { int rc; - rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); + rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS); if (rc) dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); @@ -4500,14 +4500,14 @@ static void goya_print_irq_info(struct hl_device *hdev, u16 event_type, static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, size_t irq_arr_size) { - struct armcp_unmask_irq_arr_packet *pkt; + struct cpucp_unmask_irq_arr_packet *pkt; size_t total_pkt_size; long result; int rc; int irq_num_entries, irq_arr_index; __le32 *goya_irq_arr; - total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) + + total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) + irq_arr_size; /* data should be aligned to 8 bytes in order to ArmCP to copy it */ @@ -4534,8 +4534,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, goya_irq_arr[irq_arr_index] = cpu_to_le32(irq_arr[irq_arr_index]); - pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << + CPUCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, total_pkt_size, 0, &result); @@ -4560,14 +4560,14 @@ static int goya_soft_reset_late_init(struct hl_device *hdev) static int goya_unmask_irq(struct hl_device *hdev, u16 event_type) { - struct armcp_packet pkt; + struct cpucp_packet pkt; long result; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.value = cpu_to_le64(event_type); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), @@ -5103,7 +5103,7 @@ int goya_send_heartbeat(struct hl_device *hdev) return hl_fw_send_heartbeat(hdev); } -int goya_armcp_info_get(struct hl_device *hdev) +int goya_cpucp_info_get(struct hl_device *hdev) { struct goya_device *goya = hdev->asic_specific; struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -5113,11 +5113,11 @@ int goya_armcp_info_get(struct hl_device *hdev) if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) return 0; - rc = hl_fw_armcp_info_get(hdev); + rc = hl_fw_cpucp_info_get(hdev); if (rc) return rc; - dram_size = le64_to_cpu(prop->armcp_info.dram_size); + dram_size = le64_to_cpu(prop->cpucp_info.dram_size); if (dram_size) { if ((!is_power_of_2(dram_size)) || (dram_size < DRAM_PHYS_DEFAULT_SIZE)) { @@ -5131,8 +5131,8 @@ int goya_armcp_info_get(struct hl_device *hdev) prop->dram_end_address = prop->dram_base_address + dram_size; } - if (!strlen(prop->armcp_info.card_name)) - strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME, + if (!strlen(prop->cpucp_info.card_name)) + strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); return 0; diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index bb7474ee9784..09b4006d4dc3 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -207,7 +207,7 @@ void goya_set_max_power(struct hl_device *hdev, u64 value); void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq); void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_attr_grp); -int goya_armcp_info_get(struct hl_device *hdev); +int goya_cpucp_info_get(struct hl_device *hdev); int goya_debug_coresight(struct hl_device *hdev, void *data); void goya_halt_coresight(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/include/common/armcp_if.h b/drivers/misc/habanalabs/include/common/armcp_if.h deleted file mode 100644 index 4d78898524e9..000000000000 --- a/drivers/misc/habanalabs/include/common/armcp_if.h +++ /dev/null @@ -1,418 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright 2016-2020 HabanaLabs, Ltd. - * All Rights Reserved. - * - */ - -#ifndef ARMCP_IF_H -#define ARMCP_IF_H - -#include - -/* - * EVENT QUEUE - */ - -struct hl_eq_header { - __le32 reserved; - __le32 ctl; -}; - -struct hl_eq_ecc_data { - __le64 ecc_address; - __le64 ecc_syndrom; - __u8 memory_wrapper_idx; - __u8 pad[7]; -}; - -struct hl_eq_entry { - struct hl_eq_header hdr; - union { - struct hl_eq_ecc_data ecc_data; - __le64 data[7]; - }; -}; - -#define HL_EQ_ENTRY_SIZE sizeof(struct hl_eq_entry) - -#define EQ_CTL_READY_SHIFT 31 -#define EQ_CTL_READY_MASK 0x80000000 - -#define EQ_CTL_EVENT_TYPE_SHIFT 16 -#define EQ_CTL_EVENT_TYPE_MASK 0x03FF0000 - -enum pq_init_status { - PQ_INIT_STATUS_NA = 0, - PQ_INIT_STATUS_READY_FOR_CP, - PQ_INIT_STATUS_READY_FOR_HOST, - PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI -}; - -/* - * ArmCP Primary Queue Packets - * - * During normal operation, the host's kernel driver needs to send various - * messages to ArmCP, usually either to SET some value into a H/W periphery or - * to GET the current value of some H/W periphery. For example, SET the - * frequency of MME/TPC and GET the value of the thermal sensor. - * - * These messages can be initiated either by the User application or by the - * host's driver itself, e.g. power management code. In either case, the - * communication from the host's driver to ArmCP will *always* be in - * synchronous mode, meaning that the host will send a single message and poll - * until the message was acknowledged and the results are ready (if results are - * needed). - * - * This means that only a single message can be sent at a time and the host's - * driver must wait for its result before sending the next message. Having said - * that, because these are control messages which are sent in a relatively low - * frequency, this limitation seems acceptable. It's important to note that - * in case of multiple devices, messages to different devices *can* be sent - * at the same time. - * - * The message, inputs/outputs (if relevant) and fence object will be located - * on the device DDR at an address that will be determined by the host's driver. - * During device initialization phase, the host will pass to ArmCP that address. - * Most of the message types will contain inputs/outputs inside the message - * itself. The common part of each message will contain the opcode of the - * message (its type) and a field representing a fence object. - * - * When the host's driver wishes to send a message to ArmCP, it will write the - * message contents to the device DDR, clear the fence object and then write the - * value 484 to the mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR register to issue - * the 484 interrupt-id to the ARM core. - * - * Upon receiving the 484 interrupt-id, ArmCP will read the message from the - * DDR. In case the message is a SET operation, ArmCP will first perform the - * operation and then write to the fence object on the device DDR. In case the - * message is a GET operation, ArmCP will first fill the results section on the - * device DDR and then write to the fence object. If an error occurred, ArmCP - * will fill the rc field with the right error code. - * - * In the meantime, the host's driver will poll on the fence object. Once the - * host sees that the fence object is signaled, it will read the results from - * the device DDR (if relevant) and resume the code execution in the host's - * driver. - * - * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8 - * so the value being put by the host's driver matches the value read by ArmCP - * - * Non-QMAN packets should be limited to values 1 through (2^8 - 1) - * - * Detailed description: - * - * ARMCP_PACKET_DISABLE_PCI_ACCESS - - * After receiving this packet the embedded CPU must NOT issue PCI - * transactions (read/write) towards the Host CPU. This also include - * sending MSI-X interrupts. - * This packet is usually sent before the device is moved to D3Hot state. - * - * ARMCP_PACKET_ENABLE_PCI_ACCESS - - * After receiving this packet the embedded CPU is allowed to issue PCI - * transactions towards the Host CPU, including sending MSI-X interrupts. - * This packet is usually send after the device is moved to D0 state. - * - * ARMCP_PACKET_TEMPERATURE_GET - - * Fetch the current temperature / Max / Max Hyst / Critical / - * Critical Hyst of a specified thermal sensor. The packet's - * arguments specify the desired sensor and the field to get. - * - * ARMCP_PACKET_VOLTAGE_GET - - * Fetch the voltage / Max / Min of a specified sensor. The packet's - * arguments specify the sensor and type. - * - * ARMCP_PACKET_CURRENT_GET - - * Fetch the current / Max / Min of a specified sensor. The packet's - * arguments specify the sensor and type. - * - * ARMCP_PACKET_FAN_SPEED_GET - - * Fetch the speed / Max / Min of a specified fan. The packet's - * arguments specify the sensor and type. - * - * ARMCP_PACKET_PWM_GET - - * Fetch the pwm value / mode of a specified pwm. The packet's - * arguments specify the sensor and type. - * - * ARMCP_PACKET_PWM_SET - - * Set the pwm value / mode of a specified pwm. The packet's - * arguments specify the sensor, type and value. - * - * ARMCP_PACKET_FREQUENCY_SET - - * Set the frequency of a specified PLL. The packet's arguments specify - * the PLL and the desired frequency. The actual frequency in the device - * might differ from the requested frequency. - * - * ARMCP_PACKET_FREQUENCY_GET - - * Fetch the frequency of a specified PLL. The packet's arguments specify - * the PLL. - * - * ARMCP_PACKET_LED_SET - - * Set the state of a specified led. The packet's arguments - * specify the led and the desired state. - * - * ARMCP_PACKET_I2C_WR - - * Write 32-bit value to I2C device. The packet's arguments specify the - * I2C bus, address and value. - * - * ARMCP_PACKET_I2C_RD - - * Read 32-bit value from I2C device. The packet's arguments specify the - * I2C bus and address. - * - * ARMCP_PACKET_INFO_GET - - * Fetch information from the device as specified in the packet's - * structure. The host's driver passes the max size it allows the ArmCP to - * write to the structure, to prevent data corruption in case of - * mismatched driver/FW versions. - * - * ARMCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed - * - * ARMCP_PACKET_UNMASK_RAZWI_IRQ - - * Unmask the given IRQ. The IRQ number is specified in the value field. - * The packet is sent after receiving an interrupt and printing its - * relevant information. - * - * ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY - - * Unmask the given IRQs. The IRQs numbers are specified in an array right - * after the armcp_packet structure, where its first element is the array - * length. The packet is sent after a soft reset was done in order to - * handle any interrupts that were sent during the reset process. - * - * ARMCP_PACKET_TEST - - * Test packet for ArmCP connectivity. The CPU will put the fence value - * in the result field. - * - * ARMCP_PACKET_FREQUENCY_CURR_GET - - * Fetch the current frequency of a specified PLL. The packet's arguments - * specify the PLL. - * - * ARMCP_PACKET_MAX_POWER_GET - - * Fetch the maximal power of the device. - * - * ARMCP_PACKET_MAX_POWER_SET - - * Set the maximal power of the device. The packet's arguments specify - * the power. - * - * ARMCP_PACKET_EEPROM_DATA_GET - - * Get EEPROM data from the ArmCP kernel. The buffer is specified in the - * addr field. The CPU will put the returned data size in the result - * field. In addition, the host's driver passes the max size it allows the - * ArmCP to write to the structure, to prevent data corruption in case of - * mismatched driver/FW versions. - * - * ARMCP_PACKET_TEMPERATURE_SET - - * Set the value of the offset property of a specified thermal sensor. - * The packet's arguments specify the desired sensor and the field to - * set. - * - * ARMCP_PACKET_VOLTAGE_SET - - * Trigger the reset_history property of a specified voltage sensor. - * The packet's arguments specify the desired sensor and the field to - * set. - * - * ARMCP_PACKET_CURRENT_SET - - * Trigger the reset_history property of a specified current sensor. - * The packet's arguments specify the desired sensor and the field to - * set. - */ - -enum armcp_packet_id { - ARMCP_PACKET_DISABLE_PCI_ACCESS = 1, /* internal */ - ARMCP_PACKET_ENABLE_PCI_ACCESS, /* internal */ - ARMCP_PACKET_TEMPERATURE_GET, /* sysfs */ - ARMCP_PACKET_VOLTAGE_GET, /* sysfs */ - ARMCP_PACKET_CURRENT_GET, /* sysfs */ - ARMCP_PACKET_FAN_SPEED_GET, /* sysfs */ - ARMCP_PACKET_PWM_GET, /* sysfs */ - ARMCP_PACKET_PWM_SET, /* sysfs */ - ARMCP_PACKET_FREQUENCY_SET, /* sysfs */ - ARMCP_PACKET_FREQUENCY_GET, /* sysfs */ - ARMCP_PACKET_LED_SET, /* debugfs */ - ARMCP_PACKET_I2C_WR, /* debugfs */ - ARMCP_PACKET_I2C_RD, /* debugfs */ - ARMCP_PACKET_INFO_GET, /* IOCTL */ - ARMCP_PACKET_FLASH_PROGRAM_REMOVED, - ARMCP_PACKET_UNMASK_RAZWI_IRQ, /* internal */ - ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY, /* internal */ - ARMCP_PACKET_TEST, /* internal */ - ARMCP_PACKET_FREQUENCY_CURR_GET, /* sysfs */ - ARMCP_PACKET_MAX_POWER_GET, /* sysfs */ - ARMCP_PACKET_MAX_POWER_SET, /* sysfs */ - ARMCP_PACKET_EEPROM_DATA_GET, /* sysfs */ - ARMCP_RESERVED, - ARMCP_PACKET_TEMPERATURE_SET, /* sysfs */ - ARMCP_PACKET_VOLTAGE_SET, /* sysfs */ - ARMCP_PACKET_CURRENT_SET, /* sysfs */ - ARMCP_PACKET_PCIE_THROUGHPUT_GET, /* internal */ - ARMCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */ - ARMCP_PACKET_TOTAL_ENERGY_GET, /* internal */ -}; - -#define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5 - -#define ARMCP_PKT_CTL_RC_SHIFT 12 -#define ARMCP_PKT_CTL_RC_MASK 0x0000F000 - -#define ARMCP_PKT_CTL_OPCODE_SHIFT 16 -#define ARMCP_PKT_CTL_OPCODE_MASK 0x1FFF0000 - -struct armcp_packet { - union { - __le64 value; /* For SET packets */ - __le64 result; /* For GET packets */ - __le64 addr; /* For PQ */ - }; - - __le32 ctl; - - __le32 fence; /* Signal to host that message is completed */ - - union { - struct {/* For temperature/current/voltage/fan/pwm get/set */ - __le16 sensor_index; - __le16 type; - }; - - struct { /* For I2C read/write */ - __u8 i2c_bus; - __u8 i2c_addr; - __u8 i2c_reg; - __u8 pad; /* unused */ - }; - - /* For any general request */ - __le32 index; - - /* For frequency get/set */ - __le32 pll_index; - - /* For led set */ - __le32 led_index; - - /* For get Armcp info/EEPROM data */ - __le32 data_max_size; - }; - - __le32 reserved; -}; - -struct armcp_unmask_irq_arr_packet { - struct armcp_packet armcp_pkt; - __le32 length; - __le32 irqs[0]; -}; - -enum armcp_packet_rc { - armcp_packet_success, - armcp_packet_invalid, - armcp_packet_fault -}; - -/* - * armcp_temp_type should adhere to hwmon_temp_attributes - * defined in Linux kernel hwmon.h file - */ -enum armcp_temp_type { - armcp_temp_input, - armcp_temp_max = 6, - armcp_temp_max_hyst, - armcp_temp_crit, - armcp_temp_crit_hyst, - armcp_temp_offset = 19, - armcp_temp_highest = 22, - armcp_temp_reset_history = 23 -}; - -enum armcp_in_attributes { - armcp_in_input, - armcp_in_min, - armcp_in_max, - armcp_in_highest = 7, - armcp_in_reset_history -}; - -enum armcp_curr_attributes { - armcp_curr_input, - armcp_curr_min, - armcp_curr_max, - armcp_curr_highest = 7, - armcp_curr_reset_history -}; - -enum armcp_fan_attributes { - armcp_fan_input, - armcp_fan_min = 2, - armcp_fan_max -}; - -enum armcp_pwm_attributes { - armcp_pwm_input, - armcp_pwm_enable -}; - -enum armcp_pcie_throughput_attributes { - armcp_pcie_throughput_tx, - armcp_pcie_throughput_rx -}; - -/* Event Queue Packets */ - -struct eq_generic_event { - __le64 data[7]; -}; - -/* - * ArmCP info - */ - -#define CARD_NAME_MAX_LEN 16 -#define VERSION_MAX_LEN 128 -#define ARMCP_MAX_SENSORS 128 - -struct armcp_sensor { - __le32 type; - __le32 flags; -}; - -/** - * struct armcp_card_types - ASIC card type. - * @armcp_card_type_pci: PCI card. - * @armcp_card_type_pmc: PCI Mezzanine Card. - */ -enum armcp_card_types { - armcp_card_type_pci, - armcp_card_type_pmc -}; - -/** - * struct armcp_info - Info from ArmCP that is necessary to the host's driver - * @sensors: available sensors description. - * @kernel_version: ArmCP linux kernel version. - * @reserved: reserved field. - * @card_type: card configuration type. - * @card_location: in a server, each card has different connections topology - * depending on its location (relevant for PMC card type) - * @cpld_version: CPLD programmed F/W version. - * @infineon_version: Infineon main DC-DC version. - * @fuse_version: silicon production FUSE information. - * @thermal_version: thermald S/W version. - * @armcp_version: ArmCP S/W version. - * @dram_size: available DRAM size. - * @card_name: card name that will be displayed in HWMON subsystem on the host - */ -struct armcp_info { - struct armcp_sensor sensors[ARMCP_MAX_SENSORS]; - __u8 kernel_version[VERSION_MAX_LEN]; - __le32 reserved; - __le32 card_type; - __le32 card_location; - __le32 cpld_version; - __le32 infineon_version; - __u8 fuse_version[VERSION_MAX_LEN]; - __u8 thermal_version[VERSION_MAX_LEN]; - __u8 armcp_version[VERSION_MAX_LEN]; - __le64 dram_size; - char card_name[CARD_NAME_MAX_LEN]; -}; - -#endif /* ARMCP_IF_H */ diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h new file mode 100644 index 000000000000..1e8480e978e2 --- /dev/null +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -0,0 +1,417 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2020 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef CPUCP_IF_H +#define CPUCP_IF_H + +#include + +/* + * EVENT QUEUE + */ + +struct hl_eq_header { + __le32 reserved; + __le32 ctl; +}; + +struct hl_eq_ecc_data { + __le64 ecc_address; + __le64 ecc_syndrom; + __u8 memory_wrapper_idx; + __u8 pad[7]; +}; + +struct hl_eq_entry { + struct hl_eq_header hdr; + union { + struct hl_eq_ecc_data ecc_data; + __le64 data[7]; + }; +}; + +#define HL_EQ_ENTRY_SIZE sizeof(struct hl_eq_entry) + +#define EQ_CTL_READY_SHIFT 31 +#define EQ_CTL_READY_MASK 0x80000000 + +#define EQ_CTL_EVENT_TYPE_SHIFT 16 +#define EQ_CTL_EVENT_TYPE_MASK 0x03FF0000 + +enum pq_init_status { + PQ_INIT_STATUS_NA = 0, + PQ_INIT_STATUS_READY_FOR_CP, + PQ_INIT_STATUS_READY_FOR_HOST, + PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI +}; + +/* + * CpuCP Primary Queue Packets + * + * During normal operation, the host's kernel driver needs to send various + * messages to CpuCP, usually either to SET some value into a H/W periphery or + * to GET the current value of some H/W periphery. For example, SET the + * frequency of MME/TPC and GET the value of the thermal sensor. + * + * These messages can be initiated either by the User application or by the + * host's driver itself, e.g. power management code. In either case, the + * communication from the host's driver to CpuCP will *always* be in + * synchronous mode, meaning that the host will send a single message and poll + * until the message was acknowledged and the results are ready (if results are + * needed). + * + * This means that only a single message can be sent at a time and the host's + * driver must wait for its result before sending the next message. Having said + * that, because these are control messages which are sent in a relatively low + * frequency, this limitation seems acceptable. It's important to note that + * in case of multiple devices, messages to different devices *can* be sent + * at the same time. + * + * The message, inputs/outputs (if relevant) and fence object will be located + * on the device DDR at an address that will be determined by the host's driver. + * During device initialization phase, the host will pass to CpuCP that address. + * Most of the message types will contain inputs/outputs inside the message + * itself. The common part of each message will contain the opcode of the + * message (its type) and a field representing a fence object. + * + * When the host's driver wishes to send a message to CPU CP, it will write the + * message contents to the device DDR, clear the fence object and then write to + * the PSOC_ARC1_AUX_SW_INTR, to issue interrupt 121 to ARC Management CPU. + * + * Upon receiving the interrupt (#121), CpuCP will read the message from the + * DDR. In case the message is a SET operation, CpuCP will first perform the + * operation and then write to the fence object on the device DDR. In case the + * message is a GET operation, CpuCP will first fill the results section on the + * device DDR and then write to the fence object. If an error occurred, CpuCP + * will fill the rc field with the right error code. + * + * In the meantime, the host's driver will poll on the fence object. Once the + * host sees that the fence object is signaled, it will read the results from + * the device DDR (if relevant) and resume the code execution in the host's + * driver. + * + * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8 + * so the value being put by the host's driver matches the value read by CpuCP + * + * Non-QMAN packets should be limited to values 1 through (2^8 - 1) + * + * Detailed description: + * + * CPUCP_PACKET_DISABLE_PCI_ACCESS - + * After receiving this packet the embedded CPU must NOT issue PCI + * transactions (read/write) towards the Host CPU. This also include + * sending MSI-X interrupts. + * This packet is usually sent before the device is moved to D3Hot state. + * + * CPUCP_PACKET_ENABLE_PCI_ACCESS - + * After receiving this packet the embedded CPU is allowed to issue PCI + * transactions towards the Host CPU, including sending MSI-X interrupts. + * This packet is usually send after the device is moved to D0 state. + * + * CPUCP_PACKET_TEMPERATURE_GET - + * Fetch the current temperature / Max / Max Hyst / Critical / + * Critical Hyst of a specified thermal sensor. The packet's + * arguments specify the desired sensor and the field to get. + * + * CPUCP_PACKET_VOLTAGE_GET - + * Fetch the voltage / Max / Min of a specified sensor. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_CURRENT_GET - + * Fetch the current / Max / Min of a specified sensor. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_FAN_SPEED_GET - + * Fetch the speed / Max / Min of a specified fan. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_PWM_GET - + * Fetch the pwm value / mode of a specified pwm. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_PWM_SET - + * Set the pwm value / mode of a specified pwm. The packet's + * arguments specify the sensor, type and value. + * + * CPUCP_PACKET_FREQUENCY_SET - + * Set the frequency of a specified PLL. The packet's arguments specify + * the PLL and the desired frequency. The actual frequency in the device + * might differ from the requested frequency. + * + * CPUCP_PACKET_FREQUENCY_GET - + * Fetch the frequency of a specified PLL. The packet's arguments specify + * the PLL. + * + * CPUCP_PACKET_LED_SET - + * Set the state of a specified led. The packet's arguments + * specify the led and the desired state. + * + * CPUCP_PACKET_I2C_WR - + * Write 32-bit value to I2C device. The packet's arguments specify the + * I2C bus, address and value. + * + * CPUCP_PACKET_I2C_RD - + * Read 32-bit value from I2C device. The packet's arguments specify the + * I2C bus and address. + * + * CPUCP_PACKET_INFO_GET - + * Fetch information from the device as specified in the packet's + * structure. The host's driver passes the max size it allows the CpuCP to + * write to the structure, to prevent data corruption in case of + * mismatched driver/FW versions. + * + * CPUCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed + * + * CPUCP_PACKET_UNMASK_RAZWI_IRQ - + * Unmask the given IRQ. The IRQ number is specified in the value field. + * The packet is sent after receiving an interrupt and printing its + * relevant information. + * + * CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY - + * Unmask the given IRQs. The IRQs numbers are specified in an array right + * after the cpucp_packet structure, where its first element is the array + * length. The packet is sent after a soft reset was done in order to + * handle any interrupts that were sent during the reset process. + * + * CPUCP_PACKET_TEST - + * Test packet for CpuCP connectivity. The CPU will put the fence value + * in the result field. + * + * CPUCP_PACKET_FREQUENCY_CURR_GET - + * Fetch the current frequency of a specified PLL. The packet's arguments + * specify the PLL. + * + * CPUCP_PACKET_MAX_POWER_GET - + * Fetch the maximal power of the device. + * + * CPUCP_PACKET_MAX_POWER_SET - + * Set the maximal power of the device. The packet's arguments specify + * the power. + * + * CPUCP_PACKET_EEPROM_DATA_GET - + * Get EEPROM data from the CpuCP kernel. The buffer is specified in the + * addr field. The CPU will put the returned data size in the result + * field. In addition, the host's driver passes the max size it allows the + * CpuCP to write to the structure, to prevent data corruption in case of + * mismatched driver/FW versions. + * + * CPUCP_PACKET_TEMPERATURE_SET - + * Set the value of the offset property of a specified thermal sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + * + * CPUCP_PACKET_VOLTAGE_SET - + * Trigger the reset_history property of a specified voltage sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + * + * CPUCP_PACKET_CURRENT_SET - + * Trigger the reset_history property of a specified current sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + */ + +enum cpucp_packet_id { + CPUCP_PACKET_DISABLE_PCI_ACCESS = 1, /* internal */ + CPUCP_PACKET_ENABLE_PCI_ACCESS, /* internal */ + CPUCP_PACKET_TEMPERATURE_GET, /* sysfs */ + CPUCP_PACKET_VOLTAGE_GET, /* sysfs */ + CPUCP_PACKET_CURRENT_GET, /* sysfs */ + CPUCP_PACKET_FAN_SPEED_GET, /* sysfs */ + CPUCP_PACKET_PWM_GET, /* sysfs */ + CPUCP_PACKET_PWM_SET, /* sysfs */ + CPUCP_PACKET_FREQUENCY_SET, /* sysfs */ + CPUCP_PACKET_FREQUENCY_GET, /* sysfs */ + CPUCP_PACKET_LED_SET, /* debugfs */ + CPUCP_PACKET_I2C_WR, /* debugfs */ + CPUCP_PACKET_I2C_RD, /* debugfs */ + CPUCP_PACKET_INFO_GET, /* IOCTL */ + CPUCP_PACKET_FLASH_PROGRAM_REMOVED, + CPUCP_PACKET_UNMASK_RAZWI_IRQ, /* internal */ + CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY, /* internal */ + CPUCP_PACKET_TEST, /* internal */ + CPUCP_PACKET_FREQUENCY_CURR_GET, /* sysfs */ + CPUCP_PACKET_MAX_POWER_GET, /* sysfs */ + CPUCP_PACKET_MAX_POWER_SET, /* sysfs */ + CPUCP_PACKET_EEPROM_DATA_GET, /* sysfs */ + CPUCP_RESERVED, + CPUCP_PACKET_TEMPERATURE_SET, /* sysfs */ + CPUCP_PACKET_VOLTAGE_SET, /* sysfs */ + CPUCP_PACKET_CURRENT_SET, /* sysfs */ + CPUCP_PACKET_PCIE_THROUGHPUT_GET, /* internal */ + CPUCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */ + CPUCP_PACKET_TOTAL_ENERGY_GET, /* internal */ +}; + +#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 + +#define CPUCP_PKT_CTL_RC_SHIFT 12 +#define CPUCP_PKT_CTL_RC_MASK 0x0000F000 + +#define CPUCP_PKT_CTL_OPCODE_SHIFT 16 +#define CPUCP_PKT_CTL_OPCODE_MASK 0x1FFF0000 + +struct cpucp_packet { + union { + __le64 value; /* For SET packets */ + __le64 result; /* For GET packets */ + __le64 addr; /* For PQ */ + }; + + __le32 ctl; + + __le32 fence; /* Signal to host that message is completed */ + + union { + struct {/* For temperature/current/voltage/fan/pwm get/set */ + __le16 sensor_index; + __le16 type; + }; + + struct { /* For I2C read/write */ + __u8 i2c_bus; + __u8 i2c_addr; + __u8 i2c_reg; + __u8 pad; /* unused */ + }; + + /* For any general request */ + __le32 index; + + /* For frequency get/set */ + __le32 pll_index; + + /* For led set */ + __le32 led_index; + + /* For get CpuCP info/EEPROM data */ + __le32 data_max_size; + }; + + __le32 reserved; +}; + +struct cpucp_unmask_irq_arr_packet { + struct cpucp_packet cpucp_pkt; + __le32 length; + __le32 irqs[0]; +}; + +enum cpucp_packet_rc { + cpucp_packet_success, + cpucp_packet_invalid, + cpucp_packet_fault +}; + +/* + * cpucp_temp_type should adhere to hwmon_temp_attributes + * defined in Linux kernel hwmon.h file + */ +enum cpucp_temp_type { + cpucp_temp_input, + cpucp_temp_max = 6, + cpucp_temp_max_hyst, + cpucp_temp_crit, + cpucp_temp_crit_hyst, + cpucp_temp_offset = 19, + cpucp_temp_highest = 22, + cpucp_temp_reset_history = 23 +}; + +enum cpucp_in_attributes { + cpucp_in_input, + cpucp_in_min, + cpucp_in_max, + cpucp_in_highest = 7, + cpucp_in_reset_history +}; + +enum cpucp_curr_attributes { + cpucp_curr_input, + cpucp_curr_min, + cpucp_curr_max, + cpucp_curr_highest = 7, + cpucp_curr_reset_history +}; + +enum cpucp_fan_attributes { + cpucp_fan_input, + cpucp_fan_min = 2, + cpucp_fan_max +}; + +enum cpucp_pwm_attributes { + cpucp_pwm_input, + cpucp_pwm_enable +}; + +enum cpucp_pcie_throughput_attributes { + cpucp_pcie_throughput_tx, + cpucp_pcie_throughput_rx +}; + +/* Event Queue Packets */ + +struct eq_generic_event { + __le64 data[7]; +}; + +/* + * CpuCP info + */ + +#define CARD_NAME_MAX_LEN 16 +#define VERSION_MAX_LEN 128 +#define CPUCP_MAX_SENSORS 128 + +struct cpucp_sensor { + __le32 type; + __le32 flags; +}; + +/** + * struct cpucp_card_types - ASIC card type. + * @cpucp_card_type_pci: PCI card. + * @cpucp_card_type_pmc: PCI Mezzanine Card. + */ +enum cpucp_card_types { + cpucp_card_type_pci, + cpucp_card_type_pmc +}; + +/** + * struct cpucp_info - Info from CpuCP that is necessary to the host's driver + * @sensors: available sensors description. + * @kernel_version: CpuCP linux kernel version. + * @reserved: reserved field. + * @card_type: card configuration type. + * @card_location: in a server, each card has different connections topology + * depending on its location (relevant for PMC card type) + * @cpld_version: CPLD programmed F/W version. + * @infineon_version: Infineon main DC-DC version. + * @fuse_version: silicon production FUSE information. + * @thermal_version: thermald S/W version. + * @cpucp_version: CpuCP S/W version. + * @dram_size: available DRAM size. + * @card_name: card name that will be displayed in HWMON subsystem on the host + */ +struct cpucp_info { + struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; + __u8 kernel_version[VERSION_MAX_LEN]; + __le32 reserved; + __le32 card_type; + __le32 card_location; + __le32 cpld_version; + __le32 infineon_version; + __u8 fuse_version[VERSION_MAX_LEN]; + __u8 thermal_version[VERSION_MAX_LEN]; + __u8 cpucp_version[VERSION_MAX_LEN]; + __le64 dram_size; + char card_name[CARD_NAME_MAX_LEN]; +}; + +#endif /* CPUCP_IF_H */ diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 6803991726e8..a2dcad29340f 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -297,7 +297,7 @@ struct hl_info_hw_ip_info { __u32 device_id; /* PCI Device ID */ __u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */ __u32 reserved[2]; - __u32 armcp_cpld_version; + __u32 cpld_version; __u32 psoc_pci_pll_nr; __u32 psoc_pci_pll_nf; __u32 psoc_pci_pll_od; @@ -305,7 +305,7 @@ struct hl_info_hw_ip_info { __u8 tpc_enabled_mask; __u8 dram_enabled; __u8 pad[2]; - __u8 armcp_version[HL_INFO_VERSION_MAX_LEN]; + __u8 cpucp_version[HL_INFO_VERSION_MAX_LEN]; __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN]; }; -- cgit v1.2.3 From 975ab7b32b90c97046ddbdd53798391b7d8a6a1e Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 1 Sep 2020 11:22:05 +0300 Subject: habanalabs: count dropped CS because max CS in-flight There is a case where the user reaches the maximum number of CS in-flight. In that case, the driver rejects the new CS of the user with EAGAIN. Count that event so the user can query the driver later to see if it happened. Reviewed-by: Tomer Tayar Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 5 ++++- include/uapi/misc/habanalabs.h | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index a811a9fdf13b..470bffbe9bdc 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -252,6 +252,8 @@ static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx) ctx->cs_counters.parsing_drop_cnt; hdev->aggregated_cs_counters.queue_full_drop_cnt += ctx->cs_counters.queue_full_drop_cnt; + hdev->aggregated_cs_counters.max_cs_in_flight_drop_cnt += + ctx->cs_counters.max_cs_in_flight_drop_cnt; } static void cs_do_release(struct kref *ref) @@ -431,8 +433,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, (hdev->asic_prop.max_pending_cs - 1)]; if (other && !completion_done(&other->completion)) { - dev_dbg(hdev->dev, + dev_dbg_ratelimited(hdev->dev, "Rejecting CS because of too many in-flights CS\n"); + ctx->cs_counters.max_cs_in_flight_drop_cnt++; rc = -EAGAIN; goto free_fence; } diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index a2dcad29340f..69fb44d35292 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -401,12 +401,14 @@ struct hl_info_sync_manager { * @parsing_drop_cnt: dropped due to error in packet parsing * @queue_full_drop_cnt: dropped due to queue full * @device_in_reset_drop_cnt: dropped due to device in reset + * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight */ struct hl_cs_counters { __u64 out_of_mem_drop_cnt; __u64 parsing_drop_cnt; __u64 queue_full_drop_cnt; __u64 device_in_reset_drop_cnt; + __u64 max_cs_in_flight_drop_cnt; }; struct hl_info_cs_counters { -- cgit v1.2.3 From 681a22f55f1506023da06ebf660a4a252b35bc93 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 7 Sep 2020 18:08:51 +0300 Subject: habanalabs: allow to wait on CS without sleep The user sometimes wants to check if a CS has completed to clean resources. In that case, the user doesn't want to sleep but just to check if the CS has finished and continue with his code. Add a new definition to the API of the wait on CS. The new definition says that if the timeout is 0, the driver won't sleep at all but return immediately after checking if the CS has finished. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 7 +++++-- include/uapi/misc/habanalabs.h | 3 +++ 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 470bffbe9bdc..b2b974ecc431 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -1180,8 +1180,11 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev, "Can't wait on CS %llu because current CS is at seq %llu\n", seq, ctx->cs_sequence); } else if (fence) { - rc = wait_for_completion_interruptible_timeout( - &fence->completion, timeout); + if (!timeout_us) + rc = completion_done(&fence->completion); + else + rc = wait_for_completion_interruptible_timeout( + &fence->completion, timeout); if (fence->error == -ETIMEDOUT) rc = -ETIMEDOUT; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 69fb44d35292..d449f8a31ce6 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -914,6 +914,9 @@ struct hl_debug_args { * inside the kernel until the CS has finished or until the user-requested * timeout has expired. * + * If the timeout value is 0, the driver won't sleep at all. It will check + * the status of the CS and return immediately + * * The return value of the IOCTL is a standard Linux error code. The possible * values are: * -- cgit v1.2.3 From ef6a0f6caa4a5dbfbb42b642e23fb06182798d30 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Thu, 9 Jul 2020 16:17:48 +0300 Subject: habanalabs: Add an option to map CB to device MMU There are cases in which the device should access the host memory of a CB through the device MMU, and thus this memory should be mapped. The patch adds a flag to the CB IOCTL, in which a user can ask the driver to perform the mapping when creating a CB. The mapping is allowed only if a dedicated VA range was allocated for the specific ASIC. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_buffer.c | 196 +++++++++++++++++++++++- drivers/misc/habanalabs/common/context.c | 12 +- drivers/misc/habanalabs/common/habanalabs.h | 20 ++- drivers/misc/habanalabs/gaudi/gaudi.c | 4 +- drivers/misc/habanalabs/goya/goya.c | 4 +- include/uapi/misc/habanalabs.h | 12 +- 6 files changed, 237 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index 0cb556fb4a8b..901e213daf40 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -13,6 +13,131 @@ #include #include +static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_vm_va_block *va_block, *tmp; + dma_addr_t bus_addr; + u64 virt_addr; + u32 page_size = prop->pmmu.page_size; + s32 offset; + int rc; + + if (!hdev->supports_cb_mapping) { + dev_err_ratelimited(hdev->dev, + "Cannot map CB because no VA range is allocated for CB mapping\n"); + return -EINVAL; + } + + if (!hdev->mmu_enable) { + dev_err_ratelimited(hdev->dev, + "Cannot map CB because MMU is disabled\n"); + return -EINVAL; + } + + INIT_LIST_HEAD(&cb->va_block_list); + + for (bus_addr = cb->bus_address; + bus_addr < cb->bus_address + cb->size; + bus_addr += page_size) { + + virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size); + if (!virt_addr) { + dev_err(hdev->dev, + "Failed to allocate device virtual address for CB\n"); + rc = -ENOMEM; + goto err_va_pool_free; + } + + va_block = kzalloc(sizeof(*va_block), GFP_KERNEL); + if (!va_block) { + rc = -ENOMEM; + gen_pool_free(ctx->cb_va_pool, virt_addr, page_size); + goto err_va_pool_free; + } + + va_block->start = virt_addr; + va_block->end = virt_addr + page_size; + va_block->size = page_size; + list_add_tail(&va_block->node, &cb->va_block_list); + } + + mutex_lock(&ctx->mmu_lock); + + bus_addr = cb->bus_address; + offset = 0; + list_for_each_entry(va_block, &cb->va_block_list, node) { + rc = hl_mmu_map(ctx, va_block->start, bus_addr, va_block->size, + list_is_last(&va_block->node, + &cb->va_block_list)); + if (rc) { + dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", + va_block->start); + goto err_va_umap; + } + + bus_addr += va_block->size; + offset += va_block->size; + } + + hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR); + + mutex_unlock(&ctx->mmu_lock); + + cb->is_mmu_mapped = true; + + return 0; + +err_va_umap: + list_for_each_entry(va_block, &cb->va_block_list, node) { + if (offset <= 0) + break; + hl_mmu_unmap(ctx, va_block->start, va_block->size, + offset <= va_block->size); + offset -= va_block->size; + } + + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); + + mutex_unlock(&ctx->mmu_lock); + +err_va_pool_free: + list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) { + gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size); + list_del(&va_block->node); + kfree(va_block); + } + + return rc; +} + +static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb) +{ + struct hl_device *hdev = ctx->hdev; + struct hl_vm_va_block *va_block, *tmp; + + mutex_lock(&ctx->mmu_lock); + + list_for_each_entry(va_block, &cb->va_block_list, node) + if (hl_mmu_unmap(ctx, va_block->start, va_block->size, + list_is_last(&va_block->node, + &cb->va_block_list))) + dev_warn_ratelimited(hdev->dev, + "Failed to unmap CB's va 0x%llx\n", + va_block->start); + + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); + + mutex_unlock(&ctx->mmu_lock); + + list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) { + gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size); + list_del(&va_block->node); + kfree(va_block); + } +} + static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) { if (cb->is_internal) @@ -47,6 +172,9 @@ static void cb_release(struct kref *ref) hl_debugfs_remove_cb(cb); + if (cb->is_mmu_mapped) + cb_unmap_mem(cb->ctx, cb); + hl_ctx_put(cb->ctx); cb_do_release(hdev, cb); @@ -110,7 +238,7 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, struct hl_ctx *ctx, u32 cb_size, bool internal_cb, - u64 *handle) + bool map_cb, u64 *handle) { struct hl_cb *cb; bool alloc_new_cb = true; @@ -169,13 +297,26 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, cb->ctx = ctx; hl_ctx_get(hdev, cb->ctx); + if (map_cb) { + if (ctx_id == HL_KERNEL_ASID_ID) { + dev_err(hdev->dev, + "CB mapping is not supported for kernel context\n"); + rc = -EINVAL; + goto release_cb; + } + + rc = cb_map_mem(ctx, cb); + if (rc) + goto release_cb; + } + spin_lock(&mgr->cb_lock); rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC); spin_unlock(&mgr->cb_lock); if (rc < 0) { dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n"); - goto release_cb; + goto unmap_mem; } cb->id = (u64) rc; @@ -194,6 +335,9 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, return 0; +unmap_mem: + if (cb->is_mmu_mapped) + cb_unmap_mem(cb->ctx, cb); release_cb: hl_ctx_put(cb->ctx); cb_do_release(hdev, cb); @@ -256,7 +400,9 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) rc = -EINVAL; } else { rc = hl_cb_create(hdev, &hpriv->cb_mgr, hpriv->ctx, - args->in.cb_size, false, &handle); + args->in.cb_size, false, + !!(args->in.flags & HL_CB_FLAGS_MAP), + &handle); } memset(args, 0, sizeof(*args)); @@ -442,7 +588,7 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, int rc; rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, cb_size, - internal_cb, &cb_handle); + internal_cb, false, &cb_handle); if (rc) { dev_err(hdev->dev, "Failed to allocate CB for the kernel driver %d\n", rc); @@ -498,3 +644,45 @@ int hl_cb_pool_fini(struct hl_device *hdev) return 0; } + +int hl_cb_va_pool_init(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + int rc; + + if (!hdev->supports_cb_mapping) + return 0; + + ctx->cb_va_pool = gen_pool_create(__ffs(prop->pmmu.page_size), -1); + if (!ctx->cb_va_pool) { + dev_err(hdev->dev, + "Failed to create VA gen pool for CB mapping\n"); + return -ENOMEM; + } + + rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr, + prop->cb_va_end_addr - prop->cb_va_start_addr, -1); + if (rc) { + dev_err(hdev->dev, + "Failed to add memory to VA gen pool for CB mapping\n"); + goto err_pool_destroy; + } + + return 0; + +err_pool_destroy: + gen_pool_destroy(ctx->cb_va_pool); + + return rc; +} + +void hl_cb_va_pool_fini(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + + if (!hdev->supports_cb_mapping) + return; + + gen_pool_destroy(ctx->cb_va_pool); +} diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index b168a9fce817..df8171a2226c 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -37,6 +37,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx) if ((hdev->in_debug) && (hdev->compute_ctx == ctx)) hl_device_set_debug_mode(hdev, false); + hl_cb_va_pool_fini(ctx); hl_vm_ctx_fini(ctx); hl_asid_free(hdev, ctx->asid); } else { @@ -155,15 +156,24 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) goto err_asid_free; } + rc = hl_cb_va_pool_init(ctx); + if (rc) { + dev_err(hdev->dev, + "Failed to init VA pool for mapped CB\n"); + goto err_vm_ctx_fini; + } + rc = hdev->asic_funcs->ctx_init(ctx); if (rc) { dev_err(hdev->dev, "ctx_init failed\n"); - goto err_vm_ctx_fini; + goto err_cb_va_pool_fini; } } return 0; +err_cb_va_pool_fini: + hl_cb_va_pool_fini(ctx); err_vm_ctx_fini: hl_vm_ctx_fini(ctx); err_asid_free: diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 88c68b664ef6..eaa9bf3f82a3 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -291,6 +291,10 @@ struct hl_mmu_properties { * @pcie_aux_dbi_reg_addr: Address of the PCIE_AUX DBI register. * @mmu_pgt_addr: base physical address in DRAM of MMU page tables. * @mmu_dram_default_page_addr: DRAM default page physical address. + * @cb_va_start_addr: virtual start address of command buffers which are mapped + * to the device's MMU. + * @cb_va_end_addr: virtual end address of command buffers which are mapped to + * the device's MMU. * @mmu_pgt_size: MMU page tables total size. * @mmu_pte_size: PTE size in MMU page tables. * @mmu_hop_table_size: MMU hop table size. @@ -339,6 +343,8 @@ struct asic_fixed_properties { u64 pcie_aux_dbi_reg_addr; u64 mmu_pgt_addr; u64 mmu_dram_default_page_addr; + u64 cb_va_start_addr; + u64 cb_va_end_addr; u32 mmu_pgt_size; u32 mmu_pte_size; u32 mmu_hop_table_size; @@ -421,6 +427,8 @@ struct hl_cb_mgr { * @lock: spinlock to protect mmap/cs flows. * @debugfs_list: node in debugfs list of command buffers. * @pool_list: node in pool list of command buffers. + * @va_block_list: list of virtual addresses blocks of the CB if it is mapped to + * the device's MMU. * @id: the CB's ID. * @kernel_address: Holds the CB's kernel virtual address. * @bus_address: Holds the CB's DMA address. @@ -430,6 +438,7 @@ struct hl_cb_mgr { * @mmap: true if the CB is currently mmaped to user. * @is_pool: true if CB was acquired from the pool, false otherwise. * @is_internal: internaly allocated + * @is_mmu_mapped: true if the CB is mapped to the device's MMU. */ struct hl_cb { struct kref refcount; @@ -438,6 +447,7 @@ struct hl_cb { spinlock_t lock; struct list_head debugfs_list; struct list_head pool_list; + struct list_head va_block_list; u64 id; u64 kernel_address; dma_addr_t bus_address; @@ -447,6 +457,7 @@ struct hl_cb { u8 mmap; u8 is_pool; u8 is_internal; + u8 is_mmu_mapped; }; @@ -843,6 +854,8 @@ struct hl_va_range { * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the * MMU hash or walking the PGT requires talking this lock. * @debugfs_list: node in debugfs list of contexts. + * @cb_va_pool: device VA pool for command buffers which are mapped to the + * device's MMU. * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed * to user so user could inquire about CS. It is used as * index to cs_pending array. @@ -874,6 +887,7 @@ struct hl_ctx { struct mutex mmu_lock; struct list_head debugfs_list; struct hl_cs_counters cs_counters; + struct gen_pool *cb_va_pool; u64 cs_sequence; u64 *dram_default_hops; spinlock_t cs_lock; @@ -1574,6 +1588,7 @@ struct hl_mmu_funcs { * @sync_stream_queue_idx: helper index for sync stream queues initialization. * @supports_coresight: is CoreSight supported. * @supports_soft_reset: is soft reset supported. + * @supports_cb_mapping: is mapping a CB to the device's MMU supported. */ struct hl_device { struct pci_dev *pdev; @@ -1673,6 +1688,7 @@ struct hl_device { u8 sync_stream_queue_idx; u8 supports_coresight; u8 supports_soft_reset; + u8 supports_cb_mapping; /* Parameters for bring-up */ u8 mmu_enable; @@ -1840,7 +1856,7 @@ void hl_hwmon_fini(struct hl_device *hdev); int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, struct hl_ctx *ctx, u32 cb_size, bool internal_cb, - u64 *handle); + bool map_cb, u64 *handle); int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle); int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma); struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr, @@ -1852,6 +1868,8 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, bool internal_cb); int hl_cb_pool_init(struct hl_device *hdev); int hl_cb_pool_fini(struct hl_device *hdev); +int hl_cb_va_pool_init(struct hl_ctx *ctx); +void hl_cb_va_pool_fini(struct hl_ctx *ctx); void hl_cs_rollback_all(struct hl_device *hdev); struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index b51cc6c1d541..6f7f6ad7a358 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -4115,7 +4115,7 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev, sizeof(struct packet_msg_prot) * 2; rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, - parser->patched_cb_size, false, + parser->patched_cb_size, false, false, &patched_cb_handle); if (rc) { @@ -4189,7 +4189,7 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, goto free_userptr; rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, - parser->patched_cb_size, false, + parser->patched_cb_size, false, false, &patched_cb_handle); if (rc) { dev_err(hdev->dev, diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 6c81a4b148de..5cddd46a8fb8 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -3811,7 +3811,7 @@ static int goya_parse_cb_mmu(struct hl_device *hdev, sizeof(struct packet_msg_prot) * 2; rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, - parser->patched_cb_size, false, + parser->patched_cb_size, false, false, &patched_cb_handle); if (rc) { @@ -3885,7 +3885,7 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev, goto free_userptr; rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, - parser->patched_cb_size, false, + parser->patched_cb_size, false, false, &patched_cb_handle); if (rc) { dev_err(hdev->dev, diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index d449f8a31ce6..9705b8adb60c 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -462,6 +462,9 @@ struct hl_info_args { /* 2MB minus 32 bytes for 2xMSG_PROT */ #define HL_MAX_CB_SIZE (0x200000 - 32) +/* Indicates whether the command buffer should be mapped to the device's MMU */ +#define HL_CB_FLAGS_MAP 0x1 + struct hl_cb_in { /* Handle of CB or 0 if we want to create one */ __u64 cb_handle; @@ -473,7 +476,8 @@ struct hl_cb_in { __u32 cb_size; /* Context ID - Currently not in use */ __u32 ctx_id; - __u32 pad; + /* HL_CB_FLAGS_* */ + __u32 flags; }; struct hl_cb_out { @@ -856,6 +860,12 @@ struct hl_debug_args { * When creating a new CB, the IOCTL returns a handle of it, and the user-space * process needs to use that handle to mmap the buffer so it can access them. * + * In some instances, the device must access the command buffer through the + * device's MMU, and thus its memory should be mapped. In these cases, user can + * indicate the driver that such a mapping is required. + * The resulting device virtual address will be used internally by the driver, + * and won't be returned to user. + * */ #define HL_IOCTL_CB \ _IOWR('H', 0x02, union hl_cb_args) -- cgit v1.2.3