summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c31
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c56
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c93
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c40
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c5
8 files changed, 170 insertions, 77 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 828a9ceef1e7..0f0719528bcc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -521,15 +521,10 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
}
- minfo.cu_mask.ptr = kzalloc(cu_mask_size, GFP_KERNEL);
- if (!minfo.cu_mask.ptr)
- return -ENOMEM;
-
- retval = copy_from_user(minfo.cu_mask.ptr, cu_mask_ptr, cu_mask_size);
- if (retval) {
+ minfo.cu_mask.ptr = memdup_user(cu_mask_ptr, cu_mask_size);
+ if (IS_ERR(minfo.cu_mask.ptr)) {
pr_debug("Could not copy CU mask from userspace");
- retval = -EFAULT;
- goto out;
+ return PTR_ERR(minfo.cu_mask.ptr);
}
mutex_lock(&p->mutex);
@@ -538,7 +533,6 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
mutex_unlock(&p->mutex);
-out:
kfree(minfo.cu_mask.ptr);
return retval;
}
@@ -1070,7 +1064,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
svm_range_list_lock_and_flush_work(&p->svms, current->mm);
mutex_lock(&p->svms.lock);
mmap_write_unlock(current->mm);
- if (interval_tree_iter_first(&p->svms.objects,
+
+ /* Skip a special case that allocates VRAM without VA,
+ * VA will be invalid of 0.
+ */
+ if (!(!args->va_addr && (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) &&
+ interval_tree_iter_first(&p->svms.objects,
args->va_addr >> PAGE_SHIFT,
(args->va_addr + args->size - 1) >> PAGE_SHIFT)) {
pr_err("Address: 0x%llx already allocated by SVM\n",
@@ -2566,8 +2565,8 @@ static int criu_restore(struct file *filep,
pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n",
args->num_devices, args->num_bos, args->num_objects, args->priv_data_size);
- if (!args->bos || !args->devices || !args->priv_data || !args->priv_data_size ||
- !args->num_devices || !args->num_bos)
+ if ((args->num_bos > 0 && !args->bos) || !args->devices || !args->priv_data ||
+ !args->priv_data_size || !args->num_devices)
return -EINVAL;
mutex_lock(&p->mutex);
@@ -3252,8 +3251,10 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
int retcode = -EINVAL;
bool ptrace_attached = false;
- if (nr >= AMDKFD_CORE_IOCTL_COUNT)
+ if (nr >= AMDKFD_CORE_IOCTL_COUNT) {
+ retcode = -ENOTTY;
goto err_i1;
+ }
if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
u32 amdkfd_size;
@@ -3266,8 +3267,10 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
asize = amdkfd_size;
cmd = ioctl->cmd;
- } else
+ } else {
+ retcode = -ENOTTY;
goto err_i1;
+ }
dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 7e749f9b6d69..e9cfb80bd436 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -495,6 +495,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
mutex_init(&kfd->doorbell_mutex);
ida_init(&kfd->doorbell_ida);
+ atomic_set(&kfd->kfd_processes_count, 0);
return kfd;
}
@@ -1133,7 +1134,15 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
}
for (i = 0; i < kfd->num_nodes; i++) {
- node = kfd->nodes[i];
+ /* Race if another thread in b/w
+ * kfd_cleanup_nodes and kfree(kfd),
+ * when kfd->nodes[i] = NULL
+ */
+ if (kfd->nodes[i])
+ node = kfd->nodes[i];
+ else
+ return;
+
spin_lock_irqsave(&node->interrupt_lock, flags);
if (node->interrupts_active
@@ -1485,6 +1494,15 @@ int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd)
mutex_lock(&kfd_processes_mutex);
+ /* kfd_processes_count is per kfd_dev, return -EBUSY without
+ * further check
+ */
+ if (!!atomic_read(&kfd->kfd_processes_count)) {
+ pr_debug("process_wq_release not finished\n");
+ r = -EBUSY;
+ goto out;
+ }
+
if (hash_empty(kfd_processes_table) && !kfd_is_locked(kfd))
goto out;
@@ -1550,6 +1568,25 @@ int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id)
return ret;
}
+int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd)
+{
+ struct kfd_node *node;
+ int i, r;
+
+ if (!kfd->init_complete)
+ return 0;
+
+ for (i = 0; i < kfd->num_nodes; i++) {
+ node = kfd->nodes[i];
+ r = node->dqm->ops.unhalt(node->dqm);
+ if (r) {
+ dev_err(kfd_device, "Error in starting scheduler\n");
+ return r;
+ }
+ }
+ return 0;
+}
+
int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)
{
struct kfd_node *node;
@@ -1567,6 +1604,23 @@ int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)
return node->dqm->ops.halt(node->dqm);
}
+int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd)
+{
+ struct kfd_node *node;
+ int i, r;
+
+ if (!kfd->init_complete)
+ return 0;
+
+ for (i = 0; i < kfd->num_nodes; i++) {
+ node = kfd->nodes[i];
+ r = node->dqm->ops.halt(node->dqm);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
{
struct kfd_node *node;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 2b0a830f5b29..fb3129883a4c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -46,11 +46,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
int retval;
union PM4_MES_TYPE_3_HEADER nop;
- if (WARN_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ))
- return false;
-
- pr_debug("Initializing queue type %d size %d\n", KFD_QUEUE_TYPE_HIQ,
- queue_size);
+ pr_debug("Initializing queue type %d size %d\n", type, queue_size);
memset(&prop, 0, sizeof(prop));
memset(&nop, 0, sizeof(nop));
@@ -69,6 +65,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
break;
default:
+ WARN(1, "Invalid queue type %d\n", type);
dev_err(dev->adev->dev, "Invalid queue type %d\n", type);
return false;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 79251f22b702..59a5a3fea65d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -39,22 +39,22 @@
#endif
#define dev_fmt(fmt) "kfd_migrate: " fmt
-static uint64_t
-svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
+static u64
+svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, u64 addr)
{
return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
}
static int
-svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
- dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
+svm_migrate_gart_map(struct amdgpu_ring *ring, u64 npages,
+ dma_addr_t *addr, u64 *gart_addr, u64 flags)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_job *job;
unsigned int num_dw, num_bytes;
struct dma_fence *fence;
- uint64_t src_addr, dst_addr;
- uint64_t pte_flags;
+ u64 src_addr, dst_addr;
+ u64 pte_flags;
void *cpu_addr;
int r;
@@ -68,7 +68,8 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4 + num_bytes,
AMDGPU_IB_POOL_DELAYED,
- &job);
+ &job,
+ AMDGPU_KERNEL_JOB_ID_KFD_GART_MAP);
if (r)
return r;
@@ -122,15 +123,15 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
static int
svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
- uint64_t *vram, uint64_t npages,
+ u64 *vram, u64 npages,
enum MIGRATION_COPY_DIR direction,
struct dma_fence **mfence)
{
- const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
+ const u64 GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- uint64_t gart_s, gart_d;
+ u64 gart_s, gart_d;
struct dma_fence *next;
- uint64_t size;
+ u64 size;
int r;
mutex_lock(&adev->mman.gtt_window_lock);
@@ -260,39 +261,39 @@ static void svm_migrate_put_sys_page(unsigned long addr)
put_page(page);
}
-static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
+static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
{
- unsigned long upages = 0;
+ unsigned long mpages = 0;
unsigned long i;
for (i = 0; i < migrate->npages; i++) {
- if (migrate->src[i] & MIGRATE_PFN_VALID &&
- !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
- upages++;
+ if (migrate->dst[i] & MIGRATE_PFN_VALID &&
+ migrate->src[i] & MIGRATE_PFN_MIGRATE)
+ mpages++;
}
- return upages;
+ return mpages;
}
static int
svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
- dma_addr_t *scratch, uint64_t ttm_res_offset)
+ dma_addr_t *scratch, u64 ttm_res_offset)
{
- uint64_t npages = migrate->npages;
+ u64 npages = migrate->npages;
struct amdgpu_device *adev = node->adev;
struct device *dev = adev->dev;
struct amdgpu_res_cursor cursor;
- uint64_t mpages = 0;
+ u64 mpages = 0;
dma_addr_t *src;
- uint64_t *dst;
- uint64_t i, j;
+ u64 *dst;
+ u64 i, j;
int r;
pr_debug("svms 0x%p [0x%lx 0x%lx 0x%llx]\n", prange->svms, prange->start,
prange->last, ttm_res_offset);
src = scratch;
- dst = (uint64_t *)(scratch + npages);
+ dst = (u64 *)(scratch + npages);
amdgpu_res_first(prange->ttm_res, ttm_res_offset,
npages << PAGE_SHIFT, &cursor);
@@ -385,11 +386,11 @@ out_free_vram_pages:
static long
svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
- struct vm_area_struct *vma, uint64_t start,
- uint64_t end, uint32_t trigger, uint64_t ttm_res_offset)
+ struct vm_area_struct *vma, u64 start,
+ u64 end, uint32_t trigger, u64 ttm_res_offset)
{
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
- uint64_t npages = (end - start) >> PAGE_SHIFT;
+ u64 npages = (end - start) >> PAGE_SHIFT;
struct amdgpu_device *adev = node->adev;
struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
@@ -408,7 +409,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
buf = kvcalloc(npages,
- 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
+ 2 * sizeof(*migrate.src) + sizeof(u64) + sizeof(dma_addr_t),
GFP_KERNEL);
if (!buf)
goto out;
@@ -447,9 +448,9 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
- mpages = cpages - svm_migrate_unsuccessful_pages(&migrate);
- pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
- mpages, cpages, migrate.npages);
+ mpages = svm_migrate_successful_pages(&migrate);
+ pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n",
+ mpages, cpages, migrate.npages);
svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages);
@@ -490,7 +491,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
{
unsigned long addr, start, end;
struct vm_area_struct *vma;
- uint64_t ttm_res_offset;
+ u64 ttm_res_offset;
struct kfd_node *node;
unsigned long mpages = 0;
long r = 0;
@@ -580,14 +581,14 @@ static void svm_migrate_page_free(struct page *page)
static int
svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
- dma_addr_t *scratch, uint64_t npages)
+ dma_addr_t *scratch, u64 npages)
{
struct device *dev = adev->dev;
- uint64_t *src;
+ u64 *src;
dma_addr_t *dst;
struct page *dpage;
- uint64_t i = 0, j;
- uint64_t addr;
+ u64 i = 0, j;
+ u64 addr;
int r = 0;
pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
@@ -595,7 +596,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
addr = migrate->start;
- src = (uint64_t *)(scratch + npages);
+ src = (u64 *)(scratch + npages);
dst = scratch;
for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) {
@@ -683,12 +684,11 @@ out_oom:
*/
static long
svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
- struct vm_area_struct *vma, uint64_t start, uint64_t end,
+ struct vm_area_struct *vma, u64 start, u64 end,
uint32_t trigger, struct page *fault_page)
{
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
- uint64_t npages = (end - start) >> PAGE_SHIFT;
- unsigned long upages = npages;
+ u64 npages = (end - start) >> PAGE_SHIFT;
unsigned long cpages = 0;
unsigned long mpages = 0;
struct amdgpu_device *adev = node->adev;
@@ -710,7 +710,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
buf = kvcalloc(npages,
- 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
+ 2 * sizeof(*migrate.src) + sizeof(u64) + sizeof(dma_addr_t),
GFP_KERNEL);
if (!buf)
goto out;
@@ -736,7 +736,6 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
if (!cpages) {
pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
prange->start, prange->last);
- upages = svm_migrate_unsuccessful_pages(&migrate);
goto out_free;
}
if (cpages != npages)
@@ -749,9 +748,9 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
scratch, npages);
migrate_vma_pages(&migrate);
- upages = svm_migrate_unsuccessful_pages(&migrate);
- pr_debug("unsuccessful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
- upages, cpages, migrate.npages);
+ mpages = svm_migrate_successful_pages(&migrate);
+ pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n",
+ mpages, cpages, migrate.npages);
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
@@ -764,8 +763,7 @@ out_free:
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
node->id, 0, trigger, r);
out:
- if (!r && cpages) {
- mpages = cpages - upages;
+ if (!r && mpages) {
pdd = svm_range_get_pdd_by_node(prange, node);
if (pdd)
WRITE_ONCE(pdd->page_out, pdd->page_out + mpages);
@@ -848,6 +846,9 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
}
if (r >= 0) {
+ WARN_ONCE(prange->vram_pages < mpages,
+ "Recorded vram pages(0x%llx) should not be less than migration pages(0x%lx).",
+ prange->vram_pages, mpages);
prange->vram_pages -= mpages;
/* prange does not have vram page set its actual_loc to system
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 67694bcd9464..70ef051511bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -111,7 +111,14 @@
#define KFD_KERNEL_QUEUE_SIZE 2048
-#define KFD_UNMAP_LATENCY_MS (4000)
+/* KFD_UNMAP_LATENCY_MS is the timeout CP waiting for SDMA preemption. One XCC
+ * can be associated to 2 SDMA engines. queue_preemption_timeout_ms is the time
+ * driver waiting for CP returning the UNMAP_QUEUE fence. Thus the math is
+ * queue_preemption_timeout_ms = sdma_preemption_time * 2 + cp workload
+ * The format here makes CP workload 10% of total timeout
+ */
+#define KFD_UNMAP_LATENCY_MS \
+ ((queue_preemption_timeout_ms - queue_preemption_timeout_ms / 10) >> 1)
#define KFD_MAX_SDMA_QUEUES 128
@@ -375,6 +382,8 @@ struct kfd_dev {
/* for dynamic partitioning */
int kfd_dev_lock;
+
+ atomic_t kfd_processes_count;
};
enum kfd_mempool {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 5be28c6c4f6a..ddfe30c13e9d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1088,6 +1088,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
pdd->runtime_inuse = false;
}
+ atomic_dec(&pdd->dev->kfd->kfd_processes_count);
+
kfree(pdd);
p->pdds[i] = NULL;
}
@@ -1649,6 +1651,8 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
/* Init idr used for memory handle translation */
idr_init(&pdd->alloc_idr);
+ atomic_inc(&dev->kfd->kfd_processes_count);
+
return pdd;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index a0f22ea6d15a..273f42e3afdd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1189,7 +1189,7 @@ svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b)
}
static uint64_t
-svm_range_get_pte_flags(struct kfd_node *node,
+svm_range_get_pte_flags(struct kfd_node *node, struct amdgpu_vm *vm,
struct svm_range *prange, int domain)
{
struct kfd_node *bo_node;
@@ -1292,10 +1292,6 @@ svm_range_get_pte_flags(struct kfd_node *node,
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
}
- mapping_flags |= AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE;
-
- if (flags & KFD_IOCTL_SVM_FLAG_GPU_RO)
- mapping_flags &= ~AMDGPU_VM_PAGE_WRITEABLE;
if (flags & KFD_IOCTL_SVM_FLAG_GPU_EXEC)
mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
@@ -1305,7 +1301,10 @@ svm_range_get_pte_flags(struct kfd_node *node,
if (gc_ip_version >= IP_VERSION(12, 0, 0))
pte_flags |= AMDGPU_PTE_IS_PTE;
- pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags);
+ amdgpu_gmc_get_vm_pte(node->adev, vm, NULL, mapping_flags, &pte_flags);
+ pte_flags |= AMDGPU_PTE_READABLE;
+ if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
+ pte_flags |= AMDGPU_PTE_WRITEABLE;
return pte_flags;
}
@@ -1412,7 +1411,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
last_start, prange->start + i, last_domain ? "GPU" : "CPU");
- pte_flags = svm_range_get_pte_flags(pdd->dev, prange, last_domain);
+ pte_flags = svm_range_get_pte_flags(pdd->dev, vm, prange, last_domain);
if (readonly)
pte_flags &= ~AMDGPU_PTE_WRITEABLE;
@@ -1714,9 +1713,32 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
next = min(vma->vm_end, end);
npages = (next - addr) >> PAGE_SHIFT;
+ /* HMM requires at least READ permissions. If provided with PROT_NONE,
+ * unmap the memory. If it's not already mapped, this is a no-op
+ * If PROT_WRITE is provided without READ, warn first then unmap
+ */
+ if (!(vma->vm_flags & VM_READ)) {
+ unsigned long e, s;
+
+ svm_range_lock(prange);
+ if (vma->vm_flags & VM_WRITE)
+ pr_debug("VM_WRITE without VM_READ is not supported");
+ s = max(start, prange->start);
+ e = min(end, prange->last);
+ if (e >= s)
+ r = svm_range_unmap_from_gpus(prange, s, e,
+ KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU);
+ svm_range_unlock(prange);
+ /* If unmap returns non-zero, we'll bail on the next for loop
+ * iteration, so just leave r and continue
+ */
+ addr = next;
+ continue;
+ }
+
WRITE_ONCE(p->svms.faulting_task, current);
r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages,
- readonly, owner, NULL,
+ readonly, owner,
&hmm_range);
WRITE_ONCE(p->svms.faulting_task, NULL);
if (r)
@@ -4239,7 +4261,7 @@ svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
r = svm_range_get_attr(p, mm, start, size, nattrs, attrs);
break;
default:
- r = EINVAL;
+ r = -EINVAL;
break;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 4ec73f33535e..5c98746eb72d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -530,6 +530,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
dev->gpu->kfd->sdma_fw_version);
sysfs_show_64bit_prop(buffer, offs, "unique_id",
+ dev->gpu->xcp ?
+ dev->gpu->xcp->unique_id :
dev->gpu->adev->unique_id);
sysfs_show_32bit_prop(buffer, offs, "num_xcc",
NUM_XCC(dev->gpu->xcc_mask));
@@ -1587,7 +1589,8 @@ static int kfd_dev_create_p2p_links(void)
break;
if (!dev->gpu || !dev->gpu->adev ||
(dev->gpu->kfd->hive_id &&
- dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id))
+ dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id &&
+ amdgpu_xgmi_get_is_sharing_enabled(dev->gpu->adev, new_dev->gpu->adev)))
goto next;
/* check if node(s) is/are peer accessible in one direction or bi-direction */