xref: /linux/drivers/gpu/drm/amd/amdkfd/kfd_process.c (revision 975ef7ff81bb000af6e6c8e63e81f89f3468dcf7)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <linux/mutex.h>
24 #include <linux/log2.h>
25 #include <linux/sched.h>
26 #include <linux/sched/mm.h>
27 #include <linux/sched/task.h>
28 #include <linux/slab.h>
29 #include <linux/amd-iommu.h>
30 #include <linux/notifier.h>
31 #include <linux/compat.h>
32 #include <linux/mman.h>
33 #include <linux/file.h>
34 
35 struct mm_struct;
36 
37 #include "kfd_priv.h"
38 #include "kfd_device_queue_manager.h"
39 #include "kfd_dbgmgr.h"
40 #include "kfd_iommu.h"
41 
42 /*
43  * List of struct kfd_process (field kfd_process).
44  * Unique/indexed by mm_struct*
45  */
46 DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
47 static DEFINE_MUTEX(kfd_processes_mutex);
48 
49 DEFINE_SRCU(kfd_processes_srcu);
50 
51 /* For process termination handling */
52 static struct workqueue_struct *kfd_process_wq;
53 
54 /* Ordered, single-threaded workqueue for restoring evicted
55  * processes. Restoring multiple processes concurrently under memory
56  * pressure can lead to processes blocking each other from validating
57  * their BOs and result in a live-lock situation where processes
58  * remain evicted indefinitely.
59  */
60 static struct workqueue_struct *kfd_restore_wq;
61 
62 static struct kfd_process *find_process(const struct task_struct *thread);
63 static void kfd_process_ref_release(struct kref *ref);
64 static struct kfd_process *create_process(const struct task_struct *thread,
65 					struct file *filep);
66 
67 static void evict_process_worker(struct work_struct *work);
68 static void restore_process_worker(struct work_struct *work);
69 
70 
71 int kfd_process_create_wq(void)
72 {
73 	if (!kfd_process_wq)
74 		kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
75 	if (!kfd_restore_wq)
76 		kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
77 
78 	if (!kfd_process_wq || !kfd_restore_wq) {
79 		kfd_process_destroy_wq();
80 		return -ENOMEM;
81 	}
82 
83 	return 0;
84 }
85 
86 void kfd_process_destroy_wq(void)
87 {
88 	if (kfd_process_wq) {
89 		destroy_workqueue(kfd_process_wq);
90 		kfd_process_wq = NULL;
91 	}
92 	if (kfd_restore_wq) {
93 		destroy_workqueue(kfd_restore_wq);
94 		kfd_restore_wq = NULL;
95 	}
96 }
97 
98 static void kfd_process_free_gpuvm(struct kgd_mem *mem,
99 			struct kfd_process_device *pdd)
100 {
101 	struct kfd_dev *dev = pdd->dev;
102 
103 	dev->kfd2kgd->unmap_memory_to_gpu(dev->kgd, mem, pdd->vm);
104 	dev->kfd2kgd->free_memory_of_gpu(dev->kgd, mem);
105 }
106 
107 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
108  *	This function should be only called right after the process
109  *	is created and when kfd_processes_mutex is still being held
110  *	to avoid concurrency. Because of that exclusiveness, we do
111  *	not need to take p->mutex.
112  */
113 static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
114 				   uint64_t gpu_va, uint32_t size,
115 				   uint32_t flags, void **kptr)
116 {
117 	struct kfd_dev *kdev = pdd->dev;
118 	struct kgd_mem *mem = NULL;
119 	int handle;
120 	int err;
121 
122 	err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
123 						 pdd->vm, &mem, NULL, flags);
124 	if (err)
125 		goto err_alloc_mem;
126 
127 	err = kdev->kfd2kgd->map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
128 	if (err)
129 		goto err_map_mem;
130 
131 	err = kdev->kfd2kgd->sync_memory(kdev->kgd, mem, true);
132 	if (err) {
133 		pr_debug("Sync memory failed, wait interrupted by user signal\n");
134 		goto sync_memory_failed;
135 	}
136 
137 	/* Create an obj handle so kfd_process_device_remove_obj_handle
138 	 * will take care of the bo removal when the process finishes.
139 	 * We do not need to take p->mutex, because the process is just
140 	 * created and the ioctls have not had the chance to run.
141 	 */
142 	handle = kfd_process_device_create_obj_handle(pdd, mem);
143 
144 	if (handle < 0) {
145 		err = handle;
146 		goto free_gpuvm;
147 	}
148 
149 	if (kptr) {
150 		err = kdev->kfd2kgd->map_gtt_bo_to_kernel(kdev->kgd,
151 				(struct kgd_mem *)mem, kptr, NULL);
152 		if (err) {
153 			pr_debug("Map GTT BO to kernel failed\n");
154 			goto free_obj_handle;
155 		}
156 	}
157 
158 	return err;
159 
160 free_obj_handle:
161 	kfd_process_device_remove_obj_handle(pdd, handle);
162 free_gpuvm:
163 sync_memory_failed:
164 	kfd_process_free_gpuvm(mem, pdd);
165 	return err;
166 
167 err_map_mem:
168 	kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem);
169 err_alloc_mem:
170 	*kptr = NULL;
171 	return err;
172 }
173 
174 /* kfd_process_device_reserve_ib_mem - Reserve memory inside the
175  *	process for IB usage The memory reserved is for KFD to submit
176  *	IB to AMDGPU from kernel.  If the memory is reserved
177  *	successfully, ib_kaddr will have the CPU/kernel
178  *	address. Check ib_kaddr before accessing the memory.
179  */
180 static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
181 {
182 	struct qcm_process_device *qpd = &pdd->qpd;
183 	uint32_t flags = ALLOC_MEM_FLAGS_GTT |
184 			 ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
185 			 ALLOC_MEM_FLAGS_WRITABLE |
186 			 ALLOC_MEM_FLAGS_EXECUTABLE;
187 	void *kaddr;
188 	int ret;
189 
190 	if (qpd->ib_kaddr || !qpd->ib_base)
191 		return 0;
192 
193 	/* ib_base is only set for dGPU */
194 	ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
195 				      &kaddr);
196 	if (ret)
197 		return ret;
198 
199 	qpd->ib_kaddr = kaddr;
200 
201 	return 0;
202 }
203 
204 struct kfd_process *kfd_create_process(struct file *filep)
205 {
206 	struct kfd_process *process;
207 	struct task_struct *thread = current;
208 
209 	if (!thread->mm)
210 		return ERR_PTR(-EINVAL);
211 
212 	/* Only the pthreads threading model is supported. */
213 	if (thread->group_leader->mm != thread->mm)
214 		return ERR_PTR(-EINVAL);
215 
216 	/*
217 	 * take kfd processes mutex before starting of process creation
218 	 * so there won't be a case where two threads of the same process
219 	 * create two kfd_process structures
220 	 */
221 	mutex_lock(&kfd_processes_mutex);
222 
223 	/* A prior open of /dev/kfd could have already created the process. */
224 	process = find_process(thread);
225 	if (process)
226 		pr_debug("Process already found\n");
227 	else
228 		process = create_process(thread, filep);
229 
230 	mutex_unlock(&kfd_processes_mutex);
231 
232 	return process;
233 }
234 
235 struct kfd_process *kfd_get_process(const struct task_struct *thread)
236 {
237 	struct kfd_process *process;
238 
239 	if (!thread->mm)
240 		return ERR_PTR(-EINVAL);
241 
242 	/* Only the pthreads threading model is supported. */
243 	if (thread->group_leader->mm != thread->mm)
244 		return ERR_PTR(-EINVAL);
245 
246 	process = find_process(thread);
247 
248 	return process;
249 }
250 
251 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
252 {
253 	struct kfd_process *process;
254 
255 	hash_for_each_possible_rcu(kfd_processes_table, process,
256 					kfd_processes, (uintptr_t)mm)
257 		if (process->mm == mm)
258 			return process;
259 
260 	return NULL;
261 }
262 
263 static struct kfd_process *find_process(const struct task_struct *thread)
264 {
265 	struct kfd_process *p;
266 	int idx;
267 
268 	idx = srcu_read_lock(&kfd_processes_srcu);
269 	p = find_process_by_mm(thread->mm);
270 	srcu_read_unlock(&kfd_processes_srcu, idx);
271 
272 	return p;
273 }
274 
275 void kfd_unref_process(struct kfd_process *p)
276 {
277 	kref_put(&p->ref, kfd_process_ref_release);
278 }
279 
280 static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
281 {
282 	struct kfd_process *p = pdd->process;
283 	void *mem;
284 	int id;
285 
286 	/*
287 	 * Remove all handles from idr and release appropriate
288 	 * local memory object
289 	 */
290 	idr_for_each_entry(&pdd->alloc_idr, mem, id) {
291 		struct kfd_process_device *peer_pdd;
292 
293 		list_for_each_entry(peer_pdd, &p->per_device_data,
294 				    per_device_list) {
295 			if (!peer_pdd->vm)
296 				continue;
297 			peer_pdd->dev->kfd2kgd->unmap_memory_to_gpu(
298 				peer_pdd->dev->kgd, mem, peer_pdd->vm);
299 		}
300 
301 		pdd->dev->kfd2kgd->free_memory_of_gpu(pdd->dev->kgd, mem);
302 		kfd_process_device_remove_obj_handle(pdd, id);
303 	}
304 }
305 
306 static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
307 {
308 	struct kfd_process_device *pdd;
309 
310 	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
311 		kfd_process_device_free_bos(pdd);
312 }
313 
314 static void kfd_process_destroy_pdds(struct kfd_process *p)
315 {
316 	struct kfd_process_device *pdd, *temp;
317 
318 	list_for_each_entry_safe(pdd, temp, &p->per_device_data,
319 				 per_device_list) {
320 		pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
321 				pdd->dev->id, p->pasid);
322 
323 		if (pdd->drm_file)
324 			fput(pdd->drm_file);
325 		else if (pdd->vm)
326 			pdd->dev->kfd2kgd->destroy_process_vm(
327 				pdd->dev->kgd, pdd->vm);
328 
329 		list_del(&pdd->per_device_list);
330 
331 		if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
332 			free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
333 				get_order(KFD_CWSR_TBA_TMA_SIZE));
334 
335 		kfree(pdd->qpd.doorbell_bitmap);
336 		idr_destroy(&pdd->alloc_idr);
337 
338 		kfree(pdd);
339 	}
340 }
341 
342 /* No process locking is needed in this function, because the process
343  * is not findable any more. We must assume that no other thread is
344  * using it any more, otherwise we couldn't safely free the process
345  * structure in the end.
346  */
347 static void kfd_process_wq_release(struct work_struct *work)
348 {
349 	struct kfd_process *p = container_of(work, struct kfd_process,
350 					     release_work);
351 
352 	kfd_iommu_unbind_process(p);
353 
354 	kfd_process_free_outstanding_kfd_bos(p);
355 
356 	kfd_process_destroy_pdds(p);
357 	dma_fence_put(p->ef);
358 
359 	kfd_event_free_process(p);
360 
361 	kfd_pasid_free(p->pasid);
362 	kfd_free_process_doorbells(p);
363 
364 	mutex_destroy(&p->mutex);
365 
366 	put_task_struct(p->lead_thread);
367 
368 	kfree(p);
369 }
370 
371 static void kfd_process_ref_release(struct kref *ref)
372 {
373 	struct kfd_process *p = container_of(ref, struct kfd_process, ref);
374 
375 	INIT_WORK(&p->release_work, kfd_process_wq_release);
376 	queue_work(kfd_process_wq, &p->release_work);
377 }
378 
379 static void kfd_process_destroy_delayed(struct rcu_head *rcu)
380 {
381 	struct kfd_process *p = container_of(rcu, struct kfd_process, rcu);
382 
383 	kfd_unref_process(p);
384 }
385 
386 static void kfd_process_notifier_release(struct mmu_notifier *mn,
387 					struct mm_struct *mm)
388 {
389 	struct kfd_process *p;
390 	struct kfd_process_device *pdd = NULL;
391 
392 	/*
393 	 * The kfd_process structure can not be free because the
394 	 * mmu_notifier srcu is read locked
395 	 */
396 	p = container_of(mn, struct kfd_process, mmu_notifier);
397 	if (WARN_ON(p->mm != mm))
398 		return;
399 
400 	mutex_lock(&kfd_processes_mutex);
401 	hash_del_rcu(&p->kfd_processes);
402 	mutex_unlock(&kfd_processes_mutex);
403 	synchronize_srcu(&kfd_processes_srcu);
404 
405 	cancel_delayed_work_sync(&p->eviction_work);
406 	cancel_delayed_work_sync(&p->restore_work);
407 
408 	mutex_lock(&p->mutex);
409 
410 	/* Iterate over all process device data structures and if the
411 	 * pdd is in debug mode, we should first force unregistration,
412 	 * then we will be able to destroy the queues
413 	 */
414 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
415 		struct kfd_dev *dev = pdd->dev;
416 
417 		mutex_lock(kfd_get_dbgmgr_mutex());
418 		if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
419 			if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
420 				kfd_dbgmgr_destroy(dev->dbgmgr);
421 				dev->dbgmgr = NULL;
422 			}
423 		}
424 		mutex_unlock(kfd_get_dbgmgr_mutex());
425 	}
426 
427 	kfd_process_dequeue_from_all_devices(p);
428 	pqm_uninit(&p->pqm);
429 
430 	/* Indicate to other users that MM is no longer valid */
431 	p->mm = NULL;
432 
433 	mutex_unlock(&p->mutex);
434 
435 	mmu_notifier_unregister_no_release(&p->mmu_notifier, mm);
436 	mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
437 }
438 
439 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
440 	.release = kfd_process_notifier_release,
441 };
442 
443 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
444 {
445 	unsigned long  offset;
446 	struct kfd_process_device *pdd;
447 
448 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
449 		struct kfd_dev *dev = pdd->dev;
450 		struct qcm_process_device *qpd = &pdd->qpd;
451 
452 		if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
453 			continue;
454 
455 		offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id))
456 			<< PAGE_SHIFT;
457 		qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
458 			KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
459 			MAP_SHARED, offset);
460 
461 		if (IS_ERR_VALUE(qpd->tba_addr)) {
462 			int err = qpd->tba_addr;
463 
464 			pr_err("Failure to set tba address. error %d.\n", err);
465 			qpd->tba_addr = 0;
466 			qpd->cwsr_kaddr = NULL;
467 			return err;
468 		}
469 
470 		memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
471 
472 		qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
473 		pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
474 			qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
475 	}
476 
477 	return 0;
478 }
479 
480 static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
481 {
482 	struct kfd_dev *dev = pdd->dev;
483 	struct qcm_process_device *qpd = &pdd->qpd;
484 	uint32_t flags = ALLOC_MEM_FLAGS_GTT |
485 		ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTABLE;
486 	void *kaddr;
487 	int ret;
488 
489 	if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
490 		return 0;
491 
492 	/* cwsr_base is only set for dGPU */
493 	ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
494 				      KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
495 	if (ret)
496 		return ret;
497 
498 	qpd->cwsr_kaddr = kaddr;
499 	qpd->tba_addr = qpd->cwsr_base;
500 
501 	memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
502 
503 	qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
504 	pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
505 		 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
506 
507 	return 0;
508 }
509 
510 static struct kfd_process *create_process(const struct task_struct *thread,
511 					struct file *filep)
512 {
513 	struct kfd_process *process;
514 	int err = -ENOMEM;
515 
516 	process = kzalloc(sizeof(*process), GFP_KERNEL);
517 
518 	if (!process)
519 		goto err_alloc_process;
520 
521 	process->pasid = kfd_pasid_alloc();
522 	if (process->pasid == 0)
523 		goto err_alloc_pasid;
524 
525 	if (kfd_alloc_process_doorbells(process) < 0)
526 		goto err_alloc_doorbells;
527 
528 	kref_init(&process->ref);
529 
530 	mutex_init(&process->mutex);
531 
532 	process->mm = thread->mm;
533 
534 	/* register notifier */
535 	process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
536 	err = mmu_notifier_register(&process->mmu_notifier, process->mm);
537 	if (err)
538 		goto err_mmu_notifier;
539 
540 	hash_add_rcu(kfd_processes_table, &process->kfd_processes,
541 			(uintptr_t)process->mm);
542 
543 	process->lead_thread = thread->group_leader;
544 	get_task_struct(process->lead_thread);
545 
546 	INIT_LIST_HEAD(&process->per_device_data);
547 
548 	kfd_event_init_process(process);
549 
550 	err = pqm_init(&process->pqm, process);
551 	if (err != 0)
552 		goto err_process_pqm_init;
553 
554 	/* init process apertures*/
555 	process->is_32bit_user_mode = in_compat_syscall();
556 	err = kfd_init_apertures(process);
557 	if (err != 0)
558 		goto err_init_apertures;
559 
560 	INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
561 	INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
562 	process->last_restore_timestamp = get_jiffies_64();
563 
564 	err = kfd_process_init_cwsr_apu(process, filep);
565 	if (err)
566 		goto err_init_cwsr;
567 
568 	return process;
569 
570 err_init_cwsr:
571 	kfd_process_free_outstanding_kfd_bos(process);
572 	kfd_process_destroy_pdds(process);
573 err_init_apertures:
574 	pqm_uninit(&process->pqm);
575 err_process_pqm_init:
576 	hash_del_rcu(&process->kfd_processes);
577 	synchronize_rcu();
578 	mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
579 err_mmu_notifier:
580 	mutex_destroy(&process->mutex);
581 	kfd_free_process_doorbells(process);
582 err_alloc_doorbells:
583 	kfd_pasid_free(process->pasid);
584 err_alloc_pasid:
585 	kfree(process);
586 err_alloc_process:
587 	return ERR_PTR(err);
588 }
589 
590 static int init_doorbell_bitmap(struct qcm_process_device *qpd,
591 			struct kfd_dev *dev)
592 {
593 	unsigned int i;
594 
595 	if (!KFD_IS_SOC15(dev->device_info->asic_family))
596 		return 0;
597 
598 	qpd->doorbell_bitmap =
599 		kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
600 				     BITS_PER_BYTE), GFP_KERNEL);
601 	if (!qpd->doorbell_bitmap)
602 		return -ENOMEM;
603 
604 	/* Mask out any reserved doorbells */
605 	for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++)
606 		if ((dev->shared_resources.reserved_doorbell_mask & i) ==
607 		    dev->shared_resources.reserved_doorbell_val) {
608 			set_bit(i, qpd->doorbell_bitmap);
609 			pr_debug("reserved doorbell 0x%03x\n", i);
610 		}
611 
612 	return 0;
613 }
614 
615 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
616 							struct kfd_process *p)
617 {
618 	struct kfd_process_device *pdd = NULL;
619 
620 	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
621 		if (pdd->dev == dev)
622 			return pdd;
623 
624 	return NULL;
625 }
626 
627 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
628 							struct kfd_process *p)
629 {
630 	struct kfd_process_device *pdd = NULL;
631 
632 	pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
633 	if (!pdd)
634 		return NULL;
635 
636 	if (init_doorbell_bitmap(&pdd->qpd, dev)) {
637 		pr_err("Failed to init doorbell for process\n");
638 		kfree(pdd);
639 		return NULL;
640 	}
641 
642 	pdd->dev = dev;
643 	INIT_LIST_HEAD(&pdd->qpd.queues_list);
644 	INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
645 	pdd->qpd.dqm = dev->dqm;
646 	pdd->qpd.pqm = &p->pqm;
647 	pdd->qpd.evicted = 0;
648 	pdd->process = p;
649 	pdd->bound = PDD_UNBOUND;
650 	pdd->already_dequeued = false;
651 	list_add(&pdd->per_device_list, &p->per_device_data);
652 
653 	/* Init idr used for memory handle translation */
654 	idr_init(&pdd->alloc_idr);
655 
656 	return pdd;
657 }
658 
659 /**
660  * kfd_process_device_init_vm - Initialize a VM for a process-device
661  *
662  * @pdd: The process-device
663  * @drm_file: Optional pointer to a DRM file descriptor
664  *
665  * If @drm_file is specified, it will be used to acquire the VM from
666  * that file descriptor. If successful, the @pdd takes ownership of
667  * the file descriptor.
668  *
669  * If @drm_file is NULL, a new VM is created.
670  *
671  * Returns 0 on success, -errno on failure.
672  */
673 int kfd_process_device_init_vm(struct kfd_process_device *pdd,
674 			       struct file *drm_file)
675 {
676 	struct kfd_process *p;
677 	struct kfd_dev *dev;
678 	int ret;
679 
680 	if (pdd->vm)
681 		return drm_file ? -EBUSY : 0;
682 
683 	p = pdd->process;
684 	dev = pdd->dev;
685 
686 	if (drm_file)
687 		ret = dev->kfd2kgd->acquire_process_vm(
688 			dev->kgd, drm_file,
689 			&pdd->vm, &p->kgd_process_info, &p->ef);
690 	else
691 		ret = dev->kfd2kgd->create_process_vm(
692 			dev->kgd, &pdd->vm, &p->kgd_process_info, &p->ef);
693 	if (ret) {
694 		pr_err("Failed to create process VM object\n");
695 		return ret;
696 	}
697 
698 	ret = kfd_process_device_reserve_ib_mem(pdd);
699 	if (ret)
700 		goto err_reserve_ib_mem;
701 	ret = kfd_process_device_init_cwsr_dgpu(pdd);
702 	if (ret)
703 		goto err_init_cwsr;
704 
705 	pdd->drm_file = drm_file;
706 
707 	return 0;
708 
709 err_init_cwsr:
710 err_reserve_ib_mem:
711 	kfd_process_device_free_bos(pdd);
712 	if (!drm_file)
713 		dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm);
714 	pdd->vm = NULL;
715 
716 	return ret;
717 }
718 
719 /*
720  * Direct the IOMMU to bind the process (specifically the pasid->mm)
721  * to the device.
722  * Unbinding occurs when the process dies or the device is removed.
723  *
724  * Assumes that the process lock is held.
725  */
726 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
727 							struct kfd_process *p)
728 {
729 	struct kfd_process_device *pdd;
730 	int err;
731 
732 	pdd = kfd_get_process_device_data(dev, p);
733 	if (!pdd) {
734 		pr_err("Process device data doesn't exist\n");
735 		return ERR_PTR(-ENOMEM);
736 	}
737 
738 	err = kfd_iommu_bind_process_to_device(pdd);
739 	if (err)
740 		return ERR_PTR(err);
741 
742 	err = kfd_process_device_init_vm(pdd, NULL);
743 	if (err)
744 		return ERR_PTR(err);
745 
746 	return pdd;
747 }
748 
749 struct kfd_process_device *kfd_get_first_process_device_data(
750 						struct kfd_process *p)
751 {
752 	return list_first_entry(&p->per_device_data,
753 				struct kfd_process_device,
754 				per_device_list);
755 }
756 
757 struct kfd_process_device *kfd_get_next_process_device_data(
758 						struct kfd_process *p,
759 						struct kfd_process_device *pdd)
760 {
761 	if (list_is_last(&pdd->per_device_list, &p->per_device_data))
762 		return NULL;
763 	return list_next_entry(pdd, per_device_list);
764 }
765 
766 bool kfd_has_process_device_data(struct kfd_process *p)
767 {
768 	return !(list_empty(&p->per_device_data));
769 }
770 
771 /* Create specific handle mapped to mem from process local memory idr
772  * Assumes that the process lock is held.
773  */
774 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
775 					void *mem)
776 {
777 	return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL);
778 }
779 
780 /* Translate specific handle from process local memory idr
781  * Assumes that the process lock is held.
782  */
783 void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
784 					int handle)
785 {
786 	if (handle < 0)
787 		return NULL;
788 
789 	return idr_find(&pdd->alloc_idr, handle);
790 }
791 
792 /* Remove specific handle from process local memory idr
793  * Assumes that the process lock is held.
794  */
795 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
796 					int handle)
797 {
798 	if (handle >= 0)
799 		idr_remove(&pdd->alloc_idr, handle);
800 }
801 
802 /* This increments the process->ref counter. */
803 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
804 {
805 	struct kfd_process *p, *ret_p = NULL;
806 	unsigned int temp;
807 
808 	int idx = srcu_read_lock(&kfd_processes_srcu);
809 
810 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
811 		if (p->pasid == pasid) {
812 			kref_get(&p->ref);
813 			ret_p = p;
814 			break;
815 		}
816 	}
817 
818 	srcu_read_unlock(&kfd_processes_srcu, idx);
819 
820 	return ret_p;
821 }
822 
823 /* This increments the process->ref counter. */
824 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
825 {
826 	struct kfd_process *p;
827 
828 	int idx = srcu_read_lock(&kfd_processes_srcu);
829 
830 	p = find_process_by_mm(mm);
831 	if (p)
832 		kref_get(&p->ref);
833 
834 	srcu_read_unlock(&kfd_processes_srcu, idx);
835 
836 	return p;
837 }
838 
839 /* process_evict_queues - Evict all user queues of a process
840  *
841  * Eviction is reference-counted per process-device. This means multiple
842  * evictions from different sources can be nested safely.
843  */
844 int kfd_process_evict_queues(struct kfd_process *p)
845 {
846 	struct kfd_process_device *pdd;
847 	int r = 0;
848 	unsigned int n_evicted = 0;
849 
850 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
851 		r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
852 							    &pdd->qpd);
853 		if (r) {
854 			pr_err("Failed to evict process queues\n");
855 			goto fail;
856 		}
857 		n_evicted++;
858 	}
859 
860 	return r;
861 
862 fail:
863 	/* To keep state consistent, roll back partial eviction by
864 	 * restoring queues
865 	 */
866 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
867 		if (n_evicted == 0)
868 			break;
869 		if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
870 							      &pdd->qpd))
871 			pr_err("Failed to restore queues\n");
872 
873 		n_evicted--;
874 	}
875 
876 	return r;
877 }
878 
879 /* process_restore_queues - Restore all user queues of a process */
880 int kfd_process_restore_queues(struct kfd_process *p)
881 {
882 	struct kfd_process_device *pdd;
883 	int r, ret = 0;
884 
885 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
886 		r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
887 							      &pdd->qpd);
888 		if (r) {
889 			pr_err("Failed to restore process queues\n");
890 			if (!ret)
891 				ret = r;
892 		}
893 	}
894 
895 	return ret;
896 }
897 
898 static void evict_process_worker(struct work_struct *work)
899 {
900 	int ret;
901 	struct kfd_process *p;
902 	struct delayed_work *dwork;
903 
904 	dwork = to_delayed_work(work);
905 
906 	/* Process termination destroys this worker thread. So during the
907 	 * lifetime of this thread, kfd_process p will be valid
908 	 */
909 	p = container_of(dwork, struct kfd_process, eviction_work);
910 	WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
911 		  "Eviction fence mismatch\n");
912 
913 	/* Narrow window of overlap between restore and evict work
914 	 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
915 	 * unreserves KFD BOs, it is possible to evicted again. But
916 	 * restore has few more steps of finish. So lets wait for any
917 	 * previous restore work to complete
918 	 */
919 	flush_delayed_work(&p->restore_work);
920 
921 	pr_debug("Started evicting pasid %d\n", p->pasid);
922 	ret = kfd_process_evict_queues(p);
923 	if (!ret) {
924 		dma_fence_signal(p->ef);
925 		dma_fence_put(p->ef);
926 		p->ef = NULL;
927 		queue_delayed_work(kfd_restore_wq, &p->restore_work,
928 				msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
929 
930 		pr_debug("Finished evicting pasid %d\n", p->pasid);
931 	} else
932 		pr_err("Failed to evict queues of pasid %d\n", p->pasid);
933 }
934 
935 static void restore_process_worker(struct work_struct *work)
936 {
937 	struct delayed_work *dwork;
938 	struct kfd_process *p;
939 	struct kfd_process_device *pdd;
940 	int ret = 0;
941 
942 	dwork = to_delayed_work(work);
943 
944 	/* Process termination destroys this worker thread. So during the
945 	 * lifetime of this thread, kfd_process p will be valid
946 	 */
947 	p = container_of(dwork, struct kfd_process, restore_work);
948 
949 	/* Call restore_process_bos on the first KGD device. This function
950 	 * takes care of restoring the whole process including other devices.
951 	 * Restore can fail if enough memory is not available. If so,
952 	 * reschedule again.
953 	 */
954 	pdd = list_first_entry(&p->per_device_data,
955 			       struct kfd_process_device,
956 			       per_device_list);
957 
958 	pr_debug("Started restoring pasid %d\n", p->pasid);
959 
960 	/* Setting last_restore_timestamp before successful restoration.
961 	 * Otherwise this would have to be set by KGD (restore_process_bos)
962 	 * before KFD BOs are unreserved. If not, the process can be evicted
963 	 * again before the timestamp is set.
964 	 * If restore fails, the timestamp will be set again in the next
965 	 * attempt. This would mean that the minimum GPU quanta would be
966 	 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
967 	 * functions)
968 	 */
969 
970 	p->last_restore_timestamp = get_jiffies_64();
971 	ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info,
972 						     &p->ef);
973 	if (ret) {
974 		pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",
975 			 p->pasid, PROCESS_BACK_OFF_TIME_MS);
976 		ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
977 				msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
978 		WARN(!ret, "reschedule restore work failed\n");
979 		return;
980 	}
981 
982 	ret = kfd_process_restore_queues(p);
983 	if (!ret)
984 		pr_debug("Finished restoring pasid %d\n", p->pasid);
985 	else
986 		pr_err("Failed to restore queues of pasid %d\n", p->pasid);
987 }
988 
989 void kfd_suspend_all_processes(void)
990 {
991 	struct kfd_process *p;
992 	unsigned int temp;
993 	int idx = srcu_read_lock(&kfd_processes_srcu);
994 
995 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
996 		cancel_delayed_work_sync(&p->eviction_work);
997 		cancel_delayed_work_sync(&p->restore_work);
998 
999 		if (kfd_process_evict_queues(p))
1000 			pr_err("Failed to suspend process %d\n", p->pasid);
1001 		dma_fence_signal(p->ef);
1002 		dma_fence_put(p->ef);
1003 		p->ef = NULL;
1004 	}
1005 	srcu_read_unlock(&kfd_processes_srcu, idx);
1006 }
1007 
1008 int kfd_resume_all_processes(void)
1009 {
1010 	struct kfd_process *p;
1011 	unsigned int temp;
1012 	int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
1013 
1014 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1015 		if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
1016 			pr_err("Restore process %d failed during resume\n",
1017 			       p->pasid);
1018 			ret = -EFAULT;
1019 		}
1020 	}
1021 	srcu_read_unlock(&kfd_processes_srcu, idx);
1022 	return ret;
1023 }
1024 
1025 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
1026 			  struct vm_area_struct *vma)
1027 {
1028 	struct kfd_process_device *pdd;
1029 	struct qcm_process_device *qpd;
1030 
1031 	if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
1032 		pr_err("Incorrect CWSR mapping size.\n");
1033 		return -EINVAL;
1034 	}
1035 
1036 	pdd = kfd_get_process_device_data(dev, process);
1037 	if (!pdd)
1038 		return -EINVAL;
1039 	qpd = &pdd->qpd;
1040 
1041 	qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1042 					get_order(KFD_CWSR_TBA_TMA_SIZE));
1043 	if (!qpd->cwsr_kaddr) {
1044 		pr_err("Error allocating per process CWSR buffer.\n");
1045 		return -ENOMEM;
1046 	}
1047 
1048 	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
1049 		| VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
1050 	/* Mapping pages to user process */
1051 	return remap_pfn_range(vma, vma->vm_start,
1052 			       PFN_DOWN(__pa(qpd->cwsr_kaddr)),
1053 			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
1054 }
1055 
1056 void kfd_flush_tlb(struct kfd_process_device *pdd)
1057 {
1058 	struct kfd_dev *dev = pdd->dev;
1059 	const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
1060 
1061 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1062 		/* Nothing to flush until a VMID is assigned, which
1063 		 * only happens when the first queue is created.
1064 		 */
1065 		if (pdd->qpd.vmid)
1066 			f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
1067 	} else {
1068 		f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
1069 	}
1070 }
1071 
1072 #if defined(CONFIG_DEBUG_FS)
1073 
1074 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
1075 {
1076 	struct kfd_process *p;
1077 	unsigned int temp;
1078 	int r = 0;
1079 
1080 	int idx = srcu_read_lock(&kfd_processes_srcu);
1081 
1082 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1083 		seq_printf(m, "Process %d PASID %d:\n",
1084 			   p->lead_thread->tgid, p->pasid);
1085 
1086 		mutex_lock(&p->mutex);
1087 		r = pqm_debugfs_mqds(m, &p->pqm);
1088 		mutex_unlock(&p->mutex);
1089 
1090 		if (r)
1091 			break;
1092 	}
1093 
1094 	srcu_read_unlock(&kfd_processes_srcu, idx);
1095 
1096 	return r;
1097 }
1098 
1099 #endif
1100