1 #ifndef _LINUX_MMU_NOTIFIER_H 2 #define _LINUX_MMU_NOTIFIER_H 3 4 #include <linux/list.h> 5 #include <linux/spinlock.h> 6 #include <linux/mm_types.h> 7 #include <linux/srcu.h> 8 9 struct mmu_notifier; 10 struct mmu_notifier_ops; 11 12 #ifdef CONFIG_MMU_NOTIFIER 13 14 /* 15 * The mmu notifier_mm structure is allocated and installed in 16 * mm->mmu_notifier_mm inside the mm_take_all_locks() protected 17 * critical section and it's released only when mm_count reaches zero 18 * in mmdrop(). 19 */ 20 struct mmu_notifier_mm { 21 /* all mmu notifiers registerd in this mm are queued in this list */ 22 struct hlist_head list; 23 /* to serialize the list modifications and hlist_unhashed */ 24 spinlock_t lock; 25 }; 26 27 struct mmu_notifier_ops { 28 /* 29 * Called either by mmu_notifier_unregister or when the mm is 30 * being destroyed by exit_mmap, always before all pages are 31 * freed. This can run concurrently with other mmu notifier 32 * methods (the ones invoked outside the mm context) and it 33 * should tear down all secondary mmu mappings and freeze the 34 * secondary mmu. If this method isn't implemented you've to 35 * be sure that nothing could possibly write to the pages 36 * through the secondary mmu by the time the last thread with 37 * tsk->mm == mm exits. 38 * 39 * As side note: the pages freed after ->release returns could 40 * be immediately reallocated by the gart at an alias physical 41 * address with a different cache model, so if ->release isn't 42 * implemented because all _software_ driven memory accesses 43 * through the secondary mmu are terminated by the time the 44 * last thread of this mm quits, you've also to be sure that 45 * speculative _hardware_ operations can't allocate dirty 46 * cachelines in the cpu that could not be snooped and made 47 * coherent with the other read and write operations happening 48 * through the gart alias address, so leading to memory 49 * corruption. 50 */ 51 void (*release)(struct mmu_notifier *mn, 52 struct mm_struct *mm); 53 54 /* 55 * clear_flush_young is called after the VM is 56 * test-and-clearing the young/accessed bitflag in the 57 * pte. This way the VM will provide proper aging to the 58 * accesses to the page through the secondary MMUs and not 59 * only to the ones through the Linux pte. 60 * Start-end is necessary in case the secondary MMU is mapping the page 61 * at a smaller granularity than the primary MMU. 62 */ 63 int (*clear_flush_young)(struct mmu_notifier *mn, 64 struct mm_struct *mm, 65 unsigned long start, 66 unsigned long end); 67 68 /* 69 * clear_young is a lightweight version of clear_flush_young. Like the 70 * latter, it is supposed to test-and-clear the young/accessed bitflag 71 * in the secondary pte, but it may omit flushing the secondary tlb. 72 */ 73 int (*clear_young)(struct mmu_notifier *mn, 74 struct mm_struct *mm, 75 unsigned long start, 76 unsigned long end); 77 78 /* 79 * test_young is called to check the young/accessed bitflag in 80 * the secondary pte. This is used to know if the page is 81 * frequently used without actually clearing the flag or tearing 82 * down the secondary mapping on the page. 83 */ 84 int (*test_young)(struct mmu_notifier *mn, 85 struct mm_struct *mm, 86 unsigned long address); 87 88 /* 89 * change_pte is called in cases that pte mapping to page is changed: 90 * for example, when ksm remaps pte to point to a new shared page. 91 */ 92 void (*change_pte)(struct mmu_notifier *mn, 93 struct mm_struct *mm, 94 unsigned long address, 95 pte_t pte); 96 97 /* 98 * Before this is invoked any secondary MMU is still ok to 99 * read/write to the page previously pointed to by the Linux 100 * pte because the page hasn't been freed yet and it won't be 101 * freed until this returns. If required set_page_dirty has to 102 * be called internally to this method. 103 */ 104 void (*invalidate_page)(struct mmu_notifier *mn, 105 struct mm_struct *mm, 106 unsigned long address); 107 108 /* 109 * invalidate_range_start() and invalidate_range_end() must be 110 * paired and are called only when the mmap_sem and/or the 111 * locks protecting the reverse maps are held. If the subsystem 112 * can't guarantee that no additional references are taken to 113 * the pages in the range, it has to implement the 114 * invalidate_range() notifier to remove any references taken 115 * after invalidate_range_start(). 116 * 117 * Invalidation of multiple concurrent ranges may be 118 * optionally permitted by the driver. Either way the 119 * establishment of sptes is forbidden in the range passed to 120 * invalidate_range_begin/end for the whole duration of the 121 * invalidate_range_begin/end critical section. 122 * 123 * invalidate_range_start() is called when all pages in the 124 * range are still mapped and have at least a refcount of one. 125 * 126 * invalidate_range_end() is called when all pages in the 127 * range have been unmapped and the pages have been freed by 128 * the VM. 129 * 130 * The VM will remove the page table entries and potentially 131 * the page between invalidate_range_start() and 132 * invalidate_range_end(). If the page must not be freed 133 * because of pending I/O or other circumstances then the 134 * invalidate_range_start() callback (or the initial mapping 135 * by the driver) must make sure that the refcount is kept 136 * elevated. 137 * 138 * If the driver increases the refcount when the pages are 139 * initially mapped into an address space then either 140 * invalidate_range_start() or invalidate_range_end() may 141 * decrease the refcount. If the refcount is decreased on 142 * invalidate_range_start() then the VM can free pages as page 143 * table entries are removed. If the refcount is only 144 * droppped on invalidate_range_end() then the driver itself 145 * will drop the last refcount but it must take care to flush 146 * any secondary tlb before doing the final free on the 147 * page. Pages will no longer be referenced by the linux 148 * address space but may still be referenced by sptes until 149 * the last refcount is dropped. 150 */ 151 void (*invalidate_range_start)(struct mmu_notifier *mn, 152 struct mm_struct *mm, 153 unsigned long start, unsigned long end); 154 void (*invalidate_range_end)(struct mmu_notifier *mn, 155 struct mm_struct *mm, 156 unsigned long start, unsigned long end); 157 158 /* 159 * invalidate_range() is either called between 160 * invalidate_range_start() and invalidate_range_end() when the 161 * VM has to free pages that where unmapped, but before the 162 * pages are actually freed, or outside of _start()/_end() when 163 * a (remote) TLB is necessary. 164 * 165 * If invalidate_range() is used to manage a non-CPU TLB with 166 * shared page-tables, it not necessary to implement the 167 * invalidate_range_start()/end() notifiers, as 168 * invalidate_range() alread catches the points in time when an 169 * external TLB range needs to be flushed. 170 * 171 * The invalidate_range() function is called under the ptl 172 * spin-lock and not allowed to sleep. 173 * 174 * Note that this function might be called with just a sub-range 175 * of what was passed to invalidate_range_start()/end(), if 176 * called between those functions. 177 */ 178 void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm, 179 unsigned long start, unsigned long end); 180 }; 181 182 /* 183 * The notifier chains are protected by mmap_sem and/or the reverse map 184 * semaphores. Notifier chains are only changed when all reverse maps and 185 * the mmap_sem locks are taken. 186 * 187 * Therefore notifier chains can only be traversed when either 188 * 189 * 1. mmap_sem is held. 190 * 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem). 191 * 3. No other concurrent thread can access the list (release) 192 */ 193 struct mmu_notifier { 194 struct hlist_node hlist; 195 const struct mmu_notifier_ops *ops; 196 }; 197 198 static inline int mm_has_notifiers(struct mm_struct *mm) 199 { 200 return unlikely(mm->mmu_notifier_mm); 201 } 202 203 extern int mmu_notifier_register(struct mmu_notifier *mn, 204 struct mm_struct *mm); 205 extern int __mmu_notifier_register(struct mmu_notifier *mn, 206 struct mm_struct *mm); 207 extern void mmu_notifier_unregister(struct mmu_notifier *mn, 208 struct mm_struct *mm); 209 extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, 210 struct mm_struct *mm); 211 extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); 212 extern void __mmu_notifier_release(struct mm_struct *mm); 213 extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, 214 unsigned long start, 215 unsigned long end); 216 extern int __mmu_notifier_clear_young(struct mm_struct *mm, 217 unsigned long start, 218 unsigned long end); 219 extern int __mmu_notifier_test_young(struct mm_struct *mm, 220 unsigned long address); 221 extern void __mmu_notifier_change_pte(struct mm_struct *mm, 222 unsigned long address, pte_t pte); 223 extern void __mmu_notifier_invalidate_page(struct mm_struct *mm, 224 unsigned long address); 225 extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, 226 unsigned long start, unsigned long end); 227 extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, 228 unsigned long start, unsigned long end); 229 extern void __mmu_notifier_invalidate_range(struct mm_struct *mm, 230 unsigned long start, unsigned long end); 231 232 static inline void mmu_notifier_release(struct mm_struct *mm) 233 { 234 if (mm_has_notifiers(mm)) 235 __mmu_notifier_release(mm); 236 } 237 238 static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, 239 unsigned long start, 240 unsigned long end) 241 { 242 if (mm_has_notifiers(mm)) 243 return __mmu_notifier_clear_flush_young(mm, start, end); 244 return 0; 245 } 246 247 static inline int mmu_notifier_clear_young(struct mm_struct *mm, 248 unsigned long start, 249 unsigned long end) 250 { 251 if (mm_has_notifiers(mm)) 252 return __mmu_notifier_clear_young(mm, start, end); 253 return 0; 254 } 255 256 static inline int mmu_notifier_test_young(struct mm_struct *mm, 257 unsigned long address) 258 { 259 if (mm_has_notifiers(mm)) 260 return __mmu_notifier_test_young(mm, address); 261 return 0; 262 } 263 264 static inline void mmu_notifier_change_pte(struct mm_struct *mm, 265 unsigned long address, pte_t pte) 266 { 267 if (mm_has_notifiers(mm)) 268 __mmu_notifier_change_pte(mm, address, pte); 269 } 270 271 static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, 272 unsigned long address) 273 { 274 if (mm_has_notifiers(mm)) 275 __mmu_notifier_invalidate_page(mm, address); 276 } 277 278 static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, 279 unsigned long start, unsigned long end) 280 { 281 if (mm_has_notifiers(mm)) 282 __mmu_notifier_invalidate_range_start(mm, start, end); 283 } 284 285 static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, 286 unsigned long start, unsigned long end) 287 { 288 if (mm_has_notifiers(mm)) 289 __mmu_notifier_invalidate_range_end(mm, start, end); 290 } 291 292 static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, 293 unsigned long start, unsigned long end) 294 { 295 if (mm_has_notifiers(mm)) 296 __mmu_notifier_invalidate_range(mm, start, end); 297 } 298 299 static inline void mmu_notifier_mm_init(struct mm_struct *mm) 300 { 301 mm->mmu_notifier_mm = NULL; 302 } 303 304 static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) 305 { 306 if (mm_has_notifiers(mm)) 307 __mmu_notifier_mm_destroy(mm); 308 } 309 310 #define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ 311 ({ \ 312 int __young; \ 313 struct vm_area_struct *___vma = __vma; \ 314 unsigned long ___address = __address; \ 315 __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ 316 __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ 317 ___address, \ 318 ___address + \ 319 PAGE_SIZE); \ 320 __young; \ 321 }) 322 323 #define pmdp_clear_flush_young_notify(__vma, __address, __pmdp) \ 324 ({ \ 325 int __young; \ 326 struct vm_area_struct *___vma = __vma; \ 327 unsigned long ___address = __address; \ 328 __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ 329 __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ 330 ___address, \ 331 ___address + \ 332 PMD_SIZE); \ 333 __young; \ 334 }) 335 336 #define ptep_clear_young_notify(__vma, __address, __ptep) \ 337 ({ \ 338 int __young; \ 339 struct vm_area_struct *___vma = __vma; \ 340 unsigned long ___address = __address; \ 341 __young = ptep_test_and_clear_young(___vma, ___address, __ptep);\ 342 __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \ 343 ___address + PAGE_SIZE); \ 344 __young; \ 345 }) 346 347 #define pmdp_clear_young_notify(__vma, __address, __pmdp) \ 348 ({ \ 349 int __young; \ 350 struct vm_area_struct *___vma = __vma; \ 351 unsigned long ___address = __address; \ 352 __young = pmdp_test_and_clear_young(___vma, ___address, __pmdp);\ 353 __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \ 354 ___address + PMD_SIZE); \ 355 __young; \ 356 }) 357 358 #define ptep_clear_flush_notify(__vma, __address, __ptep) \ 359 ({ \ 360 unsigned long ___addr = __address & PAGE_MASK; \ 361 struct mm_struct *___mm = (__vma)->vm_mm; \ 362 pte_t ___pte; \ 363 \ 364 ___pte = ptep_clear_flush(__vma, __address, __ptep); \ 365 mmu_notifier_invalidate_range(___mm, ___addr, \ 366 ___addr + PAGE_SIZE); \ 367 \ 368 ___pte; \ 369 }) 370 371 #define pmdp_huge_clear_flush_notify(__vma, __haddr, __pmd) \ 372 ({ \ 373 unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ 374 struct mm_struct *___mm = (__vma)->vm_mm; \ 375 pmd_t ___pmd; \ 376 \ 377 ___pmd = pmdp_huge_clear_flush(__vma, __haddr, __pmd); \ 378 mmu_notifier_invalidate_range(___mm, ___haddr, \ 379 ___haddr + HPAGE_PMD_SIZE); \ 380 \ 381 ___pmd; \ 382 }) 383 384 #define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \ 385 ({ \ 386 unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ 387 pmd_t ___pmd; \ 388 \ 389 ___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd); \ 390 mmu_notifier_invalidate_range(__mm, ___haddr, \ 391 ___haddr + HPAGE_PMD_SIZE); \ 392 \ 393 ___pmd; \ 394 }) 395 396 /* 397 * set_pte_at_notify() sets the pte _after_ running the notifier. 398 * This is safe to start by updating the secondary MMUs, because the primary MMU 399 * pte invalidate must have already happened with a ptep_clear_flush() before 400 * set_pte_at_notify() has been invoked. Updating the secondary MMUs first is 401 * required when we change both the protection of the mapping from read-only to 402 * read-write and the pfn (like during copy on write page faults). Otherwise the 403 * old page would remain mapped readonly in the secondary MMUs after the new 404 * page is already writable by some CPU through the primary MMU. 405 */ 406 #define set_pte_at_notify(__mm, __address, __ptep, __pte) \ 407 ({ \ 408 struct mm_struct *___mm = __mm; \ 409 unsigned long ___address = __address; \ 410 pte_t ___pte = __pte; \ 411 \ 412 mmu_notifier_change_pte(___mm, ___address, ___pte); \ 413 set_pte_at(___mm, ___address, __ptep, ___pte); \ 414 }) 415 416 extern void mmu_notifier_call_srcu(struct rcu_head *rcu, 417 void (*func)(struct rcu_head *rcu)); 418 extern void mmu_notifier_synchronize(void); 419 420 #else /* CONFIG_MMU_NOTIFIER */ 421 422 static inline void mmu_notifier_release(struct mm_struct *mm) 423 { 424 } 425 426 static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, 427 unsigned long start, 428 unsigned long end) 429 { 430 return 0; 431 } 432 433 static inline int mmu_notifier_test_young(struct mm_struct *mm, 434 unsigned long address) 435 { 436 return 0; 437 } 438 439 static inline void mmu_notifier_change_pte(struct mm_struct *mm, 440 unsigned long address, pte_t pte) 441 { 442 } 443 444 static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, 445 unsigned long address) 446 { 447 } 448 449 static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, 450 unsigned long start, unsigned long end) 451 { 452 } 453 454 static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, 455 unsigned long start, unsigned long end) 456 { 457 } 458 459 static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, 460 unsigned long start, unsigned long end) 461 { 462 } 463 464 static inline void mmu_notifier_mm_init(struct mm_struct *mm) 465 { 466 } 467 468 static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) 469 { 470 } 471 472 #define ptep_clear_flush_young_notify ptep_clear_flush_young 473 #define pmdp_clear_flush_young_notify pmdp_clear_flush_young 474 #define ptep_clear_young_notify ptep_test_and_clear_young 475 #define pmdp_clear_young_notify pmdp_test_and_clear_young 476 #define ptep_clear_flush_notify ptep_clear_flush 477 #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush 478 #define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear 479 #define set_pte_at_notify set_pte_at 480 481 #endif /* CONFIG_MMU_NOTIFIER */ 482 483 #endif /* _LINUX_MMU_NOTIFIER_H */ 484