1 /* 2 * Copyright (C) 2015 Red Hat. All rights reserved. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm-cache-background-tracker.h" 8 #include "dm-cache-policy-internal.h" 9 #include "dm-cache-policy.h" 10 #include "dm.h" 11 12 #include <linux/hash.h> 13 #include <linux/jiffies.h> 14 #include <linux/module.h> 15 #include <linux/mutex.h> 16 #include <linux/vmalloc.h> 17 #include <linux/math64.h> 18 19 #define DM_MSG_PREFIX "cache-policy-smq" 20 21 /*----------------------------------------------------------------*/ 22 23 /* 24 * Safe division functions that return zero on divide by zero. 25 */ 26 static unsigned safe_div(unsigned n, unsigned d) 27 { 28 return d ? n / d : 0u; 29 } 30 31 static unsigned safe_mod(unsigned n, unsigned d) 32 { 33 return d ? n % d : 0u; 34 } 35 36 /*----------------------------------------------------------------*/ 37 38 struct entry { 39 unsigned hash_next:28; 40 unsigned prev:28; 41 unsigned next:28; 42 unsigned level:6; 43 bool dirty:1; 44 bool allocated:1; 45 bool sentinel:1; 46 bool pending_work:1; 47 48 dm_oblock_t oblock; 49 }; 50 51 /*----------------------------------------------------------------*/ 52 53 #define INDEXER_NULL ((1u << 28u) - 1u) 54 55 /* 56 * An entry_space manages a set of entries that we use for the queues. 57 * The clean and dirty queues share entries, so this object is separate 58 * from the queue itself. 59 */ 60 struct entry_space { 61 struct entry *begin; 62 struct entry *end; 63 }; 64 65 static int space_init(struct entry_space *es, unsigned nr_entries) 66 { 67 if (!nr_entries) { 68 es->begin = es->end = NULL; 69 return 0; 70 } 71 72 es->begin = vzalloc(sizeof(struct entry) * nr_entries); 73 if (!es->begin) 74 return -ENOMEM; 75 76 es->end = es->begin + nr_entries; 77 return 0; 78 } 79 80 static void space_exit(struct entry_space *es) 81 { 82 vfree(es->begin); 83 } 84 85 static struct entry *__get_entry(struct entry_space *es, unsigned block) 86 { 87 struct entry *e; 88 89 e = es->begin + block; 90 BUG_ON(e >= es->end); 91 92 return e; 93 } 94 95 static unsigned to_index(struct entry_space *es, struct entry *e) 96 { 97 BUG_ON(e < es->begin || e >= es->end); 98 return e - es->begin; 99 } 100 101 static struct entry *to_entry(struct entry_space *es, unsigned block) 102 { 103 if (block == INDEXER_NULL) 104 return NULL; 105 106 return __get_entry(es, block); 107 } 108 109 /*----------------------------------------------------------------*/ 110 111 struct ilist { 112 unsigned nr_elts; /* excluding sentinel entries */ 113 unsigned head, tail; 114 }; 115 116 static void l_init(struct ilist *l) 117 { 118 l->nr_elts = 0; 119 l->head = l->tail = INDEXER_NULL; 120 } 121 122 static struct entry *l_head(struct entry_space *es, struct ilist *l) 123 { 124 return to_entry(es, l->head); 125 } 126 127 static struct entry *l_tail(struct entry_space *es, struct ilist *l) 128 { 129 return to_entry(es, l->tail); 130 } 131 132 static struct entry *l_next(struct entry_space *es, struct entry *e) 133 { 134 return to_entry(es, e->next); 135 } 136 137 static struct entry *l_prev(struct entry_space *es, struct entry *e) 138 { 139 return to_entry(es, e->prev); 140 } 141 142 static bool l_empty(struct ilist *l) 143 { 144 return l->head == INDEXER_NULL; 145 } 146 147 static void l_add_head(struct entry_space *es, struct ilist *l, struct entry *e) 148 { 149 struct entry *head = l_head(es, l); 150 151 e->next = l->head; 152 e->prev = INDEXER_NULL; 153 154 if (head) 155 head->prev = l->head = to_index(es, e); 156 else 157 l->head = l->tail = to_index(es, e); 158 159 if (!e->sentinel) 160 l->nr_elts++; 161 } 162 163 static void l_add_tail(struct entry_space *es, struct ilist *l, struct entry *e) 164 { 165 struct entry *tail = l_tail(es, l); 166 167 e->next = INDEXER_NULL; 168 e->prev = l->tail; 169 170 if (tail) 171 tail->next = l->tail = to_index(es, e); 172 else 173 l->head = l->tail = to_index(es, e); 174 175 if (!e->sentinel) 176 l->nr_elts++; 177 } 178 179 static void l_add_before(struct entry_space *es, struct ilist *l, 180 struct entry *old, struct entry *e) 181 { 182 struct entry *prev = l_prev(es, old); 183 184 if (!prev) 185 l_add_head(es, l, e); 186 187 else { 188 e->prev = old->prev; 189 e->next = to_index(es, old); 190 prev->next = old->prev = to_index(es, e); 191 192 if (!e->sentinel) 193 l->nr_elts++; 194 } 195 } 196 197 static void l_del(struct entry_space *es, struct ilist *l, struct entry *e) 198 { 199 struct entry *prev = l_prev(es, e); 200 struct entry *next = l_next(es, e); 201 202 if (prev) 203 prev->next = e->next; 204 else 205 l->head = e->next; 206 207 if (next) 208 next->prev = e->prev; 209 else 210 l->tail = e->prev; 211 212 if (!e->sentinel) 213 l->nr_elts--; 214 } 215 216 static struct entry *l_pop_tail(struct entry_space *es, struct ilist *l) 217 { 218 struct entry *e; 219 220 for (e = l_tail(es, l); e; e = l_prev(es, e)) 221 if (!e->sentinel) { 222 l_del(es, l, e); 223 return e; 224 } 225 226 return NULL; 227 } 228 229 /*----------------------------------------------------------------*/ 230 231 /* 232 * The stochastic-multi-queue is a set of lru lists stacked into levels. 233 * Entries are moved up levels when they are used, which loosely orders the 234 * most accessed entries in the top levels and least in the bottom. This 235 * structure is *much* better than a single lru list. 236 */ 237 #define MAX_LEVELS 64u 238 239 struct queue { 240 struct entry_space *es; 241 242 unsigned nr_elts; 243 unsigned nr_levels; 244 struct ilist qs[MAX_LEVELS]; 245 246 /* 247 * We maintain a count of the number of entries we would like in each 248 * level. 249 */ 250 unsigned last_target_nr_elts; 251 unsigned nr_top_levels; 252 unsigned nr_in_top_levels; 253 unsigned target_count[MAX_LEVELS]; 254 }; 255 256 static void q_init(struct queue *q, struct entry_space *es, unsigned nr_levels) 257 { 258 unsigned i; 259 260 q->es = es; 261 q->nr_elts = 0; 262 q->nr_levels = nr_levels; 263 264 for (i = 0; i < q->nr_levels; i++) { 265 l_init(q->qs + i); 266 q->target_count[i] = 0u; 267 } 268 269 q->last_target_nr_elts = 0u; 270 q->nr_top_levels = 0u; 271 q->nr_in_top_levels = 0u; 272 } 273 274 static unsigned q_size(struct queue *q) 275 { 276 return q->nr_elts; 277 } 278 279 /* 280 * Insert an entry to the back of the given level. 281 */ 282 static void q_push(struct queue *q, struct entry *e) 283 { 284 BUG_ON(e->pending_work); 285 286 if (!e->sentinel) 287 q->nr_elts++; 288 289 l_add_tail(q->es, q->qs + e->level, e); 290 } 291 292 static void q_push_front(struct queue *q, struct entry *e) 293 { 294 BUG_ON(e->pending_work); 295 296 if (!e->sentinel) 297 q->nr_elts++; 298 299 l_add_head(q->es, q->qs + e->level, e); 300 } 301 302 static void q_push_before(struct queue *q, struct entry *old, struct entry *e) 303 { 304 BUG_ON(e->pending_work); 305 306 if (!e->sentinel) 307 q->nr_elts++; 308 309 l_add_before(q->es, q->qs + e->level, old, e); 310 } 311 312 static void q_del(struct queue *q, struct entry *e) 313 { 314 l_del(q->es, q->qs + e->level, e); 315 if (!e->sentinel) 316 q->nr_elts--; 317 } 318 319 /* 320 * Return the oldest entry of the lowest populated level. 321 */ 322 static struct entry *q_peek(struct queue *q, unsigned max_level, bool can_cross_sentinel) 323 { 324 unsigned level; 325 struct entry *e; 326 327 max_level = min(max_level, q->nr_levels); 328 329 for (level = 0; level < max_level; level++) 330 for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e)) { 331 if (e->sentinel) { 332 if (can_cross_sentinel) 333 continue; 334 else 335 break; 336 } 337 338 return e; 339 } 340 341 return NULL; 342 } 343 344 static struct entry *q_pop(struct queue *q) 345 { 346 struct entry *e = q_peek(q, q->nr_levels, true); 347 348 if (e) 349 q_del(q, e); 350 351 return e; 352 } 353 354 /* 355 * This function assumes there is a non-sentinel entry to pop. It's only 356 * used by redistribute, so we know this is true. It also doesn't adjust 357 * the q->nr_elts count. 358 */ 359 static struct entry *__redist_pop_from(struct queue *q, unsigned level) 360 { 361 struct entry *e; 362 363 for (; level < q->nr_levels; level++) 364 for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e)) 365 if (!e->sentinel) { 366 l_del(q->es, q->qs + e->level, e); 367 return e; 368 } 369 370 return NULL; 371 } 372 373 static void q_set_targets_subrange_(struct queue *q, unsigned nr_elts, unsigned lbegin, unsigned lend) 374 { 375 unsigned level, nr_levels, entries_per_level, remainder; 376 377 BUG_ON(lbegin > lend); 378 BUG_ON(lend > q->nr_levels); 379 nr_levels = lend - lbegin; 380 entries_per_level = safe_div(nr_elts, nr_levels); 381 remainder = safe_mod(nr_elts, nr_levels); 382 383 for (level = lbegin; level < lend; level++) 384 q->target_count[level] = 385 (level < (lbegin + remainder)) ? entries_per_level + 1u : entries_per_level; 386 } 387 388 /* 389 * Typically we have fewer elements in the top few levels which allows us 390 * to adjust the promote threshold nicely. 391 */ 392 static void q_set_targets(struct queue *q) 393 { 394 if (q->last_target_nr_elts == q->nr_elts) 395 return; 396 397 q->last_target_nr_elts = q->nr_elts; 398 399 if (q->nr_top_levels > q->nr_levels) 400 q_set_targets_subrange_(q, q->nr_elts, 0, q->nr_levels); 401 402 else { 403 q_set_targets_subrange_(q, q->nr_in_top_levels, 404 q->nr_levels - q->nr_top_levels, q->nr_levels); 405 406 if (q->nr_in_top_levels < q->nr_elts) 407 q_set_targets_subrange_(q, q->nr_elts - q->nr_in_top_levels, 408 0, q->nr_levels - q->nr_top_levels); 409 else 410 q_set_targets_subrange_(q, 0, 0, q->nr_levels - q->nr_top_levels); 411 } 412 } 413 414 static void q_redistribute(struct queue *q) 415 { 416 unsigned target, level; 417 struct ilist *l, *l_above; 418 struct entry *e; 419 420 q_set_targets(q); 421 422 for (level = 0u; level < q->nr_levels - 1u; level++) { 423 l = q->qs + level; 424 target = q->target_count[level]; 425 426 /* 427 * Pull down some entries from the level above. 428 */ 429 while (l->nr_elts < target) { 430 e = __redist_pop_from(q, level + 1u); 431 if (!e) { 432 /* bug in nr_elts */ 433 break; 434 } 435 436 e->level = level; 437 l_add_tail(q->es, l, e); 438 } 439 440 /* 441 * Push some entries up. 442 */ 443 l_above = q->qs + level + 1u; 444 while (l->nr_elts > target) { 445 e = l_pop_tail(q->es, l); 446 447 if (!e) 448 /* bug in nr_elts */ 449 break; 450 451 e->level = level + 1u; 452 l_add_tail(q->es, l_above, e); 453 } 454 } 455 } 456 457 static void q_requeue(struct queue *q, struct entry *e, unsigned extra_levels, 458 struct entry *s1, struct entry *s2) 459 { 460 struct entry *de; 461 unsigned sentinels_passed = 0; 462 unsigned new_level = min(q->nr_levels - 1u, e->level + extra_levels); 463 464 /* try and find an entry to swap with */ 465 if (extra_levels && (e->level < q->nr_levels - 1u)) { 466 for (de = l_head(q->es, q->qs + new_level); de && de->sentinel; de = l_next(q->es, de)) 467 sentinels_passed++; 468 469 if (de) { 470 q_del(q, de); 471 de->level = e->level; 472 if (s1) { 473 switch (sentinels_passed) { 474 case 0: 475 q_push_before(q, s1, de); 476 break; 477 478 case 1: 479 q_push_before(q, s2, de); 480 break; 481 482 default: 483 q_push(q, de); 484 } 485 } else 486 q_push(q, de); 487 } 488 } 489 490 q_del(q, e); 491 e->level = new_level; 492 q_push(q, e); 493 } 494 495 /*----------------------------------------------------------------*/ 496 497 #define FP_SHIFT 8 498 #define SIXTEENTH (1u << (FP_SHIFT - 4u)) 499 #define EIGHTH (1u << (FP_SHIFT - 3u)) 500 501 struct stats { 502 unsigned hit_threshold; 503 unsigned hits; 504 unsigned misses; 505 }; 506 507 enum performance { 508 Q_POOR, 509 Q_FAIR, 510 Q_WELL 511 }; 512 513 static void stats_init(struct stats *s, unsigned nr_levels) 514 { 515 s->hit_threshold = (nr_levels * 3u) / 4u; 516 s->hits = 0u; 517 s->misses = 0u; 518 } 519 520 static void stats_reset(struct stats *s) 521 { 522 s->hits = s->misses = 0u; 523 } 524 525 static void stats_level_accessed(struct stats *s, unsigned level) 526 { 527 if (level >= s->hit_threshold) 528 s->hits++; 529 else 530 s->misses++; 531 } 532 533 static void stats_miss(struct stats *s) 534 { 535 s->misses++; 536 } 537 538 /* 539 * There are times when we don't have any confidence in the hotspot queue. 540 * Such as when a fresh cache is created and the blocks have been spread 541 * out across the levels, or if an io load changes. We detect this by 542 * seeing how often a lookup is in the top levels of the hotspot queue. 543 */ 544 static enum performance stats_assess(struct stats *s) 545 { 546 unsigned confidence = safe_div(s->hits << FP_SHIFT, s->hits + s->misses); 547 548 if (confidence < SIXTEENTH) 549 return Q_POOR; 550 551 else if (confidence < EIGHTH) 552 return Q_FAIR; 553 554 else 555 return Q_WELL; 556 } 557 558 /*----------------------------------------------------------------*/ 559 560 struct smq_hash_table { 561 struct entry_space *es; 562 unsigned long long hash_bits; 563 unsigned *buckets; 564 }; 565 566 /* 567 * All cache entries are stored in a chained hash table. To save space we 568 * use indexing again, and only store indexes to the next entry. 569 */ 570 static int h_init(struct smq_hash_table *ht, struct entry_space *es, unsigned nr_entries) 571 { 572 unsigned i, nr_buckets; 573 574 ht->es = es; 575 nr_buckets = roundup_pow_of_two(max(nr_entries / 4u, 16u)); 576 ht->hash_bits = __ffs(nr_buckets); 577 578 ht->buckets = vmalloc(sizeof(*ht->buckets) * nr_buckets); 579 if (!ht->buckets) 580 return -ENOMEM; 581 582 for (i = 0; i < nr_buckets; i++) 583 ht->buckets[i] = INDEXER_NULL; 584 585 return 0; 586 } 587 588 static void h_exit(struct smq_hash_table *ht) 589 { 590 vfree(ht->buckets); 591 } 592 593 static struct entry *h_head(struct smq_hash_table *ht, unsigned bucket) 594 { 595 return to_entry(ht->es, ht->buckets[bucket]); 596 } 597 598 static struct entry *h_next(struct smq_hash_table *ht, struct entry *e) 599 { 600 return to_entry(ht->es, e->hash_next); 601 } 602 603 static void __h_insert(struct smq_hash_table *ht, unsigned bucket, struct entry *e) 604 { 605 e->hash_next = ht->buckets[bucket]; 606 ht->buckets[bucket] = to_index(ht->es, e); 607 } 608 609 static void h_insert(struct smq_hash_table *ht, struct entry *e) 610 { 611 unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits); 612 __h_insert(ht, h, e); 613 } 614 615 static struct entry *__h_lookup(struct smq_hash_table *ht, unsigned h, dm_oblock_t oblock, 616 struct entry **prev) 617 { 618 struct entry *e; 619 620 *prev = NULL; 621 for (e = h_head(ht, h); e; e = h_next(ht, e)) { 622 if (e->oblock == oblock) 623 return e; 624 625 *prev = e; 626 } 627 628 return NULL; 629 } 630 631 static void __h_unlink(struct smq_hash_table *ht, unsigned h, 632 struct entry *e, struct entry *prev) 633 { 634 if (prev) 635 prev->hash_next = e->hash_next; 636 else 637 ht->buckets[h] = e->hash_next; 638 } 639 640 /* 641 * Also moves each entry to the front of the bucket. 642 */ 643 static struct entry *h_lookup(struct smq_hash_table *ht, dm_oblock_t oblock) 644 { 645 struct entry *e, *prev; 646 unsigned h = hash_64(from_oblock(oblock), ht->hash_bits); 647 648 e = __h_lookup(ht, h, oblock, &prev); 649 if (e && prev) { 650 /* 651 * Move to the front because this entry is likely 652 * to be hit again. 653 */ 654 __h_unlink(ht, h, e, prev); 655 __h_insert(ht, h, e); 656 } 657 658 return e; 659 } 660 661 static void h_remove(struct smq_hash_table *ht, struct entry *e) 662 { 663 unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits); 664 struct entry *prev; 665 666 /* 667 * The down side of using a singly linked list is we have to 668 * iterate the bucket to remove an item. 669 */ 670 e = __h_lookup(ht, h, e->oblock, &prev); 671 if (e) 672 __h_unlink(ht, h, e, prev); 673 } 674 675 /*----------------------------------------------------------------*/ 676 677 struct entry_alloc { 678 struct entry_space *es; 679 unsigned begin; 680 681 unsigned nr_allocated; 682 struct ilist free; 683 }; 684 685 static void init_allocator(struct entry_alloc *ea, struct entry_space *es, 686 unsigned begin, unsigned end) 687 { 688 unsigned i; 689 690 ea->es = es; 691 ea->nr_allocated = 0u; 692 ea->begin = begin; 693 694 l_init(&ea->free); 695 for (i = begin; i != end; i++) 696 l_add_tail(ea->es, &ea->free, __get_entry(ea->es, i)); 697 } 698 699 static void init_entry(struct entry *e) 700 { 701 /* 702 * We can't memset because that would clear the hotspot and 703 * sentinel bits which remain constant. 704 */ 705 e->hash_next = INDEXER_NULL; 706 e->next = INDEXER_NULL; 707 e->prev = INDEXER_NULL; 708 e->level = 0u; 709 e->dirty = true; /* FIXME: audit */ 710 e->allocated = true; 711 e->sentinel = false; 712 e->pending_work = false; 713 } 714 715 static struct entry *alloc_entry(struct entry_alloc *ea) 716 { 717 struct entry *e; 718 719 if (l_empty(&ea->free)) 720 return NULL; 721 722 e = l_pop_tail(ea->es, &ea->free); 723 init_entry(e); 724 ea->nr_allocated++; 725 726 return e; 727 } 728 729 /* 730 * This assumes the cblock hasn't already been allocated. 731 */ 732 static struct entry *alloc_particular_entry(struct entry_alloc *ea, unsigned i) 733 { 734 struct entry *e = __get_entry(ea->es, ea->begin + i); 735 736 BUG_ON(e->allocated); 737 738 l_del(ea->es, &ea->free, e); 739 init_entry(e); 740 ea->nr_allocated++; 741 742 return e; 743 } 744 745 static void free_entry(struct entry_alloc *ea, struct entry *e) 746 { 747 BUG_ON(!ea->nr_allocated); 748 BUG_ON(!e->allocated); 749 750 ea->nr_allocated--; 751 e->allocated = false; 752 l_add_tail(ea->es, &ea->free, e); 753 } 754 755 static bool allocator_empty(struct entry_alloc *ea) 756 { 757 return l_empty(&ea->free); 758 } 759 760 static unsigned get_index(struct entry_alloc *ea, struct entry *e) 761 { 762 return to_index(ea->es, e) - ea->begin; 763 } 764 765 static struct entry *get_entry(struct entry_alloc *ea, unsigned index) 766 { 767 return __get_entry(ea->es, ea->begin + index); 768 } 769 770 /*----------------------------------------------------------------*/ 771 772 #define NR_HOTSPOT_LEVELS 64u 773 #define NR_CACHE_LEVELS 64u 774 775 #define WRITEBACK_PERIOD (10ul * HZ) 776 #define DEMOTE_PERIOD (60ul * HZ) 777 778 #define HOTSPOT_UPDATE_PERIOD (HZ) 779 #define CACHE_UPDATE_PERIOD (60ul * HZ) 780 781 struct smq_policy { 782 struct dm_cache_policy policy; 783 784 /* protects everything */ 785 spinlock_t lock; 786 dm_cblock_t cache_size; 787 sector_t cache_block_size; 788 789 sector_t hotspot_block_size; 790 unsigned nr_hotspot_blocks; 791 unsigned cache_blocks_per_hotspot_block; 792 unsigned hotspot_level_jump; 793 794 struct entry_space es; 795 struct entry_alloc writeback_sentinel_alloc; 796 struct entry_alloc demote_sentinel_alloc; 797 struct entry_alloc hotspot_alloc; 798 struct entry_alloc cache_alloc; 799 800 unsigned long *hotspot_hit_bits; 801 unsigned long *cache_hit_bits; 802 803 /* 804 * We maintain three queues of entries. The cache proper, 805 * consisting of a clean and dirty queue, containing the currently 806 * active mappings. The hotspot queue uses a larger block size to 807 * track blocks that are being hit frequently and potential 808 * candidates for promotion to the cache. 809 */ 810 struct queue hotspot; 811 struct queue clean; 812 struct queue dirty; 813 814 struct stats hotspot_stats; 815 struct stats cache_stats; 816 817 /* 818 * Keeps track of time, incremented by the core. We use this to 819 * avoid attributing multiple hits within the same tick. 820 */ 821 unsigned tick; 822 823 /* 824 * The hash tables allows us to quickly find an entry by origin 825 * block. 826 */ 827 struct smq_hash_table table; 828 struct smq_hash_table hotspot_table; 829 830 bool current_writeback_sentinels; 831 unsigned long next_writeback_period; 832 833 bool current_demote_sentinels; 834 unsigned long next_demote_period; 835 836 unsigned write_promote_level; 837 unsigned read_promote_level; 838 839 unsigned long next_hotspot_period; 840 unsigned long next_cache_period; 841 842 struct background_tracker *bg_work; 843 844 bool migrations_allowed; 845 }; 846 847 /*----------------------------------------------------------------*/ 848 849 static struct entry *get_sentinel(struct entry_alloc *ea, unsigned level, bool which) 850 { 851 return get_entry(ea, which ? level : NR_CACHE_LEVELS + level); 852 } 853 854 static struct entry *writeback_sentinel(struct smq_policy *mq, unsigned level) 855 { 856 return get_sentinel(&mq->writeback_sentinel_alloc, level, mq->current_writeback_sentinels); 857 } 858 859 static struct entry *demote_sentinel(struct smq_policy *mq, unsigned level) 860 { 861 return get_sentinel(&mq->demote_sentinel_alloc, level, mq->current_demote_sentinels); 862 } 863 864 static void __update_writeback_sentinels(struct smq_policy *mq) 865 { 866 unsigned level; 867 struct queue *q = &mq->dirty; 868 struct entry *sentinel; 869 870 for (level = 0; level < q->nr_levels; level++) { 871 sentinel = writeback_sentinel(mq, level); 872 q_del(q, sentinel); 873 q_push(q, sentinel); 874 } 875 } 876 877 static void __update_demote_sentinels(struct smq_policy *mq) 878 { 879 unsigned level; 880 struct queue *q = &mq->clean; 881 struct entry *sentinel; 882 883 for (level = 0; level < q->nr_levels; level++) { 884 sentinel = demote_sentinel(mq, level); 885 q_del(q, sentinel); 886 q_push(q, sentinel); 887 } 888 } 889 890 static void update_sentinels(struct smq_policy *mq) 891 { 892 if (time_after(jiffies, mq->next_writeback_period)) { 893 mq->next_writeback_period = jiffies + WRITEBACK_PERIOD; 894 mq->current_writeback_sentinels = !mq->current_writeback_sentinels; 895 __update_writeback_sentinels(mq); 896 } 897 898 if (time_after(jiffies, mq->next_demote_period)) { 899 mq->next_demote_period = jiffies + DEMOTE_PERIOD; 900 mq->current_demote_sentinels = !mq->current_demote_sentinels; 901 __update_demote_sentinels(mq); 902 } 903 } 904 905 static void __sentinels_init(struct smq_policy *mq) 906 { 907 unsigned level; 908 struct entry *sentinel; 909 910 for (level = 0; level < NR_CACHE_LEVELS; level++) { 911 sentinel = writeback_sentinel(mq, level); 912 sentinel->level = level; 913 q_push(&mq->dirty, sentinel); 914 915 sentinel = demote_sentinel(mq, level); 916 sentinel->level = level; 917 q_push(&mq->clean, sentinel); 918 } 919 } 920 921 static void sentinels_init(struct smq_policy *mq) 922 { 923 mq->next_writeback_period = jiffies + WRITEBACK_PERIOD; 924 mq->next_demote_period = jiffies + DEMOTE_PERIOD; 925 926 mq->current_writeback_sentinels = false; 927 mq->current_demote_sentinels = false; 928 __sentinels_init(mq); 929 930 mq->current_writeback_sentinels = !mq->current_writeback_sentinels; 931 mq->current_demote_sentinels = !mq->current_demote_sentinels; 932 __sentinels_init(mq); 933 } 934 935 /*----------------------------------------------------------------*/ 936 937 static void del_queue(struct smq_policy *mq, struct entry *e) 938 { 939 q_del(e->dirty ? &mq->dirty : &mq->clean, e); 940 } 941 942 static void push_queue(struct smq_policy *mq, struct entry *e) 943 { 944 if (e->dirty) 945 q_push(&mq->dirty, e); 946 else 947 q_push(&mq->clean, e); 948 } 949 950 // !h, !q, a -> h, q, a 951 static void push(struct smq_policy *mq, struct entry *e) 952 { 953 h_insert(&mq->table, e); 954 if (!e->pending_work) 955 push_queue(mq, e); 956 } 957 958 static void push_queue_front(struct smq_policy *mq, struct entry *e) 959 { 960 if (e->dirty) 961 q_push_front(&mq->dirty, e); 962 else 963 q_push_front(&mq->clean, e); 964 } 965 966 static void push_front(struct smq_policy *mq, struct entry *e) 967 { 968 h_insert(&mq->table, e); 969 if (!e->pending_work) 970 push_queue_front(mq, e); 971 } 972 973 static dm_cblock_t infer_cblock(struct smq_policy *mq, struct entry *e) 974 { 975 return to_cblock(get_index(&mq->cache_alloc, e)); 976 } 977 978 static void requeue(struct smq_policy *mq, struct entry *e) 979 { 980 /* 981 * Pending work has temporarily been taken out of the queues. 982 */ 983 if (e->pending_work) 984 return; 985 986 if (!test_and_set_bit(from_cblock(infer_cblock(mq, e)), mq->cache_hit_bits)) { 987 if (!e->dirty) { 988 q_requeue(&mq->clean, e, 1u, NULL, NULL); 989 return; 990 } 991 992 q_requeue(&mq->dirty, e, 1u, 993 get_sentinel(&mq->writeback_sentinel_alloc, e->level, !mq->current_writeback_sentinels), 994 get_sentinel(&mq->writeback_sentinel_alloc, e->level, mq->current_writeback_sentinels)); 995 } 996 } 997 998 static unsigned default_promote_level(struct smq_policy *mq) 999 { 1000 /* 1001 * The promote level depends on the current performance of the 1002 * cache. 1003 * 1004 * If the cache is performing badly, then we can't afford 1005 * to promote much without causing performance to drop below that 1006 * of the origin device. 1007 * 1008 * If the cache is performing well, then we don't need to promote 1009 * much. If it isn't broken, don't fix it. 1010 * 1011 * If the cache is middling then we promote more. 1012 * 1013 * This scheme reminds me of a graph of entropy vs probability of a 1014 * binary variable. 1015 */ 1016 static unsigned table[] = {1, 1, 1, 2, 4, 6, 7, 8, 7, 6, 4, 4, 3, 3, 2, 2, 1}; 1017 1018 unsigned hits = mq->cache_stats.hits; 1019 unsigned misses = mq->cache_stats.misses; 1020 unsigned index = safe_div(hits << 4u, hits + misses); 1021 return table[index]; 1022 } 1023 1024 static void update_promote_levels(struct smq_policy *mq) 1025 { 1026 /* 1027 * If there are unused cache entries then we want to be really 1028 * eager to promote. 1029 */ 1030 unsigned threshold_level = allocator_empty(&mq->cache_alloc) ? 1031 default_promote_level(mq) : (NR_HOTSPOT_LEVELS / 2u); 1032 1033 threshold_level = max(threshold_level, NR_HOTSPOT_LEVELS); 1034 1035 /* 1036 * If the hotspot queue is performing badly then we have little 1037 * confidence that we know which blocks to promote. So we cut down 1038 * the amount of promotions. 1039 */ 1040 switch (stats_assess(&mq->hotspot_stats)) { 1041 case Q_POOR: 1042 threshold_level /= 4u; 1043 break; 1044 1045 case Q_FAIR: 1046 threshold_level /= 2u; 1047 break; 1048 1049 case Q_WELL: 1050 break; 1051 } 1052 1053 mq->read_promote_level = NR_HOTSPOT_LEVELS - threshold_level; 1054 mq->write_promote_level = (NR_HOTSPOT_LEVELS - threshold_level); 1055 } 1056 1057 /* 1058 * If the hotspot queue is performing badly, then we try and move entries 1059 * around more quickly. 1060 */ 1061 static void update_level_jump(struct smq_policy *mq) 1062 { 1063 switch (stats_assess(&mq->hotspot_stats)) { 1064 case Q_POOR: 1065 mq->hotspot_level_jump = 4u; 1066 break; 1067 1068 case Q_FAIR: 1069 mq->hotspot_level_jump = 2u; 1070 break; 1071 1072 case Q_WELL: 1073 mq->hotspot_level_jump = 1u; 1074 break; 1075 } 1076 } 1077 1078 static void end_hotspot_period(struct smq_policy *mq) 1079 { 1080 clear_bitset(mq->hotspot_hit_bits, mq->nr_hotspot_blocks); 1081 update_promote_levels(mq); 1082 1083 if (time_after(jiffies, mq->next_hotspot_period)) { 1084 update_level_jump(mq); 1085 q_redistribute(&mq->hotspot); 1086 stats_reset(&mq->hotspot_stats); 1087 mq->next_hotspot_period = jiffies + HOTSPOT_UPDATE_PERIOD; 1088 } 1089 } 1090 1091 static void end_cache_period(struct smq_policy *mq) 1092 { 1093 if (time_after(jiffies, mq->next_cache_period)) { 1094 clear_bitset(mq->cache_hit_bits, from_cblock(mq->cache_size)); 1095 1096 q_redistribute(&mq->dirty); 1097 q_redistribute(&mq->clean); 1098 stats_reset(&mq->cache_stats); 1099 1100 mq->next_cache_period = jiffies + CACHE_UPDATE_PERIOD; 1101 } 1102 } 1103 1104 /*----------------------------------------------------------------*/ 1105 1106 /* 1107 * Targets are given as a percentage. 1108 */ 1109 #define CLEAN_TARGET 25u 1110 #define FREE_TARGET 25u 1111 1112 static unsigned percent_to_target(struct smq_policy *mq, unsigned p) 1113 { 1114 return from_cblock(mq->cache_size) * p / 100u; 1115 } 1116 1117 static bool clean_target_met(struct smq_policy *mq, bool idle) 1118 { 1119 /* 1120 * Cache entries may not be populated. So we cannot rely on the 1121 * size of the clean queue. 1122 */ 1123 unsigned nr_clean; 1124 1125 if (idle) { 1126 /* 1127 * We'd like to clean everything. 1128 */ 1129 return q_size(&mq->dirty) == 0u; 1130 } 1131 1132 nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty); 1133 return (nr_clean + btracker_nr_writebacks_queued(mq->bg_work)) >= 1134 percent_to_target(mq, CLEAN_TARGET); 1135 } 1136 1137 static bool free_target_met(struct smq_policy *mq, bool idle) 1138 { 1139 unsigned nr_free; 1140 1141 if (!idle) 1142 return true; 1143 1144 nr_free = from_cblock(mq->cache_size) - mq->cache_alloc.nr_allocated; 1145 return (nr_free + btracker_nr_demotions_queued(mq->bg_work)) >= 1146 percent_to_target(mq, FREE_TARGET); 1147 } 1148 1149 /*----------------------------------------------------------------*/ 1150 1151 static void mark_pending(struct smq_policy *mq, struct entry *e) 1152 { 1153 BUG_ON(e->sentinel); 1154 BUG_ON(!e->allocated); 1155 BUG_ON(e->pending_work); 1156 e->pending_work = true; 1157 } 1158 1159 static void clear_pending(struct smq_policy *mq, struct entry *e) 1160 { 1161 BUG_ON(!e->pending_work); 1162 e->pending_work = false; 1163 } 1164 1165 static void queue_writeback(struct smq_policy *mq) 1166 { 1167 int r; 1168 struct policy_work work; 1169 struct entry *e; 1170 1171 e = q_peek(&mq->dirty, mq->dirty.nr_levels, !mq->migrations_allowed); 1172 if (e) { 1173 mark_pending(mq, e); 1174 q_del(&mq->dirty, e); 1175 1176 work.op = POLICY_WRITEBACK; 1177 work.oblock = e->oblock; 1178 work.cblock = infer_cblock(mq, e); 1179 1180 r = btracker_queue(mq->bg_work, &work, NULL); 1181 WARN_ON_ONCE(r); // FIXME: finish, I think we have to get rid of this race. 1182 } 1183 } 1184 1185 static void queue_demotion(struct smq_policy *mq) 1186 { 1187 struct policy_work work; 1188 struct entry *e; 1189 1190 if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed))) 1191 return; 1192 1193 e = q_peek(&mq->clean, mq->clean.nr_levels, true); 1194 if (!e) { 1195 if (!clean_target_met(mq, false)) 1196 queue_writeback(mq); 1197 return; 1198 } 1199 1200 mark_pending(mq, e); 1201 q_del(&mq->clean, e); 1202 1203 work.op = POLICY_DEMOTE; 1204 work.oblock = e->oblock; 1205 work.cblock = infer_cblock(mq, e); 1206 btracker_queue(mq->bg_work, &work, NULL); 1207 } 1208 1209 static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock, 1210 struct policy_work **workp) 1211 { 1212 struct entry *e; 1213 struct policy_work work; 1214 1215 if (!mq->migrations_allowed) 1216 return; 1217 1218 if (allocator_empty(&mq->cache_alloc)) { 1219 /* 1220 * We always claim to be 'idle' to ensure some demotions happen 1221 * with continuous loads. 1222 */ 1223 if (!free_target_met(mq, true)) 1224 queue_demotion(mq); 1225 return; 1226 } 1227 1228 if (btracker_promotion_already_present(mq->bg_work, oblock)) 1229 return; 1230 1231 /* 1232 * We allocate the entry now to reserve the cblock. If the 1233 * background work is aborted we must remember to free it. 1234 */ 1235 e = alloc_entry(&mq->cache_alloc); 1236 BUG_ON(!e); 1237 e->pending_work = true; 1238 work.op = POLICY_PROMOTE; 1239 work.oblock = oblock; 1240 work.cblock = infer_cblock(mq, e); 1241 btracker_queue(mq->bg_work, &work, workp); 1242 } 1243 1244 /*----------------------------------------------------------------*/ 1245 1246 enum promote_result { 1247 PROMOTE_NOT, 1248 PROMOTE_TEMPORARY, 1249 PROMOTE_PERMANENT 1250 }; 1251 1252 /* 1253 * Converts a boolean into a promote result. 1254 */ 1255 static enum promote_result maybe_promote(bool promote) 1256 { 1257 return promote ? PROMOTE_PERMANENT : PROMOTE_NOT; 1258 } 1259 1260 static enum promote_result should_promote(struct smq_policy *mq, struct entry *hs_e, 1261 int data_dir, bool fast_promote) 1262 { 1263 if (data_dir == WRITE) { 1264 if (!allocator_empty(&mq->cache_alloc) && fast_promote) 1265 return PROMOTE_TEMPORARY; 1266 1267 return maybe_promote(hs_e->level >= mq->write_promote_level); 1268 } else 1269 return maybe_promote(hs_e->level >= mq->read_promote_level); 1270 } 1271 1272 static dm_oblock_t to_hblock(struct smq_policy *mq, dm_oblock_t b) 1273 { 1274 sector_t r = from_oblock(b); 1275 (void) sector_div(r, mq->cache_blocks_per_hotspot_block); 1276 return to_oblock(r); 1277 } 1278 1279 static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b) 1280 { 1281 unsigned hi; 1282 dm_oblock_t hb = to_hblock(mq, b); 1283 struct entry *e = h_lookup(&mq->hotspot_table, hb); 1284 1285 if (e) { 1286 stats_level_accessed(&mq->hotspot_stats, e->level); 1287 1288 hi = get_index(&mq->hotspot_alloc, e); 1289 q_requeue(&mq->hotspot, e, 1290 test_and_set_bit(hi, mq->hotspot_hit_bits) ? 1291 0u : mq->hotspot_level_jump, 1292 NULL, NULL); 1293 1294 } else { 1295 stats_miss(&mq->hotspot_stats); 1296 1297 e = alloc_entry(&mq->hotspot_alloc); 1298 if (!e) { 1299 e = q_pop(&mq->hotspot); 1300 if (e) { 1301 h_remove(&mq->hotspot_table, e); 1302 hi = get_index(&mq->hotspot_alloc, e); 1303 clear_bit(hi, mq->hotspot_hit_bits); 1304 } 1305 1306 } 1307 1308 if (e) { 1309 e->oblock = hb; 1310 q_push(&mq->hotspot, e); 1311 h_insert(&mq->hotspot_table, e); 1312 } 1313 } 1314 1315 return e; 1316 } 1317 1318 /*----------------------------------------------------------------*/ 1319 1320 /* 1321 * Public interface, via the policy struct. See dm-cache-policy.h for a 1322 * description of these. 1323 */ 1324 1325 static struct smq_policy *to_smq_policy(struct dm_cache_policy *p) 1326 { 1327 return container_of(p, struct smq_policy, policy); 1328 } 1329 1330 static void smq_destroy(struct dm_cache_policy *p) 1331 { 1332 struct smq_policy *mq = to_smq_policy(p); 1333 1334 btracker_destroy(mq->bg_work); 1335 h_exit(&mq->hotspot_table); 1336 h_exit(&mq->table); 1337 free_bitset(mq->hotspot_hit_bits); 1338 free_bitset(mq->cache_hit_bits); 1339 space_exit(&mq->es); 1340 kfree(mq); 1341 } 1342 1343 /*----------------------------------------------------------------*/ 1344 1345 static int __lookup(struct smq_policy *mq, dm_oblock_t oblock, dm_cblock_t *cblock, 1346 int data_dir, bool fast_copy, 1347 struct policy_work **work, bool *background_work) 1348 { 1349 struct entry *e, *hs_e; 1350 enum promote_result pr; 1351 1352 *background_work = false; 1353 1354 e = h_lookup(&mq->table, oblock); 1355 if (e) { 1356 stats_level_accessed(&mq->cache_stats, e->level); 1357 1358 requeue(mq, e); 1359 *cblock = infer_cblock(mq, e); 1360 return 0; 1361 1362 } else { 1363 stats_miss(&mq->cache_stats); 1364 1365 /* 1366 * The hotspot queue only gets updated with misses. 1367 */ 1368 hs_e = update_hotspot_queue(mq, oblock); 1369 1370 pr = should_promote(mq, hs_e, data_dir, fast_copy); 1371 if (pr != PROMOTE_NOT) { 1372 queue_promotion(mq, oblock, work); 1373 *background_work = true; 1374 } 1375 1376 return -ENOENT; 1377 } 1378 } 1379 1380 static int smq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock, 1381 int data_dir, bool fast_copy, 1382 bool *background_work) 1383 { 1384 int r; 1385 unsigned long flags; 1386 struct smq_policy *mq = to_smq_policy(p); 1387 1388 spin_lock_irqsave(&mq->lock, flags); 1389 r = __lookup(mq, oblock, cblock, 1390 data_dir, fast_copy, 1391 NULL, background_work); 1392 spin_unlock_irqrestore(&mq->lock, flags); 1393 1394 return r; 1395 } 1396 1397 static int smq_lookup_with_work(struct dm_cache_policy *p, 1398 dm_oblock_t oblock, dm_cblock_t *cblock, 1399 int data_dir, bool fast_copy, 1400 struct policy_work **work) 1401 { 1402 int r; 1403 bool background_queued; 1404 unsigned long flags; 1405 struct smq_policy *mq = to_smq_policy(p); 1406 1407 spin_lock_irqsave(&mq->lock, flags); 1408 r = __lookup(mq, oblock, cblock, data_dir, fast_copy, work, &background_queued); 1409 spin_unlock_irqrestore(&mq->lock, flags); 1410 1411 return r; 1412 } 1413 1414 static int smq_get_background_work(struct dm_cache_policy *p, bool idle, 1415 struct policy_work **result) 1416 { 1417 int r; 1418 unsigned long flags; 1419 struct smq_policy *mq = to_smq_policy(p); 1420 1421 spin_lock_irqsave(&mq->lock, flags); 1422 r = btracker_issue(mq->bg_work, result); 1423 if (r == -ENODATA) { 1424 /* find some writeback work to do */ 1425 if (mq->migrations_allowed && !free_target_met(mq, idle)) 1426 queue_demotion(mq); 1427 1428 else if (!clean_target_met(mq, idle)) 1429 queue_writeback(mq); 1430 1431 r = btracker_issue(mq->bg_work, result); 1432 } 1433 spin_unlock_irqrestore(&mq->lock, flags); 1434 1435 return r; 1436 } 1437 1438 /* 1439 * We need to clear any pending work flags that have been set, and in the 1440 * case of promotion free the entry for the destination cblock. 1441 */ 1442 static void __complete_background_work(struct smq_policy *mq, 1443 struct policy_work *work, 1444 bool success) 1445 { 1446 struct entry *e = get_entry(&mq->cache_alloc, 1447 from_cblock(work->cblock)); 1448 1449 switch (work->op) { 1450 case POLICY_PROMOTE: 1451 // !h, !q, a 1452 clear_pending(mq, e); 1453 if (success) { 1454 e->oblock = work->oblock; 1455 push(mq, e); 1456 // h, q, a 1457 } else { 1458 free_entry(&mq->cache_alloc, e); 1459 // !h, !q, !a 1460 } 1461 break; 1462 1463 case POLICY_DEMOTE: 1464 // h, !q, a 1465 if (success) { 1466 h_remove(&mq->table, e); 1467 free_entry(&mq->cache_alloc, e); 1468 // !h, !q, !a 1469 } else { 1470 clear_pending(mq, e); 1471 push_queue(mq, e); 1472 // h, q, a 1473 } 1474 break; 1475 1476 case POLICY_WRITEBACK: 1477 // h, !q, a 1478 clear_pending(mq, e); 1479 push_queue(mq, e); 1480 // h, q, a 1481 break; 1482 } 1483 1484 btracker_complete(mq->bg_work, work); 1485 } 1486 1487 static void smq_complete_background_work(struct dm_cache_policy *p, 1488 struct policy_work *work, 1489 bool success) 1490 { 1491 unsigned long flags; 1492 struct smq_policy *mq = to_smq_policy(p); 1493 1494 spin_lock_irqsave(&mq->lock, flags); 1495 __complete_background_work(mq, work, success); 1496 spin_unlock_irqrestore(&mq->lock, flags); 1497 } 1498 1499 // in_hash(oblock) -> in_hash(oblock) 1500 static void __smq_set_clear_dirty(struct smq_policy *mq, dm_cblock_t cblock, bool set) 1501 { 1502 struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock)); 1503 1504 if (e->pending_work) 1505 e->dirty = set; 1506 else { 1507 del_queue(mq, e); 1508 e->dirty = set; 1509 push_queue(mq, e); 1510 } 1511 } 1512 1513 static void smq_set_dirty(struct dm_cache_policy *p, dm_cblock_t cblock) 1514 { 1515 unsigned long flags; 1516 struct smq_policy *mq = to_smq_policy(p); 1517 1518 spin_lock_irqsave(&mq->lock, flags); 1519 __smq_set_clear_dirty(mq, cblock, true); 1520 spin_unlock_irqrestore(&mq->lock, flags); 1521 } 1522 1523 static void smq_clear_dirty(struct dm_cache_policy *p, dm_cblock_t cblock) 1524 { 1525 struct smq_policy *mq = to_smq_policy(p); 1526 unsigned long flags; 1527 1528 spin_lock_irqsave(&mq->lock, flags); 1529 __smq_set_clear_dirty(mq, cblock, false); 1530 spin_unlock_irqrestore(&mq->lock, flags); 1531 } 1532 1533 static unsigned random_level(dm_cblock_t cblock) 1534 { 1535 return hash_32(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1); 1536 } 1537 1538 static int smq_load_mapping(struct dm_cache_policy *p, 1539 dm_oblock_t oblock, dm_cblock_t cblock, 1540 bool dirty, uint32_t hint, bool hint_valid) 1541 { 1542 struct smq_policy *mq = to_smq_policy(p); 1543 struct entry *e; 1544 1545 e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock)); 1546 e->oblock = oblock; 1547 e->dirty = dirty; 1548 e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : random_level(cblock); 1549 e->pending_work = false; 1550 1551 /* 1552 * When we load mappings we push ahead of both sentinels in order to 1553 * allow demotions and cleaning to occur immediately. 1554 */ 1555 push_front(mq, e); 1556 1557 return 0; 1558 } 1559 1560 static int smq_invalidate_mapping(struct dm_cache_policy *p, dm_cblock_t cblock) 1561 { 1562 struct smq_policy *mq = to_smq_policy(p); 1563 struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock)); 1564 1565 if (!e->allocated) 1566 return -ENODATA; 1567 1568 // FIXME: what if this block has pending background work? 1569 del_queue(mq, e); 1570 h_remove(&mq->table, e); 1571 free_entry(&mq->cache_alloc, e); 1572 return 0; 1573 } 1574 1575 static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock) 1576 { 1577 struct smq_policy *mq = to_smq_policy(p); 1578 struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock)); 1579 1580 if (!e->allocated) 1581 return 0; 1582 1583 return e->level; 1584 } 1585 1586 static dm_cblock_t smq_residency(struct dm_cache_policy *p) 1587 { 1588 dm_cblock_t r; 1589 unsigned long flags; 1590 struct smq_policy *mq = to_smq_policy(p); 1591 1592 spin_lock_irqsave(&mq->lock, flags); 1593 r = to_cblock(mq->cache_alloc.nr_allocated); 1594 spin_unlock_irqrestore(&mq->lock, flags); 1595 1596 return r; 1597 } 1598 1599 static void smq_tick(struct dm_cache_policy *p, bool can_block) 1600 { 1601 struct smq_policy *mq = to_smq_policy(p); 1602 unsigned long flags; 1603 1604 spin_lock_irqsave(&mq->lock, flags); 1605 mq->tick++; 1606 update_sentinels(mq); 1607 end_hotspot_period(mq); 1608 end_cache_period(mq); 1609 spin_unlock_irqrestore(&mq->lock, flags); 1610 } 1611 1612 static void smq_allow_migrations(struct dm_cache_policy *p, bool allow) 1613 { 1614 struct smq_policy *mq = to_smq_policy(p); 1615 mq->migrations_allowed = allow; 1616 } 1617 1618 /* 1619 * smq has no config values, but the old mq policy did. To avoid breaking 1620 * software we continue to accept these configurables for the mq policy, 1621 * but they have no effect. 1622 */ 1623 static int mq_set_config_value(struct dm_cache_policy *p, 1624 const char *key, const char *value) 1625 { 1626 unsigned long tmp; 1627 1628 if (kstrtoul(value, 10, &tmp)) 1629 return -EINVAL; 1630 1631 if (!strcasecmp(key, "random_threshold") || 1632 !strcasecmp(key, "sequential_threshold") || 1633 !strcasecmp(key, "discard_promote_adjustment") || 1634 !strcasecmp(key, "read_promote_adjustment") || 1635 !strcasecmp(key, "write_promote_adjustment")) { 1636 DMWARN("tunable '%s' no longer has any effect, mq policy is now an alias for smq", key); 1637 return 0; 1638 } 1639 1640 return -EINVAL; 1641 } 1642 1643 static int mq_emit_config_values(struct dm_cache_policy *p, char *result, 1644 unsigned maxlen, ssize_t *sz_ptr) 1645 { 1646 ssize_t sz = *sz_ptr; 1647 1648 DMEMIT("10 random_threshold 0 " 1649 "sequential_threshold 0 " 1650 "discard_promote_adjustment 0 " 1651 "read_promote_adjustment 0 " 1652 "write_promote_adjustment 0 "); 1653 1654 *sz_ptr = sz; 1655 return 0; 1656 } 1657 1658 /* Init the policy plugin interface function pointers. */ 1659 static void init_policy_functions(struct smq_policy *mq, bool mimic_mq) 1660 { 1661 mq->policy.destroy = smq_destroy; 1662 mq->policy.lookup = smq_lookup; 1663 mq->policy.lookup_with_work = smq_lookup_with_work; 1664 mq->policy.get_background_work = smq_get_background_work; 1665 mq->policy.complete_background_work = smq_complete_background_work; 1666 mq->policy.set_dirty = smq_set_dirty; 1667 mq->policy.clear_dirty = smq_clear_dirty; 1668 mq->policy.load_mapping = smq_load_mapping; 1669 mq->policy.invalidate_mapping = smq_invalidate_mapping; 1670 mq->policy.get_hint = smq_get_hint; 1671 mq->policy.residency = smq_residency; 1672 mq->policy.tick = smq_tick; 1673 mq->policy.allow_migrations = smq_allow_migrations; 1674 1675 if (mimic_mq) { 1676 mq->policy.set_config_value = mq_set_config_value; 1677 mq->policy.emit_config_values = mq_emit_config_values; 1678 } 1679 } 1680 1681 static bool too_many_hotspot_blocks(sector_t origin_size, 1682 sector_t hotspot_block_size, 1683 unsigned nr_hotspot_blocks) 1684 { 1685 return (hotspot_block_size * nr_hotspot_blocks) > origin_size; 1686 } 1687 1688 static void calc_hotspot_params(sector_t origin_size, 1689 sector_t cache_block_size, 1690 unsigned nr_cache_blocks, 1691 sector_t *hotspot_block_size, 1692 unsigned *nr_hotspot_blocks) 1693 { 1694 *hotspot_block_size = cache_block_size * 16u; 1695 *nr_hotspot_blocks = max(nr_cache_blocks / 4u, 1024u); 1696 1697 while ((*hotspot_block_size > cache_block_size) && 1698 too_many_hotspot_blocks(origin_size, *hotspot_block_size, *nr_hotspot_blocks)) 1699 *hotspot_block_size /= 2u; 1700 } 1701 1702 static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size, 1703 sector_t origin_size, 1704 sector_t cache_block_size, 1705 bool mimic_mq, 1706 bool migrations_allowed) 1707 { 1708 unsigned i; 1709 unsigned nr_sentinels_per_queue = 2u * NR_CACHE_LEVELS; 1710 unsigned total_sentinels = 2u * nr_sentinels_per_queue; 1711 struct smq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL); 1712 1713 if (!mq) 1714 return NULL; 1715 1716 init_policy_functions(mq, mimic_mq); 1717 mq->cache_size = cache_size; 1718 mq->cache_block_size = cache_block_size; 1719 1720 calc_hotspot_params(origin_size, cache_block_size, from_cblock(cache_size), 1721 &mq->hotspot_block_size, &mq->nr_hotspot_blocks); 1722 1723 mq->cache_blocks_per_hotspot_block = div64_u64(mq->hotspot_block_size, mq->cache_block_size); 1724 mq->hotspot_level_jump = 1u; 1725 if (space_init(&mq->es, total_sentinels + mq->nr_hotspot_blocks + from_cblock(cache_size))) { 1726 DMERR("couldn't initialize entry space"); 1727 goto bad_pool_init; 1728 } 1729 1730 init_allocator(&mq->writeback_sentinel_alloc, &mq->es, 0, nr_sentinels_per_queue); 1731 for (i = 0; i < nr_sentinels_per_queue; i++) 1732 get_entry(&mq->writeback_sentinel_alloc, i)->sentinel = true; 1733 1734 init_allocator(&mq->demote_sentinel_alloc, &mq->es, nr_sentinels_per_queue, total_sentinels); 1735 for (i = 0; i < nr_sentinels_per_queue; i++) 1736 get_entry(&mq->demote_sentinel_alloc, i)->sentinel = true; 1737 1738 init_allocator(&mq->hotspot_alloc, &mq->es, total_sentinels, 1739 total_sentinels + mq->nr_hotspot_blocks); 1740 1741 init_allocator(&mq->cache_alloc, &mq->es, 1742 total_sentinels + mq->nr_hotspot_blocks, 1743 total_sentinels + mq->nr_hotspot_blocks + from_cblock(cache_size)); 1744 1745 mq->hotspot_hit_bits = alloc_bitset(mq->nr_hotspot_blocks); 1746 if (!mq->hotspot_hit_bits) { 1747 DMERR("couldn't allocate hotspot hit bitset"); 1748 goto bad_hotspot_hit_bits; 1749 } 1750 clear_bitset(mq->hotspot_hit_bits, mq->nr_hotspot_blocks); 1751 1752 if (from_cblock(cache_size)) { 1753 mq->cache_hit_bits = alloc_bitset(from_cblock(cache_size)); 1754 if (!mq->cache_hit_bits) { 1755 DMERR("couldn't allocate cache hit bitset"); 1756 goto bad_cache_hit_bits; 1757 } 1758 clear_bitset(mq->cache_hit_bits, from_cblock(mq->cache_size)); 1759 } else 1760 mq->cache_hit_bits = NULL; 1761 1762 mq->tick = 0; 1763 spin_lock_init(&mq->lock); 1764 1765 q_init(&mq->hotspot, &mq->es, NR_HOTSPOT_LEVELS); 1766 mq->hotspot.nr_top_levels = 8; 1767 mq->hotspot.nr_in_top_levels = min(mq->nr_hotspot_blocks / NR_HOTSPOT_LEVELS, 1768 from_cblock(mq->cache_size) / mq->cache_blocks_per_hotspot_block); 1769 1770 q_init(&mq->clean, &mq->es, NR_CACHE_LEVELS); 1771 q_init(&mq->dirty, &mq->es, NR_CACHE_LEVELS); 1772 1773 stats_init(&mq->hotspot_stats, NR_HOTSPOT_LEVELS); 1774 stats_init(&mq->cache_stats, NR_CACHE_LEVELS); 1775 1776 if (h_init(&mq->table, &mq->es, from_cblock(cache_size))) 1777 goto bad_alloc_table; 1778 1779 if (h_init(&mq->hotspot_table, &mq->es, mq->nr_hotspot_blocks)) 1780 goto bad_alloc_hotspot_table; 1781 1782 sentinels_init(mq); 1783 mq->write_promote_level = mq->read_promote_level = NR_HOTSPOT_LEVELS; 1784 1785 mq->next_hotspot_period = jiffies; 1786 mq->next_cache_period = jiffies; 1787 1788 mq->bg_work = btracker_create(10240); /* FIXME: hard coded value */ 1789 if (!mq->bg_work) 1790 goto bad_btracker; 1791 1792 mq->migrations_allowed = migrations_allowed; 1793 1794 return &mq->policy; 1795 1796 bad_btracker: 1797 h_exit(&mq->hotspot_table); 1798 bad_alloc_hotspot_table: 1799 h_exit(&mq->table); 1800 bad_alloc_table: 1801 free_bitset(mq->cache_hit_bits); 1802 bad_cache_hit_bits: 1803 free_bitset(mq->hotspot_hit_bits); 1804 bad_hotspot_hit_bits: 1805 space_exit(&mq->es); 1806 bad_pool_init: 1807 kfree(mq); 1808 1809 return NULL; 1810 } 1811 1812 static struct dm_cache_policy *smq_create(dm_cblock_t cache_size, 1813 sector_t origin_size, 1814 sector_t cache_block_size) 1815 { 1816 return __smq_create(cache_size, origin_size, cache_block_size, false, true); 1817 } 1818 1819 static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, 1820 sector_t origin_size, 1821 sector_t cache_block_size) 1822 { 1823 return __smq_create(cache_size, origin_size, cache_block_size, true, true); 1824 } 1825 1826 static struct dm_cache_policy *cleaner_create(dm_cblock_t cache_size, 1827 sector_t origin_size, 1828 sector_t cache_block_size) 1829 { 1830 return __smq_create(cache_size, origin_size, cache_block_size, false, false); 1831 } 1832 1833 /*----------------------------------------------------------------*/ 1834 1835 static struct dm_cache_policy_type smq_policy_type = { 1836 .name = "smq", 1837 .version = {2, 0, 0}, 1838 .hint_size = 4, 1839 .owner = THIS_MODULE, 1840 .create = smq_create 1841 }; 1842 1843 static struct dm_cache_policy_type mq_policy_type = { 1844 .name = "mq", 1845 .version = {2, 0, 0}, 1846 .hint_size = 4, 1847 .owner = THIS_MODULE, 1848 .create = mq_create, 1849 }; 1850 1851 static struct dm_cache_policy_type cleaner_policy_type = { 1852 .name = "cleaner", 1853 .version = {2, 0, 0}, 1854 .hint_size = 4, 1855 .owner = THIS_MODULE, 1856 .create = cleaner_create, 1857 }; 1858 1859 static struct dm_cache_policy_type default_policy_type = { 1860 .name = "default", 1861 .version = {2, 0, 0}, 1862 .hint_size = 4, 1863 .owner = THIS_MODULE, 1864 .create = smq_create, 1865 .real = &smq_policy_type 1866 }; 1867 1868 static int __init smq_init(void) 1869 { 1870 int r; 1871 1872 r = dm_cache_policy_register(&smq_policy_type); 1873 if (r) { 1874 DMERR("register failed %d", r); 1875 return -ENOMEM; 1876 } 1877 1878 r = dm_cache_policy_register(&mq_policy_type); 1879 if (r) { 1880 DMERR("register failed (as mq) %d", r); 1881 goto out_mq; 1882 } 1883 1884 r = dm_cache_policy_register(&cleaner_policy_type); 1885 if (r) { 1886 DMERR("register failed (as cleaner) %d", r); 1887 goto out_cleaner; 1888 } 1889 1890 r = dm_cache_policy_register(&default_policy_type); 1891 if (r) { 1892 DMERR("register failed (as default) %d", r); 1893 goto out_default; 1894 } 1895 1896 return 0; 1897 1898 out_default: 1899 dm_cache_policy_unregister(&cleaner_policy_type); 1900 out_cleaner: 1901 dm_cache_policy_unregister(&mq_policy_type); 1902 out_mq: 1903 dm_cache_policy_unregister(&smq_policy_type); 1904 1905 return -ENOMEM; 1906 } 1907 1908 static void __exit smq_exit(void) 1909 { 1910 dm_cache_policy_unregister(&cleaner_policy_type); 1911 dm_cache_policy_unregister(&smq_policy_type); 1912 dm_cache_policy_unregister(&mq_policy_type); 1913 dm_cache_policy_unregister(&default_policy_type); 1914 } 1915 1916 module_init(smq_init); 1917 module_exit(smq_exit); 1918 1919 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); 1920 MODULE_LICENSE("GPL"); 1921 MODULE_DESCRIPTION("smq cache policy"); 1922 1923 MODULE_ALIAS("dm-cache-default"); 1924 MODULE_ALIAS("dm-cache-mq"); 1925 MODULE_ALIAS("dm-cache-cleaner"); 1926