xref: /linux/drivers/md/dm-cache-metadata.c (revision ab520be8cd5d56867fc95cfbc34b90880faf1f9d)
1 /*
2  * Copyright (C) 2012 Red Hat, Inc.
3  *
4  * This file is released under the GPL.
5  */
6 
7 #include "dm-cache-metadata.h"
8 
9 #include "persistent-data/dm-array.h"
10 #include "persistent-data/dm-bitset.h"
11 #include "persistent-data/dm-space-map.h"
12 #include "persistent-data/dm-space-map-disk.h"
13 #include "persistent-data/dm-transaction-manager.h"
14 
15 #include <linux/device-mapper.h>
16 
17 /*----------------------------------------------------------------*/
18 
19 #define DM_MSG_PREFIX   "cache metadata"
20 
21 #define CACHE_SUPERBLOCK_MAGIC 06142003
22 #define CACHE_SUPERBLOCK_LOCATION 0
23 
24 /*
25  * defines a range of metadata versions that this module can handle.
26  */
27 #define MIN_CACHE_VERSION 1
28 #define MAX_CACHE_VERSION 1
29 
30 #define CACHE_METADATA_CACHE_SIZE 64
31 
32 /*
33  *  3 for btree insert +
34  *  2 for btree lookup used within space map
35  */
36 #define CACHE_MAX_CONCURRENT_LOCKS 5
37 #define SPACE_MAP_ROOT_SIZE 128
38 
39 enum superblock_flag_bits {
40 	/* for spotting crashes that would invalidate the dirty bitset */
41 	CLEAN_SHUTDOWN,
42 	/* metadata must be checked using the tools */
43 	NEEDS_CHECK,
44 };
45 
46 /*
47  * Each mapping from cache block -> origin block carries a set of flags.
48  */
49 enum mapping_bits {
50 	/*
51 	 * A valid mapping.  Because we're using an array we clear this
52 	 * flag for an non existant mapping.
53 	 */
54 	M_VALID = 1,
55 
56 	/*
57 	 * The data on the cache is different from that on the origin.
58 	 */
59 	M_DIRTY = 2
60 };
61 
62 struct cache_disk_superblock {
63 	__le32 csum;
64 	__le32 flags;
65 	__le64 blocknr;
66 
67 	__u8 uuid[16];
68 	__le64 magic;
69 	__le32 version;
70 
71 	__u8 policy_name[CACHE_POLICY_NAME_SIZE];
72 	__le32 policy_hint_size;
73 
74 	__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
75 	__le64 mapping_root;
76 	__le64 hint_root;
77 
78 	__le64 discard_root;
79 	__le64 discard_block_size;
80 	__le64 discard_nr_blocks;
81 
82 	__le32 data_block_size;
83 	__le32 metadata_block_size;
84 	__le32 cache_blocks;
85 
86 	__le32 compat_flags;
87 	__le32 compat_ro_flags;
88 	__le32 incompat_flags;
89 
90 	__le32 read_hits;
91 	__le32 read_misses;
92 	__le32 write_hits;
93 	__le32 write_misses;
94 
95 	__le32 policy_version[CACHE_POLICY_VERSION_SIZE];
96 } __packed;
97 
98 struct dm_cache_metadata {
99 	atomic_t ref_count;
100 	struct list_head list;
101 
102 	struct block_device *bdev;
103 	struct dm_block_manager *bm;
104 	struct dm_space_map *metadata_sm;
105 	struct dm_transaction_manager *tm;
106 
107 	struct dm_array_info info;
108 	struct dm_array_info hint_info;
109 	struct dm_disk_bitset discard_info;
110 
111 	struct rw_semaphore root_lock;
112 	unsigned long flags;
113 	dm_block_t root;
114 	dm_block_t hint_root;
115 	dm_block_t discard_root;
116 
117 	sector_t discard_block_size;
118 	dm_dblock_t discard_nr_blocks;
119 
120 	sector_t data_block_size;
121 	dm_cblock_t cache_blocks;
122 	bool changed:1;
123 	bool clean_when_opened:1;
124 
125 	char policy_name[CACHE_POLICY_NAME_SIZE];
126 	unsigned policy_version[CACHE_POLICY_VERSION_SIZE];
127 	size_t policy_hint_size;
128 	struct dm_cache_statistics stats;
129 
130 	/*
131 	 * Reading the space map root can fail, so we read it into this
132 	 * buffer before the superblock is locked and updated.
133 	 */
134 	__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
135 
136 	/*
137 	 * Set if a transaction has to be aborted but the attempt to roll
138 	 * back to the previous (good) transaction failed.  The only
139 	 * metadata operation permissible in this state is the closing of
140 	 * the device.
141 	 */
142 	bool fail_io:1;
143 
144 	/*
145 	 * These structures are used when loading metadata.  They're too
146 	 * big to put on the stack.
147 	 */
148 	struct dm_array_cursor mapping_cursor;
149 	struct dm_array_cursor hint_cursor;
150 };
151 
152 /*-------------------------------------------------------------------
153  * superblock validator
154  *-----------------------------------------------------------------*/
155 
156 #define SUPERBLOCK_CSUM_XOR 9031977
157 
158 static void sb_prepare_for_write(struct dm_block_validator *v,
159 				 struct dm_block *b,
160 				 size_t sb_block_size)
161 {
162 	struct cache_disk_superblock *disk_super = dm_block_data(b);
163 
164 	disk_super->blocknr = cpu_to_le64(dm_block_location(b));
165 	disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
166 						      sb_block_size - sizeof(__le32),
167 						      SUPERBLOCK_CSUM_XOR));
168 }
169 
170 static int check_metadata_version(struct cache_disk_superblock *disk_super)
171 {
172 	uint32_t metadata_version = le32_to_cpu(disk_super->version);
173 	if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) {
174 		DMERR("Cache metadata version %u found, but only versions between %u and %u supported.",
175 		      metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION);
176 		return -EINVAL;
177 	}
178 
179 	return 0;
180 }
181 
182 static int sb_check(struct dm_block_validator *v,
183 		    struct dm_block *b,
184 		    size_t sb_block_size)
185 {
186 	struct cache_disk_superblock *disk_super = dm_block_data(b);
187 	__le32 csum_le;
188 
189 	if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
190 		DMERR("sb_check failed: blocknr %llu: wanted %llu",
191 		      le64_to_cpu(disk_super->blocknr),
192 		      (unsigned long long)dm_block_location(b));
193 		return -ENOTBLK;
194 	}
195 
196 	if (le64_to_cpu(disk_super->magic) != CACHE_SUPERBLOCK_MAGIC) {
197 		DMERR("sb_check failed: magic %llu: wanted %llu",
198 		      le64_to_cpu(disk_super->magic),
199 		      (unsigned long long)CACHE_SUPERBLOCK_MAGIC);
200 		return -EILSEQ;
201 	}
202 
203 	csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
204 					     sb_block_size - sizeof(__le32),
205 					     SUPERBLOCK_CSUM_XOR));
206 	if (csum_le != disk_super->csum) {
207 		DMERR("sb_check failed: csum %u: wanted %u",
208 		      le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
209 		return -EILSEQ;
210 	}
211 
212 	return check_metadata_version(disk_super);
213 }
214 
215 static struct dm_block_validator sb_validator = {
216 	.name = "superblock",
217 	.prepare_for_write = sb_prepare_for_write,
218 	.check = sb_check
219 };
220 
221 /*----------------------------------------------------------------*/
222 
223 static int superblock_read_lock(struct dm_cache_metadata *cmd,
224 				struct dm_block **sblock)
225 {
226 	return dm_bm_read_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
227 			       &sb_validator, sblock);
228 }
229 
230 static int superblock_lock_zero(struct dm_cache_metadata *cmd,
231 				struct dm_block **sblock)
232 {
233 	return dm_bm_write_lock_zero(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
234 				     &sb_validator, sblock);
235 }
236 
237 static int superblock_lock(struct dm_cache_metadata *cmd,
238 			   struct dm_block **sblock)
239 {
240 	return dm_bm_write_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
241 				&sb_validator, sblock);
242 }
243 
244 /*----------------------------------------------------------------*/
245 
246 static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *result)
247 {
248 	int r;
249 	unsigned i;
250 	struct dm_block *b;
251 	__le64 *data_le, zero = cpu_to_le64(0);
252 	unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64);
253 
254 	/*
255 	 * We can't use a validator here - it may be all zeroes.
256 	 */
257 	r = dm_bm_read_lock(bm, CACHE_SUPERBLOCK_LOCATION, NULL, &b);
258 	if (r)
259 		return r;
260 
261 	data_le = dm_block_data(b);
262 	*result = true;
263 	for (i = 0; i < sb_block_size; i++) {
264 		if (data_le[i] != zero) {
265 			*result = false;
266 			break;
267 		}
268 	}
269 
270 	dm_bm_unlock(b);
271 
272 	return 0;
273 }
274 
275 static void __setup_mapping_info(struct dm_cache_metadata *cmd)
276 {
277 	struct dm_btree_value_type vt;
278 
279 	vt.context = NULL;
280 	vt.size = sizeof(__le64);
281 	vt.inc = NULL;
282 	vt.dec = NULL;
283 	vt.equal = NULL;
284 	dm_array_info_init(&cmd->info, cmd->tm, &vt);
285 
286 	if (cmd->policy_hint_size) {
287 		vt.size = sizeof(__le32);
288 		dm_array_info_init(&cmd->hint_info, cmd->tm, &vt);
289 	}
290 }
291 
292 static int __save_sm_root(struct dm_cache_metadata *cmd)
293 {
294 	int r;
295 	size_t metadata_len;
296 
297 	r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
298 	if (r < 0)
299 		return r;
300 
301 	return dm_sm_copy_root(cmd->metadata_sm, &cmd->metadata_space_map_root,
302 			       metadata_len);
303 }
304 
305 static void __copy_sm_root(struct dm_cache_metadata *cmd,
306 			   struct cache_disk_superblock *disk_super)
307 {
308 	memcpy(&disk_super->metadata_space_map_root,
309 	       &cmd->metadata_space_map_root,
310 	       sizeof(cmd->metadata_space_map_root));
311 }
312 
313 static int __write_initial_superblock(struct dm_cache_metadata *cmd)
314 {
315 	int r;
316 	struct dm_block *sblock;
317 	struct cache_disk_superblock *disk_super;
318 	sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT;
319 
320 	/* FIXME: see if we can lose the max sectors limit */
321 	if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS)
322 		bdev_size = DM_CACHE_METADATA_MAX_SECTORS;
323 
324 	r = dm_tm_pre_commit(cmd->tm);
325 	if (r < 0)
326 		return r;
327 
328 	/*
329 	 * dm_sm_copy_root() can fail.  So we need to do it before we start
330 	 * updating the superblock.
331 	 */
332 	r = __save_sm_root(cmd);
333 	if (r)
334 		return r;
335 
336 	r = superblock_lock_zero(cmd, &sblock);
337 	if (r)
338 		return r;
339 
340 	disk_super = dm_block_data(sblock);
341 	disk_super->flags = 0;
342 	memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
343 	disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC);
344 	disk_super->version = cpu_to_le32(MAX_CACHE_VERSION);
345 	memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
346 	memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
347 	disk_super->policy_hint_size = 0;
348 
349 	__copy_sm_root(cmd, disk_super);
350 
351 	disk_super->mapping_root = cpu_to_le64(cmd->root);
352 	disk_super->hint_root = cpu_to_le64(cmd->hint_root);
353 	disk_super->discard_root = cpu_to_le64(cmd->discard_root);
354 	disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
355 	disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
356 	disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE);
357 	disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
358 	disk_super->cache_blocks = cpu_to_le32(0);
359 
360 	disk_super->read_hits = cpu_to_le32(0);
361 	disk_super->read_misses = cpu_to_le32(0);
362 	disk_super->write_hits = cpu_to_le32(0);
363 	disk_super->write_misses = cpu_to_le32(0);
364 
365 	return dm_tm_commit(cmd->tm, sblock);
366 }
367 
368 static int __format_metadata(struct dm_cache_metadata *cmd)
369 {
370 	int r;
371 
372 	r = dm_tm_create_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
373 				 &cmd->tm, &cmd->metadata_sm);
374 	if (r < 0) {
375 		DMERR("tm_create_with_sm failed");
376 		return r;
377 	}
378 
379 	__setup_mapping_info(cmd);
380 
381 	r = dm_array_empty(&cmd->info, &cmd->root);
382 	if (r < 0)
383 		goto bad;
384 
385 	dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
386 	r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root);
387 	if (r < 0)
388 		goto bad;
389 
390 	cmd->discard_block_size = 0;
391 	cmd->discard_nr_blocks = 0;
392 
393 	r = __write_initial_superblock(cmd);
394 	if (r)
395 		goto bad;
396 
397 	cmd->clean_when_opened = true;
398 	return 0;
399 
400 bad:
401 	dm_tm_destroy(cmd->tm);
402 	dm_sm_destroy(cmd->metadata_sm);
403 
404 	return r;
405 }
406 
407 static int __check_incompat_features(struct cache_disk_superblock *disk_super,
408 				     struct dm_cache_metadata *cmd)
409 {
410 	uint32_t features;
411 
412 	features = le32_to_cpu(disk_super->incompat_flags) & ~DM_CACHE_FEATURE_INCOMPAT_SUPP;
413 	if (features) {
414 		DMERR("could not access metadata due to unsupported optional features (%lx).",
415 		      (unsigned long)features);
416 		return -EINVAL;
417 	}
418 
419 	/*
420 	 * Check for read-only metadata to skip the following RDWR checks.
421 	 */
422 	if (get_disk_ro(cmd->bdev->bd_disk))
423 		return 0;
424 
425 	features = le32_to_cpu(disk_super->compat_ro_flags) & ~DM_CACHE_FEATURE_COMPAT_RO_SUPP;
426 	if (features) {
427 		DMERR("could not access metadata RDWR due to unsupported optional features (%lx).",
428 		      (unsigned long)features);
429 		return -EINVAL;
430 	}
431 
432 	return 0;
433 }
434 
435 static int __open_metadata(struct dm_cache_metadata *cmd)
436 {
437 	int r;
438 	struct dm_block *sblock;
439 	struct cache_disk_superblock *disk_super;
440 	unsigned long sb_flags;
441 
442 	r = superblock_read_lock(cmd, &sblock);
443 	if (r < 0) {
444 		DMERR("couldn't read lock superblock");
445 		return r;
446 	}
447 
448 	disk_super = dm_block_data(sblock);
449 
450 	/* Verify the data block size hasn't changed */
451 	if (le32_to_cpu(disk_super->data_block_size) != cmd->data_block_size) {
452 		DMERR("changing the data block size (from %u to %llu) is not supported",
453 		      le32_to_cpu(disk_super->data_block_size),
454 		      (unsigned long long)cmd->data_block_size);
455 		r = -EINVAL;
456 		goto bad;
457 	}
458 
459 	r = __check_incompat_features(disk_super, cmd);
460 	if (r < 0)
461 		goto bad;
462 
463 	r = dm_tm_open_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
464 			       disk_super->metadata_space_map_root,
465 			       sizeof(disk_super->metadata_space_map_root),
466 			       &cmd->tm, &cmd->metadata_sm);
467 	if (r < 0) {
468 		DMERR("tm_open_with_sm failed");
469 		goto bad;
470 	}
471 
472 	__setup_mapping_info(cmd);
473 	dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
474 	sb_flags = le32_to_cpu(disk_super->flags);
475 	cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags);
476 	dm_bm_unlock(sblock);
477 
478 	return 0;
479 
480 bad:
481 	dm_bm_unlock(sblock);
482 	return r;
483 }
484 
485 static int __open_or_format_metadata(struct dm_cache_metadata *cmd,
486 				     bool format_device)
487 {
488 	int r;
489 	bool unformatted = false;
490 
491 	r = __superblock_all_zeroes(cmd->bm, &unformatted);
492 	if (r)
493 		return r;
494 
495 	if (unformatted)
496 		return format_device ? __format_metadata(cmd) : -EPERM;
497 
498 	return __open_metadata(cmd);
499 }
500 
501 static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
502 					    bool may_format_device)
503 {
504 	int r;
505 	cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
506 					  CACHE_METADATA_CACHE_SIZE,
507 					  CACHE_MAX_CONCURRENT_LOCKS);
508 	if (IS_ERR(cmd->bm)) {
509 		DMERR("could not create block manager");
510 		return PTR_ERR(cmd->bm);
511 	}
512 
513 	r = __open_or_format_metadata(cmd, may_format_device);
514 	if (r)
515 		dm_block_manager_destroy(cmd->bm);
516 
517 	return r;
518 }
519 
520 static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd)
521 {
522 	dm_sm_destroy(cmd->metadata_sm);
523 	dm_tm_destroy(cmd->tm);
524 	dm_block_manager_destroy(cmd->bm);
525 }
526 
527 typedef unsigned long (*flags_mutator)(unsigned long);
528 
529 static void update_flags(struct cache_disk_superblock *disk_super,
530 			 flags_mutator mutator)
531 {
532 	uint32_t sb_flags = mutator(le32_to_cpu(disk_super->flags));
533 	disk_super->flags = cpu_to_le32(sb_flags);
534 }
535 
536 static unsigned long set_clean_shutdown(unsigned long flags)
537 {
538 	set_bit(CLEAN_SHUTDOWN, &flags);
539 	return flags;
540 }
541 
542 static unsigned long clear_clean_shutdown(unsigned long flags)
543 {
544 	clear_bit(CLEAN_SHUTDOWN, &flags);
545 	return flags;
546 }
547 
548 static void read_superblock_fields(struct dm_cache_metadata *cmd,
549 				   struct cache_disk_superblock *disk_super)
550 {
551 	cmd->flags = le32_to_cpu(disk_super->flags);
552 	cmd->root = le64_to_cpu(disk_super->mapping_root);
553 	cmd->hint_root = le64_to_cpu(disk_super->hint_root);
554 	cmd->discard_root = le64_to_cpu(disk_super->discard_root);
555 	cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size);
556 	cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks));
557 	cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
558 	cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
559 	strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
560 	cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]);
561 	cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]);
562 	cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]);
563 	cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size);
564 
565 	cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits);
566 	cmd->stats.read_misses = le32_to_cpu(disk_super->read_misses);
567 	cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits);
568 	cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses);
569 
570 	cmd->changed = false;
571 }
572 
573 /*
574  * The mutator updates the superblock flags.
575  */
576 static int __begin_transaction_flags(struct dm_cache_metadata *cmd,
577 				     flags_mutator mutator)
578 {
579 	int r;
580 	struct cache_disk_superblock *disk_super;
581 	struct dm_block *sblock;
582 
583 	r = superblock_lock(cmd, &sblock);
584 	if (r)
585 		return r;
586 
587 	disk_super = dm_block_data(sblock);
588 	update_flags(disk_super, mutator);
589 	read_superblock_fields(cmd, disk_super);
590 	dm_bm_unlock(sblock);
591 
592 	return dm_bm_flush(cmd->bm);
593 }
594 
595 static int __begin_transaction(struct dm_cache_metadata *cmd)
596 {
597 	int r;
598 	struct cache_disk_superblock *disk_super;
599 	struct dm_block *sblock;
600 
601 	/*
602 	 * We re-read the superblock every time.  Shouldn't need to do this
603 	 * really.
604 	 */
605 	r = superblock_read_lock(cmd, &sblock);
606 	if (r)
607 		return r;
608 
609 	disk_super = dm_block_data(sblock);
610 	read_superblock_fields(cmd, disk_super);
611 	dm_bm_unlock(sblock);
612 
613 	return 0;
614 }
615 
616 static int __commit_transaction(struct dm_cache_metadata *cmd,
617 				flags_mutator mutator)
618 {
619 	int r;
620 	struct cache_disk_superblock *disk_super;
621 	struct dm_block *sblock;
622 
623 	/*
624 	 * We need to know if the cache_disk_superblock exceeds a 512-byte sector.
625 	 */
626 	BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512);
627 
628 	r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root,
629 			    &cmd->discard_root);
630 	if (r)
631 		return r;
632 
633 	r = dm_tm_pre_commit(cmd->tm);
634 	if (r < 0)
635 		return r;
636 
637 	r = __save_sm_root(cmd);
638 	if (r)
639 		return r;
640 
641 	r = superblock_lock(cmd, &sblock);
642 	if (r)
643 		return r;
644 
645 	disk_super = dm_block_data(sblock);
646 
647 	disk_super->flags = cpu_to_le32(cmd->flags);
648 	if (mutator)
649 		update_flags(disk_super, mutator);
650 
651 	disk_super->mapping_root = cpu_to_le64(cmd->root);
652 	disk_super->hint_root = cpu_to_le64(cmd->hint_root);
653 	disk_super->discard_root = cpu_to_le64(cmd->discard_root);
654 	disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
655 	disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
656 	disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
657 	strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
658 	disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
659 	disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]);
660 	disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]);
661 
662 	disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits);
663 	disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses);
664 	disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits);
665 	disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses);
666 	__copy_sm_root(cmd, disk_super);
667 
668 	return dm_tm_commit(cmd->tm, sblock);
669 }
670 
671 /*----------------------------------------------------------------*/
672 
673 /*
674  * The mappings are held in a dm-array that has 64-bit values stored in
675  * little-endian format.  The index is the cblock, the high 48bits of the
676  * value are the oblock and the low 16 bit the flags.
677  */
678 #define FLAGS_MASK ((1 << 16) - 1)
679 
680 static __le64 pack_value(dm_oblock_t block, unsigned flags)
681 {
682 	uint64_t value = from_oblock(block);
683 	value <<= 16;
684 	value = value | (flags & FLAGS_MASK);
685 	return cpu_to_le64(value);
686 }
687 
688 static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags)
689 {
690 	uint64_t value = le64_to_cpu(value_le);
691 	uint64_t b = value >> 16;
692 	*block = to_oblock(b);
693 	*flags = value & FLAGS_MASK;
694 }
695 
696 /*----------------------------------------------------------------*/
697 
698 static struct dm_cache_metadata *metadata_open(struct block_device *bdev,
699 					       sector_t data_block_size,
700 					       bool may_format_device,
701 					       size_t policy_hint_size)
702 {
703 	int r;
704 	struct dm_cache_metadata *cmd;
705 
706 	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
707 	if (!cmd) {
708 		DMERR("could not allocate metadata struct");
709 		return ERR_PTR(-ENOMEM);
710 	}
711 
712 	atomic_set(&cmd->ref_count, 1);
713 	init_rwsem(&cmd->root_lock);
714 	cmd->bdev = bdev;
715 	cmd->data_block_size = data_block_size;
716 	cmd->cache_blocks = 0;
717 	cmd->policy_hint_size = policy_hint_size;
718 	cmd->changed = true;
719 	cmd->fail_io = false;
720 
721 	r = __create_persistent_data_objects(cmd, may_format_device);
722 	if (r) {
723 		kfree(cmd);
724 		return ERR_PTR(r);
725 	}
726 
727 	r = __begin_transaction_flags(cmd, clear_clean_shutdown);
728 	if (r < 0) {
729 		dm_cache_metadata_close(cmd);
730 		return ERR_PTR(r);
731 	}
732 
733 	return cmd;
734 }
735 
736 /*
737  * We keep a little list of ref counted metadata objects to prevent two
738  * different target instances creating separate bufio instances.  This is
739  * an issue if a table is reloaded before the suspend.
740  */
741 static DEFINE_MUTEX(table_lock);
742 static LIST_HEAD(table);
743 
744 static struct dm_cache_metadata *lookup(struct block_device *bdev)
745 {
746 	struct dm_cache_metadata *cmd;
747 
748 	list_for_each_entry(cmd, &table, list)
749 		if (cmd->bdev == bdev) {
750 			atomic_inc(&cmd->ref_count);
751 			return cmd;
752 		}
753 
754 	return NULL;
755 }
756 
757 static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
758 						sector_t data_block_size,
759 						bool may_format_device,
760 						size_t policy_hint_size)
761 {
762 	struct dm_cache_metadata *cmd, *cmd2;
763 
764 	mutex_lock(&table_lock);
765 	cmd = lookup(bdev);
766 	mutex_unlock(&table_lock);
767 
768 	if (cmd)
769 		return cmd;
770 
771 	cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size);
772 	if (!IS_ERR(cmd)) {
773 		mutex_lock(&table_lock);
774 		cmd2 = lookup(bdev);
775 		if (cmd2) {
776 			mutex_unlock(&table_lock);
777 			__destroy_persistent_data_objects(cmd);
778 			kfree(cmd);
779 			return cmd2;
780 		}
781 		list_add(&cmd->list, &table);
782 		mutex_unlock(&table_lock);
783 	}
784 
785 	return cmd;
786 }
787 
788 static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size)
789 {
790 	if (cmd->data_block_size != data_block_size) {
791 		DMERR("data_block_size (%llu) different from that in metadata (%llu)",
792 		      (unsigned long long) data_block_size,
793 		      (unsigned long long) cmd->data_block_size);
794 		return false;
795 	}
796 
797 	return true;
798 }
799 
800 struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
801 						 sector_t data_block_size,
802 						 bool may_format_device,
803 						 size_t policy_hint_size)
804 {
805 	struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size,
806 						       may_format_device, policy_hint_size);
807 
808 	if (!IS_ERR(cmd) && !same_params(cmd, data_block_size)) {
809 		dm_cache_metadata_close(cmd);
810 		return ERR_PTR(-EINVAL);
811 	}
812 
813 	return cmd;
814 }
815 
816 void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
817 {
818 	if (atomic_dec_and_test(&cmd->ref_count)) {
819 		mutex_lock(&table_lock);
820 		list_del(&cmd->list);
821 		mutex_unlock(&table_lock);
822 
823 		if (!cmd->fail_io)
824 			__destroy_persistent_data_objects(cmd);
825 		kfree(cmd);
826 	}
827 }
828 
829 /*
830  * Checks that the given cache block is either unmapped or clean.
831  */
832 static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b,
833 				   bool *result)
834 {
835 	int r;
836 	__le64 value;
837 	dm_oblock_t ob;
838 	unsigned flags;
839 
840 	r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value);
841 	if (r) {
842 		DMERR("block_unmapped_or_clean failed");
843 		return r;
844 	}
845 
846 	unpack_value(value, &ob, &flags);
847 	*result = !((flags & M_VALID) && (flags & M_DIRTY));
848 
849 	return 0;
850 }
851 
852 static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
853 					dm_cblock_t begin, dm_cblock_t end,
854 					bool *result)
855 {
856 	int r;
857 	*result = true;
858 
859 	while (begin != end) {
860 		r = block_unmapped_or_clean(cmd, begin, result);
861 		if (r)
862 			return r;
863 
864 		if (!*result) {
865 			DMERR("cache block %llu is dirty",
866 			      (unsigned long long) from_cblock(begin));
867 			return 0;
868 		}
869 
870 		begin = to_cblock(from_cblock(begin) + 1);
871 	}
872 
873 	return 0;
874 }
875 
876 static bool cmd_write_lock(struct dm_cache_metadata *cmd)
877 {
878 	down_write(&cmd->root_lock);
879 	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) {
880 		up_write(&cmd->root_lock);
881 		return false;
882 	}
883 	return true;
884 }
885 
886 #define WRITE_LOCK(cmd)				\
887 	do {					\
888 		if (!cmd_write_lock((cmd)))	\
889 			return -EINVAL;		\
890 	} while(0)
891 
892 #define WRITE_LOCK_VOID(cmd)			\
893 	do {					\
894 		if (!cmd_write_lock((cmd)))	\
895 			return;			\
896 	} while(0)
897 
898 #define WRITE_UNLOCK(cmd) \
899 	up_write(&(cmd)->root_lock)
900 
901 static bool cmd_read_lock(struct dm_cache_metadata *cmd)
902 {
903 	down_read(&cmd->root_lock);
904 	if (cmd->fail_io) {
905 		up_read(&cmd->root_lock);
906 		return false;
907 	}
908 	return true;
909 }
910 
911 #define READ_LOCK(cmd)				\
912 	do {					\
913 		if (!cmd_read_lock((cmd)))	\
914 			return -EINVAL;		\
915 	} while(0)
916 
917 #define READ_LOCK_VOID(cmd)			\
918 	do {					\
919 		if (!cmd_read_lock((cmd)))	\
920 			return;			\
921 	} while(0)
922 
923 #define READ_UNLOCK(cmd) \
924 	up_read(&(cmd)->root_lock)
925 
926 int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
927 {
928 	int r;
929 	bool clean;
930 	__le64 null_mapping = pack_value(0, 0);
931 
932 	WRITE_LOCK(cmd);
933 	__dm_bless_for_disk(&null_mapping);
934 
935 	if (from_cblock(new_cache_size) < from_cblock(cmd->cache_blocks)) {
936 		r = blocks_are_unmapped_or_clean(cmd, new_cache_size, cmd->cache_blocks, &clean);
937 		if (r) {
938 			__dm_unbless_for_disk(&null_mapping);
939 			goto out;
940 		}
941 
942 		if (!clean) {
943 			DMERR("unable to shrink cache due to dirty blocks");
944 			r = -EINVAL;
945 			__dm_unbless_for_disk(&null_mapping);
946 			goto out;
947 		}
948 	}
949 
950 	r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks),
951 			    from_cblock(new_cache_size),
952 			    &null_mapping, &cmd->root);
953 	if (!r)
954 		cmd->cache_blocks = new_cache_size;
955 	cmd->changed = true;
956 
957 out:
958 	WRITE_UNLOCK(cmd);
959 
960 	return r;
961 }
962 
963 int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
964 				   sector_t discard_block_size,
965 				   dm_dblock_t new_nr_entries)
966 {
967 	int r;
968 
969 	WRITE_LOCK(cmd);
970 	r = dm_bitset_resize(&cmd->discard_info,
971 			     cmd->discard_root,
972 			     from_dblock(cmd->discard_nr_blocks),
973 			     from_dblock(new_nr_entries),
974 			     false, &cmd->discard_root);
975 	if (!r) {
976 		cmd->discard_block_size = discard_block_size;
977 		cmd->discard_nr_blocks = new_nr_entries;
978 	}
979 
980 	cmd->changed = true;
981 	WRITE_UNLOCK(cmd);
982 
983 	return r;
984 }
985 
986 static int __set_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
987 {
988 	return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root,
989 				 from_dblock(b), &cmd->discard_root);
990 }
991 
992 static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
993 {
994 	return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root,
995 				   from_dblock(b), &cmd->discard_root);
996 }
997 
998 static int __is_discarded(struct dm_cache_metadata *cmd, dm_dblock_t b,
999 			  bool *is_discarded)
1000 {
1001 	return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root,
1002 				  from_dblock(b), &cmd->discard_root,
1003 				  is_discarded);
1004 }
1005 
1006 static int __discard(struct dm_cache_metadata *cmd,
1007 		     dm_dblock_t dblock, bool discard)
1008 {
1009 	int r;
1010 
1011 	r = (discard ? __set_discard : __clear_discard)(cmd, dblock);
1012 	if (r)
1013 		return r;
1014 
1015 	cmd->changed = true;
1016 	return 0;
1017 }
1018 
1019 int dm_cache_set_discard(struct dm_cache_metadata *cmd,
1020 			 dm_dblock_t dblock, bool discard)
1021 {
1022 	int r;
1023 
1024 	WRITE_LOCK(cmd);
1025 	r = __discard(cmd, dblock, discard);
1026 	WRITE_UNLOCK(cmd);
1027 
1028 	return r;
1029 }
1030 
1031 static int __load_discards(struct dm_cache_metadata *cmd,
1032 			   load_discard_fn fn, void *context)
1033 {
1034 	int r = 0;
1035 	dm_block_t b;
1036 	bool discard;
1037 
1038 	for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) {
1039 		dm_dblock_t dblock = to_dblock(b);
1040 
1041 		if (cmd->clean_when_opened) {
1042 			r = __is_discarded(cmd, dblock, &discard);
1043 			if (r)
1044 				return r;
1045 		} else
1046 			discard = false;
1047 
1048 		r = fn(context, cmd->discard_block_size, dblock, discard);
1049 		if (r)
1050 			break;
1051 	}
1052 
1053 	return r;
1054 }
1055 
1056 int dm_cache_load_discards(struct dm_cache_metadata *cmd,
1057 			   load_discard_fn fn, void *context)
1058 {
1059 	int r;
1060 
1061 	READ_LOCK(cmd);
1062 	r = __load_discards(cmd, fn, context);
1063 	READ_UNLOCK(cmd);
1064 
1065 	return r;
1066 }
1067 
1068 int dm_cache_size(struct dm_cache_metadata *cmd, dm_cblock_t *result)
1069 {
1070 	READ_LOCK(cmd);
1071 	*result = cmd->cache_blocks;
1072 	READ_UNLOCK(cmd);
1073 
1074 	return 0;
1075 }
1076 
1077 static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
1078 {
1079 	int r;
1080 	__le64 value = pack_value(0, 0);
1081 
1082 	__dm_bless_for_disk(&value);
1083 	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1084 			       &value, &cmd->root);
1085 	if (r)
1086 		return r;
1087 
1088 	cmd->changed = true;
1089 	return 0;
1090 }
1091 
1092 int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
1093 {
1094 	int r;
1095 
1096 	WRITE_LOCK(cmd);
1097 	r = __remove(cmd, cblock);
1098 	WRITE_UNLOCK(cmd);
1099 
1100 	return r;
1101 }
1102 
1103 static int __insert(struct dm_cache_metadata *cmd,
1104 		    dm_cblock_t cblock, dm_oblock_t oblock)
1105 {
1106 	int r;
1107 	__le64 value = pack_value(oblock, M_VALID);
1108 	__dm_bless_for_disk(&value);
1109 
1110 	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1111 			       &value, &cmd->root);
1112 	if (r)
1113 		return r;
1114 
1115 	cmd->changed = true;
1116 	return 0;
1117 }
1118 
1119 int dm_cache_insert_mapping(struct dm_cache_metadata *cmd,
1120 			    dm_cblock_t cblock, dm_oblock_t oblock)
1121 {
1122 	int r;
1123 
1124 	WRITE_LOCK(cmd);
1125 	r = __insert(cmd, cblock, oblock);
1126 	WRITE_UNLOCK(cmd);
1127 
1128 	return r;
1129 }
1130 
1131 struct thunk {
1132 	load_mapping_fn fn;
1133 	void *context;
1134 
1135 	struct dm_cache_metadata *cmd;
1136 	bool respect_dirty_flags;
1137 	bool hints_valid;
1138 };
1139 
1140 static bool policy_unchanged(struct dm_cache_metadata *cmd,
1141 			     struct dm_cache_policy *policy)
1142 {
1143 	const char *policy_name = dm_cache_policy_get_name(policy);
1144 	const unsigned *policy_version = dm_cache_policy_get_version(policy);
1145 	size_t policy_hint_size = dm_cache_policy_get_hint_size(policy);
1146 
1147 	/*
1148 	 * Ensure policy names match.
1149 	 */
1150 	if (strncmp(cmd->policy_name, policy_name, sizeof(cmd->policy_name)))
1151 		return false;
1152 
1153 	/*
1154 	 * Ensure policy major versions match.
1155 	 */
1156 	if (cmd->policy_version[0] != policy_version[0])
1157 		return false;
1158 
1159 	/*
1160 	 * Ensure policy hint sizes match.
1161 	 */
1162 	if (cmd->policy_hint_size != policy_hint_size)
1163 		return false;
1164 
1165 	return true;
1166 }
1167 
1168 static bool hints_array_initialized(struct dm_cache_metadata *cmd)
1169 {
1170 	return cmd->hint_root && cmd->policy_hint_size;
1171 }
1172 
1173 static bool hints_array_available(struct dm_cache_metadata *cmd,
1174 				  struct dm_cache_policy *policy)
1175 {
1176 	return cmd->clean_when_opened && policy_unchanged(cmd, policy) &&
1177 		hints_array_initialized(cmd);
1178 }
1179 
1180 static int __load_mapping(struct dm_cache_metadata *cmd,
1181 			  uint64_t cb, bool hints_valid,
1182 			  struct dm_array_cursor *mapping_cursor,
1183 			  struct dm_array_cursor *hint_cursor,
1184 			  load_mapping_fn fn, void *context)
1185 {
1186 	int r = 0;
1187 
1188 	__le64 mapping;
1189 	__le32 hint = 0;
1190 
1191 	__le64 *mapping_value_le;
1192 	__le32 *hint_value_le;
1193 
1194 	dm_oblock_t oblock;
1195 	unsigned flags;
1196 
1197 	dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le);
1198 	memcpy(&mapping, mapping_value_le, sizeof(mapping));
1199 	unpack_value(mapping, &oblock, &flags);
1200 
1201 	if (flags & M_VALID) {
1202 		if (hints_valid) {
1203 			dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le);
1204 			memcpy(&hint, hint_value_le, sizeof(hint));
1205 		}
1206 
1207 		r = fn(context, oblock, to_cblock(cb), flags & M_DIRTY,
1208 		       le32_to_cpu(hint), hints_valid);
1209 		if (r)
1210 			DMERR("policy couldn't load cblock");
1211 	}
1212 
1213 	return r;
1214 }
1215 
1216 static int __load_mappings(struct dm_cache_metadata *cmd,
1217 			   struct dm_cache_policy *policy,
1218 			   load_mapping_fn fn, void *context)
1219 {
1220 	int r;
1221 	uint64_t cb;
1222 
1223 	bool hints_valid = hints_array_available(cmd, policy);
1224 
1225 	if (from_cblock(cmd->cache_blocks) == 0)
1226 		/* Nothing to do */
1227 		return 0;
1228 
1229 	r = dm_array_cursor_begin(&cmd->info, cmd->root, &cmd->mapping_cursor);
1230 	if (r)
1231 		return r;
1232 
1233 	if (hints_valid) {
1234 		r = dm_array_cursor_begin(&cmd->hint_info, cmd->hint_root, &cmd->hint_cursor);
1235 		if (r) {
1236 			dm_array_cursor_end(&cmd->mapping_cursor);
1237 			return r;
1238 		}
1239 	}
1240 
1241 	for (cb = 0; ; cb++) {
1242 		r = __load_mapping(cmd, cb, hints_valid,
1243 				   &cmd->mapping_cursor, &cmd->hint_cursor,
1244 				   fn, context);
1245 		if (r)
1246 			goto out;
1247 
1248 		/*
1249 		 * We need to break out before we move the cursors.
1250 		 */
1251 		if (cb >= (from_cblock(cmd->cache_blocks) - 1))
1252 			break;
1253 
1254 		r = dm_array_cursor_next(&cmd->mapping_cursor);
1255 		if (r) {
1256 			DMERR("dm_array_cursor_next for mapping failed");
1257 			goto out;
1258 		}
1259 
1260 		if (hints_valid) {
1261 			r = dm_array_cursor_next(&cmd->hint_cursor);
1262 			if (r) {
1263 				DMERR("dm_array_cursor_next for hint failed");
1264 				goto out;
1265 			}
1266 		}
1267 	}
1268 out:
1269 	dm_array_cursor_end(&cmd->mapping_cursor);
1270 	if (hints_valid)
1271 		dm_array_cursor_end(&cmd->hint_cursor);
1272 
1273 	return r;
1274 }
1275 
1276 int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
1277 			   struct dm_cache_policy *policy,
1278 			   load_mapping_fn fn, void *context)
1279 {
1280 	int r;
1281 
1282 	READ_LOCK(cmd);
1283 	r = __load_mappings(cmd, policy, fn, context);
1284 	READ_UNLOCK(cmd);
1285 
1286 	return r;
1287 }
1288 
1289 static int __dump_mapping(void *context, uint64_t cblock, void *leaf)
1290 {
1291 	int r = 0;
1292 	__le64 value;
1293 	dm_oblock_t oblock;
1294 	unsigned flags;
1295 
1296 	memcpy(&value, leaf, sizeof(value));
1297 	unpack_value(value, &oblock, &flags);
1298 
1299 	return r;
1300 }
1301 
1302 static int __dump_mappings(struct dm_cache_metadata *cmd)
1303 {
1304 	return dm_array_walk(&cmd->info, cmd->root, __dump_mapping, NULL);
1305 }
1306 
1307 void dm_cache_dump(struct dm_cache_metadata *cmd)
1308 {
1309 	READ_LOCK_VOID(cmd);
1310 	__dump_mappings(cmd);
1311 	READ_UNLOCK(cmd);
1312 }
1313 
1314 int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd)
1315 {
1316 	int r;
1317 
1318 	READ_LOCK(cmd);
1319 	r = cmd->changed;
1320 	READ_UNLOCK(cmd);
1321 
1322 	return r;
1323 }
1324 
1325 static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty)
1326 {
1327 	int r;
1328 	unsigned flags;
1329 	dm_oblock_t oblock;
1330 	__le64 value;
1331 
1332 	r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(cblock), &value);
1333 	if (r)
1334 		return r;
1335 
1336 	unpack_value(value, &oblock, &flags);
1337 
1338 	if (((flags & M_DIRTY) && dirty) || (!(flags & M_DIRTY) && !dirty))
1339 		/* nothing to be done */
1340 		return 0;
1341 
1342 	value = pack_value(oblock, (flags & ~M_DIRTY) | (dirty ? M_DIRTY : 0));
1343 	__dm_bless_for_disk(&value);
1344 
1345 	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1346 			       &value, &cmd->root);
1347 	if (r)
1348 		return r;
1349 
1350 	cmd->changed = true;
1351 	return 0;
1352 
1353 }
1354 
1355 int dm_cache_set_dirty(struct dm_cache_metadata *cmd,
1356 		       dm_cblock_t cblock, bool dirty)
1357 {
1358 	int r;
1359 
1360 	WRITE_LOCK(cmd);
1361 	r = __dirty(cmd, cblock, dirty);
1362 	WRITE_UNLOCK(cmd);
1363 
1364 	return r;
1365 }
1366 
1367 void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd,
1368 				 struct dm_cache_statistics *stats)
1369 {
1370 	READ_LOCK_VOID(cmd);
1371 	*stats = cmd->stats;
1372 	READ_UNLOCK(cmd);
1373 }
1374 
1375 void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd,
1376 				 struct dm_cache_statistics *stats)
1377 {
1378 	WRITE_LOCK_VOID(cmd);
1379 	cmd->stats = *stats;
1380 	WRITE_UNLOCK(cmd);
1381 }
1382 
1383 int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown)
1384 {
1385 	int r;
1386 	flags_mutator mutator = (clean_shutdown ? set_clean_shutdown :
1387 				 clear_clean_shutdown);
1388 
1389 	WRITE_LOCK(cmd);
1390 	r = __commit_transaction(cmd, mutator);
1391 	if (r)
1392 		goto out;
1393 
1394 	r = __begin_transaction(cmd);
1395 
1396 out:
1397 	WRITE_UNLOCK(cmd);
1398 	return r;
1399 }
1400 
1401 int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd,
1402 					   dm_block_t *result)
1403 {
1404 	int r = -EINVAL;
1405 
1406 	READ_LOCK(cmd);
1407 	r = dm_sm_get_nr_free(cmd->metadata_sm, result);
1408 	READ_UNLOCK(cmd);
1409 
1410 	return r;
1411 }
1412 
1413 int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd,
1414 				   dm_block_t *result)
1415 {
1416 	int r = -EINVAL;
1417 
1418 	READ_LOCK(cmd);
1419 	r = dm_sm_get_nr_blocks(cmd->metadata_sm, result);
1420 	READ_UNLOCK(cmd);
1421 
1422 	return r;
1423 }
1424 
1425 /*----------------------------------------------------------------*/
1426 
1427 static int get_hint(uint32_t index, void *value_le, void *context)
1428 {
1429 	uint32_t value;
1430 	struct dm_cache_policy *policy = context;
1431 
1432 	value = policy_get_hint(policy, to_cblock(index));
1433 	*((__le32 *) value_le) = cpu_to_le32(value);
1434 
1435 	return 0;
1436 }
1437 
1438 /*
1439  * It's quicker to always delete the hint array, and recreate with
1440  * dm_array_new().
1441  */
1442 static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1443 {
1444 	int r;
1445 	size_t hint_size;
1446 	const char *policy_name = dm_cache_policy_get_name(policy);
1447 	const unsigned *policy_version = dm_cache_policy_get_version(policy);
1448 
1449 	if (!policy_name[0] ||
1450 	    (strlen(policy_name) > sizeof(cmd->policy_name) - 1))
1451 		return -EINVAL;
1452 
1453 	strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
1454 	memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
1455 
1456 	hint_size = dm_cache_policy_get_hint_size(policy);
1457 	if (!hint_size)
1458 		return 0; /* short-circuit hints initialization */
1459 	cmd->policy_hint_size = hint_size;
1460 
1461 	if (cmd->hint_root) {
1462 		r = dm_array_del(&cmd->hint_info, cmd->hint_root);
1463 		if (r)
1464 			return r;
1465 	}
1466 
1467 	return dm_array_new(&cmd->hint_info, &cmd->hint_root,
1468 			    from_cblock(cmd->cache_blocks),
1469 			    get_hint, policy);
1470 }
1471 
1472 int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1473 {
1474 	int r;
1475 
1476 	WRITE_LOCK(cmd);
1477 	r = write_hints(cmd, policy);
1478 	WRITE_UNLOCK(cmd);
1479 
1480 	return r;
1481 }
1482 
1483 int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result)
1484 {
1485 	int r;
1486 
1487 	READ_LOCK(cmd);
1488 	r = blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result);
1489 	READ_UNLOCK(cmd);
1490 
1491 	return r;
1492 }
1493 
1494 void dm_cache_metadata_set_read_only(struct dm_cache_metadata *cmd)
1495 {
1496 	WRITE_LOCK_VOID(cmd);
1497 	dm_bm_set_read_only(cmd->bm);
1498 	WRITE_UNLOCK(cmd);
1499 }
1500 
1501 void dm_cache_metadata_set_read_write(struct dm_cache_metadata *cmd)
1502 {
1503 	WRITE_LOCK_VOID(cmd);
1504 	dm_bm_set_read_write(cmd->bm);
1505 	WRITE_UNLOCK(cmd);
1506 }
1507 
1508 int dm_cache_metadata_set_needs_check(struct dm_cache_metadata *cmd)
1509 {
1510 	int r;
1511 	struct dm_block *sblock;
1512 	struct cache_disk_superblock *disk_super;
1513 
1514 	WRITE_LOCK(cmd);
1515 	set_bit(NEEDS_CHECK, &cmd->flags);
1516 
1517 	r = superblock_lock(cmd, &sblock);
1518 	if (r) {
1519 		DMERR("couldn't read superblock");
1520 		goto out;
1521 	}
1522 
1523 	disk_super = dm_block_data(sblock);
1524 	disk_super->flags = cpu_to_le32(cmd->flags);
1525 
1526 	dm_bm_unlock(sblock);
1527 
1528 out:
1529 	WRITE_UNLOCK(cmd);
1530 	return r;
1531 }
1532 
1533 int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result)
1534 {
1535 	READ_LOCK(cmd);
1536 	*result = !!test_bit(NEEDS_CHECK, &cmd->flags);
1537 	READ_UNLOCK(cmd);
1538 
1539 	return 0;
1540 }
1541 
1542 int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
1543 {
1544 	int r;
1545 
1546 	WRITE_LOCK(cmd);
1547 	__destroy_persistent_data_objects(cmd);
1548 	r = __create_persistent_data_objects(cmd, false);
1549 	if (r)
1550 		cmd->fail_io = true;
1551 	WRITE_UNLOCK(cmd);
1552 
1553 	return r;
1554 }
1555