1#!/usr/bin/ksh -p
2
3#
4# This file and its contents are supplied under the terms of the
5# Common Development and Distribution License ("CDDL"), version 1.0.
6# You may only use this file in accordance with the terms of version
7# 1.0 of the CDDL.
8#
9# A full copy of the text of the CDDL should have accompanied this
10# source.  A copy of the CDDL is also available via the Internet at
11# http://www.illumos.org/license/CDDL.
12#
13
14#
15# Copyright (c) 2016 by Delphix. All rights reserved.
16#
17
18. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
19
20#
21# DESCRIPTION:
22#	It should be possible to rewind a pool beyond a device replacement.
23#
24# STRATEGY:
25#	1. Create a pool.
26#	2. Generate files and remember their md5sum.
27#	3. Sync a few times and note last synced txg.
28#	4. Take a snapshot to make sure old blocks are not overwritten.
29#	5. Initiate device replacement and export the pool. Special care must
30#	   be taken so that resilvering doesn't complete before the export.
31#	6. Test 1: Rewind pool to noted txg and then verify data checksums.
32#	   Import it read-only so that we do not overwrite blocks in later txgs.
33#	7. Re-import pool at latest txg and let the replacement finish.
34#	8. Export the pool an remove the new device - we shouldn't need it.
35#	9. Test 2: Rewind pool to noted txg and then verify data checksums.
36#
37# STRATEGY TO SLOW DOWN RESILVERING:
38#	1. Reduce zfs_txg_timeout, which controls how long can we resilver for
39#	   each sync.
40#	2. Add data to pool
41#	3. Re-import the pool so that data isn't cached
42#	4. Use zinject to slow down device I/O
43#	5. Trigger the resilvering
44#	6. Use spa freeze to stop writing to the pool.
45#	7. Clear zinject events (needed to export the pool)
46#	8. Export the pool
47#
48# DISCLAIMER:
49#	This test can fail since nothing guarantees that old MOS blocks aren't
50#	overwritten. Snapshots protect datasets and data files but not the MOS.
51#	sync_some_data_a_few_times interleaves file data and MOS data for a few
52#	txgs, thus increasing the odds that some txgs will have their MOS data
53#	left untouched.
54#
55
56verify_runnable "global"
57
58ZFS_TXG_TIMEOUT=""
59
60function custom_cleanup
61{
62	# Revert zfs_txg_timeout to defaults
63	[[ -n ZFS_TXG_TIMEOUT ]] &&
64	    log_must set_zfs_txg_timeout $ZFS_TXG_TIMEOUT
65	log_must rm -rf $BACKUP_DEVICE_DIR
66	cleanup
67}
68
69log_onexit custom_cleanup
70
71function test_replace_vdev
72{
73	typeset poolcreate="$1"
74	typeset replacevdev="$2"
75	typeset replaceby="$3"
76	typeset poolfinalstate="$4"
77	typeset zinjectdevices="$5"
78
79	log_note "$0: pool '$poolcreate', replace $replacevdev by $replaceby."
80
81	log_must zpool create $TESTPOOL1 $poolcreate
82
83	# generate data and checksum it
84	log_must generate_data $TESTPOOL1 $MD5FILE
85
86	# add more data so that resilver takes longer
87	log_must write_some_data $TESTPOOL1
88
89	# Syncing a few times while writing new data increases the odds that
90	# MOS metadata for some of the txgs will survive.
91	log_must sync_some_data_a_few_times $TESTPOOL1
92	typeset txg
93	txg=$(get_last_txg_synced $TESTPOOL1)
94	log_must zfs snapshot -r $TESTPOOL1@snap1
95
96	# This should not free original data.
97	log_must overwrite_data $TESTPOOL1 ""
98
99	# Steps to insure resilvering happens very slowly.
100	log_must zpool export $TESTPOOL1
101	log_must zpool import -d $DEVICE_DIR $TESTPOOL1
102	typeset device
103	for device in $zinjectdevices ; do
104		log_must zinject -d $device -D 200:1 $TESTPOOL1 > /dev/null
105	done
106	log_must zpool replace $TESTPOOL1 $replacevdev $replaceby
107
108	# We must disable zinject in order to export the pool, so we freeze
109	# it first to prevent writing out subsequent resilvering progress.
110	log_must zpool freeze $TESTPOOL1
111	# Confirm pool is still replacing
112	log_must pool_is_replacing $TESTPOOL1
113	log_must zinject -c all > /dev/null
114	log_must zpool export $TESTPOOL1
115
116	############################################################
117	# Test 1: rewind while device is resilvering.
118	# Import read only to avoid overwriting more recent blocks.
119	############################################################
120	log_must zpool import -d $DEVICE_DIR -o readonly=on -T $txg $TESTPOOL1
121	log_must check_pool_config $TESTPOOL1 "$poolcreate"
122
123	log_must verify_data_md5sums $MD5FILE
124
125	log_must zpool export $TESTPOOL1
126
127	# Import pool at latest txg to finish the resilvering
128	log_must zpool import -d $DEVICE_DIR $TESTPOOL1
129	log_must overwrite_data $TESTPOOL1 ""
130	log_must wait_for_pool_config $TESTPOOL1 "$poolfinalstate"
131	log_must zpool export $TESTPOOL1
132
133	# Move out the new device
134	log_must mv $replaceby $BACKUP_DEVICE_DIR/
135
136	############################################################
137	# Test 2: rewind after device has been replaced.
138	# Import read-write since we won't need the pool anymore.
139	############################################################
140	log_must zpool import -d $DEVICE_DIR -T $txg $TESTPOOL1
141	log_must check_pool_config $TESTPOOL1 "$poolcreate"
142
143	log_must verify_data_md5sums $MD5FILE
144
145	# Cleanup
146	log_must zpool destroy $TESTPOOL1
147	# Restore the device we moved out
148	log_must mv "$BACKUP_DEVICE_DIR/$(basename $replaceby)" $DEVICE_DIR/
149	# Fast way to clear vdev labels
150	log_must zpool create -f $TESTPOOL2 $VDEV0 $VDEV1 $VDEV2 $VDEV3 $VDEV4
151	log_must zpool destroy $TESTPOOL2
152
153	log_note ""
154}
155
156log_must mkdir $BACKUP_DEVICE_DIR
157# Make the devices bigger to reduce chances of overwriting MOS metadata.
158increase_device_sizes $(( FILE_SIZE * 4 ))
159
160# We set zfs_txg_timeout to 1 to reduce resilvering time at each sync.
161ZFS_TXG_TIMEOUT=$(get_zfs_txg_timeout)
162set_zfs_txg_timeout 1
163
164test_replace_vdev "$VDEV0 $VDEV1" \
165    "$VDEV1" "$VDEV2" \
166    "$VDEV0 $VDEV2" \
167    "$VDEV0 $VDEV1"
168
169test_replace_vdev "mirror $VDEV0 $VDEV1" \
170	"$VDEV1" "$VDEV2" \
171	"mirror $VDEV0 $VDEV2" \
172	"$VDEV0 $VDEV1"
173
174test_replace_vdev "raidz $VDEV0 $VDEV1 $VDEV2" \
175	"$VDEV1" "$VDEV3" \
176	"raidz $VDEV0 $VDEV3 $VDEV2" \
177	"$VDEV0 $VDEV1 $VDEV2"
178
179set_zfs_txg_timeout $ZFS_TXG_TIMEOUT
180
181log_pass "zpool import rewind after device replacement passed."
182