xref: /illumos-gate/usr/src/lib/libzfs/common/libzfs_import.c (revision c94be9439c4f0773ef60e2cec21d548359cfea20)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
25  * Copyright 2015 RackTop Systems.
26  * Copyright 2017 Nexenta Systems, Inc.
27  */
28 
29 /*
30  * Pool import support functions.
31  *
32  * To import a pool, we rely on reading the configuration information from the
33  * ZFS label of each device.  If we successfully read the label, then we
34  * organize the configuration information in the following hierarchy:
35  *
36  *	pool guid -> toplevel vdev guid -> label txg
37  *
38  * Duplicate entries matching this same tuple will be discarded.  Once we have
39  * examined every device, we pick the best label txg config for each toplevel
40  * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
41  * update any paths that have changed.  Finally, we attempt to import the pool
42  * using our derived config, and record the results.
43  */
44 
45 #include <ctype.h>
46 #include <devid.h>
47 #include <dirent.h>
48 #include <errno.h>
49 #include <libintl.h>
50 #include <stddef.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <sys/stat.h>
54 #include <unistd.h>
55 #include <fcntl.h>
56 #include <sys/vtoc.h>
57 #include <sys/dktp/fdisk.h>
58 #include <sys/efi_partition.h>
59 #include <thread_pool.h>
60 
61 #include <sys/vdev_impl.h>
62 #include <libzutil.h>
63 
64 #include "libzfs.h"
65 #include "libzfs_impl.h"
66 
67 /*
68  * Returns true if the named pool matches the given GUID.
69  */
70 static int
71 pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
72     boolean_t *isactive)
73 {
74 	zpool_handle_t *zhp;
75 	uint64_t theguid;
76 
77 	if (zpool_open_silent(hdl, name, &zhp) != 0)
78 		return (-1);
79 
80 	if (zhp == NULL) {
81 		*isactive = B_FALSE;
82 		return (0);
83 	}
84 
85 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
86 	    &theguid) == 0);
87 
88 	zpool_close(zhp);
89 
90 	*isactive = (theguid == guid);
91 	return (0);
92 }
93 
94 static nvlist_t *
95 refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
96 {
97 	nvlist_t *nvl;
98 	zfs_cmd_t zc = {"\0"};
99 	int err, dstbuf_size;
100 
101 	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0)
102 		return (NULL);
103 
104 	dstbuf_size = MAX(CONFIG_BUF_MINSIZE, zc.zc_nvlist_conf_size * 4);
105 
106 	if (zcmd_alloc_dst_nvlist(hdl, &zc, dstbuf_size) != 0) {
107 		zcmd_free_nvlists(&zc);
108 		return (NULL);
109 	}
110 
111 	while ((err = zfs_ioctl(hdl, ZFS_IOC_POOL_TRYIMPORT,
112 	    &zc)) != 0 && errno == ENOMEM) {
113 		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
114 			zcmd_free_nvlists(&zc);
115 			return (NULL);
116 		}
117 	}
118 
119 	if (err) {
120 		zcmd_free_nvlists(&zc);
121 		return (NULL);
122 	}
123 
124 	if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) {
125 		zcmd_free_nvlists(&zc);
126 		return (NULL);
127 	}
128 
129 	zcmd_free_nvlists(&zc);
130 	return (nvl);
131 }
132 
133 static nvlist_t *
134 refresh_config_libzfs(void *handle, nvlist_t *tryconfig)
135 {
136 	return (refresh_config((libzfs_handle_t *)handle, tryconfig));
137 }
138 
139 static int
140 pool_active_libzfs(void *handle, const char *name, uint64_t guid,
141     boolean_t *isactive)
142 {
143 	return (pool_active((libzfs_handle_t *)handle, name, guid, isactive));
144 }
145 
146 const pool_config_ops_t libzfs_config_ops = {
147 	.pco_refresh_config = refresh_config_libzfs,
148 	.pco_pool_active = pool_active_libzfs,
149 };
150 
151 /*
152  * Return the offset of the given label.
153  */
154 static uint64_t
155 label_offset(uint64_t size, int l)
156 {
157 	ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0);
158 	return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
159 	    0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
160 }
161 
162 /*
163  * Given a file descriptor, clear (zero) the label information.
164  */
165 int
166 zpool_clear_label(int fd)
167 {
168 	struct stat64 statbuf;
169 	int l;
170 	vdev_label_t *label;
171 	uint64_t size;
172 	int labels_cleared = 0;
173 
174 	if (fstat64(fd, &statbuf) == -1)
175 		return (0);
176 
177 	size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
178 
179 	if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
180 		return (-1);
181 
182 	for (l = 0; l < VDEV_LABELS; l++) {
183 		uint64_t state, guid;
184 		nvlist_t *config;
185 
186 		if (pread64(fd, label, sizeof (vdev_label_t),
187 		    label_offset(size, l)) != sizeof (vdev_label_t)) {
188 			continue;
189 		}
190 
191 		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
192 		    sizeof (label->vl_vdev_phys.vp_nvlist), &config, 0) != 0) {
193 			continue;
194 		}
195 
196 		/* Skip labels which do not have a valid guid. */
197 		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
198 		    &guid) != 0 || guid == 0) {
199 			nvlist_free(config);
200 			continue;
201 		}
202 
203 		/* Skip labels which are not in a known valid state. */
204 		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
205 		    &state) != 0 || state > POOL_STATE_L2CACHE) {
206 			nvlist_free(config);
207 			continue;
208 		}
209 
210 		nvlist_free(config);
211 
212 		/*
213 		 * A valid label was found, overwrite this label's nvlist
214 		 * and uberblocks with zeros on disk.  This is done to prevent
215 		 * system utilities, like blkid, from incorrectly detecting a
216 		 * partial label.  The leading pad space is left untouched.
217 		 */
218 		memset(label, 0, sizeof (vdev_label_t));
219 		size_t label_size = sizeof (vdev_label_t) - (2 * VDEV_PAD_SIZE);
220 
221 		if (pwrite64(fd, label, label_size, label_offset(size, l) +
222 		    (2 * VDEV_PAD_SIZE)) == label_size) {
223 			labels_cleared++;
224 		}
225 	}
226 
227 	free(label);
228 
229 	if (labels_cleared == 0)
230 		return (-1);
231 
232 	return (0);
233 }
234 
235 boolean_t
236 find_guid(nvlist_t *nv, uint64_t guid)
237 {
238 	uint64_t tmp;
239 	nvlist_t **child;
240 	uint_t c, children;
241 
242 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0);
243 	if (tmp == guid)
244 		return (B_TRUE);
245 
246 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
247 	    &child, &children) == 0) {
248 		for (c = 0; c < children; c++)
249 			if (find_guid(child[c], guid))
250 				return (B_TRUE);
251 	}
252 
253 	return (B_FALSE);
254 }
255 
256 typedef struct aux_cbdata {
257 	const char	*cb_type;
258 	uint64_t	cb_guid;
259 	zpool_handle_t	*cb_zhp;
260 } aux_cbdata_t;
261 
262 static int
263 find_aux(zpool_handle_t *zhp, void *data)
264 {
265 	aux_cbdata_t *cbp = data;
266 	nvlist_t **list;
267 	uint_t i, count;
268 	uint64_t guid;
269 	nvlist_t *nvroot;
270 
271 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
272 	    &nvroot) == 0);
273 
274 	if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type,
275 	    &list, &count) == 0) {
276 		for (i = 0; i < count; i++) {
277 			verify(nvlist_lookup_uint64(list[i],
278 			    ZPOOL_CONFIG_GUID, &guid) == 0);
279 			if (guid == cbp->cb_guid) {
280 				cbp->cb_zhp = zhp;
281 				return (1);
282 			}
283 		}
284 	}
285 
286 	zpool_close(zhp);
287 	return (0);
288 }
289 
290 /*
291  * Determines if the pool is in use.  If so, it returns true and the state of
292  * the pool as well as the name of the pool.  Both strings are allocated and
293  * must be freed by the caller.
294  */
295 int
296 zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
297     boolean_t *inuse)
298 {
299 	nvlist_t *config;
300 	char *name;
301 	boolean_t ret;
302 	uint64_t guid, vdev_guid;
303 	zpool_handle_t *zhp;
304 	nvlist_t *pool_config;
305 	uint64_t stateval, isspare;
306 	aux_cbdata_t cb = { 0 };
307 	boolean_t isactive;
308 
309 	*inuse = B_FALSE;
310 
311 	if (zpool_read_label(fd, &config, NULL) != 0 && errno == ENOMEM) {
312 		(void) no_memory(hdl);
313 		return (-1);
314 	}
315 
316 	if (config == NULL)
317 		return (0);
318 
319 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
320 	    &stateval) == 0);
321 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
322 	    &vdev_guid) == 0);
323 
324 	if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) {
325 		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
326 		    &name) == 0);
327 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
328 		    &guid) == 0);
329 	}
330 
331 	switch (stateval) {
332 	case POOL_STATE_EXPORTED:
333 		/*
334 		 * A pool with an exported state may in fact be imported
335 		 * read-only, so check the in-core state to see if it's
336 		 * active and imported read-only.  If it is, set
337 		 * its state to active.
338 		 */
339 		if (pool_active(hdl, name, guid, &isactive) == 0 && isactive &&
340 		    (zhp = zpool_open_canfail(hdl, name)) != NULL) {
341 			if (zpool_get_prop_int(zhp, ZPOOL_PROP_READONLY, NULL))
342 				stateval = POOL_STATE_ACTIVE;
343 
344 			/*
345 			 * All we needed the zpool handle for is the
346 			 * readonly prop check.
347 			 */
348 			zpool_close(zhp);
349 		}
350 
351 		ret = B_TRUE;
352 		break;
353 
354 	case POOL_STATE_ACTIVE:
355 		/*
356 		 * For an active pool, we have to determine if it's really part
357 		 * of a currently active pool (in which case the pool will exist
358 		 * and the guid will be the same), or whether it's part of an
359 		 * active pool that was disconnected without being explicitly
360 		 * exported.
361 		 */
362 		if (pool_active(hdl, name, guid, &isactive) != 0) {
363 			nvlist_free(config);
364 			return (-1);
365 		}
366 
367 		if (isactive) {
368 			/*
369 			 * Because the device may have been removed while
370 			 * offlined, we only report it as active if the vdev is
371 			 * still present in the config.  Otherwise, pretend like
372 			 * it's not in use.
373 			 */
374 			if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
375 			    (pool_config = zpool_get_config(zhp, NULL))
376 			    != NULL) {
377 				nvlist_t *nvroot;
378 
379 				verify(nvlist_lookup_nvlist(pool_config,
380 				    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
381 				ret = find_guid(nvroot, vdev_guid);
382 			} else {
383 				ret = B_FALSE;
384 			}
385 
386 			/*
387 			 * If this is an active spare within another pool, we
388 			 * treat it like an unused hot spare.  This allows the
389 			 * user to create a pool with a hot spare that currently
390 			 * in use within another pool.  Since we return B_TRUE,
391 			 * libdiskmgt will continue to prevent generic consumers
392 			 * from using the device.
393 			 */
394 			if (ret && nvlist_lookup_uint64(config,
395 			    ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
396 				stateval = POOL_STATE_SPARE;
397 
398 			if (zhp != NULL)
399 				zpool_close(zhp);
400 		} else {
401 			stateval = POOL_STATE_POTENTIALLY_ACTIVE;
402 			ret = B_TRUE;
403 		}
404 		break;
405 
406 	case POOL_STATE_SPARE:
407 		/*
408 		 * For a hot spare, it can be either definitively in use, or
409 		 * potentially active.  To determine if it's in use, we iterate
410 		 * over all pools in the system and search for one with a spare
411 		 * with a matching guid.
412 		 *
413 		 * Due to the shared nature of spares, we don't actually report
414 		 * the potentially active case as in use.  This means the user
415 		 * can freely create pools on the hot spares of exported pools,
416 		 * but to do otherwise makes the resulting code complicated, and
417 		 * we end up having to deal with this case anyway.
418 		 */
419 		cb.cb_zhp = NULL;
420 		cb.cb_guid = vdev_guid;
421 		cb.cb_type = ZPOOL_CONFIG_SPARES;
422 		if (zpool_iter(hdl, find_aux, &cb) == 1) {
423 			name = (char *)zpool_get_name(cb.cb_zhp);
424 			ret = B_TRUE;
425 		} else {
426 			ret = B_FALSE;
427 		}
428 		break;
429 
430 	case POOL_STATE_L2CACHE:
431 
432 		/*
433 		 * Check if any pool is currently using this l2cache device.
434 		 */
435 		cb.cb_zhp = NULL;
436 		cb.cb_guid = vdev_guid;
437 		cb.cb_type = ZPOOL_CONFIG_L2CACHE;
438 		if (zpool_iter(hdl, find_aux, &cb) == 1) {
439 			name = (char *)zpool_get_name(cb.cb_zhp);
440 			ret = B_TRUE;
441 		} else {
442 			ret = B_FALSE;
443 		}
444 		break;
445 
446 	default:
447 		ret = B_FALSE;
448 	}
449 
450 
451 	if (ret) {
452 		if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
453 			if (cb.cb_zhp)
454 				zpool_close(cb.cb_zhp);
455 			nvlist_free(config);
456 			return (-1);
457 		}
458 		*state = (pool_state_t)stateval;
459 	}
460 
461 	if (cb.cb_zhp)
462 		zpool_close(cb.cb_zhp);
463 
464 	nvlist_free(config);
465 	*inuse = ret;
466 	return (0);
467 }
468