xref: /illumos-gate/usr/src/uts/common/os/sunmdi.c (revision d5ebc4938a50bb2fb1914062e396761dc9161a51)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2014 Nexenta Systems Inc. All rights reserved.
24  * Copyright (c) 2018, Joyent, Inc.
25  * Copyright 2023 Oxide Computer Company
26  */
27 
28 /*
29  * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a
30  * more detailed discussion of the overall mpxio architecture.
31  *
32  * Default locking order:
33  *
34  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
35  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
36  * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
37  * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
38  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
39  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
40  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
41  */
42 
43 #include <sys/note.h>
44 #include <sys/types.h>
45 #include <sys/varargs.h>
46 #include <sys/param.h>
47 #include <sys/errno.h>
48 #include <sys/uio.h>
49 #include <sys/buf.h>
50 #include <sys/modctl.h>
51 #include <sys/open.h>
52 #include <sys/kmem.h>
53 #include <sys/poll.h>
54 #include <sys/conf.h>
55 #include <sys/bootconf.h>
56 #include <sys/cmn_err.h>
57 #include <sys/stat.h>
58 #include <sys/ddi.h>
59 #include <sys/sunddi.h>
60 #include <sys/ddipropdefs.h>
61 #include <sys/sunndi.h>
62 #include <sys/ndi_impldefs.h>
63 #include <sys/promif.h>
64 #include <sys/sunmdi.h>
65 #include <sys/mdi_impldefs.h>
66 #include <sys/taskq.h>
67 #include <sys/epm.h>
68 #include <sys/sunpm.h>
69 #include <sys/modhash.h>
70 #include <sys/disp.h>
71 #include <sys/autoconf.h>
72 #include <sys/sysmacros.h>
73 
74 #ifdef	DEBUG
75 #include <sys/debug.h>
76 int	mdi_debug = 1;
77 int	mdi_debug_logonly = 0;
78 #define	MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel))	i_mdi_log pargs
79 #define	MDI_WARN	CE_WARN, __func__
80 #define	MDI_NOTE	CE_NOTE, __func__
81 #define	MDI_CONT	CE_CONT, __func__
82 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...);
83 #else	/* !DEBUG */
84 #define	MDI_DEBUG(dbglevel, pargs)
85 #endif	/* DEBUG */
86 int	mdi_debug_consoleonly = 0;
87 int	mdi_delay = 3;
88 
89 extern pri_t	minclsyspri;
90 extern int	modrootloaded;
91 
92 /*
93  * Global mutex:
94  * Protects vHCI list and structure members.
95  */
96 kmutex_t	mdi_mutex;
97 
98 /*
99  * Registered vHCI class driver lists
100  */
101 int		mdi_vhci_count;
102 mdi_vhci_t	*mdi_vhci_head;
103 mdi_vhci_t	*mdi_vhci_tail;
104 
105 /*
106  * Client Hash Table size
107  */
108 static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
109 
110 /*
111  * taskq interface definitions
112  */
113 #define	MDI_TASKQ_N_THREADS	8
114 #define	MDI_TASKQ_PRI		minclsyspri
115 #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
116 #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
117 
118 taskq_t				*mdi_taskq;
119 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
120 
121 #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
122 
123 /*
124  * The data should be "quiet" for this interval (in seconds) before the
125  * vhci cached data is flushed to the disk.
126  */
127 static int mdi_vhcache_flush_delay = 10;
128 
129 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
130 static int mdi_vhcache_flush_daemon_idle_time = 60;
131 
132 /*
133  * MDI falls back to discovery of all paths when a bus_config_one fails.
134  * The following parameters can be used to tune this operation.
135  *
136  * mdi_path_discovery_boot
137  *	Number of times path discovery will be attempted during early boot.
138  *	Probably there is no reason to ever set this value to greater than one.
139  *
140  * mdi_path_discovery_postboot
141  *	Number of times path discovery will be attempted after early boot.
142  *	Set it to a minimum of two to allow for discovery of iscsi paths which
143  *	may happen very late during booting.
144  *
145  * mdi_path_discovery_interval
146  *	Minimum number of seconds MDI will wait between successive discovery
147  *	of all paths. Set it to -1 to disable discovery of all paths.
148  */
149 static int mdi_path_discovery_boot = 1;
150 static int mdi_path_discovery_postboot = 2;
151 static int mdi_path_discovery_interval = 10;
152 
153 /*
154  * number of seconds the asynchronous configuration thread will sleep idle
155  * before exiting.
156  */
157 static int mdi_async_config_idle_time = 600;
158 
159 static int mdi_bus_config_cache_hash_size = 256;
160 
161 /* turns off multithreaded configuration for certain operations */
162 static int mdi_mtc_off = 0;
163 
164 /*
165  * The "path" to a pathinfo node is identical to the /devices path to a
166  * devinfo node had the device been enumerated under a pHCI instead of
167  * a vHCI.  This pathinfo "path" is associated with a 'path_instance'.
168  * This association persists across create/delete of the pathinfo nodes,
169  * but not across reboot.
170  */
171 static uint_t		mdi_pathmap_instance = 1;	/* 0 -> any path */
172 static int		mdi_pathmap_hash_size = 256;
173 static kmutex_t		mdi_pathmap_mutex;
174 static mod_hash_t	*mdi_pathmap_bypath;		/* "path"->instance */
175 static mod_hash_t	*mdi_pathmap_byinstance;	/* instance->"path" */
176 static mod_hash_t	*mdi_pathmap_sbyinstance;	/* inst->shortpath */
177 
178 /*
179  * MDI component property name/value string definitions
180  */
181 const char		*mdi_component_prop = "mpxio-component";
182 const char		*mdi_component_prop_vhci = "vhci";
183 const char		*mdi_component_prop_phci = "phci";
184 const char		*mdi_component_prop_client = "client";
185 
186 /*
187  * MDI client global unique identifier property name
188  */
189 const char		*mdi_client_guid_prop = "client-guid";
190 
191 /*
192  * MDI client load balancing property name/value string definitions
193  */
194 const char		*mdi_load_balance = "load-balance";
195 const char		*mdi_load_balance_none = "none";
196 const char		*mdi_load_balance_rr = "round-robin";
197 const char		*mdi_load_balance_lba = "logical-block";
198 
199 /*
200  * Obsolete vHCI class definition; to be removed after Leadville update
201  */
202 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
203 
204 static char vhci_greeting[] =
205 	"\tThere already exists one vHCI driver for class %s\n"
206 	"\tOnly one vHCI driver for each class is allowed\n";
207 
208 /*
209  * Static function prototypes
210  */
211 static int		i_mdi_phci_offline(dev_info_t *, uint_t);
212 static int		i_mdi_client_offline(dev_info_t *, uint_t);
213 static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
214 static void		i_mdi_phci_post_detach(dev_info_t *,
215 			    ddi_detach_cmd_t, int);
216 static int		i_mdi_client_pre_detach(dev_info_t *,
217 			    ddi_detach_cmd_t);
218 static void		i_mdi_client_post_detach(dev_info_t *,
219 			    ddi_detach_cmd_t, int);
220 static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
221 static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
222 static int		i_mdi_lba_lb(mdi_client_t *ct,
223 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
224 static void		i_mdi_pm_hold_client(mdi_client_t *, int);
225 static void		i_mdi_pm_rele_client(mdi_client_t *, int);
226 static void		i_mdi_pm_reset_client(mdi_client_t *);
227 static int		i_mdi_power_all_phci(mdi_client_t *);
228 static void		i_mdi_log_sysevent(dev_info_t *, char *, char *);
229 
230 
231 /*
232  * Internal mdi_pathinfo node functions
233  */
234 static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
235 
236 static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
237 static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
238 static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
239 static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
240 static void		i_mdi_phci_unlock(mdi_phci_t *);
241 static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
242 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
243 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
244 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
245 			    mdi_client_t *);
246 static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
247 static void		i_mdi_client_remove_path(mdi_client_t *,
248 			    mdi_pathinfo_t *);
249 
250 static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
251 			    mdi_pathinfo_state_t, int);
252 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
253 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
254 			    char **, int);
255 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
256 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
257 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
258 static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
259 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
260 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
261 static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
262 static void		i_mdi_client_update_state(mdi_client_t *);
263 static int		i_mdi_client_compute_state(mdi_client_t *,
264 			    mdi_phci_t *);
265 static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
266 static void		i_mdi_client_unlock(mdi_client_t *);
267 static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
268 static mdi_client_t	*i_devi_get_client(dev_info_t *);
269 /*
270  * NOTE: this will be removed once the NWS files are changed to use the new
271  * mdi_{enable,disable}_path interfaces
272  */
273 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
274 				int, int);
275 static mdi_pathinfo_t	*i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
276 				mdi_vhci_t *vh, int flags, int op);
277 /*
278  * Failover related function prototypes
279  */
280 static int		i_mdi_failover(void *);
281 
282 /*
283  * misc internal functions
284  */
285 static int		i_mdi_get_hash_key(char *);
286 static int		i_map_nvlist_error_to_mdi(int);
287 static void		i_mdi_report_path_state(mdi_client_t *,
288 			    mdi_pathinfo_t *);
289 
290 static void		setup_vhci_cache(mdi_vhci_t *);
291 static int		destroy_vhci_cache(mdi_vhci_t *);
292 static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
293 static boolean_t	stop_vhcache_flush_thread(void *, int);
294 static void		free_string_array(char **, int);
295 static void		free_vhcache_phci(mdi_vhcache_phci_t *);
296 static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
297 static void		free_vhcache_client(mdi_vhcache_client_t *);
298 static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
299 static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
300 static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
301 static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
302 static void		vhcache_pi_add(mdi_vhci_config_t *,
303 			    struct mdi_pathinfo *);
304 static void		vhcache_pi_remove(mdi_vhci_config_t *,
305 			    struct mdi_pathinfo *);
306 static void		free_phclient_path_list(mdi_phys_path_t *);
307 static void		sort_vhcache_paths(mdi_vhcache_client_t *);
308 static int		flush_vhcache(mdi_vhci_config_t *, int);
309 static void		vhcache_dirty(mdi_vhci_config_t *);
310 static void		free_async_client_config(mdi_async_client_config_t *);
311 static void		single_threaded_vhconfig_enter(mdi_vhci_config_t *);
312 static void		single_threaded_vhconfig_exit(mdi_vhci_config_t *);
313 static nvlist_t		*read_on_disk_vhci_cache(char *);
314 extern int		fread_nvlist(char *, nvlist_t **);
315 extern int		fwrite_nvlist(char *, nvlist_t *);
316 
317 /* called once when first vhci registers with mdi */
318 static void
i_mdi_init()319 i_mdi_init()
320 {
321 	static int initialized = 0;
322 
323 	if (initialized)
324 		return;
325 	initialized = 1;
326 
327 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
328 
329 	/* Create our taskq resources */
330 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
331 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
332 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
333 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
334 
335 	/* Allocate ['path_instance' <-> "path"] maps */
336 	mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
337 	mdi_pathmap_bypath = mod_hash_create_strhash(
338 	    "mdi_pathmap_bypath", mdi_pathmap_hash_size,
339 	    mod_hash_null_valdtor);
340 	mdi_pathmap_byinstance = mod_hash_create_idhash(
341 	    "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
342 	    mod_hash_null_valdtor);
343 	mdi_pathmap_sbyinstance = mod_hash_create_idhash(
344 	    "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size,
345 	    mod_hash_null_valdtor);
346 }
347 
348 /*
349  * mdi_get_component_type():
350  *		Return mpxio component type
351  * Return Values:
352  *		MDI_COMPONENT_NONE
353  *		MDI_COMPONENT_VHCI
354  *		MDI_COMPONENT_PHCI
355  *		MDI_COMPONENT_CLIENT
356  * XXX This doesn't work under multi-level MPxIO and should be
357  *	removed when clients migrate mdi_component_is_*() interfaces.
358  */
359 int
mdi_get_component_type(dev_info_t * dip)360 mdi_get_component_type(dev_info_t *dip)
361 {
362 	return (DEVI(dip)->devi_mdi_component);
363 }
364 
365 /*
366  * mdi_vhci_register():
367  *		Register a vHCI module with the mpxio framework
368  *		mdi_vhci_register() is called by vHCI drivers to register the
369  *		'class_driver' vHCI driver and its MDI entrypoints with the
370  *		mpxio framework.  The vHCI driver must call this interface as
371  *		part of its attach(9e) handler.
372  *		Competing threads may try to attach mdi_vhci_register() as
373  *		the vHCI drivers are loaded and attached as a result of pHCI
374  *		driver instance registration (mdi_phci_register()) with the
375  *		framework.
376  * Return Values:
377  *		MDI_SUCCESS
378  *		MDI_FAILURE
379  */
380 /*ARGSUSED*/
381 int
mdi_vhci_register(char * class,dev_info_t * vdip,mdi_vhci_ops_t * vops,int flags)382 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
383     int flags)
384 {
385 	mdi_vhci_t		*vh = NULL;
386 
387 	/* Registrant can't be older */
388 	ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
389 
390 #ifdef DEBUG
391 	/*
392 	 * IB nexus driver is loaded only when IB hardware is present.
393 	 * In order to be able to do this there is a need to drive the loading
394 	 * and attaching of the IB nexus driver (especially when an IB hardware
395 	 * is dynamically plugged in) when an IB HCA driver (PHCI)
396 	 * is being attached. Unfortunately this gets into the limitations
397 	 * of devfs as there seems to be no clean way to drive configuration
398 	 * of a subtree from another subtree of a devfs. Hence, do not ASSERT
399 	 * for IB.
400 	 */
401 	if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
402 		ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
403 #endif
404 
405 	i_mdi_init();
406 
407 	mutex_enter(&mdi_mutex);
408 	/*
409 	 * Scan for already registered vhci
410 	 */
411 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
412 		if (strcmp(vh->vh_class, class) == 0) {
413 			/*
414 			 * vHCI has already been created.  Check for valid
415 			 * vHCI ops registration.  We only support one vHCI
416 			 * module per class
417 			 */
418 			if (vh->vh_ops != NULL) {
419 				mutex_exit(&mdi_mutex);
420 				cmn_err(CE_NOTE, vhci_greeting, class);
421 				return (MDI_FAILURE);
422 			}
423 			break;
424 		}
425 	}
426 
427 	/*
428 	 * if not yet created, create the vHCI component
429 	 */
430 	if (vh == NULL) {
431 		struct client_hash	*hash = NULL;
432 		char			*load_balance;
433 
434 		/*
435 		 * Allocate and initialize the mdi extensions
436 		 */
437 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
438 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
439 		    KM_SLEEP);
440 		vh->vh_client_table = hash;
441 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
442 		(void) strcpy(vh->vh_class, class);
443 		vh->vh_lb = LOAD_BALANCE_RR;
444 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
445 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
446 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
447 				vh->vh_lb = LOAD_BALANCE_NONE;
448 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
449 				    == 0) {
450 				vh->vh_lb = LOAD_BALANCE_LBA;
451 			}
452 			ddi_prop_free(load_balance);
453 		}
454 
455 		mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
456 		mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
457 
458 		/*
459 		 * Store the vHCI ops vectors
460 		 */
461 		vh->vh_dip = vdip;
462 		vh->vh_ops = vops;
463 
464 		setup_vhci_cache(vh);
465 
466 		if (mdi_vhci_head == NULL) {
467 			mdi_vhci_head = vh;
468 		}
469 		if (mdi_vhci_tail) {
470 			mdi_vhci_tail->vh_next = vh;
471 		}
472 		mdi_vhci_tail = vh;
473 		mdi_vhci_count++;
474 	}
475 
476 	/*
477 	 * Claim the devfs node as a vhci component
478 	 */
479 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
480 
481 	/*
482 	 * Initialize our back reference from dev_info node
483 	 */
484 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
485 	mutex_exit(&mdi_mutex);
486 	return (MDI_SUCCESS);
487 }
488 
489 /*
490  * mdi_vhci_unregister():
491  *		Unregister a vHCI module from mpxio framework
492  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
493  *		of a vhci to unregister it from the framework.
494  * Return Values:
495  *		MDI_SUCCESS
496  *		MDI_FAILURE
497  */
498 /*ARGSUSED*/
499 int
mdi_vhci_unregister(dev_info_t * vdip,int flags)500 mdi_vhci_unregister(dev_info_t *vdip, int flags)
501 {
502 	mdi_vhci_t	*found, *vh, *prev = NULL;
503 
504 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
505 
506 	/*
507 	 * Check for invalid VHCI
508 	 */
509 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
510 		return (MDI_FAILURE);
511 
512 	/*
513 	 * Scan the list of registered vHCIs for a match
514 	 */
515 	mutex_enter(&mdi_mutex);
516 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
517 		if (found == vh)
518 			break;
519 		prev = found;
520 	}
521 
522 	if (found == NULL) {
523 		mutex_exit(&mdi_mutex);
524 		return (MDI_FAILURE);
525 	}
526 
527 	/*
528 	 * Check the vHCI, pHCI and client count. All the pHCIs and clients
529 	 * should have been unregistered, before a vHCI can be
530 	 * unregistered.
531 	 */
532 	MDI_VHCI_PHCI_LOCK(vh);
533 	if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
534 		MDI_VHCI_PHCI_UNLOCK(vh);
535 		mutex_exit(&mdi_mutex);
536 		return (MDI_FAILURE);
537 	}
538 	MDI_VHCI_PHCI_UNLOCK(vh);
539 
540 	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
541 		mutex_exit(&mdi_mutex);
542 		return (MDI_FAILURE);
543 	}
544 
545 	/*
546 	 * Remove the vHCI from the global list
547 	 */
548 	if (vh == mdi_vhci_head) {
549 		mdi_vhci_head = vh->vh_next;
550 	} else {
551 		prev->vh_next = vh->vh_next;
552 	}
553 	if (vh == mdi_vhci_tail) {
554 		mdi_vhci_tail = prev;
555 	}
556 	mdi_vhci_count--;
557 	mutex_exit(&mdi_mutex);
558 
559 	vh->vh_ops = NULL;
560 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
561 	DEVI(vdip)->devi_mdi_xhci = NULL;
562 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
563 	kmem_free(vh->vh_client_table,
564 	    mdi_client_table_size * sizeof (struct client_hash));
565 	mutex_destroy(&vh->vh_phci_mutex);
566 	mutex_destroy(&vh->vh_client_mutex);
567 
568 	kmem_free(vh, sizeof (mdi_vhci_t));
569 	return (MDI_SUCCESS);
570 }
571 
572 /*
573  * i_mdi_vhci_class2vhci():
574  *		Look for a matching vHCI module given a vHCI class name
575  * Return Values:
576  *		Handle to a vHCI component
577  *		NULL
578  */
579 static mdi_vhci_t *
i_mdi_vhci_class2vhci(char * class)580 i_mdi_vhci_class2vhci(char *class)
581 {
582 	mdi_vhci_t	*vh = NULL;
583 
584 	ASSERT(!MUTEX_HELD(&mdi_mutex));
585 
586 	mutex_enter(&mdi_mutex);
587 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
588 		if (strcmp(vh->vh_class, class) == 0) {
589 			break;
590 		}
591 	}
592 	mutex_exit(&mdi_mutex);
593 	return (vh);
594 }
595 
596 /*
597  * i_devi_get_vhci():
598  *		Utility function to get the handle to a vHCI component
599  * Return Values:
600  *		Handle to a vHCI component
601  *		NULL
602  */
603 mdi_vhci_t *
i_devi_get_vhci(dev_info_t * vdip)604 i_devi_get_vhci(dev_info_t *vdip)
605 {
606 	mdi_vhci_t	*vh = NULL;
607 	if (MDI_VHCI(vdip)) {
608 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
609 	}
610 	return (vh);
611 }
612 
613 /*
614  * mdi_phci_register():
615  *		Register a pHCI module with mpxio framework
616  *		mdi_phci_register() is called by pHCI drivers to register with
617  *		the mpxio framework and a specific 'class_driver' vHCI.  The
618  *		pHCI driver must call this interface as part of its attach(9e)
619  *		handler.
620  * Return Values:
621  *		MDI_SUCCESS
622  *		MDI_FAILURE
623  */
624 /*ARGSUSED*/
625 int
mdi_phci_register(char * class,dev_info_t * pdip,int flags)626 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
627 {
628 	mdi_phci_t		*ph;
629 	mdi_vhci_t		*vh;
630 	char			*data;
631 
632 	/*
633 	 * Some subsystems, like fcp, perform pHCI registration from a
634 	 * different thread than the one doing the pHCI attach(9E) - the
635 	 * driver attach code is waiting for this other thread to complete.
636 	 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
637 	 * (indicating that some thread has done an ndi_devi_enter of parent)
638 	 * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
639 	 */
640 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
641 
642 	/*
643 	 * Check for mpxio-disable property. Enable mpxio if the property is
644 	 * missing or not set to "yes".
645 	 * If the property is set to "yes" then emit a brief message.
646 	 */
647 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
648 	    &data) == DDI_SUCCESS)) {
649 		if (strcmp(data, "yes") == 0) {
650 			MDI_DEBUG(1, (MDI_CONT, pdip,
651 			    "?multipath capabilities disabled via %s.conf.",
652 			    ddi_driver_name(pdip)));
653 			ddi_prop_free(data);
654 			return (MDI_FAILURE);
655 		}
656 		ddi_prop_free(data);
657 	}
658 
659 	/*
660 	 * Search for a matching vHCI
661 	 */
662 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
663 	if (vh == NULL) {
664 		return (MDI_FAILURE);
665 	}
666 
667 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
668 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
669 	ph->ph_dip = pdip;
670 	ph->ph_vhci = vh;
671 	ph->ph_next = NULL;
672 	ph->ph_unstable = 0;
673 	ph->ph_vprivate = 0;
674 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
675 
676 	MDI_PHCI_LOCK(ph);
677 	MDI_PHCI_SET_POWER_UP(ph);
678 	MDI_PHCI_UNLOCK(ph);
679 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
680 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
681 
682 	vhcache_phci_add(vh->vh_config, ph);
683 
684 	MDI_VHCI_PHCI_LOCK(vh);
685 	if (vh->vh_phci_head == NULL) {
686 		vh->vh_phci_head = ph;
687 	}
688 	if (vh->vh_phci_tail) {
689 		vh->vh_phci_tail->ph_next = ph;
690 	}
691 	vh->vh_phci_tail = ph;
692 	vh->vh_phci_count++;
693 	MDI_VHCI_PHCI_UNLOCK(vh);
694 
695 	i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
696 	return (MDI_SUCCESS);
697 }
698 
699 /*
700  * mdi_phci_unregister():
701  *		Unregister a pHCI module from mpxio framework
702  *		mdi_phci_unregister() is called by the pHCI drivers from their
703  *		detach(9E) handler to unregister their instances from the
704  *		framework.
705  * Return Values:
706  *		MDI_SUCCESS
707  *		MDI_FAILURE
708  */
709 /*ARGSUSED*/
710 int
mdi_phci_unregister(dev_info_t * pdip,int flags)711 mdi_phci_unregister(dev_info_t *pdip, int flags)
712 {
713 	mdi_vhci_t		*vh;
714 	mdi_phci_t		*ph;
715 	mdi_phci_t		*tmp;
716 	mdi_phci_t		*prev = NULL;
717 	mdi_pathinfo_t		*pip;
718 
719 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
720 
721 	ph = i_devi_get_phci(pdip);
722 	if (ph == NULL) {
723 		MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI"));
724 		return (MDI_FAILURE);
725 	}
726 
727 	vh = ph->ph_vhci;
728 	ASSERT(vh != NULL);
729 	if (vh == NULL) {
730 		MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI"));
731 		return (MDI_FAILURE);
732 	}
733 
734 	MDI_VHCI_PHCI_LOCK(vh);
735 	tmp = vh->vh_phci_head;
736 	while (tmp) {
737 		if (tmp == ph) {
738 			break;
739 		}
740 		prev = tmp;
741 		tmp = tmp->ph_next;
742 	}
743 
744 	if (ph == vh->vh_phci_head) {
745 		vh->vh_phci_head = ph->ph_next;
746 	} else {
747 		prev->ph_next = ph->ph_next;
748 	}
749 
750 	if (ph == vh->vh_phci_tail) {
751 		vh->vh_phci_tail = prev;
752 	}
753 
754 	vh->vh_phci_count--;
755 	MDI_VHCI_PHCI_UNLOCK(vh);
756 
757 	/* Walk remaining pathinfo nodes and disassociate them from pHCI */
758 	MDI_PHCI_LOCK(ph);
759 	for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip;
760 	    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link)
761 		MDI_PI(pip)->pi_phci = NULL;
762 	MDI_PHCI_UNLOCK(ph);
763 
764 	i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
765 	    ESC_DDI_INITIATOR_UNREGISTER);
766 	vhcache_phci_remove(vh->vh_config, ph);
767 	cv_destroy(&ph->ph_unstable_cv);
768 	mutex_destroy(&ph->ph_mutex);
769 	kmem_free(ph, sizeof (mdi_phci_t));
770 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
771 	DEVI(pdip)->devi_mdi_xhci = NULL;
772 	return (MDI_SUCCESS);
773 }
774 
775 /*
776  * i_devi_get_phci():
777  *		Utility function to return the phci extensions.
778  */
779 static mdi_phci_t *
i_devi_get_phci(dev_info_t * pdip)780 i_devi_get_phci(dev_info_t *pdip)
781 {
782 	mdi_phci_t	*ph = NULL;
783 
784 	if (MDI_PHCI(pdip)) {
785 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
786 	}
787 	return (ph);
788 }
789 
790 /*
791  * Single thread mdi entry into devinfo node for modifying its children.
792  * If necessary we perform an ndi_devi_enter of the vHCI before doing
793  * an ndi_devi_enter of 'dip'.  If we enter the vHCI, we set *enteredvp
794  * to true, otherwise it is unconditionally set to false.
795  */
796 void
mdi_devi_enter(dev_info_t * phci_dip,boolean_t * enteredvp)797 mdi_devi_enter(dev_info_t *phci_dip, boolean_t *enteredvp)
798 {
799 	dev_info_t	*vdip;
800 
801 	/* Verify calling context */
802 	ASSERT3P(enteredvp, !=, NULL);
803 	ASSERT(MDI_PHCI(phci_dip));
804 	vdip = mdi_devi_get_vdip(phci_dip);
805 	ASSERT3P(vdip, !=, NULL);	/* A pHCI always has a vHCI */
806 
807 	/*
808 	 * If pHCI is detaching then the framework has already entered the
809 	 * vHCI on a thread that went down the code path leading to
810 	 * detach_node().  This framework enter of the vHCI during pHCI
811 	 * detach is done to avoid deadlock with vHCI power management
812 	 * operations which enter the vHCI and then enter down the path
813 	 * to the pHCI.  If pHCI is detaching then we piggyback this call's
814 	 * enter of the vHCI on the framework's vHCI enter that has already
815 	 * occurred - this is OK because we know that the framework thread
816 	 * doing detach is waiting for our completion.
817 	 *
818 	 * We should check DEVI_IS_DETACHING under an enter of the parent to
819 	 * avoid a race with detach, but we can't because the framework has
820 	 * already entered the parent, so we have this complexity instead.
821 	 */
822 	*enteredvp = B_FALSE;
823 	for (;;) {
824 		if (panicstr != NULL)
825 			return;
826 
827 		if (ndi_devi_tryenter(vdip)) {
828 			*enteredvp = B_TRUE;
829 			if (DEVI_IS_DETACHING(phci_dip)) {
830 				ndi_devi_exit(vdip);
831 				*enteredvp = B_FALSE;
832 			}
833 			break;
834 		} else if (DEVI_IS_DETACHING(phci_dip)) {
835 			*enteredvp = B_FALSE;
836 			break;
837 		} else if (servicing_interrupt()) {
838 			/*
839 			 * Don't delay an interrupt (and ensure adaptive
840 			 * mutex inversion support).
841 			 */
842 			ndi_devi_enter(vdip);
843 			*enteredvp = B_TRUE;
844 			break;
845 		} else {
846 			delay_random(mdi_delay);
847 		}
848 	}
849 
850 	ndi_devi_enter(phci_dip);
851 }
852 
853 /*
854  * Attempt to mdi_devi_enter.
855  */
856 int
mdi_devi_tryenter(dev_info_t * phci_dip,boolean_t * enteredvp)857 mdi_devi_tryenter(dev_info_t *phci_dip, boolean_t *enteredvp)
858 {
859 	dev_info_t	*vdip;
860 
861 	/* Verify calling context */
862 	ASSERT(MDI_PHCI(phci_dip));
863 	vdip = mdi_devi_get_vdip(phci_dip);
864 	ASSERT3P(vdip, !=, NULL);	/* A pHCI always has a vHCI */
865 
866 	*enteredvp = B_FALSE;
867 	if (ndi_devi_tryenter(vdip)) {
868 		if (ndi_devi_tryenter(phci_dip)) {
869 			*enteredvp = B_TRUE;
870 			return (1);	/* locked */
871 		}
872 		ndi_devi_exit(vdip);
873 	}
874 	return (0);			/* busy */
875 }
876 
877 /*
878  * Release mdi_devi_enter or successful mdi_devi_tryenter.
879  */
880 void
mdi_devi_exit(dev_info_t * phci_dip,boolean_t enteredv)881 mdi_devi_exit(dev_info_t *phci_dip, boolean_t enteredv)
882 {
883 	dev_info_t	*vdip;
884 
885 	/* Verify calling context */
886 	ASSERT(MDI_PHCI(phci_dip));
887 	vdip = mdi_devi_get_vdip(phci_dip);
888 	ASSERT3P(vdip, !=, NULL);	/* A pHCI always has a vHCI */
889 
890 	ndi_devi_exit(phci_dip);
891 	if (enteredv)
892 		ndi_devi_exit(vdip);
893 }
894 
895 /*
896  * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
897  * around a pHCI drivers calls to mdi_pi_online/offline, after holding
898  * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
899  * with vHCI power management code during path online/offline.  Each
900  * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
901  * occur within the scope of an active mdi_devi_enter that establishes the
902  * circular value.
903  */
904 void
mdi_devi_exit_phci(dev_info_t * phci_dip)905 mdi_devi_exit_phci(dev_info_t *phci_dip)
906 {
907 	/* Verify calling context */
908 	ASSERT(MDI_PHCI(phci_dip));
909 
910 	/* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */
911 	ndi_hold_devi(phci_dip);
912 
913 	ndi_devi_exit(phci_dip);
914 }
915 
916 void
mdi_devi_enter_phci(dev_info_t * phci_dip)917 mdi_devi_enter_phci(dev_info_t *phci_dip)
918 {
919 	/* Verify calling context */
920 	ASSERT(MDI_PHCI(phci_dip));
921 
922 	ndi_devi_enter(phci_dip);
923 
924 	/* Drop hold from mdi_devi_exit_phci. */
925 	ndi_rele_devi(phci_dip);
926 }
927 
928 /*
929  * mdi_devi_get_vdip():
930  *		given a pHCI dip return vHCI dip
931  * Returns:
932  *		the vHCI dip if it exists
933  *		else NULL
934  */
935 dev_info_t *
mdi_devi_get_vdip(dev_info_t * pdip)936 mdi_devi_get_vdip(dev_info_t *pdip)
937 {
938 	mdi_phci_t	*ph;
939 
940 	ph = i_devi_get_phci(pdip);
941 	if (ph && ph->ph_vhci)
942 		return (ph->ph_vhci->vh_dip);
943 	return (NULL);
944 }
945 
946 /*
947  * mdi_devi_pdip_entered():
948  *		Return 1 if we are vHCI and have done an ndi_devi_enter
949  *		of a pHCI
950  */
951 int
mdi_devi_pdip_entered(dev_info_t * vdip)952 mdi_devi_pdip_entered(dev_info_t *vdip)
953 {
954 	mdi_vhci_t	*vh;
955 	mdi_phci_t	*ph;
956 
957 	vh = i_devi_get_vhci(vdip);
958 	if (vh == NULL)
959 		return (0);
960 
961 	MDI_VHCI_PHCI_LOCK(vh);
962 	ph = vh->vh_phci_head;
963 	while (ph) {
964 		if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
965 			MDI_VHCI_PHCI_UNLOCK(vh);
966 			return (1);
967 		}
968 		ph = ph->ph_next;
969 	}
970 	MDI_VHCI_PHCI_UNLOCK(vh);
971 	return (0);
972 }
973 
974 /*
975  * mdi_phci_path2devinfo():
976  *		Utility function to search for a valid phci device given
977  *		the devfs pathname.
978  */
979 dev_info_t *
mdi_phci_path2devinfo(dev_info_t * vdip,caddr_t pathname)980 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
981 {
982 	char		*temp_pathname;
983 	mdi_vhci_t	*vh;
984 	mdi_phci_t	*ph;
985 	dev_info_t	*pdip = NULL;
986 
987 	vh = i_devi_get_vhci(vdip);
988 	ASSERT(vh != NULL);
989 
990 	if (vh == NULL) {
991 		/*
992 		 * Invalid vHCI component, return failure
993 		 */
994 		return (NULL);
995 	}
996 
997 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
998 	MDI_VHCI_PHCI_LOCK(vh);
999 	ph = vh->vh_phci_head;
1000 	while (ph != NULL) {
1001 		pdip = ph->ph_dip;
1002 		ASSERT(pdip != NULL);
1003 		*temp_pathname = '\0';
1004 		(void) ddi_pathname(pdip, temp_pathname);
1005 		if (strcmp(temp_pathname, pathname) == 0) {
1006 			break;
1007 		}
1008 		ph = ph->ph_next;
1009 	}
1010 	if (ph == NULL) {
1011 		pdip = NULL;
1012 	}
1013 	MDI_VHCI_PHCI_UNLOCK(vh);
1014 	kmem_free(temp_pathname, MAXPATHLEN);
1015 	return (pdip);
1016 }
1017 
1018 /*
1019  * mdi_phci_get_path_count():
1020  *		get number of path information nodes associated with a given
1021  *		pHCI device.
1022  */
1023 int
mdi_phci_get_path_count(dev_info_t * pdip)1024 mdi_phci_get_path_count(dev_info_t *pdip)
1025 {
1026 	mdi_phci_t	*ph;
1027 	int		count = 0;
1028 
1029 	ph = i_devi_get_phci(pdip);
1030 	if (ph != NULL) {
1031 		count = ph->ph_path_count;
1032 	}
1033 	return (count);
1034 }
1035 
1036 /*
1037  * i_mdi_phci_lock():
1038  *		Lock a pHCI device
1039  * Return Values:
1040  *		None
1041  * Note:
1042  *		The default locking order is:
1043  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
1044  *		But there are number of situations where locks need to be
1045  *		grabbed in reverse order.  This routine implements try and lock
1046  *		mechanism depending on the requested parameter option.
1047  */
1048 static void
i_mdi_phci_lock(mdi_phci_t * ph,mdi_pathinfo_t * pip)1049 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
1050 {
1051 	if (pip) {
1052 		/* Reverse locking is requested. */
1053 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
1054 			if (servicing_interrupt()) {
1055 				MDI_PI_HOLD(pip);
1056 				MDI_PI_UNLOCK(pip);
1057 				MDI_PHCI_LOCK(ph);
1058 				MDI_PI_LOCK(pip);
1059 				MDI_PI_RELE(pip);
1060 				break;
1061 			} else {
1062 				/*
1063 				 * tryenter failed. Try to grab again
1064 				 * after a small delay
1065 				 */
1066 				MDI_PI_HOLD(pip);
1067 				MDI_PI_UNLOCK(pip);
1068 				delay_random(mdi_delay);
1069 				MDI_PI_LOCK(pip);
1070 				MDI_PI_RELE(pip);
1071 			}
1072 		}
1073 	} else {
1074 		MDI_PHCI_LOCK(ph);
1075 	}
1076 }
1077 
1078 /*
1079  * i_mdi_phci_unlock():
1080  *		Unlock the pHCI component
1081  */
1082 static void
i_mdi_phci_unlock(mdi_phci_t * ph)1083 i_mdi_phci_unlock(mdi_phci_t *ph)
1084 {
1085 	MDI_PHCI_UNLOCK(ph);
1086 }
1087 
1088 /*
1089  * i_mdi_devinfo_create():
1090  *		create client device's devinfo node
1091  * Return Values:
1092  *		dev_info
1093  *		NULL
1094  * Notes:
1095  */
1096 static dev_info_t *
i_mdi_devinfo_create(mdi_vhci_t * vh,char * name,char * guid,char ** compatible,int ncompatible)1097 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1098 	char **compatible, int ncompatible)
1099 {
1100 	dev_info_t *cdip = NULL;
1101 
1102 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1103 
1104 	/* Verify for duplicate entry */
1105 	cdip = i_mdi_devinfo_find(vh, name, guid);
1106 	ASSERT(cdip == NULL);
1107 	if (cdip) {
1108 		cmn_err(CE_WARN,
1109 		    "i_mdi_devinfo_create: client %s@%s already exists",
1110 			name ? name : "", guid ? guid : "");
1111 	}
1112 
1113 	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1114 	if (cdip == NULL)
1115 		goto fail;
1116 
1117 	/*
1118 	 * Create component type and Global unique identifier
1119 	 * properties
1120 	 */
1121 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1122 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1123 		goto fail;
1124 	}
1125 
1126 	/* Decorate the node with compatible property */
1127 	if (compatible &&
1128 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1129 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1130 		goto fail;
1131 	}
1132 
1133 	return (cdip);
1134 
1135 fail:
1136 	if (cdip) {
1137 		(void) ndi_prop_remove_all(cdip);
1138 		(void) ndi_devi_free(cdip);
1139 	}
1140 	return (NULL);
1141 }
1142 
1143 /*
1144  * i_mdi_devinfo_find():
1145  *		Find a matching devinfo node for given client node name
1146  *		and its guid.
1147  * Return Values:
1148  *		Handle to a dev_info node or NULL
1149  */
1150 static dev_info_t *
i_mdi_devinfo_find(mdi_vhci_t * vh,caddr_t name,char * guid)1151 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1152 {
1153 	char			*data;
1154 	dev_info_t		*cdip = NULL;
1155 	dev_info_t		*ndip = NULL;
1156 
1157 	ndi_devi_enter(vh->vh_dip);
1158 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1159 	while ((cdip = ndip) != NULL) {
1160 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1161 
1162 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1163 			continue;
1164 		}
1165 
1166 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1167 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1168 		    &data) != DDI_PROP_SUCCESS) {
1169 			continue;
1170 		}
1171 
1172 		if (strcmp(data, guid) != 0) {
1173 			ddi_prop_free(data);
1174 			continue;
1175 		}
1176 		ddi_prop_free(data);
1177 		break;
1178 	}
1179 	ndi_devi_exit(vh->vh_dip);
1180 	return (cdip);
1181 }
1182 
1183 /*
1184  * i_mdi_devinfo_remove():
1185  *		Remove a client device node
1186  */
1187 static int
i_mdi_devinfo_remove(dev_info_t * vdip,dev_info_t * cdip,int flags)1188 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1189 {
1190 	int	rv = MDI_SUCCESS;
1191 
1192 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1193 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1194 		rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE);
1195 		if (rv != NDI_SUCCESS) {
1196 			MDI_DEBUG(1, (MDI_NOTE, cdip,
1197 			    "!failed: cdip %p", (void *)cdip));
1198 		}
1199 		/*
1200 		 * Convert to MDI error code
1201 		 */
1202 		switch (rv) {
1203 		case NDI_SUCCESS:
1204 			rv = MDI_SUCCESS;
1205 			break;
1206 		case NDI_BUSY:
1207 			rv = MDI_BUSY;
1208 			break;
1209 		default:
1210 			rv = MDI_FAILURE;
1211 			break;
1212 		}
1213 	}
1214 	return (rv);
1215 }
1216 
1217 /*
1218  * i_devi_get_client()
1219  *		Utility function to get mpxio component extensions
1220  */
1221 static mdi_client_t *
i_devi_get_client(dev_info_t * cdip)1222 i_devi_get_client(dev_info_t *cdip)
1223 {
1224 	mdi_client_t	*ct = NULL;
1225 
1226 	if (MDI_CLIENT(cdip)) {
1227 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1228 	}
1229 	return (ct);
1230 }
1231 
1232 /*
1233  * i_mdi_is_child_present():
1234  *		Search for the presence of client device dev_info node
1235  */
1236 static int
i_mdi_is_child_present(dev_info_t * vdip,dev_info_t * cdip)1237 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1238 {
1239 	int		rv = MDI_FAILURE;
1240 	struct dev_info	*dip;
1241 
1242 	ndi_devi_enter(vdip);
1243 	dip = DEVI(vdip)->devi_child;
1244 	while (dip) {
1245 		if (dip == DEVI(cdip)) {
1246 			rv = MDI_SUCCESS;
1247 			break;
1248 		}
1249 		dip = dip->devi_sibling;
1250 	}
1251 	ndi_devi_exit(vdip);
1252 	return (rv);
1253 }
1254 
1255 
1256 /*
1257  * i_mdi_client_lock():
1258  *		Grab client component lock
1259  * Return Values:
1260  *		None
1261  * Note:
1262  *		The default locking order is:
1263  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1264  *		But there are number of situations where locks need to be
1265  *		grabbed in reverse order.  This routine implements try and lock
1266  *		mechanism depending on the requested parameter option.
1267  */
1268 static void
i_mdi_client_lock(mdi_client_t * ct,mdi_pathinfo_t * pip)1269 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1270 {
1271 	if (pip) {
1272 		/*
1273 		 * Reverse locking is requested.
1274 		 */
1275 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1276 			if (servicing_interrupt()) {
1277 				MDI_PI_HOLD(pip);
1278 				MDI_PI_UNLOCK(pip);
1279 				MDI_CLIENT_LOCK(ct);
1280 				MDI_PI_LOCK(pip);
1281 				MDI_PI_RELE(pip);
1282 				break;
1283 			} else {
1284 				/*
1285 				 * tryenter failed. Try to grab again
1286 				 * after a small delay
1287 				 */
1288 				MDI_PI_HOLD(pip);
1289 				MDI_PI_UNLOCK(pip);
1290 				delay_random(mdi_delay);
1291 				MDI_PI_LOCK(pip);
1292 				MDI_PI_RELE(pip);
1293 			}
1294 		}
1295 	} else {
1296 		MDI_CLIENT_LOCK(ct);
1297 	}
1298 }
1299 
1300 /*
1301  * i_mdi_client_unlock():
1302  *		Unlock a client component
1303  */
1304 static void
i_mdi_client_unlock(mdi_client_t * ct)1305 i_mdi_client_unlock(mdi_client_t *ct)
1306 {
1307 	MDI_CLIENT_UNLOCK(ct);
1308 }
1309 
1310 /*
1311  * i_mdi_client_alloc():
1312  *		Allocate and initialize a client structure.  Caller should
1313  *		hold the vhci client lock.
1314  * Return Values:
1315  *		Handle to a client component
1316  */
1317 /*ARGSUSED*/
1318 static mdi_client_t *
i_mdi_client_alloc(mdi_vhci_t * vh,char * name,char * lguid)1319 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1320 {
1321 	mdi_client_t	*ct;
1322 
1323 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1324 
1325 	/*
1326 	 * Allocate and initialize a component structure.
1327 	 */
1328 	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1329 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1330 	ct->ct_hnext = NULL;
1331 	ct->ct_hprev = NULL;
1332 	ct->ct_dip = NULL;
1333 	ct->ct_vhci = vh;
1334 	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1335 	(void) strcpy(ct->ct_drvname, name);
1336 	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1337 	(void) strcpy(ct->ct_guid, lguid);
1338 	ct->ct_cprivate = NULL;
1339 	ct->ct_vprivate = NULL;
1340 	ct->ct_flags = 0;
1341 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
1342 	MDI_CLIENT_LOCK(ct);
1343 	MDI_CLIENT_SET_OFFLINE(ct);
1344 	MDI_CLIENT_SET_DETACH(ct);
1345 	MDI_CLIENT_SET_POWER_UP(ct);
1346 	MDI_CLIENT_UNLOCK(ct);
1347 	ct->ct_failover_flags = 0;
1348 	ct->ct_failover_status = 0;
1349 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1350 	ct->ct_unstable = 0;
1351 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1352 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1353 	ct->ct_lb = vh->vh_lb;
1354 	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1355 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1356 	ct->ct_path_count = 0;
1357 	ct->ct_path_head = NULL;
1358 	ct->ct_path_tail = NULL;
1359 	ct->ct_path_last = NULL;
1360 
1361 	/*
1362 	 * Add this client component to our client hash queue
1363 	 */
1364 	i_mdi_client_enlist_table(vh, ct);
1365 	return (ct);
1366 }
1367 
1368 /*
1369  * i_mdi_client_enlist_table():
1370  *		Attach the client device to the client hash table. Caller
1371  *		should hold the vhci client lock.
1372  */
1373 static void
i_mdi_client_enlist_table(mdi_vhci_t * vh,mdi_client_t * ct)1374 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1375 {
1376 	int			index;
1377 	struct client_hash	*head;
1378 
1379 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1380 
1381 	index = i_mdi_get_hash_key(ct->ct_guid);
1382 	head = &vh->vh_client_table[index];
1383 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1384 	head->ct_hash_head = ct;
1385 	head->ct_hash_count++;
1386 	vh->vh_client_count++;
1387 }
1388 
1389 /*
1390  * i_mdi_client_delist_table():
1391  *		Attach the client device to the client hash table.
1392  *		Caller should hold the vhci client lock.
1393  */
1394 static void
i_mdi_client_delist_table(mdi_vhci_t * vh,mdi_client_t * ct)1395 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1396 {
1397 	int			index;
1398 	char			*guid;
1399 	struct client_hash	*head;
1400 	mdi_client_t		*next;
1401 	mdi_client_t		*last;
1402 
1403 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1404 
1405 	guid = ct->ct_guid;
1406 	index = i_mdi_get_hash_key(guid);
1407 	head = &vh->vh_client_table[index];
1408 
1409 	last = NULL;
1410 	next = (mdi_client_t *)head->ct_hash_head;
1411 	while (next != NULL) {
1412 		if (next == ct) {
1413 			break;
1414 		}
1415 		last = next;
1416 		next = next->ct_hnext;
1417 	}
1418 
1419 	if (next) {
1420 		head->ct_hash_count--;
1421 		if (last == NULL) {
1422 			head->ct_hash_head = ct->ct_hnext;
1423 		} else {
1424 			last->ct_hnext = ct->ct_hnext;
1425 		}
1426 		ct->ct_hnext = NULL;
1427 		vh->vh_client_count--;
1428 	}
1429 }
1430 
1431 
1432 /*
1433  * i_mdi_client_free():
1434  *		Free a client component
1435  */
1436 static int
i_mdi_client_free(mdi_vhci_t * vh,mdi_client_t * ct)1437 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1438 {
1439 	int		rv = MDI_SUCCESS;
1440 	int		flags = ct->ct_flags;
1441 	dev_info_t	*cdip;
1442 	dev_info_t	*vdip;
1443 
1444 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1445 
1446 	vdip = vh->vh_dip;
1447 	cdip = ct->ct_dip;
1448 
1449 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1450 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1451 	DEVI(cdip)->devi_mdi_client = NULL;
1452 
1453 	/*
1454 	 * Clear out back ref. to dev_info_t node
1455 	 */
1456 	ct->ct_dip = NULL;
1457 
1458 	/*
1459 	 * Remove this client from our hash queue
1460 	 */
1461 	i_mdi_client_delist_table(vh, ct);
1462 
1463 	/*
1464 	 * Uninitialize and free the component
1465 	 */
1466 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1467 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1468 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1469 	cv_destroy(&ct->ct_failover_cv);
1470 	cv_destroy(&ct->ct_unstable_cv);
1471 	cv_destroy(&ct->ct_powerchange_cv);
1472 	mutex_destroy(&ct->ct_mutex);
1473 	kmem_free(ct, sizeof (*ct));
1474 
1475 	MDI_VHCI_CLIENT_UNLOCK(vh);
1476 	(void) i_mdi_devinfo_remove(vdip, cdip, flags);
1477 	MDI_VHCI_CLIENT_LOCK(vh);
1478 
1479 	return (rv);
1480 }
1481 
1482 /*
1483  * i_mdi_client_find():
1484  *		Find the client structure corresponding to a given guid
1485  *		Caller should hold the vhci client lock.
1486  */
1487 static mdi_client_t *
i_mdi_client_find(mdi_vhci_t * vh,char * cname,char * guid)1488 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1489 {
1490 	int			index;
1491 	struct client_hash	*head;
1492 	mdi_client_t		*ct;
1493 
1494 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1495 
1496 	index = i_mdi_get_hash_key(guid);
1497 	head = &vh->vh_client_table[index];
1498 
1499 	ct = head->ct_hash_head;
1500 	while (ct != NULL) {
1501 		if (strcmp(ct->ct_guid, guid) == 0 &&
1502 		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1503 			break;
1504 		}
1505 		ct = ct->ct_hnext;
1506 	}
1507 	return (ct);
1508 }
1509 
1510 /*
1511  * i_mdi_client_update_state():
1512  *		Compute and update client device state
1513  * Notes:
1514  *		A client device can be in any of three possible states:
1515  *
1516  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1517  *		one online/standby paths. Can tolerate failures.
1518  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1519  *		no alternate paths available as standby. A failure on the online
1520  *		would result in loss of access to device data.
1521  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
1522  *		no paths available to access the device.
1523  */
1524 static void
i_mdi_client_update_state(mdi_client_t * ct)1525 i_mdi_client_update_state(mdi_client_t *ct)
1526 {
1527 	int state;
1528 
1529 	ASSERT(MDI_CLIENT_LOCKED(ct));
1530 	state = i_mdi_client_compute_state(ct, NULL);
1531 	MDI_CLIENT_SET_STATE(ct, state);
1532 }
1533 
1534 /*
1535  * i_mdi_client_compute_state():
1536  *		Compute client device state
1537  *
1538  *		mdi_phci_t *	Pointer to pHCI structure which should
1539  *				while computing the new value.  Used by
1540  *				i_mdi_phci_offline() to find the new
1541  *				client state after DR of a pHCI.
1542  */
1543 static int
i_mdi_client_compute_state(mdi_client_t * ct,mdi_phci_t * ph)1544 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1545 {
1546 	int		state;
1547 	int		online_count = 0;
1548 	int		standby_count = 0;
1549 	mdi_pathinfo_t	*pip, *next;
1550 
1551 	ASSERT(MDI_CLIENT_LOCKED(ct));
1552 	pip = ct->ct_path_head;
1553 	while (pip != NULL) {
1554 		MDI_PI_LOCK(pip);
1555 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1556 		if (MDI_PI(pip)->pi_phci == ph) {
1557 			MDI_PI_UNLOCK(pip);
1558 			pip = next;
1559 			continue;
1560 		}
1561 
1562 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1563 				== MDI_PATHINFO_STATE_ONLINE)
1564 			online_count++;
1565 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1566 				== MDI_PATHINFO_STATE_STANDBY)
1567 			standby_count++;
1568 		MDI_PI_UNLOCK(pip);
1569 		pip = next;
1570 	}
1571 
1572 	if (online_count == 0) {
1573 		if (standby_count == 0) {
1574 			state = MDI_CLIENT_STATE_FAILED;
1575 			MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
1576 			    "client state failed: ct = %p", (void *)ct));
1577 		} else if (standby_count == 1) {
1578 			state = MDI_CLIENT_STATE_DEGRADED;
1579 		} else {
1580 			state = MDI_CLIENT_STATE_OPTIMAL;
1581 		}
1582 	} else if (online_count == 1) {
1583 		if (standby_count == 0) {
1584 			state = MDI_CLIENT_STATE_DEGRADED;
1585 		} else {
1586 			state = MDI_CLIENT_STATE_OPTIMAL;
1587 		}
1588 	} else {
1589 		state = MDI_CLIENT_STATE_OPTIMAL;
1590 	}
1591 	return (state);
1592 }
1593 
1594 /*
1595  * i_mdi_client2devinfo():
1596  *		Utility function
1597  */
1598 dev_info_t *
i_mdi_client2devinfo(mdi_client_t * ct)1599 i_mdi_client2devinfo(mdi_client_t *ct)
1600 {
1601 	return (ct->ct_dip);
1602 }
1603 
1604 /*
1605  * mdi_client_path2_devinfo():
1606  *		Given the parent devinfo and child devfs pathname, search for
1607  *		a valid devfs node handle.
1608  */
1609 dev_info_t *
mdi_client_path2devinfo(dev_info_t * vdip,char * pathname)1610 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1611 {
1612 	dev_info_t	*cdip = NULL;
1613 	dev_info_t	*ndip = NULL;
1614 	char		*temp_pathname;
1615 
1616 	/*
1617 	 * Allocate temp buffer
1618 	 */
1619 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1620 
1621 	/*
1622 	 * Lock parent against changes
1623 	 */
1624 	ndi_devi_enter(vdip);
1625 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1626 	while ((cdip = ndip) != NULL) {
1627 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1628 
1629 		*temp_pathname = '\0';
1630 		(void) ddi_pathname(cdip, temp_pathname);
1631 		if (strcmp(temp_pathname, pathname) == 0) {
1632 			break;
1633 		}
1634 	}
1635 	/*
1636 	 * Release devinfo lock
1637 	 */
1638 	ndi_devi_exit(vdip);
1639 
1640 	/*
1641 	 * Free the temp buffer
1642 	 */
1643 	kmem_free(temp_pathname, MAXPATHLEN);
1644 	return (cdip);
1645 }
1646 
1647 /*
1648  * mdi_client_get_path_count():
1649  *		Utility function to get number of path information nodes
1650  *		associated with a given client device.
1651  */
1652 int
mdi_client_get_path_count(dev_info_t * cdip)1653 mdi_client_get_path_count(dev_info_t *cdip)
1654 {
1655 	mdi_client_t	*ct;
1656 	int		count = 0;
1657 
1658 	ct = i_devi_get_client(cdip);
1659 	if (ct != NULL) {
1660 		count = ct->ct_path_count;
1661 	}
1662 	return (count);
1663 }
1664 
1665 
1666 /*
1667  * i_mdi_get_hash_key():
1668  *		Create a hash using strings as keys
1669  *
1670  */
1671 static int
i_mdi_get_hash_key(char * str)1672 i_mdi_get_hash_key(char *str)
1673 {
1674 	uint32_t	g, hash = 0;
1675 	char		*p;
1676 
1677 	for (p = str; *p != '\0'; p++) {
1678 		g = *p;
1679 		hash += g;
1680 	}
1681 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1682 }
1683 
1684 /*
1685  * mdi_get_lb_policy():
1686  *		Get current load balancing policy for a given client device
1687  */
1688 client_lb_t
mdi_get_lb_policy(dev_info_t * cdip)1689 mdi_get_lb_policy(dev_info_t *cdip)
1690 {
1691 	client_lb_t	lb = LOAD_BALANCE_NONE;
1692 	mdi_client_t	*ct;
1693 
1694 	ct = i_devi_get_client(cdip);
1695 	if (ct != NULL) {
1696 		lb = ct->ct_lb;
1697 	}
1698 	return (lb);
1699 }
1700 
1701 /*
1702  * mdi_set_lb_region_size():
1703  *		Set current region size for the load-balance
1704  */
1705 int
mdi_set_lb_region_size(dev_info_t * cdip,int region_size)1706 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1707 {
1708 	mdi_client_t	*ct;
1709 	int		rv = MDI_FAILURE;
1710 
1711 	ct = i_devi_get_client(cdip);
1712 	if (ct != NULL && ct->ct_lb_args != NULL) {
1713 		ct->ct_lb_args->region_size = region_size;
1714 		rv = MDI_SUCCESS;
1715 	}
1716 	return (rv);
1717 }
1718 
1719 /*
1720  * mdi_Set_lb_policy():
1721  *		Set current load balancing policy for a given client device
1722  */
1723 int
mdi_set_lb_policy(dev_info_t * cdip,client_lb_t lb)1724 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1725 {
1726 	mdi_client_t	*ct;
1727 	int		rv = MDI_FAILURE;
1728 
1729 	ct = i_devi_get_client(cdip);
1730 	if (ct != NULL) {
1731 		ct->ct_lb = lb;
1732 		rv = MDI_SUCCESS;
1733 	}
1734 	return (rv);
1735 }
1736 
1737 static void
mdi_failover_cb(void * arg)1738 mdi_failover_cb(void *arg)
1739 {
1740 	(void)i_mdi_failover(arg);
1741 }
1742 
1743 /*
1744  * mdi_failover():
1745  *		failover function called by the vHCI drivers to initiate
1746  *		a failover operation.  This is typically due to non-availability
1747  *		of online paths to route I/O requests.  Failover can be
1748  *		triggered through user application also.
1749  *
1750  *		The vHCI driver calls mdi_failover() to initiate a failover
1751  *		operation. mdi_failover() calls back into the vHCI driver's
1752  *		vo_failover() entry point to perform the actual failover
1753  *		operation.  The reason for requiring the vHCI driver to
1754  *		initiate failover by calling mdi_failover(), instead of directly
1755  *		executing vo_failover() itself, is to ensure that the mdi
1756  *		framework can keep track of the client state properly.
1757  *		Additionally, mdi_failover() provides as a convenience the
1758  *		option of performing the failover operation synchronously or
1759  *		asynchronously
1760  *
1761  *		Upon successful completion of the failover operation, the
1762  *		paths that were previously ONLINE will be in the STANDBY state,
1763  *		and the newly activated paths will be in the ONLINE state.
1764  *
1765  *		The flags modifier determines whether the activation is done
1766  *		synchronously: MDI_FAILOVER_SYNC
1767  * Return Values:
1768  *		MDI_SUCCESS
1769  *		MDI_FAILURE
1770  *		MDI_BUSY
1771  */
1772 /*ARGSUSED*/
1773 int
mdi_failover(dev_info_t * vdip,dev_info_t * cdip,int flags)1774 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1775 {
1776 	int			rv;
1777 	mdi_client_t		*ct;
1778 
1779 	ct = i_devi_get_client(cdip);
1780 	ASSERT(ct != NULL);
1781 	if (ct == NULL) {
1782 		/* cdip is not a valid client device. Nothing more to do. */
1783 		return (MDI_FAILURE);
1784 	}
1785 
1786 	MDI_CLIENT_LOCK(ct);
1787 
1788 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1789 		/* A path to the client is being freed */
1790 		MDI_CLIENT_UNLOCK(ct);
1791 		return (MDI_BUSY);
1792 	}
1793 
1794 
1795 	if (MDI_CLIENT_IS_FAILED(ct)) {
1796 		/*
1797 		 * Client is in failed state. Nothing more to do.
1798 		 */
1799 		MDI_CLIENT_UNLOCK(ct);
1800 		return (MDI_FAILURE);
1801 	}
1802 
1803 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1804 		/*
1805 		 * Failover is already in progress; return BUSY
1806 		 */
1807 		MDI_CLIENT_UNLOCK(ct);
1808 		return (MDI_BUSY);
1809 	}
1810 	/*
1811 	 * Make sure that mdi_pathinfo node state changes are processed.
1812 	 * We do not allow failovers to progress while client path state
1813 	 * changes are in progress
1814 	 */
1815 	if (ct->ct_unstable) {
1816 		if (flags == MDI_FAILOVER_ASYNC) {
1817 			MDI_CLIENT_UNLOCK(ct);
1818 			return (MDI_BUSY);
1819 		} else {
1820 			while (ct->ct_unstable)
1821 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1822 		}
1823 	}
1824 
1825 	/*
1826 	 * Client device is in stable state. Before proceeding, perform sanity
1827 	 * checks again.
1828 	 */
1829 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1830 	    (!i_ddi_devi_attached(cdip))) {
1831 		/*
1832 		 * Client is in failed state. Nothing more to do.
1833 		 */
1834 		MDI_CLIENT_UNLOCK(ct);
1835 		return (MDI_FAILURE);
1836 	}
1837 
1838 	/*
1839 	 * Set the client state as failover in progress.
1840 	 */
1841 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1842 	ct->ct_failover_flags = flags;
1843 	MDI_CLIENT_UNLOCK(ct);
1844 
1845 	if (flags == MDI_FAILOVER_ASYNC) {
1846 		/*
1847 		 * Submit the initiate failover request via CPR safe
1848 		 * taskq threads.
1849 		 */
1850 		(void) taskq_dispatch(mdi_taskq, mdi_failover_cb, ct, KM_SLEEP);
1851 		return (MDI_ACCEPT);
1852 	} else {
1853 		/*
1854 		 * Synchronous failover mode.  Typically invoked from the user
1855 		 * land.
1856 		 */
1857 		rv = i_mdi_failover(ct);
1858 	}
1859 	return (rv);
1860 }
1861 
1862 /*
1863  * i_mdi_failover():
1864  *		internal failover function. Invokes vHCI drivers failover
1865  *		callback function and process the failover status
1866  * Return Values:
1867  *		None
1868  *
1869  * Note: A client device in failover state can not be detached or freed.
1870  */
1871 static int
i_mdi_failover(void * arg)1872 i_mdi_failover(void *arg)
1873 {
1874 	int		rv = MDI_SUCCESS;
1875 	mdi_client_t	*ct = (mdi_client_t *)arg;
1876 	mdi_vhci_t	*vh = ct->ct_vhci;
1877 
1878 	ASSERT(!MDI_CLIENT_LOCKED(ct));
1879 
1880 	if (vh->vh_ops->vo_failover != NULL) {
1881 		/*
1882 		 * Call vHCI drivers callback routine
1883 		 */
1884 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1885 		    ct->ct_failover_flags);
1886 	}
1887 
1888 	MDI_CLIENT_LOCK(ct);
1889 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1890 
1891 	/*
1892 	 * Save the failover return status
1893 	 */
1894 	ct->ct_failover_status = rv;
1895 
1896 	/*
1897 	 * As a result of failover, client status would have been changed.
1898 	 * Update the client state and wake up anyone waiting on this client
1899 	 * device.
1900 	 */
1901 	i_mdi_client_update_state(ct);
1902 
1903 	cv_broadcast(&ct->ct_failover_cv);
1904 	MDI_CLIENT_UNLOCK(ct);
1905 	return (rv);
1906 }
1907 
1908 /*
1909  * Load balancing is logical block.
1910  * IOs within the range described by region_size
1911  * would go on the same path. This would improve the
1912  * performance by cache-hit on some of the RAID devices.
1913  * Search only for online paths(At some point we
1914  * may want to balance across target ports).
1915  * If no paths are found then default to round-robin.
1916  */
1917 static int
i_mdi_lba_lb(mdi_client_t * ct,mdi_pathinfo_t ** ret_pip,struct buf * bp)1918 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1919 {
1920 	int		path_index = -1;
1921 	int		online_path_count = 0;
1922 	int		online_nonpref_path_count = 0;
1923 	int		region_size = ct->ct_lb_args->region_size;
1924 	mdi_pathinfo_t	*pip;
1925 	mdi_pathinfo_t	*next;
1926 	int		preferred, path_cnt;
1927 
1928 	pip = ct->ct_path_head;
1929 	while (pip) {
1930 		MDI_PI_LOCK(pip);
1931 		if (MDI_PI(pip)->pi_state ==
1932 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1933 			online_path_count++;
1934 		} else if (MDI_PI(pip)->pi_state ==
1935 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1936 			online_nonpref_path_count++;
1937 		}
1938 		next = (mdi_pathinfo_t *)
1939 		    MDI_PI(pip)->pi_client_link;
1940 		MDI_PI_UNLOCK(pip);
1941 		pip = next;
1942 	}
1943 	/* if found any online/preferred then use this type */
1944 	if (online_path_count > 0) {
1945 		path_cnt = online_path_count;
1946 		preferred = 1;
1947 	} else if (online_nonpref_path_count > 0) {
1948 		path_cnt = online_nonpref_path_count;
1949 		preferred = 0;
1950 	} else {
1951 		path_cnt = 0;
1952 	}
1953 	if (path_cnt) {
1954 		path_index = (bp->b_blkno >> region_size) % path_cnt;
1955 		pip = ct->ct_path_head;
1956 		while (pip && path_index != -1) {
1957 			MDI_PI_LOCK(pip);
1958 			if (path_index == 0 &&
1959 			    (MDI_PI(pip)->pi_state ==
1960 			    MDI_PATHINFO_STATE_ONLINE) &&
1961 				MDI_PI(pip)->pi_preferred == preferred) {
1962 				MDI_PI_HOLD(pip);
1963 				MDI_PI_UNLOCK(pip);
1964 				*ret_pip = pip;
1965 				return (MDI_SUCCESS);
1966 			}
1967 			path_index --;
1968 			next = (mdi_pathinfo_t *)
1969 			    MDI_PI(pip)->pi_client_link;
1970 			MDI_PI_UNLOCK(pip);
1971 			pip = next;
1972 		}
1973 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
1974 		    "lba %llx: path %s %p",
1975 		    bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip));
1976 	}
1977 	return (MDI_FAILURE);
1978 }
1979 
1980 /*
1981  * mdi_select_path():
1982  *		select a path to access a client device.
1983  *
1984  *		mdi_select_path() function is called by the vHCI drivers to
1985  *		select a path to route the I/O request to.  The caller passes
1986  *		the block I/O data transfer structure ("buf") as one of the
1987  *		parameters.  The mpxio framework uses the buf structure
1988  *		contents to maintain per path statistics (total I/O size /
1989  *		count pending).  If more than one online paths are available to
1990  *		select, the framework automatically selects a suitable path
1991  *		for routing I/O request. If a failover operation is active for
1992  *		this client device the call shall be failed with MDI_BUSY error
1993  *		code.
1994  *
1995  *		By default this function returns a suitable path in online
1996  *		state based on the current load balancing policy.  Currently
1997  *		we support LOAD_BALANCE_NONE (Previously selected online path
1998  *		will continue to be used till the path is usable) and
1999  *		LOAD_BALANCE_RR (Online paths will be selected in a round
2000  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
2001  *		based on the logical block).  The load balancing
2002  *		through vHCI drivers configuration file (driver.conf).
2003  *
2004  *		vHCI drivers may override this default behavior by specifying
2005  *		appropriate flags.  The meaning of the thrid argument depends
2006  *		on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
2007  *		then the argument is the "path instance" of the path to select.
2008  *		If MDI_SELECT_PATH_INSTANCE is not set then the argument is
2009  *		"start_pip". A non NULL "start_pip" is the starting point to
2010  *		walk and find the next appropriate path.  The following values
2011  *		are currently defined: MDI_SELECT_ONLINE_PATH (to select an
2012  *		ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
2013  *		STANDBY path).
2014  *
2015  *		The non-standard behavior is used by the scsi_vhci driver,
2016  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
2017  *		attach of client devices (to avoid an unnecessary failover
2018  *		when the STANDBY path comes up first), during failover
2019  *		(to activate a STANDBY path as ONLINE).
2020  *
2021  *		The selected path is returned in a a mdi_hold_path() state
2022  *		(pi_ref_cnt). Caller should release the hold by calling
2023  *		mdi_rele_path().
2024  *
2025  * Return Values:
2026  *		MDI_SUCCESS	- Completed successfully
2027  *		MDI_BUSY	- Client device is busy failing over
2028  *		MDI_NOPATH	- Client device is online, but no valid path are
2029  *				  available to access this client device
2030  *		MDI_FAILURE	- Invalid client device or state
2031  *		MDI_DEVI_ONLINING
2032  *				- Client device (struct dev_info state) is in
2033  *				  onlining state.
2034  */
2035 
2036 /*ARGSUSED*/
2037 int
mdi_select_path(dev_info_t * cdip,struct buf * bp,int flags,void * arg,mdi_pathinfo_t ** ret_pip)2038 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
2039     void *arg, mdi_pathinfo_t **ret_pip)
2040 {
2041 	mdi_client_t	*ct;
2042 	mdi_pathinfo_t	*pip;
2043 	mdi_pathinfo_t	*next;
2044 	mdi_pathinfo_t	*head;
2045 	mdi_pathinfo_t	*start;
2046 	client_lb_t	lbp;	/* load balancing policy */
2047 	int		sb = 1;	/* standard behavior */
2048 	int		preferred = 1;	/* preferred path */
2049 	int		cond, cont = 1;
2050 	int		retry = 0;
2051 	mdi_pathinfo_t	*start_pip;	/* request starting pathinfo */
2052 	int		path_instance;	/* request specific path instance */
2053 
2054 	/* determine type of arg based on flags */
2055 	if (flags & MDI_SELECT_PATH_INSTANCE) {
2056 		path_instance = (int)(intptr_t)arg;
2057 		start_pip = NULL;
2058 	} else {
2059 		path_instance = 0;
2060 		start_pip = (mdi_pathinfo_t *)arg;
2061 	}
2062 
2063 	if (flags != 0) {
2064 		/*
2065 		 * disable default behavior
2066 		 */
2067 		sb = 0;
2068 	}
2069 
2070 	*ret_pip = NULL;
2071 	ct = i_devi_get_client(cdip);
2072 	if (ct == NULL) {
2073 		/* mdi extensions are NULL, Nothing more to do */
2074 		return (MDI_FAILURE);
2075 	}
2076 
2077 	MDI_CLIENT_LOCK(ct);
2078 
2079 	if (sb) {
2080 		if (MDI_CLIENT_IS_FAILED(ct)) {
2081 			/*
2082 			 * Client is not ready to accept any I/O requests.
2083 			 * Fail this request.
2084 			 */
2085 			MDI_DEBUG(2, (MDI_NOTE, cdip,
2086 			    "client state offline ct = %p", (void *)ct));
2087 			MDI_CLIENT_UNLOCK(ct);
2088 			return (MDI_FAILURE);
2089 		}
2090 
2091 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
2092 			/*
2093 			 * Check for Failover is in progress. If so tell the
2094 			 * caller that this device is busy.
2095 			 */
2096 			MDI_DEBUG(2, (MDI_NOTE, cdip,
2097 			    "client failover in progress ct = %p",
2098 			    (void *)ct));
2099 			MDI_CLIENT_UNLOCK(ct);
2100 			return (MDI_BUSY);
2101 		}
2102 
2103 		/*
2104 		 * Check to see whether the client device is attached.
2105 		 * If not so, let the vHCI driver manually select a path
2106 		 * (standby) and let the probe/attach process to continue.
2107 		 */
2108 		if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2109 			MDI_DEBUG(4, (MDI_NOTE, cdip,
2110 			    "devi is onlining ct = %p", (void *)ct));
2111 			MDI_CLIENT_UNLOCK(ct);
2112 			return (MDI_DEVI_ONLINING);
2113 		}
2114 	}
2115 
2116 	/*
2117 	 * Cache in the client list head.  If head of the list is NULL
2118 	 * return MDI_NOPATH
2119 	 */
2120 	head = ct->ct_path_head;
2121 	if (head == NULL) {
2122 		MDI_CLIENT_UNLOCK(ct);
2123 		return (MDI_NOPATH);
2124 	}
2125 
2126 	/* Caller is specifying a specific pathinfo path by path_instance */
2127 	if (path_instance) {
2128 		/* search for pathinfo with correct path_instance */
2129 		for (pip = head;
2130 		    pip && (mdi_pi_get_path_instance(pip) != path_instance);
2131 		    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
2132 			;
2133 
2134 		/* If path can't be selected then MDI_NOPATH is returned. */
2135 		if (pip == NULL) {
2136 			MDI_CLIENT_UNLOCK(ct);
2137 			return (MDI_NOPATH);
2138 		}
2139 
2140 		/*
2141 		 * Verify state of path. When asked to select a specific
2142 		 * path_instance, we select the requested path in any
2143 		 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT.
2144 		 * We don't however select paths where the pHCI has detached.
2145 		 * NOTE: last pathinfo node of an opened client device may
2146 		 * exist in an OFFLINE state after the pHCI associated with
2147 		 * that path has detached (but pi_phci will be NULL if that
2148 		 * has occurred).
2149 		 */
2150 		MDI_PI_LOCK(pip);
2151 		if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) ||
2152 		    (MDI_PI(pip)->pi_phci == NULL)) {
2153 			MDI_PI_UNLOCK(pip);
2154 			MDI_CLIENT_UNLOCK(ct);
2155 			return (MDI_FAILURE);
2156 		}
2157 
2158 		/* Return MDI_BUSY if we have a transient condition */
2159 		if (MDI_PI_IS_TRANSIENT(pip)) {
2160 			MDI_PI_UNLOCK(pip);
2161 			MDI_CLIENT_UNLOCK(ct);
2162 			return (MDI_BUSY);
2163 		}
2164 
2165 		/*
2166 		 * Return the path in hold state. Caller should release the
2167 		 * lock by calling mdi_rele_path()
2168 		 */
2169 		MDI_PI_HOLD(pip);
2170 		MDI_PI_UNLOCK(pip);
2171 		*ret_pip = pip;
2172 		MDI_CLIENT_UNLOCK(ct);
2173 		return (MDI_SUCCESS);
2174 	}
2175 
2176 	/*
2177 	 * for non default behavior, bypass current
2178 	 * load balancing policy and always use LOAD_BALANCE_RR
2179 	 * except that the start point will be adjusted based
2180 	 * on the provided start_pip
2181 	 */
2182 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2183 
2184 	switch (lbp) {
2185 	case LOAD_BALANCE_NONE:
2186 		/*
2187 		 * Load balancing is None  or Alternate path mode
2188 		 * Start looking for a online mdi_pathinfo node starting from
2189 		 * last known selected path
2190 		 */
2191 		preferred = 1;
2192 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
2193 		if (pip == NULL) {
2194 			pip = head;
2195 		}
2196 		start = pip;
2197 		do {
2198 			MDI_PI_LOCK(pip);
2199 			/*
2200 			 * No need to explicitly check if the path is disabled.
2201 			 * Since we are checking for state == ONLINE and the
2202 			 * same variable is used for DISABLE/ENABLE information.
2203 			 */
2204 			if ((MDI_PI(pip)->pi_state  ==
2205 				MDI_PATHINFO_STATE_ONLINE) &&
2206 				preferred == MDI_PI(pip)->pi_preferred) {
2207 				/*
2208 				 * Return the path in hold state. Caller should
2209 				 * release the lock by calling mdi_rele_path()
2210 				 */
2211 				MDI_PI_HOLD(pip);
2212 				MDI_PI_UNLOCK(pip);
2213 				ct->ct_path_last = pip;
2214 				*ret_pip = pip;
2215 				MDI_CLIENT_UNLOCK(ct);
2216 				return (MDI_SUCCESS);
2217 			}
2218 
2219 			/*
2220 			 * Path is busy.
2221 			 */
2222 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2223 			    MDI_PI_IS_TRANSIENT(pip))
2224 				retry = 1;
2225 			/*
2226 			 * Keep looking for a next available online path
2227 			 */
2228 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2229 			if (next == NULL) {
2230 				next = head;
2231 			}
2232 			MDI_PI_UNLOCK(pip);
2233 			pip = next;
2234 			if (start == pip && preferred) {
2235 				preferred = 0;
2236 			} else if (start == pip && !preferred) {
2237 				cont = 0;
2238 			}
2239 		} while (cont);
2240 		break;
2241 
2242 	case LOAD_BALANCE_LBA:
2243 		/*
2244 		 * Make sure we are looking
2245 		 * for an online path. Otherwise, if it is for a STANDBY
2246 		 * path request, it will go through and fetch an ONLINE
2247 		 * path which is not desirable.
2248 		 */
2249 		if ((ct->ct_lb_args != NULL) &&
2250 			    (ct->ct_lb_args->region_size) && bp &&
2251 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2252 			if (i_mdi_lba_lb(ct, ret_pip, bp)
2253 				    == MDI_SUCCESS) {
2254 				MDI_CLIENT_UNLOCK(ct);
2255 				return (MDI_SUCCESS);
2256 			}
2257 		}
2258 		/* FALLTHROUGH */
2259 	case LOAD_BALANCE_RR:
2260 		/*
2261 		 * Load balancing is Round Robin. Start looking for a online
2262 		 * mdi_pathinfo node starting from last known selected path
2263 		 * as the start point.  If override flags are specified,
2264 		 * process accordingly.
2265 		 * If the search is already in effect(start_pip not null),
2266 		 * then lets just use the same path preference to continue the
2267 		 * traversal.
2268 		 */
2269 
2270 		if (start_pip != NULL) {
2271 			preferred = MDI_PI(start_pip)->pi_preferred;
2272 		} else {
2273 			preferred = 1;
2274 		}
2275 
2276 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2277 		if (start == NULL) {
2278 			pip = head;
2279 		} else {
2280 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2281 			if (pip == NULL) {
2282 				if ( flags & MDI_SELECT_NO_PREFERRED) {
2283 					/*
2284 					 * Return since we hit the end of list
2285 					 */
2286 					MDI_CLIENT_UNLOCK(ct);
2287 					return (MDI_NOPATH);
2288 				}
2289 
2290 				if (!sb) {
2291 					if (preferred == 0) {
2292 						/*
2293 						 * Looks like we have completed
2294 						 * the traversal as preferred
2295 						 * value is 0. Time to bail out.
2296 						 */
2297 						*ret_pip = NULL;
2298 						MDI_CLIENT_UNLOCK(ct);
2299 						return (MDI_NOPATH);
2300 					} else {
2301 						/*
2302 						 * Looks like we reached the
2303 						 * end of the list. Lets enable
2304 						 * traversal of non preferred
2305 						 * paths.
2306 						 */
2307 						preferred = 0;
2308 					}
2309 				}
2310 				pip = head;
2311 			}
2312 		}
2313 		start = pip;
2314 		do {
2315 			MDI_PI_LOCK(pip);
2316 			if (sb) {
2317 				cond = ((MDI_PI(pip)->pi_state ==
2318 				    MDI_PATHINFO_STATE_ONLINE &&
2319 					MDI_PI(pip)->pi_preferred ==
2320 						preferred) ? 1 : 0);
2321 			} else {
2322 				if (flags == MDI_SELECT_ONLINE_PATH) {
2323 					cond = ((MDI_PI(pip)->pi_state ==
2324 					    MDI_PATHINFO_STATE_ONLINE &&
2325 						MDI_PI(pip)->pi_preferred ==
2326 						preferred) ? 1 : 0);
2327 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
2328 					cond = ((MDI_PI(pip)->pi_state ==
2329 					    MDI_PATHINFO_STATE_STANDBY &&
2330 						MDI_PI(pip)->pi_preferred ==
2331 						preferred) ? 1 : 0);
2332 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
2333 				    MDI_SELECT_STANDBY_PATH)) {
2334 					cond = (((MDI_PI(pip)->pi_state ==
2335 					    MDI_PATHINFO_STATE_ONLINE ||
2336 					    (MDI_PI(pip)->pi_state ==
2337 					    MDI_PATHINFO_STATE_STANDBY)) &&
2338 						MDI_PI(pip)->pi_preferred ==
2339 						preferred) ? 1 : 0);
2340 				} else if (flags ==
2341 					(MDI_SELECT_STANDBY_PATH |
2342 					MDI_SELECT_ONLINE_PATH |
2343 					MDI_SELECT_USER_DISABLE_PATH)) {
2344 					cond = (((MDI_PI(pip)->pi_state ==
2345 					    MDI_PATHINFO_STATE_ONLINE ||
2346 					    (MDI_PI(pip)->pi_state ==
2347 					    MDI_PATHINFO_STATE_STANDBY) ||
2348 						(MDI_PI(pip)->pi_state ==
2349 					    (MDI_PATHINFO_STATE_ONLINE|
2350 					    MDI_PATHINFO_STATE_USER_DISABLE)) ||
2351 						(MDI_PI(pip)->pi_state ==
2352 					    (MDI_PATHINFO_STATE_STANDBY |
2353 					    MDI_PATHINFO_STATE_USER_DISABLE)))&&
2354 						MDI_PI(pip)->pi_preferred ==
2355 						preferred) ? 1 : 0);
2356 				} else if (flags ==
2357 				    (MDI_SELECT_STANDBY_PATH |
2358 				    MDI_SELECT_ONLINE_PATH |
2359 				    MDI_SELECT_NO_PREFERRED)) {
2360 					cond = (((MDI_PI(pip)->pi_state ==
2361 					    MDI_PATHINFO_STATE_ONLINE) ||
2362 					    (MDI_PI(pip)->pi_state ==
2363 					    MDI_PATHINFO_STATE_STANDBY))
2364 					    ? 1 : 0);
2365 				} else {
2366 					cond = 0;
2367 				}
2368 			}
2369 			/*
2370 			 * No need to explicitly check if the path is disabled.
2371 			 * Since we are checking for state == ONLINE and the
2372 			 * same variable is used for DISABLE/ENABLE information.
2373 			 */
2374 			if (cond) {
2375 				/*
2376 				 * Return the path in hold state. Caller should
2377 				 * release the lock by calling mdi_rele_path()
2378 				 */
2379 				MDI_PI_HOLD(pip);
2380 				MDI_PI_UNLOCK(pip);
2381 				if (sb)
2382 					ct->ct_path_last = pip;
2383 				*ret_pip = pip;
2384 				MDI_CLIENT_UNLOCK(ct);
2385 				return (MDI_SUCCESS);
2386 			}
2387 			/*
2388 			 * Path is busy.
2389 			 */
2390 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2391 			    MDI_PI_IS_TRANSIENT(pip))
2392 				retry = 1;
2393 
2394 			/*
2395 			 * Keep looking for a next available online path
2396 			 */
2397 do_again:
2398 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2399 			if (next == NULL) {
2400 				if ( flags & MDI_SELECT_NO_PREFERRED) {
2401 					/*
2402 					 * Bail out since we hit the end of list
2403 					 */
2404 					MDI_PI_UNLOCK(pip);
2405 					break;
2406 				}
2407 
2408 				if (!sb) {
2409 					if (preferred == 1) {
2410 						/*
2411 						 * Looks like we reached the
2412 						 * end of the list. Lets enable
2413 						 * traversal of non preferred
2414 						 * paths.
2415 						 */
2416 						preferred = 0;
2417 						next = head;
2418 					} else {
2419 						/*
2420 						 * We have done both the passes
2421 						 * Preferred as well as for
2422 						 * Non-preferred. Bail out now.
2423 						 */
2424 						cont = 0;
2425 					}
2426 				} else {
2427 					/*
2428 					 * Standard behavior case.
2429 					 */
2430 					next = head;
2431 				}
2432 			}
2433 			MDI_PI_UNLOCK(pip);
2434 			if (cont == 0) {
2435 				break;
2436 			}
2437 			pip = next;
2438 
2439 			if (!sb) {
2440 				/*
2441 				 * We need to handle the selection of
2442 				 * non-preferred path in the following
2443 				 * case:
2444 				 *
2445 				 * +------+   +------+   +------+   +-----+
2446 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2447 				 * +------+   +------+   +------+   +-----+
2448 				 *
2449 				 * If we start the search with B, we need to
2450 				 * skip beyond B to pick C which is non -
2451 				 * preferred in the second pass. The following
2452 				 * test, if true, will allow us to skip over
2453 				 * the 'start'(B in the example) to select
2454 				 * other non preferred elements.
2455 				 */
2456 				if ((start_pip != NULL) && (start_pip == pip) &&
2457 				    (MDI_PI(start_pip)->pi_preferred
2458 				    != preferred)) {
2459 					/*
2460 					 * try again after going past the start
2461 					 * pip
2462 					 */
2463 					MDI_PI_LOCK(pip);
2464 					goto do_again;
2465 				}
2466 			} else {
2467 				/*
2468 				 * Standard behavior case
2469 				 */
2470 				if (start == pip && preferred) {
2471 					/* look for nonpreferred paths */
2472 					preferred = 0;
2473 				} else if (start == pip && !preferred) {
2474 					/*
2475 					 * Exit condition
2476 					 */
2477 					cont = 0;
2478 				}
2479 			}
2480 		} while (cont);
2481 		break;
2482 	}
2483 
2484 	MDI_CLIENT_UNLOCK(ct);
2485 	if (retry == 1) {
2486 		return (MDI_BUSY);
2487 	} else {
2488 		return (MDI_NOPATH);
2489 	}
2490 }
2491 
2492 /*
2493  * For a client, return the next available path to any phci
2494  *
2495  * Note:
2496  *		Caller should hold the branch's devinfo node to get a consistent
2497  *		snap shot of the mdi_pathinfo nodes.
2498  *
2499  *		Please note that even the list is stable the mdi_pathinfo
2500  *		node state and properties are volatile.  The caller should lock
2501  *		and unlock the nodes by calling mdi_pi_lock() and
2502  *		mdi_pi_unlock() functions to get a stable properties.
2503  *
2504  *		If there is a need to use the nodes beyond the hold of the
2505  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
2506  *		need to be held against unexpected removal by calling
2507  *		mdi_hold_path() and should be released by calling
2508  *		mdi_rele_path() on completion.
2509  */
2510 mdi_pathinfo_t *
mdi_get_next_phci_path(dev_info_t * ct_dip,mdi_pathinfo_t * pip)2511 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2512 {
2513 	mdi_client_t *ct;
2514 
2515 	if (!MDI_CLIENT(ct_dip))
2516 		return (NULL);
2517 
2518 	/*
2519 	 * Walk through client link
2520 	 */
2521 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2522 	ASSERT(ct != NULL);
2523 
2524 	if (pip == NULL)
2525 		return ((mdi_pathinfo_t *)ct->ct_path_head);
2526 
2527 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2528 }
2529 
2530 /*
2531  * For a phci, return the next available path to any client
2532  * Note: ditto mdi_get_next_phci_path()
2533  */
2534 mdi_pathinfo_t *
mdi_get_next_client_path(dev_info_t * ph_dip,mdi_pathinfo_t * pip)2535 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2536 {
2537 	mdi_phci_t *ph;
2538 
2539 	if (!MDI_PHCI(ph_dip))
2540 		return (NULL);
2541 
2542 	/*
2543 	 * Walk through pHCI link
2544 	 */
2545 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2546 	ASSERT(ph != NULL);
2547 
2548 	if (pip == NULL)
2549 		return ((mdi_pathinfo_t *)ph->ph_path_head);
2550 
2551 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2552 }
2553 
2554 /*
2555  * mdi_hold_path():
2556  *		Hold the mdi_pathinfo node against unwanted unexpected free.
2557  * Return Values:
2558  *		None
2559  */
2560 void
mdi_hold_path(mdi_pathinfo_t * pip)2561 mdi_hold_path(mdi_pathinfo_t *pip)
2562 {
2563 	if (pip) {
2564 		MDI_PI_LOCK(pip);
2565 		MDI_PI_HOLD(pip);
2566 		MDI_PI_UNLOCK(pip);
2567 	}
2568 }
2569 
2570 
2571 /*
2572  * mdi_rele_path():
2573  *		Release the mdi_pathinfo node which was selected
2574  *		through mdi_select_path() mechanism or manually held by
2575  *		calling mdi_hold_path().
2576  * Return Values:
2577  *		None
2578  */
2579 void
mdi_rele_path(mdi_pathinfo_t * pip)2580 mdi_rele_path(mdi_pathinfo_t *pip)
2581 {
2582 	if (pip) {
2583 		MDI_PI_LOCK(pip);
2584 		MDI_PI_RELE(pip);
2585 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
2586 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2587 		}
2588 		MDI_PI_UNLOCK(pip);
2589 	}
2590 }
2591 
2592 /*
2593  * mdi_pi_lock():
2594  *		Lock the mdi_pathinfo node.
2595  * Note:
2596  *		The caller should release the lock by calling mdi_pi_unlock()
2597  */
2598 void
mdi_pi_lock(mdi_pathinfo_t * pip)2599 mdi_pi_lock(mdi_pathinfo_t *pip)
2600 {
2601 	ASSERT(pip != NULL);
2602 	if (pip) {
2603 		MDI_PI_LOCK(pip);
2604 	}
2605 }
2606 
2607 
2608 /*
2609  * mdi_pi_unlock():
2610  *		Unlock the mdi_pathinfo node.
2611  * Note:
2612  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
2613  */
2614 void
mdi_pi_unlock(mdi_pathinfo_t * pip)2615 mdi_pi_unlock(mdi_pathinfo_t *pip)
2616 {
2617 	ASSERT(pip != NULL);
2618 	if (pip) {
2619 		MDI_PI_UNLOCK(pip);
2620 	}
2621 }
2622 
2623 /*
2624  * mdi_pi_find():
2625  *		Search the list of mdi_pathinfo nodes attached to the
2626  *		pHCI/Client device node whose path address matches "paddr".
2627  *		Returns a pointer to the mdi_pathinfo node if a matching node is
2628  *		found.
2629  * Return Values:
2630  *		mdi_pathinfo node handle
2631  *		NULL
2632  * Notes:
2633  *		Caller need not hold any locks to call this function.
2634  */
2635 mdi_pathinfo_t *
mdi_pi_find(dev_info_t * pdip,char * caddr,char * paddr)2636 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2637 {
2638 	mdi_phci_t		*ph;
2639 	mdi_vhci_t		*vh;
2640 	mdi_client_t		*ct;
2641 	mdi_pathinfo_t		*pip = NULL;
2642 
2643 	MDI_DEBUG(2, (MDI_NOTE, pdip,
2644 	    "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : ""));
2645 	if ((pdip == NULL) || (paddr == NULL)) {
2646 		return (NULL);
2647 	}
2648 	ph = i_devi_get_phci(pdip);
2649 	if (ph == NULL) {
2650 		/*
2651 		 * Invalid pHCI device, Nothing more to do.
2652 		 */
2653 		MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci"));
2654 		return (NULL);
2655 	}
2656 
2657 	vh = ph->ph_vhci;
2658 	if (vh == NULL) {
2659 		/*
2660 		 * Invalid vHCI device, Nothing more to do.
2661 		 */
2662 		MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci"));
2663 		return (NULL);
2664 	}
2665 
2666 	/*
2667 	 * Look for pathinfo node identified by paddr.
2668 	 */
2669 	if (caddr == NULL) {
2670 		/*
2671 		 * Find a mdi_pathinfo node under pHCI list for a matching
2672 		 * unit address.
2673 		 */
2674 		MDI_PHCI_LOCK(ph);
2675 		if (MDI_PHCI_IS_OFFLINE(ph)) {
2676 			MDI_DEBUG(2, (MDI_WARN, pdip,
2677 			    "offline phci %p", (void *)ph));
2678 			MDI_PHCI_UNLOCK(ph);
2679 			return (NULL);
2680 		}
2681 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
2682 
2683 		while (pip != NULL) {
2684 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2685 				break;
2686 			}
2687 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2688 		}
2689 		MDI_PHCI_UNLOCK(ph);
2690 		MDI_DEBUG(2, (MDI_NOTE, pdip,
2691 		    "found %s %p", mdi_pi_spathname(pip), (void *)pip));
2692 		return (pip);
2693 	}
2694 
2695 	/*
2696 	 * XXX - Is the rest of the code in this function really necessary?
2697 	 * The consumers of mdi_pi_find() can search for the desired pathinfo
2698 	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2699 	 * whether the search is based on the pathinfo nodes attached to
2700 	 * the pHCI or the client node, the result will be the same.
2701 	 */
2702 
2703 	/*
2704 	 * Find the client device corresponding to 'caddr'
2705 	 */
2706 	MDI_VHCI_CLIENT_LOCK(vh);
2707 
2708 	/*
2709 	 * XXX - Passing NULL to the following function works as long as the
2710 	 * the client addresses (caddr) are unique per vhci basis.
2711 	 */
2712 	ct = i_mdi_client_find(vh, NULL, caddr);
2713 	if (ct == NULL) {
2714 		/*
2715 		 * Client not found, Obviously mdi_pathinfo node has not been
2716 		 * created yet.
2717 		 */
2718 		MDI_VHCI_CLIENT_UNLOCK(vh);
2719 		MDI_DEBUG(2, (MDI_NOTE, pdip,
2720 		    "client not found for caddr @%s", caddr ? caddr : ""));
2721 		return (NULL);
2722 	}
2723 
2724 	/*
2725 	 * Hold the client lock and look for a mdi_pathinfo node with matching
2726 	 * pHCI and paddr
2727 	 */
2728 	MDI_CLIENT_LOCK(ct);
2729 
2730 	/*
2731 	 * Release the global mutex as it is no more needed. Note: We always
2732 	 * respect the locking order while acquiring.
2733 	 */
2734 	MDI_VHCI_CLIENT_UNLOCK(vh);
2735 
2736 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2737 	while (pip != NULL) {
2738 		/*
2739 		 * Compare the unit address
2740 		 */
2741 		if ((MDI_PI(pip)->pi_phci == ph) &&
2742 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2743 			break;
2744 		}
2745 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2746 	}
2747 	MDI_CLIENT_UNLOCK(ct);
2748 	MDI_DEBUG(2, (MDI_NOTE, pdip,
2749 	    "found: %s %p", mdi_pi_spathname(pip), (void *)pip));
2750 	return (pip);
2751 }
2752 
2753 /*
2754  * mdi_pi_alloc():
2755  *		Allocate and initialize a new instance of a mdi_pathinfo node.
2756  *		The mdi_pathinfo node returned by this function identifies a
2757  *		unique device path is capable of having properties attached
2758  *		and passed to mdi_pi_online() to fully attach and online the
2759  *		path and client device node.
2760  *		The mdi_pathinfo node returned by this function must be
2761  *		destroyed using mdi_pi_free() if the path is no longer
2762  *		operational or if the caller fails to attach a client device
2763  *		node when calling mdi_pi_online(). The framework will not free
2764  *		the resources allocated.
2765  *		This function can be called from both interrupt and kernel
2766  *		contexts.  DDI_NOSLEEP flag should be used while calling
2767  *		from interrupt contexts.
2768  * Return Values:
2769  *		MDI_SUCCESS
2770  *		MDI_FAILURE
2771  *		MDI_NOMEM
2772  */
2773 /*ARGSUSED*/
2774 int
mdi_pi_alloc_compatible(dev_info_t * pdip,char * cname,char * caddr,char * paddr,char ** compatible,int ncompatible,int flags,mdi_pathinfo_t ** ret_pip)2775 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2776     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2777 {
2778 	mdi_vhci_t	*vh;
2779 	mdi_phci_t	*ph;
2780 	mdi_client_t	*ct;
2781 	mdi_pathinfo_t	*pip = NULL;
2782 	dev_info_t	*cdip;
2783 	int		rv = MDI_NOMEM;
2784 	int		path_allocated = 0;
2785 
2786 	MDI_DEBUG(2, (MDI_NOTE, pdip,
2787 	    "cname %s: caddr@%s paddr@%s",
2788 	    cname ? cname : "", caddr ? caddr : "", paddr ? paddr : ""));
2789 
2790 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2791 	    ret_pip == NULL) {
2792 		/* Nothing more to do */
2793 		return (MDI_FAILURE);
2794 	}
2795 
2796 	*ret_pip = NULL;
2797 
2798 	/* No allocations on detaching pHCI */
2799 	if (DEVI_IS_DETACHING(pdip)) {
2800 		/* Invalid pHCI device, return failure */
2801 		MDI_DEBUG(1, (MDI_WARN, pdip,
2802 		    "!detaching pHCI=%p", (void *)pdip));
2803 		return (MDI_FAILURE);
2804 	}
2805 
2806 	ph = i_devi_get_phci(pdip);
2807 	ASSERT(ph != NULL);
2808 	if (ph == NULL) {
2809 		/* Invalid pHCI device, return failure */
2810 		MDI_DEBUG(1, (MDI_WARN, pdip,
2811 		    "!invalid pHCI=%p", (void *)pdip));
2812 		return (MDI_FAILURE);
2813 	}
2814 
2815 	MDI_PHCI_LOCK(ph);
2816 	vh = ph->ph_vhci;
2817 	if (vh == NULL) {
2818 		/* Invalid vHCI device, return failure */
2819 		MDI_DEBUG(1, (MDI_WARN, pdip,
2820 		    "!invalid vHCI=%p", (void *)pdip));
2821 		MDI_PHCI_UNLOCK(ph);
2822 		return (MDI_FAILURE);
2823 	}
2824 
2825 	if (MDI_PHCI_IS_READY(ph) == 0) {
2826 		/*
2827 		 * Do not allow new node creation when pHCI is in
2828 		 * offline/suspended states
2829 		 */
2830 		MDI_DEBUG(1, (MDI_WARN, pdip,
2831 		    "pHCI=%p is not ready", (void *)ph));
2832 		MDI_PHCI_UNLOCK(ph);
2833 		return (MDI_BUSY);
2834 	}
2835 	MDI_PHCI_UNSTABLE(ph);
2836 	MDI_PHCI_UNLOCK(ph);
2837 
2838 	/* look for a matching client, create one if not found */
2839 	MDI_VHCI_CLIENT_LOCK(vh);
2840 	ct = i_mdi_client_find(vh, cname, caddr);
2841 	if (ct == NULL) {
2842 		ct = i_mdi_client_alloc(vh, cname, caddr);
2843 		ASSERT(ct != NULL);
2844 	}
2845 
2846 	if (ct->ct_dip == NULL) {
2847 		/*
2848 		 * Allocate a devinfo node
2849 		 */
2850 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2851 		    compatible, ncompatible);
2852 		if (ct->ct_dip == NULL) {
2853 			(void) i_mdi_client_free(vh, ct);
2854 			goto fail;
2855 		}
2856 	}
2857 	cdip = ct->ct_dip;
2858 
2859 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2860 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2861 
2862 	MDI_CLIENT_LOCK(ct);
2863 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
2864 	while (pip != NULL) {
2865 		/*
2866 		 * Compare the unit address
2867 		 */
2868 		if ((MDI_PI(pip)->pi_phci == ph) &&
2869 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2870 			break;
2871 		}
2872 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2873 	}
2874 	MDI_CLIENT_UNLOCK(ct);
2875 
2876 	if (pip == NULL) {
2877 		/*
2878 		 * This is a new path for this client device.  Allocate and
2879 		 * initialize a new pathinfo node
2880 		 */
2881 		pip = i_mdi_pi_alloc(ph, paddr, ct);
2882 		ASSERT(pip != NULL);
2883 		path_allocated = 1;
2884 	}
2885 	rv = MDI_SUCCESS;
2886 
2887 fail:
2888 	/*
2889 	 * Release the global mutex.
2890 	 */
2891 	MDI_VHCI_CLIENT_UNLOCK(vh);
2892 
2893 	/*
2894 	 * Mark the pHCI as stable
2895 	 */
2896 	MDI_PHCI_LOCK(ph);
2897 	MDI_PHCI_STABLE(ph);
2898 	MDI_PHCI_UNLOCK(ph);
2899 	*ret_pip = pip;
2900 
2901 	MDI_DEBUG(2, (MDI_NOTE, pdip,
2902 	    "alloc %s %p", mdi_pi_spathname(pip), (void *)pip));
2903 
2904 	if (path_allocated)
2905 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2906 
2907 	return (rv);
2908 }
2909 
2910 /*ARGSUSED*/
2911 int
mdi_pi_alloc(dev_info_t * pdip,char * cname,char * caddr,char * paddr,int flags,mdi_pathinfo_t ** ret_pip)2912 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2913     int flags, mdi_pathinfo_t **ret_pip)
2914 {
2915 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2916 	    flags, ret_pip));
2917 }
2918 
2919 /*
2920  * i_mdi_pi_alloc():
2921  *		Allocate a mdi_pathinfo node and add to the pHCI path list
2922  * Return Values:
2923  *		mdi_pathinfo
2924  */
2925 /*ARGSUSED*/
2926 static mdi_pathinfo_t *
i_mdi_pi_alloc(mdi_phci_t * ph,char * paddr,mdi_client_t * ct)2927 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2928 {
2929 	mdi_pathinfo_t	*pip;
2930 	static char	path[MAXPATHLEN];	/* mdi_pathmap_mutex protects */
2931 	char		*path_persistent;
2932 	int		path_instance;
2933 	mod_hash_val_t	hv;
2934 
2935 	ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2936 
2937 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2938 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2939 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2940 	    MDI_PATHINFO_STATE_TRANSIENT;
2941 
2942 	if (MDI_PHCI_IS_USER_DISABLED(ph))
2943 		MDI_PI_SET_USER_DISABLE(pip);
2944 
2945 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2946 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2947 
2948 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
2949 		MDI_PI_SET_DRV_DISABLE(pip);
2950 
2951 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2952 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2953 	MDI_PI(pip)->pi_client = ct;
2954 	MDI_PI(pip)->pi_phci = ph;
2955 	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2956 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2957 
2958         /*
2959 	 * We form the "path" to the pathinfo node, and see if we have
2960 	 * already allocated a 'path_instance' for that "path".  If so,
2961 	 * we use the already allocated 'path_instance'.  If not, we
2962 	 * allocate a new 'path_instance' and associate it with a copy of
2963 	 * the "path" string (which is never freed). The association
2964 	 * between a 'path_instance' this "path" string persists until
2965 	 * reboot.
2966 	 */
2967         mutex_enter(&mdi_pathmap_mutex);
2968 	(void) ddi_pathname(ph->ph_dip, path);
2969 	(void) sprintf(path + strlen(path), "/%s@%s",
2970 	    mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2971         if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
2972                 path_instance = (uint_t)(intptr_t)hv;
2973         } else {
2974 		/* allocate a new 'path_instance' and persistent "path" */
2975 		path_instance = mdi_pathmap_instance++;
2976 		path_persistent = i_ddi_strdup(path, KM_SLEEP);
2977                 (void) mod_hash_insert(mdi_pathmap_bypath,
2978                     (mod_hash_key_t)path_persistent,
2979                     (mod_hash_val_t)(intptr_t)path_instance);
2980 		(void) mod_hash_insert(mdi_pathmap_byinstance,
2981 		    (mod_hash_key_t)(intptr_t)path_instance,
2982 		    (mod_hash_val_t)path_persistent);
2983 
2984 		/* create shortpath name */
2985 		(void) snprintf(path, sizeof(path), "%s%d/%s@%s",
2986 		    ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip),
2987 		    mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2988 		path_persistent = i_ddi_strdup(path, KM_SLEEP);
2989 		(void) mod_hash_insert(mdi_pathmap_sbyinstance,
2990 		    (mod_hash_key_t)(intptr_t)path_instance,
2991 		    (mod_hash_val_t)path_persistent);
2992         }
2993         mutex_exit(&mdi_pathmap_mutex);
2994 	MDI_PI(pip)->pi_path_instance = path_instance;
2995 
2996 	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
2997 	ASSERT(MDI_PI(pip)->pi_prop != NULL);
2998 	MDI_PI(pip)->pi_pprivate = NULL;
2999 	MDI_PI(pip)->pi_cprivate = NULL;
3000 	MDI_PI(pip)->pi_vprivate = NULL;
3001 	MDI_PI(pip)->pi_client_link = NULL;
3002 	MDI_PI(pip)->pi_phci_link = NULL;
3003 	MDI_PI(pip)->pi_ref_cnt = 0;
3004 	MDI_PI(pip)->pi_kstats = NULL;
3005 	MDI_PI(pip)->pi_preferred = 1;
3006 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
3007 
3008 	/*
3009 	 * Lock both dev_info nodes against changes in parallel.
3010 	 *
3011 	 * The ndi_devi_enter(Client), is atypical since the client is a leaf.
3012 	 * This atypical operation is done to synchronize pathinfo nodes
3013 	 * during devinfo snapshot (see di_register_pip) by 'pretending' that
3014 	 * the pathinfo nodes are children of the Client.
3015 	 */
3016 	ndi_devi_enter(ct->ct_dip);
3017 	ndi_devi_enter(ph->ph_dip);
3018 
3019 	i_mdi_phci_add_path(ph, pip);
3020 	i_mdi_client_add_path(ct, pip);
3021 
3022 	ndi_devi_exit(ph->ph_dip);
3023 	ndi_devi_exit(ct->ct_dip);
3024 
3025 	return (pip);
3026 }
3027 
3028 /*
3029  * mdi_pi_pathname_by_instance():
3030  *	Lookup of "path" by 'path_instance'. Return "path".
3031  *	NOTE: returned "path" remains valid forever (until reboot).
3032  */
3033 char *
mdi_pi_pathname_by_instance(int path_instance)3034 mdi_pi_pathname_by_instance(int path_instance)
3035 {
3036 	char		*path;
3037 	mod_hash_val_t	hv;
3038 
3039 	/* mdi_pathmap lookup of "path" by 'path_instance' */
3040 	mutex_enter(&mdi_pathmap_mutex);
3041 	if (mod_hash_find(mdi_pathmap_byinstance,
3042 	    (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3043 		path = (char *)hv;
3044 	else
3045 		path = NULL;
3046 	mutex_exit(&mdi_pathmap_mutex);
3047 	return (path);
3048 }
3049 
3050 /*
3051  * mdi_pi_spathname_by_instance():
3052  *	Lookup of "shortpath" by 'path_instance'. Return "shortpath".
3053  *	NOTE: returned "shortpath" remains valid forever (until reboot).
3054  */
3055 char *
mdi_pi_spathname_by_instance(int path_instance)3056 mdi_pi_spathname_by_instance(int path_instance)
3057 {
3058 	char		*path;
3059 	mod_hash_val_t	hv;
3060 
3061 	/* mdi_pathmap lookup of "path" by 'path_instance' */
3062 	mutex_enter(&mdi_pathmap_mutex);
3063 	if (mod_hash_find(mdi_pathmap_sbyinstance,
3064 	    (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3065 		path = (char *)hv;
3066 	else
3067 		path = NULL;
3068 	mutex_exit(&mdi_pathmap_mutex);
3069 	return (path);
3070 }
3071 
3072 
3073 /*
3074  * i_mdi_phci_add_path():
3075  *		Add a mdi_pathinfo node to pHCI list.
3076  * Notes:
3077  *		Caller should per-pHCI mutex
3078  */
3079 static void
i_mdi_phci_add_path(mdi_phci_t * ph,mdi_pathinfo_t * pip)3080 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3081 {
3082 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3083 
3084 	MDI_PHCI_LOCK(ph);
3085 	if (ph->ph_path_head == NULL) {
3086 		ph->ph_path_head = pip;
3087 	} else {
3088 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
3089 	}
3090 	ph->ph_path_tail = pip;
3091 	ph->ph_path_count++;
3092 	MDI_PHCI_UNLOCK(ph);
3093 }
3094 
3095 /*
3096  * i_mdi_client_add_path():
3097  *		Add mdi_pathinfo node to client list
3098  */
3099 static void
i_mdi_client_add_path(mdi_client_t * ct,mdi_pathinfo_t * pip)3100 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3101 {
3102 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3103 
3104 	MDI_CLIENT_LOCK(ct);
3105 	if (ct->ct_path_head == NULL) {
3106 		ct->ct_path_head = pip;
3107 	} else {
3108 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
3109 	}
3110 	ct->ct_path_tail = pip;
3111 	ct->ct_path_count++;
3112 	MDI_CLIENT_UNLOCK(ct);
3113 }
3114 
3115 /*
3116  * mdi_pi_free():
3117  *		Free the mdi_pathinfo node and also client device node if this
3118  *		is the last path to the device
3119  * Return Values:
3120  *		MDI_SUCCESS
3121  *		MDI_FAILURE
3122  *		MDI_BUSY
3123  */
3124 /*ARGSUSED*/
3125 int
mdi_pi_free(mdi_pathinfo_t * pip,int flags)3126 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
3127 {
3128 	int		rv;
3129 	mdi_vhci_t	*vh;
3130 	mdi_phci_t	*ph;
3131 	mdi_client_t	*ct;
3132 	int		(*f)();
3133 	int		client_held = 0;
3134 
3135 	MDI_PI_LOCK(pip);
3136 	ph = MDI_PI(pip)->pi_phci;
3137 	ASSERT(ph != NULL);
3138 	if (ph == NULL) {
3139 		/*
3140 		 * Invalid pHCI device, return failure
3141 		 */
3142 		MDI_DEBUG(1, (MDI_WARN, NULL,
3143 		    "!invalid pHCI: pip %s %p",
3144 		    mdi_pi_spathname(pip), (void *)pip));
3145 		MDI_PI_UNLOCK(pip);
3146 		return (MDI_FAILURE);
3147 	}
3148 
3149 	vh = ph->ph_vhci;
3150 	ASSERT(vh != NULL);
3151 	if (vh == NULL) {
3152 		/* Invalid pHCI device, return failure */
3153 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3154 		    "!invalid vHCI: pip %s %p",
3155 		    mdi_pi_spathname(pip), (void *)pip));
3156 		MDI_PI_UNLOCK(pip);
3157 		return (MDI_FAILURE);
3158 	}
3159 
3160 	ct = MDI_PI(pip)->pi_client;
3161 	ASSERT(ct != NULL);
3162 	if (ct == NULL) {
3163 		/*
3164 		 * Invalid Client device, return failure
3165 		 */
3166 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3167 		    "!invalid client: pip %s %p",
3168 		    mdi_pi_spathname(pip), (void *)pip));
3169 		MDI_PI_UNLOCK(pip);
3170 		return (MDI_FAILURE);
3171 	}
3172 
3173 	/*
3174 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
3175 	 * if the node state is either offline or init and the reference count
3176 	 * is zero.
3177 	 */
3178 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
3179 	    MDI_PI_IS_INITING(pip))) {
3180 		/*
3181 		 * Node is busy
3182 		 */
3183 		MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3184 		    "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip));
3185 		MDI_PI_UNLOCK(pip);
3186 		return (MDI_BUSY);
3187 	}
3188 
3189 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3190 		/*
3191 		 * Give a chance for pending I/Os to complete.
3192 		 */
3193 		MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3194 		    "!%d cmds still pending on path: %s %p",
3195 		    MDI_PI(pip)->pi_ref_cnt,
3196 		    mdi_pi_spathname(pip), (void *)pip));
3197 		if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3198 		    &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3199 		    TR_CLOCK_TICK) == -1) {
3200 			/*
3201 			 * The timeout time reached without ref_cnt being zero
3202 			 * being signaled.
3203 			 */
3204 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3205 			    "!Timeout reached on path %s %p without the cond",
3206 			    mdi_pi_spathname(pip), (void *)pip));
3207 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3208 			    "!%d cmds still pending on path %s %p",
3209 			    MDI_PI(pip)->pi_ref_cnt,
3210 			    mdi_pi_spathname(pip), (void *)pip));
3211 			MDI_PI_UNLOCK(pip);
3212 			return (MDI_BUSY);
3213 		}
3214 	}
3215 	if (MDI_PI(pip)->pi_pm_held) {
3216 		client_held = 1;
3217 	}
3218 	MDI_PI_UNLOCK(pip);
3219 
3220 	vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
3221 
3222 	MDI_CLIENT_LOCK(ct);
3223 
3224 	/* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
3225 	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
3226 
3227 	/*
3228 	 * Wait till failover is complete before removing this node.
3229 	 */
3230 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3231 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3232 
3233 	MDI_CLIENT_UNLOCK(ct);
3234 	MDI_VHCI_CLIENT_LOCK(vh);
3235 	MDI_CLIENT_LOCK(ct);
3236 	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
3237 
3238 	rv = MDI_SUCCESS;
3239 	if (!MDI_PI_IS_INITING(pip)) {
3240 		f = vh->vh_ops->vo_pi_uninit;
3241 		if (f != NULL) {
3242 			rv = (*f)(vh->vh_dip, pip, 0);
3243 		}
3244 	}
3245 
3246 	/*
3247 	 * If vo_pi_uninit() completed successfully.
3248 	 */
3249 	if (rv == MDI_SUCCESS) {
3250 		if (client_held) {
3251 			MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3252 			    "i_mdi_pm_rele_client\n"));
3253 			i_mdi_pm_rele_client(ct, 1);
3254 		}
3255 		i_mdi_pi_free(ph, pip, ct);
3256 		if (ct->ct_path_count == 0) {
3257 			/*
3258 			 * Client lost its last path.
3259 			 * Clean up the client device
3260 			 */
3261 			MDI_CLIENT_UNLOCK(ct);
3262 			(void) i_mdi_client_free(ct->ct_vhci, ct);
3263 			MDI_VHCI_CLIENT_UNLOCK(vh);
3264 			return (rv);
3265 		}
3266 	}
3267 	MDI_CLIENT_UNLOCK(ct);
3268 	MDI_VHCI_CLIENT_UNLOCK(vh);
3269 
3270 	if (rv == MDI_FAILURE)
3271 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3272 
3273 	return (rv);
3274 }
3275 
3276 /*
3277  * i_mdi_pi_free():
3278  *		Free the mdi_pathinfo node
3279  */
3280 static void
i_mdi_pi_free(mdi_phci_t * ph,mdi_pathinfo_t * pip,mdi_client_t * ct)3281 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3282 {
3283 	ASSERT(MDI_CLIENT_LOCKED(ct));
3284 
3285 	/*
3286 	 * remove any per-path kstats
3287 	 */
3288 	i_mdi_pi_kstat_destroy(pip);
3289 
3290 	/* See comments in i_mdi_pi_alloc() */
3291 	ndi_devi_enter(ct->ct_dip);
3292 	ndi_devi_enter(ph->ph_dip);
3293 
3294 	i_mdi_client_remove_path(ct, pip);
3295 	i_mdi_phci_remove_path(ph, pip);
3296 
3297 	ndi_devi_exit(ph->ph_dip);
3298 	ndi_devi_exit(ct->ct_dip);
3299 
3300 	mutex_destroy(&MDI_PI(pip)->pi_mutex);
3301 	cv_destroy(&MDI_PI(pip)->pi_state_cv);
3302 	cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3303 	if (MDI_PI(pip)->pi_addr) {
3304 		kmem_free(MDI_PI(pip)->pi_addr,
3305 		    strlen(MDI_PI(pip)->pi_addr) + 1);
3306 		MDI_PI(pip)->pi_addr = NULL;
3307 	}
3308 
3309 	if (MDI_PI(pip)->pi_prop) {
3310 		(void) nvlist_free(MDI_PI(pip)->pi_prop);
3311 		MDI_PI(pip)->pi_prop = NULL;
3312 	}
3313 	kmem_free(pip, sizeof (struct mdi_pathinfo));
3314 }
3315 
3316 
3317 /*
3318  * i_mdi_phci_remove_path():
3319  *		Remove a mdi_pathinfo node from pHCI list.
3320  * Notes:
3321  *		Caller should hold per-pHCI mutex
3322  */
3323 static void
i_mdi_phci_remove_path(mdi_phci_t * ph,mdi_pathinfo_t * pip)3324 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3325 {
3326 	mdi_pathinfo_t	*prev = NULL;
3327 	mdi_pathinfo_t	*path = NULL;
3328 
3329 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3330 
3331 	MDI_PHCI_LOCK(ph);
3332 	path = ph->ph_path_head;
3333 	while (path != NULL) {
3334 		if (path == pip) {
3335 			break;
3336 		}
3337 		prev = path;
3338 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3339 	}
3340 
3341 	if (path) {
3342 		ph->ph_path_count--;
3343 		if (prev) {
3344 			MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3345 		} else {
3346 			ph->ph_path_head =
3347 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3348 		}
3349 		if (ph->ph_path_tail == path) {
3350 			ph->ph_path_tail = prev;
3351 		}
3352 	}
3353 
3354 	/*
3355 	 * Clear the pHCI link
3356 	 */
3357 	MDI_PI(pip)->pi_phci_link = NULL;
3358 	MDI_PI(pip)->pi_phci = NULL;
3359 	MDI_PHCI_UNLOCK(ph);
3360 }
3361 
3362 /*
3363  * i_mdi_client_remove_path():
3364  *		Remove a mdi_pathinfo node from client path list.
3365  */
3366 static void
i_mdi_client_remove_path(mdi_client_t * ct,mdi_pathinfo_t * pip)3367 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3368 {
3369 	mdi_pathinfo_t	*prev = NULL;
3370 	mdi_pathinfo_t	*path;
3371 
3372 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3373 
3374 	ASSERT(MDI_CLIENT_LOCKED(ct));
3375 	path = ct->ct_path_head;
3376 	while (path != NULL) {
3377 		if (path == pip) {
3378 			break;
3379 		}
3380 		prev = path;
3381 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3382 	}
3383 
3384 	if (path) {
3385 		ct->ct_path_count--;
3386 		if (prev) {
3387 			MDI_PI(prev)->pi_client_link =
3388 			    MDI_PI(path)->pi_client_link;
3389 		} else {
3390 			ct->ct_path_head =
3391 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3392 		}
3393 		if (ct->ct_path_tail == path) {
3394 			ct->ct_path_tail = prev;
3395 		}
3396 		if (ct->ct_path_last == path) {
3397 			ct->ct_path_last = ct->ct_path_head;
3398 		}
3399 	}
3400 	MDI_PI(pip)->pi_client_link = NULL;
3401 	MDI_PI(pip)->pi_client = NULL;
3402 }
3403 
3404 /*
3405  * i_mdi_pi_state_change():
3406  *		online a mdi_pathinfo node
3407  *
3408  * Return Values:
3409  *		MDI_SUCCESS
3410  *		MDI_FAILURE
3411  */
3412 /*ARGSUSED*/
3413 static int
i_mdi_pi_state_change(mdi_pathinfo_t * pip,mdi_pathinfo_state_t state,int flag)3414 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3415 {
3416 	int		rv = MDI_SUCCESS;
3417 	mdi_vhci_t	*vh;
3418 	mdi_phci_t	*ph;
3419 	mdi_client_t	*ct;
3420 	int		(*f)();
3421 	dev_info_t	*cdip;
3422 
3423 	MDI_PI_LOCK(pip);
3424 
3425 	ph = MDI_PI(pip)->pi_phci;
3426 	ASSERT(ph);
3427 	if (ph == NULL) {
3428 		/*
3429 		 * Invalid pHCI device, fail the request
3430 		 */
3431 		MDI_PI_UNLOCK(pip);
3432 		MDI_DEBUG(1, (MDI_WARN, NULL,
3433 		    "!invalid phci: pip %s %p",
3434 		    mdi_pi_spathname(pip), (void *)pip));
3435 		return (MDI_FAILURE);
3436 	}
3437 
3438 	vh = ph->ph_vhci;
3439 	ASSERT(vh);
3440 	if (vh == NULL) {
3441 		/*
3442 		 * Invalid vHCI device, fail the request
3443 		 */
3444 		MDI_PI_UNLOCK(pip);
3445 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3446 		    "!invalid vhci: pip %s %p",
3447 		    mdi_pi_spathname(pip), (void *)pip));
3448 		return (MDI_FAILURE);
3449 	}
3450 
3451 	ct = MDI_PI(pip)->pi_client;
3452 	ASSERT(ct != NULL);
3453 	if (ct == NULL) {
3454 		/*
3455 		 * Invalid client device, fail the request
3456 		 */
3457 		MDI_PI_UNLOCK(pip);
3458 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3459 		    "!invalid client: pip %s %p",
3460 		    mdi_pi_spathname(pip), (void *)pip));
3461 		return (MDI_FAILURE);
3462 	}
3463 
3464 	/*
3465 	 * If this path has not been initialized yet, Callback vHCI driver's
3466 	 * pathinfo node initialize entry point
3467 	 */
3468 
3469 	if (MDI_PI_IS_INITING(pip)) {
3470 		MDI_PI_UNLOCK(pip);
3471 		f = vh->vh_ops->vo_pi_init;
3472 		if (f != NULL) {
3473 			rv = (*f)(vh->vh_dip, pip, 0);
3474 			if (rv != MDI_SUCCESS) {
3475 				MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3476 				    "!vo_pi_init failed: vHCI %p, pip %s %p",
3477 				    (void *)vh, mdi_pi_spathname(pip),
3478 				    (void *)pip));
3479 				return (MDI_FAILURE);
3480 			}
3481 		}
3482 		MDI_PI_LOCK(pip);
3483 		MDI_PI_CLEAR_TRANSIENT(pip);
3484 	}
3485 
3486 	/*
3487 	 * Do not allow state transition when pHCI is in offline/suspended
3488 	 * states
3489 	 */
3490 	i_mdi_phci_lock(ph, pip);
3491 	if (MDI_PHCI_IS_READY(ph) == 0) {
3492 		MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3493 		    "!pHCI not ready, pHCI=%p", (void *)ph));
3494 		MDI_PI_UNLOCK(pip);
3495 		i_mdi_phci_unlock(ph);
3496 		return (MDI_BUSY);
3497 	}
3498 	MDI_PHCI_UNSTABLE(ph);
3499 	i_mdi_phci_unlock(ph);
3500 
3501 	/*
3502 	 * Check if mdi_pathinfo state is in transient state.
3503 	 * If yes, offlining is in progress and wait till transient state is
3504 	 * cleared.
3505 	 */
3506 	if (MDI_PI_IS_TRANSIENT(pip)) {
3507 		while (MDI_PI_IS_TRANSIENT(pip)) {
3508 			cv_wait(&MDI_PI(pip)->pi_state_cv,
3509 			    &MDI_PI(pip)->pi_mutex);
3510 		}
3511 	}
3512 
3513 	/*
3514 	 * Grab the client lock in reverse order sequence and release the
3515 	 * mdi_pathinfo mutex.
3516 	 */
3517 	i_mdi_client_lock(ct, pip);
3518 	MDI_PI_UNLOCK(pip);
3519 
3520 	/*
3521 	 * Wait till failover state is cleared
3522 	 */
3523 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3524 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3525 
3526 	/*
3527 	 * Mark the mdi_pathinfo node state as transient
3528 	 */
3529 	MDI_PI_LOCK(pip);
3530 	switch (state) {
3531 	case MDI_PATHINFO_STATE_ONLINE:
3532 		MDI_PI_SET_ONLINING(pip);
3533 		break;
3534 
3535 	case MDI_PATHINFO_STATE_STANDBY:
3536 		MDI_PI_SET_STANDBYING(pip);
3537 		break;
3538 
3539 	case MDI_PATHINFO_STATE_FAULT:
3540 		/*
3541 		 * Mark the pathinfo state as FAULTED
3542 		 */
3543 		MDI_PI_SET_FAULTING(pip);
3544 		MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3545 		break;
3546 
3547 	case MDI_PATHINFO_STATE_OFFLINE:
3548 		/*
3549 		 * ndi_devi_offline() cannot hold pip or ct locks.
3550 		 */
3551 		MDI_PI_UNLOCK(pip);
3552 
3553 		/*
3554 		 * If this is a user initiated path online->offline operation
3555 		 * who's success would transition a client from DEGRADED to
3556 		 * FAILED then only proceed if we can offline the client first.
3557 		 */
3558 		cdip = ct->ct_dip;
3559 		if ((flag & NDI_USER_REQ) &&
3560 		    MDI_PI_IS_ONLINE(pip) &&
3561 		    (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3562 			i_mdi_client_unlock(ct);
3563 			rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
3564 			if (rv != NDI_SUCCESS) {
3565 				/*
3566 				 * Convert to MDI error code
3567 				 */
3568 				switch (rv) {
3569 				case NDI_BUSY:
3570 					rv = MDI_BUSY;
3571 					break;
3572 				default:
3573 					rv = MDI_FAILURE;
3574 					break;
3575 				}
3576 				goto state_change_exit;
3577 			} else {
3578 				i_mdi_client_lock(ct, NULL);
3579 			}
3580 		}
3581 		/*
3582 		 * Mark the mdi_pathinfo node state as transient
3583 		 */
3584 		MDI_PI_LOCK(pip);
3585 		MDI_PI_SET_OFFLINING(pip);
3586 		break;
3587 
3588 	case MDI_PATHINFO_STATE_INIT:
3589 		/*
3590 		 * Callers are not allowed to ask us to change the state to the
3591 		 * initial state.
3592 		 */
3593 		rv = MDI_FAILURE;
3594 		MDI_PI_UNLOCK(pip);
3595 		goto state_change_exit;
3596 
3597 	}
3598 	MDI_PI_UNLOCK(pip);
3599 	MDI_CLIENT_UNSTABLE(ct);
3600 	i_mdi_client_unlock(ct);
3601 
3602 	f = vh->vh_ops->vo_pi_state_change;
3603 	if (f != NULL)
3604 		rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3605 
3606 	MDI_CLIENT_LOCK(ct);
3607 	MDI_PI_LOCK(pip);
3608 	if (rv == MDI_NOT_SUPPORTED) {
3609 		MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3610 	}
3611 	if (rv != MDI_SUCCESS) {
3612 		MDI_DEBUG(2, (MDI_WARN, ct->ct_dip,
3613 		    "vo_pi_state_change failed: rv %x", rv));
3614 	}
3615 	if (MDI_PI_IS_TRANSIENT(pip)) {
3616 		if (rv == MDI_SUCCESS) {
3617 			MDI_PI_CLEAR_TRANSIENT(pip);
3618 		} else {
3619 			MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3620 		}
3621 	}
3622 
3623 	/*
3624 	 * Wake anyone waiting for this mdi_pathinfo node
3625 	 */
3626 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3627 	MDI_PI_UNLOCK(pip);
3628 
3629 	/*
3630 	 * Mark the client device as stable
3631 	 */
3632 	MDI_CLIENT_STABLE(ct);
3633 	if (rv == MDI_SUCCESS) {
3634 		if (ct->ct_unstable == 0) {
3635 			cdip = ct->ct_dip;
3636 
3637 			/*
3638 			 * Onlining the mdi_pathinfo node will impact the
3639 			 * client state Update the client and dev_info node
3640 			 * state accordingly
3641 			 */
3642 			rv = NDI_SUCCESS;
3643 			i_mdi_client_update_state(ct);
3644 			switch (MDI_CLIENT_STATE(ct)) {
3645 			case MDI_CLIENT_STATE_OPTIMAL:
3646 			case MDI_CLIENT_STATE_DEGRADED:
3647 				if (cdip && !i_ddi_devi_attached(cdip) &&
3648 				    ((state == MDI_PATHINFO_STATE_ONLINE) ||
3649 				    (state == MDI_PATHINFO_STATE_STANDBY))) {
3650 
3651 					/*
3652 					 * Must do ndi_devi_online() through
3653 					 * hotplug thread for deferred
3654 					 * attach mechanism to work
3655 					 */
3656 					MDI_CLIENT_UNLOCK(ct);
3657 					rv = ndi_devi_online(cdip, 0);
3658 					MDI_CLIENT_LOCK(ct);
3659 					if ((rv != NDI_SUCCESS) &&
3660 					    (MDI_CLIENT_STATE(ct) ==
3661 					    MDI_CLIENT_STATE_DEGRADED)) {
3662 						MDI_DEBUG(1, (MDI_WARN, cdip,
3663 						    "!ndi_devi_online failed "
3664 						    "error %x", rv));
3665 					}
3666 					rv = NDI_SUCCESS;
3667 				}
3668 				break;
3669 
3670 			case MDI_CLIENT_STATE_FAILED:
3671 				/*
3672 				 * This is the last path case for
3673 				 * non-user initiated events.
3674 				 */
3675 				if (((flag & NDI_USER_REQ) == 0) &&
3676 				    cdip && (i_ddi_node_state(cdip) >=
3677 				    DS_INITIALIZED)) {
3678 					MDI_CLIENT_UNLOCK(ct);
3679 					rv = ndi_devi_offline(cdip,
3680 					    NDI_DEVFS_CLEAN);
3681 					MDI_CLIENT_LOCK(ct);
3682 
3683 					if (rv != NDI_SUCCESS) {
3684 						/*
3685 						 * ndi_devi_offline failed.
3686 						 * Reset client flags to
3687 						 * online as the path could not
3688 						 * be offlined.
3689 						 */
3690 						MDI_DEBUG(1, (MDI_WARN, cdip,
3691 						    "!ndi_devi_offline failed: "
3692 						    "error %x", rv));
3693 						MDI_CLIENT_SET_ONLINE(ct);
3694 					}
3695 				}
3696 				break;
3697 			}
3698 			/*
3699 			 * Convert to MDI error code
3700 			 */
3701 			switch (rv) {
3702 			case NDI_SUCCESS:
3703 				MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3704 				i_mdi_report_path_state(ct, pip);
3705 				rv = MDI_SUCCESS;
3706 				break;
3707 			case NDI_BUSY:
3708 				rv = MDI_BUSY;
3709 				break;
3710 			default:
3711 				rv = MDI_FAILURE;
3712 				break;
3713 			}
3714 		}
3715 	}
3716 	MDI_CLIENT_UNLOCK(ct);
3717 
3718 state_change_exit:
3719 	/*
3720 	 * Mark the pHCI as stable again.
3721 	 */
3722 	MDI_PHCI_LOCK(ph);
3723 	MDI_PHCI_STABLE(ph);
3724 	MDI_PHCI_UNLOCK(ph);
3725 	return (rv);
3726 }
3727 
3728 /*
3729  * mdi_pi_online():
3730  *		Place the path_info node in the online state.  The path is
3731  *		now available to be selected by mdi_select_path() for
3732  *		transporting I/O requests to client devices.
3733  * Return Values:
3734  *		MDI_SUCCESS
3735  *		MDI_FAILURE
3736  */
3737 int
mdi_pi_online(mdi_pathinfo_t * pip,int flags)3738 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3739 {
3740 	mdi_client_t	*ct = MDI_PI(pip)->pi_client;
3741 	int		client_held = 0;
3742 	int		rv;
3743 
3744 	ASSERT(ct != NULL);
3745 	rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3746 	if (rv != MDI_SUCCESS)
3747 		return (rv);
3748 
3749 	MDI_PI_LOCK(pip);
3750 	if (MDI_PI(pip)->pi_pm_held == 0) {
3751 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3752 		    "i_mdi_pm_hold_pip %p", (void *)pip));
3753 		i_mdi_pm_hold_pip(pip);
3754 		client_held = 1;
3755 	}
3756 	MDI_PI_UNLOCK(pip);
3757 
3758 	if (client_held) {
3759 		MDI_CLIENT_LOCK(ct);
3760 		if (ct->ct_power_cnt == 0) {
3761 			rv = i_mdi_power_all_phci(ct);
3762 		}
3763 
3764 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3765 		    "i_mdi_pm_hold_client %p", (void *)ct));
3766 		i_mdi_pm_hold_client(ct, 1);
3767 		MDI_CLIENT_UNLOCK(ct);
3768 	}
3769 
3770 	return (rv);
3771 }
3772 
3773 /*
3774  * mdi_pi_standby():
3775  *		Place the mdi_pathinfo node in standby state
3776  *
3777  * Return Values:
3778  *		MDI_SUCCESS
3779  *		MDI_FAILURE
3780  */
3781 int
mdi_pi_standby(mdi_pathinfo_t * pip,int flags)3782 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3783 {
3784 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3785 }
3786 
3787 /*
3788  * mdi_pi_fault():
3789  *		Place the mdi_pathinfo node in fault'ed state
3790  * Return Values:
3791  *		MDI_SUCCESS
3792  *		MDI_FAILURE
3793  */
3794 int
mdi_pi_fault(mdi_pathinfo_t * pip,int flags)3795 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3796 {
3797 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3798 }
3799 
3800 /*
3801  * mdi_pi_offline():
3802  *		Offline a mdi_pathinfo node.
3803  * Return Values:
3804  *		MDI_SUCCESS
3805  *		MDI_FAILURE
3806  */
3807 int
mdi_pi_offline(mdi_pathinfo_t * pip,int flags)3808 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3809 {
3810 	int	ret, client_held = 0;
3811 	mdi_client_t	*ct;
3812 
3813 	/*
3814 	 * Original code overloaded NDI_DEVI_REMOVE to this interface, and
3815 	 * used it to mean "user initiated operation" (i.e. devctl). Callers
3816 	 * should now just use NDI_USER_REQ.
3817 	 */
3818 	if (flags & NDI_DEVI_REMOVE) {
3819 		flags &= ~NDI_DEVI_REMOVE;
3820 		flags |= NDI_USER_REQ;
3821 	}
3822 
3823 	ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3824 
3825 	if (ret == MDI_SUCCESS) {
3826 		MDI_PI_LOCK(pip);
3827 		if (MDI_PI(pip)->pi_pm_held) {
3828 			client_held = 1;
3829 		}
3830 		MDI_PI_UNLOCK(pip);
3831 
3832 		if (client_held) {
3833 			ct = MDI_PI(pip)->pi_client;
3834 			MDI_CLIENT_LOCK(ct);
3835 			MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3836 			    "i_mdi_pm_rele_client\n"));
3837 			i_mdi_pm_rele_client(ct, 1);
3838 			MDI_CLIENT_UNLOCK(ct);
3839 		}
3840 	}
3841 
3842 	return (ret);
3843 }
3844 
3845 /*
3846  * i_mdi_pi_offline():
3847  *		Offline a mdi_pathinfo node and call the vHCI driver's callback
3848  */
3849 static int
i_mdi_pi_offline(mdi_pathinfo_t * pip,int flags)3850 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3851 {
3852 	dev_info_t	*vdip = NULL;
3853 	mdi_vhci_t	*vh = NULL;
3854 	mdi_client_t	*ct = NULL;
3855 	int		(*f)();
3856 	int		rv;
3857 
3858 	MDI_PI_LOCK(pip);
3859 	ct = MDI_PI(pip)->pi_client;
3860 	ASSERT(ct != NULL);
3861 
3862 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
3863 		/*
3864 		 * Give a chance for pending I/Os to complete.
3865 		 */
3866 		MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3867 		    "!%d cmds still pending on path %s %p",
3868 		    MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip),
3869 		    (void *)pip));
3870 		if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3871 		    &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3872 		    TR_CLOCK_TICK) == -1) {
3873 			/*
3874 			 * The timeout time reached without ref_cnt being zero
3875 			 * being signaled.
3876 			 */
3877 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3878 			    "!Timeout reached on path %s %p without the cond",
3879 			    mdi_pi_spathname(pip), (void *)pip));
3880 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3881 			    "!%d cmds still pending on path %s %p",
3882 			    MDI_PI(pip)->pi_ref_cnt,
3883 			    mdi_pi_spathname(pip), (void *)pip));
3884 		}
3885 	}
3886 	vh = ct->ct_vhci;
3887 	vdip = vh->vh_dip;
3888 
3889 	/*
3890 	 * Notify vHCI that has registered this event
3891 	 */
3892 	ASSERT(vh->vh_ops);
3893 	f = vh->vh_ops->vo_pi_state_change;
3894 
3895 	rv = MDI_SUCCESS;
3896 	if (f != NULL) {
3897 		MDI_PI_UNLOCK(pip);
3898 		if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3899 		    flags)) != MDI_SUCCESS) {
3900 			MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3901 			    "!vo_path_offline failed: vdip %s%d %p: path %s %p",
3902 			    ddi_driver_name(vdip), ddi_get_instance(vdip),
3903 			    (void *)vdip, mdi_pi_spathname(pip), (void *)pip));
3904 		}
3905 		MDI_PI_LOCK(pip);
3906 	}
3907 
3908 	/*
3909 	 * Set the mdi_pathinfo node state and clear the transient condition
3910 	 */
3911 	MDI_PI_SET_OFFLINE(pip);
3912 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3913 	MDI_PI_UNLOCK(pip);
3914 
3915 	MDI_CLIENT_LOCK(ct);
3916 	if (rv == MDI_SUCCESS) {
3917 		if (ct->ct_unstable == 0) {
3918 			dev_info_t	*cdip = ct->ct_dip;
3919 
3920 			/*
3921 			 * Onlining the mdi_pathinfo node will impact the
3922 			 * client state Update the client and dev_info node
3923 			 * state accordingly
3924 			 */
3925 			i_mdi_client_update_state(ct);
3926 			rv = NDI_SUCCESS;
3927 			if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3928 				if (cdip &&
3929 				    (i_ddi_node_state(cdip) >=
3930 				    DS_INITIALIZED)) {
3931 					MDI_CLIENT_UNLOCK(ct);
3932 					rv = ndi_devi_offline(cdip,
3933 					    NDI_DEVFS_CLEAN);
3934 					MDI_CLIENT_LOCK(ct);
3935 					if (rv != NDI_SUCCESS) {
3936 						/*
3937 						 * ndi_devi_offline failed.
3938 						 * Reset client flags to
3939 						 * online.
3940 						 */
3941 						MDI_DEBUG(4, (MDI_WARN, cdip,
3942 						    "ndi_devi_offline failed: "
3943 						    "error %x", rv));
3944 						MDI_CLIENT_SET_ONLINE(ct);
3945 					}
3946 				}
3947 			}
3948 			/*
3949 			 * Convert to MDI error code
3950 			 */
3951 			switch (rv) {
3952 			case NDI_SUCCESS:
3953 				rv = MDI_SUCCESS;
3954 				break;
3955 			case NDI_BUSY:
3956 				rv = MDI_BUSY;
3957 				break;
3958 			default:
3959 				rv = MDI_FAILURE;
3960 				break;
3961 			}
3962 		}
3963 		MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3964 		i_mdi_report_path_state(ct, pip);
3965 	}
3966 
3967 	MDI_CLIENT_UNLOCK(ct);
3968 
3969 	/*
3970 	 * Change in the mdi_pathinfo node state will impact the client state
3971 	 */
3972 	MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
3973 	    "ct = %p pip = %p", (void *)ct, (void *)pip));
3974 	return (rv);
3975 }
3976 
3977 /*
3978  * i_mdi_pi_online():
3979  *		Online a mdi_pathinfo node and call the vHCI driver's callback
3980  */
3981 static int
i_mdi_pi_online(mdi_pathinfo_t * pip,int flags)3982 i_mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3983 {
3984 	mdi_vhci_t	*vh = NULL;
3985 	mdi_client_t	*ct = NULL;
3986 	mdi_phci_t	*ph;
3987 	int		(*f)();
3988 	int		rv;
3989 
3990 	MDI_PI_LOCK(pip);
3991 	ph = MDI_PI(pip)->pi_phci;
3992 	vh = ph->ph_vhci;
3993 	ct = MDI_PI(pip)->pi_client;
3994 	MDI_PI_SET_ONLINING(pip)
3995 	MDI_PI_UNLOCK(pip);
3996 	f = vh->vh_ops->vo_pi_state_change;
3997 	rv = MDI_SUCCESS;
3998 	if (f != NULL)
3999 		rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0, flags);
4000 	MDI_CLIENT_LOCK(ct);
4001 	MDI_PI_LOCK(pip);
4002 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
4003 	MDI_PI_UNLOCK(pip);
4004 	if (rv == MDI_SUCCESS) {
4005 		dev_info_t	*cdip = ct->ct_dip;
4006 
4007 		i_mdi_client_update_state(ct);
4008 		if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL ||
4009 		    MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4010 			if (cdip && !i_ddi_devi_attached(cdip)) {
4011 				MDI_CLIENT_UNLOCK(ct);
4012 				rv = ndi_devi_online(cdip, 0);
4013 				MDI_CLIENT_LOCK(ct);
4014 				if ((rv != NDI_SUCCESS) &&
4015 				    (MDI_CLIENT_STATE(ct) ==
4016 				    MDI_CLIENT_STATE_DEGRADED)) {
4017 					MDI_CLIENT_SET_OFFLINE(ct);
4018 				}
4019 				if (rv != NDI_SUCCESS) {
4020 					/* Reset the path state */
4021 					MDI_PI_LOCK(pip);
4022 					MDI_PI(pip)->pi_state =
4023 					    MDI_PI_OLD_STATE(pip);
4024 					MDI_PI_UNLOCK(pip);
4025 				}
4026 			}
4027 		}
4028 		switch (rv) {
4029 		case NDI_SUCCESS:
4030 			MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
4031 			i_mdi_report_path_state(ct, pip);
4032 			rv = MDI_SUCCESS;
4033 			break;
4034 		case NDI_BUSY:
4035 			rv = MDI_BUSY;
4036 			break;
4037 		default:
4038 			rv = MDI_FAILURE;
4039 			break;
4040 		}
4041 	} else {
4042 		/* Reset the path state */
4043 		MDI_PI_LOCK(pip);
4044 		MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
4045 		MDI_PI_UNLOCK(pip);
4046 	}
4047 	MDI_CLIENT_UNLOCK(ct);
4048 	return (rv);
4049 }
4050 
4051 /*
4052  * mdi_pi_get_node_name():
4053  *              Get the name associated with a mdi_pathinfo node.
4054  *              Since pathinfo nodes are not directly named, we
4055  *              return the node_name of the client.
4056  *
4057  * Return Values:
4058  *              char *
4059  */
4060 char *
mdi_pi_get_node_name(mdi_pathinfo_t * pip)4061 mdi_pi_get_node_name(mdi_pathinfo_t *pip)
4062 {
4063 	mdi_client_t    *ct;
4064 
4065 	if (pip == NULL)
4066 		return (NULL);
4067 	ct = MDI_PI(pip)->pi_client;
4068 	if ((ct == NULL) || (ct->ct_dip == NULL))
4069 		return (NULL);
4070 	return (ddi_node_name(ct->ct_dip));
4071 }
4072 
4073 /*
4074  * mdi_pi_get_addr():
4075  *		Get the unit address associated with a mdi_pathinfo node
4076  *
4077  * Return Values:
4078  *		char *
4079  */
4080 char *
mdi_pi_get_addr(mdi_pathinfo_t * pip)4081 mdi_pi_get_addr(mdi_pathinfo_t *pip)
4082 {
4083 	if (pip == NULL)
4084 		return (NULL);
4085 
4086 	return (MDI_PI(pip)->pi_addr);
4087 }
4088 
4089 /*
4090  * mdi_pi_get_path_instance():
4091  *		Get the 'path_instance' of a mdi_pathinfo node
4092  *
4093  * Return Values:
4094  *		path_instance
4095  */
4096 int
mdi_pi_get_path_instance(mdi_pathinfo_t * pip)4097 mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
4098 {
4099 	if (pip == NULL)
4100 		return (0);
4101 
4102 	return (MDI_PI(pip)->pi_path_instance);
4103 }
4104 
4105 /*
4106  * mdi_pi_pathname():
4107  *		Return pointer to path to pathinfo node.
4108  */
4109 char *
mdi_pi_pathname(mdi_pathinfo_t * pip)4110 mdi_pi_pathname(mdi_pathinfo_t *pip)
4111 {
4112 	if (pip == NULL)
4113 		return (NULL);
4114 	return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
4115 }
4116 
4117 /*
4118  * mdi_pi_spathname():
4119  *		Return pointer to shortpath to pathinfo node. Used for debug
4120  *		messages, so return "" instead of NULL when unknown.
4121  */
4122 char *
mdi_pi_spathname(mdi_pathinfo_t * pip)4123 mdi_pi_spathname(mdi_pathinfo_t *pip)
4124 {
4125 	char	*spath = "";
4126 
4127 	if (pip) {
4128 		spath = mdi_pi_spathname_by_instance(
4129 		    mdi_pi_get_path_instance(pip));
4130 		if (spath == NULL)
4131 			spath = "";
4132 	}
4133 	return (spath);
4134 }
4135 
4136 char *
mdi_pi_pathname_obp(mdi_pathinfo_t * pip,char * path)4137 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
4138 {
4139 	char *obp_path = NULL;
4140 	if ((pip == NULL) || (path == NULL))
4141 		return (NULL);
4142 
4143 	if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
4144 		(void) strcpy(path, obp_path);
4145 		(void) mdi_prop_free(obp_path);
4146 	} else {
4147 		path = NULL;
4148 	}
4149 	return (path);
4150 }
4151 
4152 int
mdi_pi_pathname_obp_set(mdi_pathinfo_t * pip,char * component)4153 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
4154 {
4155 	dev_info_t *pdip;
4156 	char *obp_path = NULL;
4157 	int rc = MDI_FAILURE;
4158 
4159 	if (pip == NULL)
4160 		return (MDI_FAILURE);
4161 
4162 	pdip = mdi_pi_get_phci(pip);
4163 	if (pdip == NULL)
4164 		return (MDI_FAILURE);
4165 
4166 	obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4167 
4168 	if (ddi_pathname_obp(pdip, obp_path) == NULL) {
4169 		(void) ddi_pathname(pdip, obp_path);
4170 	}
4171 
4172 	if (component) {
4173 		(void) strncat(obp_path, "/", MAXPATHLEN);
4174 		(void) strncat(obp_path, component, MAXPATHLEN);
4175 	}
4176 	rc = mdi_prop_update_string(pip, "obp-path", obp_path);
4177 
4178 	if (obp_path)
4179 		kmem_free(obp_path, MAXPATHLEN);
4180 	return (rc);
4181 }
4182 
4183 /*
4184  * mdi_pi_get_client():
4185  *		Get the client devinfo associated with a mdi_pathinfo node
4186  *
4187  * Return Values:
4188  *		Handle to client device dev_info node
4189  */
4190 dev_info_t *
mdi_pi_get_client(mdi_pathinfo_t * pip)4191 mdi_pi_get_client(mdi_pathinfo_t *pip)
4192 {
4193 	dev_info_t	*dip = NULL;
4194 	if (pip) {
4195 		dip = MDI_PI(pip)->pi_client->ct_dip;
4196 	}
4197 	return (dip);
4198 }
4199 
4200 /*
4201  * mdi_pi_get_phci():
4202  *		Get the pHCI devinfo associated with the mdi_pathinfo node
4203  * Return Values:
4204  *		Handle to dev_info node
4205  */
4206 dev_info_t *
mdi_pi_get_phci(mdi_pathinfo_t * pip)4207 mdi_pi_get_phci(mdi_pathinfo_t *pip)
4208 {
4209 	dev_info_t	*dip = NULL;
4210 	mdi_phci_t	*ph;
4211 
4212 	if (pip) {
4213 		ph = MDI_PI(pip)->pi_phci;
4214 		if (ph)
4215 			dip = ph->ph_dip;
4216 	}
4217 	return (dip);
4218 }
4219 
4220 /*
4221  * mdi_pi_get_client_private():
4222  *		Get the client private information associated with the
4223  *		mdi_pathinfo node
4224  */
4225 void *
mdi_pi_get_client_private(mdi_pathinfo_t * pip)4226 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
4227 {
4228 	void *cprivate = NULL;
4229 	if (pip) {
4230 		cprivate = MDI_PI(pip)->pi_cprivate;
4231 	}
4232 	return (cprivate);
4233 }
4234 
4235 /*
4236  * mdi_pi_set_client_private():
4237  *		Set the client private information in the mdi_pathinfo node
4238  */
4239 void
mdi_pi_set_client_private(mdi_pathinfo_t * pip,void * priv)4240 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
4241 {
4242 	if (pip) {
4243 		MDI_PI(pip)->pi_cprivate = priv;
4244 	}
4245 }
4246 
4247 /*
4248  * mdi_pi_get_phci_private():
4249  *		Get the pHCI private information associated with the
4250  *		mdi_pathinfo node
4251  */
4252 caddr_t
mdi_pi_get_phci_private(mdi_pathinfo_t * pip)4253 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
4254 {
4255 	caddr_t	pprivate = NULL;
4256 
4257 	if (pip) {
4258 		pprivate = MDI_PI(pip)->pi_pprivate;
4259 	}
4260 	return (pprivate);
4261 }
4262 
4263 /*
4264  * mdi_pi_set_phci_private():
4265  *		Set the pHCI private information in the mdi_pathinfo node
4266  */
4267 void
mdi_pi_set_phci_private(mdi_pathinfo_t * pip,caddr_t priv)4268 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
4269 {
4270 	if (pip) {
4271 		MDI_PI(pip)->pi_pprivate = priv;
4272 	}
4273 }
4274 
4275 /*
4276  * mdi_pi_get_state():
4277  *		Get the mdi_pathinfo node state. Transient states are internal
4278  *		and not provided to the users
4279  */
4280 mdi_pathinfo_state_t
mdi_pi_get_state(mdi_pathinfo_t * pip)4281 mdi_pi_get_state(mdi_pathinfo_t *pip)
4282 {
4283 	mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
4284 
4285 	if (pip) {
4286 		if (MDI_PI_IS_TRANSIENT(pip)) {
4287 			/*
4288 			 * mdi_pathinfo is in state transition.  Return the
4289 			 * last good state.
4290 			 */
4291 			state = MDI_PI_OLD_STATE(pip);
4292 		} else {
4293 			state = MDI_PI_STATE(pip);
4294 		}
4295 	}
4296 	return (state);
4297 }
4298 
4299 /*
4300  * mdi_pi_get_flags():
4301  *		Get the mdi_pathinfo node flags.
4302  */
4303 uint_t
mdi_pi_get_flags(mdi_pathinfo_t * pip)4304 mdi_pi_get_flags(mdi_pathinfo_t *pip)
4305 {
4306 	return (pip ? MDI_PI(pip)->pi_flags : 0);
4307 }
4308 
4309 /*
4310  * Note that the following function needs to be the new interface for
4311  * mdi_pi_get_state when mpxio gets integrated to ON.
4312  */
4313 int
mdi_pi_get_state2(mdi_pathinfo_t * pip,mdi_pathinfo_state_t * state,uint32_t * ext_state)4314 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
4315 		uint32_t *ext_state)
4316 {
4317 	*state = MDI_PATHINFO_STATE_INIT;
4318 
4319 	if (pip) {
4320 		if (MDI_PI_IS_TRANSIENT(pip)) {
4321 			/*
4322 			 * mdi_pathinfo is in state transition.  Return the
4323 			 * last good state.
4324 			 */
4325 			*state = MDI_PI_OLD_STATE(pip);
4326 			*ext_state = MDI_PI_OLD_EXT_STATE(pip);
4327 		} else {
4328 			*state = MDI_PI_STATE(pip);
4329 			*ext_state = MDI_PI_EXT_STATE(pip);
4330 		}
4331 	}
4332 	return (MDI_SUCCESS);
4333 }
4334 
4335 /*
4336  * mdi_pi_get_preferred:
4337  *	Get the preferred path flag
4338  */
4339 int
mdi_pi_get_preferred(mdi_pathinfo_t * pip)4340 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
4341 {
4342 	if (pip) {
4343 		return (MDI_PI(pip)->pi_preferred);
4344 	}
4345 	return (0);
4346 }
4347 
4348 /*
4349  * mdi_pi_set_preferred:
4350  *	Set the preferred path flag
4351  */
4352 void
mdi_pi_set_preferred(mdi_pathinfo_t * pip,int preferred)4353 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
4354 {
4355 	if (pip) {
4356 		MDI_PI(pip)->pi_preferred = preferred;
4357 	}
4358 }
4359 
4360 /*
4361  * mdi_pi_set_state():
4362  *		Set the mdi_pathinfo node state
4363  */
4364 void
mdi_pi_set_state(mdi_pathinfo_t * pip,mdi_pathinfo_state_t state)4365 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
4366 {
4367 	uint32_t	ext_state;
4368 
4369 	if (pip) {
4370 		ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
4371 		MDI_PI(pip)->pi_state = state;
4372 		MDI_PI(pip)->pi_state |= ext_state;
4373 
4374 		/* Path has changed state, invalidate DINFOCACHE snap shot. */
4375 		i_ddi_di_cache_invalidate();
4376 	}
4377 }
4378 
4379 /*
4380  * Property functions:
4381  */
4382 int
i_map_nvlist_error_to_mdi(int val)4383 i_map_nvlist_error_to_mdi(int val)
4384 {
4385 	int rv;
4386 
4387 	switch (val) {
4388 	case 0:
4389 		rv = DDI_PROP_SUCCESS;
4390 		break;
4391 	case EINVAL:
4392 	case ENOTSUP:
4393 		rv = DDI_PROP_INVAL_ARG;
4394 		break;
4395 	case ENOMEM:
4396 		rv = DDI_PROP_NO_MEMORY;
4397 		break;
4398 	default:
4399 		rv = DDI_PROP_NOT_FOUND;
4400 		break;
4401 	}
4402 	return (rv);
4403 }
4404 
4405 /*
4406  * mdi_pi_get_next_prop():
4407  *		Property walk function.  The caller should hold mdi_pi_lock()
4408  *		and release by calling mdi_pi_unlock() at the end of walk to
4409  *		get a consistent value.
4410  */
4411 nvpair_t *
mdi_pi_get_next_prop(mdi_pathinfo_t * pip,nvpair_t * prev)4412 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
4413 {
4414 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4415 		return (NULL);
4416 	}
4417 	ASSERT(MDI_PI_LOCKED(pip));
4418 	return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
4419 }
4420 
4421 /*
4422  * mdi_prop_remove():
4423  *		Remove the named property from the named list.
4424  */
4425 int
mdi_prop_remove(mdi_pathinfo_t * pip,char * name)4426 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
4427 {
4428 	if (pip == NULL) {
4429 		return (DDI_PROP_NOT_FOUND);
4430 	}
4431 	ASSERT(!MDI_PI_LOCKED(pip));
4432 	MDI_PI_LOCK(pip);
4433 	if (MDI_PI(pip)->pi_prop == NULL) {
4434 		MDI_PI_UNLOCK(pip);
4435 		return (DDI_PROP_NOT_FOUND);
4436 	}
4437 	if (name) {
4438 		(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
4439 	} else {
4440 		char		nvp_name[MAXNAMELEN];
4441 		nvpair_t	*nvp;
4442 		nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
4443 		while (nvp) {
4444 			nvpair_t	*next;
4445 			next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
4446 			(void) snprintf(nvp_name, sizeof(nvp_name), "%s",
4447 			    nvpair_name(nvp));
4448 			(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
4449 			    nvp_name);
4450 			nvp = next;
4451 		}
4452 	}
4453 	MDI_PI_UNLOCK(pip);
4454 	return (DDI_PROP_SUCCESS);
4455 }
4456 
4457 /*
4458  * mdi_prop_size():
4459  *		Get buffer size needed to pack the property data.
4460  *		Caller should hold the mdi_pathinfo_t lock to get a consistent
4461  *		buffer size.
4462  */
4463 int
mdi_prop_size(mdi_pathinfo_t * pip,size_t * buflenp)4464 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
4465 {
4466 	int	rv;
4467 	size_t	bufsize;
4468 
4469 	*buflenp = 0;
4470 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4471 		return (DDI_PROP_NOT_FOUND);
4472 	}
4473 	ASSERT(MDI_PI_LOCKED(pip));
4474 	rv = nvlist_size(MDI_PI(pip)->pi_prop,
4475 	    &bufsize, NV_ENCODE_NATIVE);
4476 	*buflenp = bufsize;
4477 	return (i_map_nvlist_error_to_mdi(rv));
4478 }
4479 
4480 /*
4481  * mdi_prop_pack():
4482  *		pack the property list.  The caller should hold the
4483  *		mdi_pathinfo_t node to get a consistent data
4484  */
4485 int
mdi_prop_pack(mdi_pathinfo_t * pip,char ** bufp,uint_t buflen)4486 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
4487 {
4488 	int	rv;
4489 	size_t	bufsize;
4490 
4491 	if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
4492 		return (DDI_PROP_NOT_FOUND);
4493 	}
4494 
4495 	ASSERT(MDI_PI_LOCKED(pip));
4496 
4497 	bufsize = buflen;
4498 	rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
4499 	    NV_ENCODE_NATIVE, KM_SLEEP);
4500 
4501 	return (i_map_nvlist_error_to_mdi(rv));
4502 }
4503 
4504 /*
4505  * mdi_prop_update_byte():
4506  *		Create/Update a byte property
4507  */
4508 int
mdi_prop_update_byte(mdi_pathinfo_t * pip,char * name,uchar_t data)4509 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
4510 {
4511 	int rv;
4512 
4513 	if (pip == NULL) {
4514 		return (DDI_PROP_INVAL_ARG);
4515 	}
4516 	ASSERT(!MDI_PI_LOCKED(pip));
4517 	MDI_PI_LOCK(pip);
4518 	if (MDI_PI(pip)->pi_prop == NULL) {
4519 		MDI_PI_UNLOCK(pip);
4520 		return (DDI_PROP_NOT_FOUND);
4521 	}
4522 	rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
4523 	MDI_PI_UNLOCK(pip);
4524 	return (i_map_nvlist_error_to_mdi(rv));
4525 }
4526 
4527 /*
4528  * mdi_prop_update_byte_array():
4529  *		Create/Update a byte array property
4530  */
4531 int
mdi_prop_update_byte_array(mdi_pathinfo_t * pip,char * name,uchar_t * data,uint_t nelements)4532 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
4533     uint_t nelements)
4534 {
4535 	int rv;
4536 
4537 	if (pip == NULL) {
4538 		return (DDI_PROP_INVAL_ARG);
4539 	}
4540 	ASSERT(!MDI_PI_LOCKED(pip));
4541 	MDI_PI_LOCK(pip);
4542 	if (MDI_PI(pip)->pi_prop == NULL) {
4543 		MDI_PI_UNLOCK(pip);
4544 		return (DDI_PROP_NOT_FOUND);
4545 	}
4546 	rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
4547 	MDI_PI_UNLOCK(pip);
4548 	return (i_map_nvlist_error_to_mdi(rv));
4549 }
4550 
4551 /*
4552  * mdi_prop_update_int():
4553  *		Create/Update a 32 bit integer property
4554  */
4555 int
mdi_prop_update_int(mdi_pathinfo_t * pip,char * name,int data)4556 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
4557 {
4558 	int rv;
4559 
4560 	if (pip == NULL) {
4561 		return (DDI_PROP_INVAL_ARG);
4562 	}
4563 	ASSERT(!MDI_PI_LOCKED(pip));
4564 	MDI_PI_LOCK(pip);
4565 	if (MDI_PI(pip)->pi_prop == NULL) {
4566 		MDI_PI_UNLOCK(pip);
4567 		return (DDI_PROP_NOT_FOUND);
4568 	}
4569 	rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
4570 	MDI_PI_UNLOCK(pip);
4571 	return (i_map_nvlist_error_to_mdi(rv));
4572 }
4573 
4574 /*
4575  * mdi_prop_update_int64():
4576  *		Create/Update a 64 bit integer property
4577  */
4578 int
mdi_prop_update_int64(mdi_pathinfo_t * pip,char * name,int64_t data)4579 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
4580 {
4581 	int rv;
4582 
4583 	if (pip == NULL) {
4584 		return (DDI_PROP_INVAL_ARG);
4585 	}
4586 	ASSERT(!MDI_PI_LOCKED(pip));
4587 	MDI_PI_LOCK(pip);
4588 	if (MDI_PI(pip)->pi_prop == NULL) {
4589 		MDI_PI_UNLOCK(pip);
4590 		return (DDI_PROP_NOT_FOUND);
4591 	}
4592 	rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
4593 	MDI_PI_UNLOCK(pip);
4594 	return (i_map_nvlist_error_to_mdi(rv));
4595 }
4596 
4597 /*
4598  * mdi_prop_update_int_array():
4599  *		Create/Update a int array property
4600  */
4601 int
mdi_prop_update_int_array(mdi_pathinfo_t * pip,char * name,int * data,uint_t nelements)4602 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4603 	    uint_t nelements)
4604 {
4605 	int rv;
4606 
4607 	if (pip == NULL) {
4608 		return (DDI_PROP_INVAL_ARG);
4609 	}
4610 	ASSERT(!MDI_PI_LOCKED(pip));
4611 	MDI_PI_LOCK(pip);
4612 	if (MDI_PI(pip)->pi_prop == NULL) {
4613 		MDI_PI_UNLOCK(pip);
4614 		return (DDI_PROP_NOT_FOUND);
4615 	}
4616 	rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4617 	    nelements);
4618 	MDI_PI_UNLOCK(pip);
4619 	return (i_map_nvlist_error_to_mdi(rv));
4620 }
4621 
4622 /*
4623  * mdi_prop_update_string():
4624  *		Create/Update a string property
4625  */
4626 int
mdi_prop_update_string(mdi_pathinfo_t * pip,char * name,char * data)4627 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4628 {
4629 	int rv;
4630 
4631 	if (pip == NULL) {
4632 		return (DDI_PROP_INVAL_ARG);
4633 	}
4634 	ASSERT(!MDI_PI_LOCKED(pip));
4635 	MDI_PI_LOCK(pip);
4636 	if (MDI_PI(pip)->pi_prop == NULL) {
4637 		MDI_PI_UNLOCK(pip);
4638 		return (DDI_PROP_NOT_FOUND);
4639 	}
4640 	rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4641 	MDI_PI_UNLOCK(pip);
4642 	return (i_map_nvlist_error_to_mdi(rv));
4643 }
4644 
4645 /*
4646  * mdi_prop_update_string_array():
4647  *		Create/Update a string array property
4648  */
4649 int
mdi_prop_update_string_array(mdi_pathinfo_t * pip,char * name,char ** data,uint_t nelements)4650 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4651     uint_t nelements)
4652 {
4653 	int rv;
4654 
4655 	if (pip == NULL) {
4656 		return (DDI_PROP_INVAL_ARG);
4657 	}
4658 	ASSERT(!MDI_PI_LOCKED(pip));
4659 	MDI_PI_LOCK(pip);
4660 	if (MDI_PI(pip)->pi_prop == NULL) {
4661 		MDI_PI_UNLOCK(pip);
4662 		return (DDI_PROP_NOT_FOUND);
4663 	}
4664 	rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4665 	    nelements);
4666 	MDI_PI_UNLOCK(pip);
4667 	return (i_map_nvlist_error_to_mdi(rv));
4668 }
4669 
4670 /*
4671  * mdi_prop_lookup_byte():
4672  *		Look for byte property identified by name.  The data returned
4673  *		is the actual property and valid as long as mdi_pathinfo_t node
4674  *		is alive.
4675  */
4676 int
mdi_prop_lookup_byte(mdi_pathinfo_t * pip,char * name,uchar_t * data)4677 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4678 {
4679 	int rv;
4680 
4681 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4682 		return (DDI_PROP_NOT_FOUND);
4683 	}
4684 	rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4685 	return (i_map_nvlist_error_to_mdi(rv));
4686 }
4687 
4688 
4689 /*
4690  * mdi_prop_lookup_byte_array():
4691  *		Look for byte array property identified by name.  The data
4692  *		returned is the actual property and valid as long as
4693  *		mdi_pathinfo_t node is alive.
4694  */
4695 int
mdi_prop_lookup_byte_array(mdi_pathinfo_t * pip,char * name,uchar_t ** data,uint_t * nelements)4696 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4697     uint_t *nelements)
4698 {
4699 	int rv;
4700 
4701 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4702 		return (DDI_PROP_NOT_FOUND);
4703 	}
4704 	rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4705 	    nelements);
4706 	return (i_map_nvlist_error_to_mdi(rv));
4707 }
4708 
4709 /*
4710  * mdi_prop_lookup_int():
4711  *		Look for int property identified by name.  The data returned
4712  *		is the actual property and valid as long as mdi_pathinfo_t
4713  *		node is alive.
4714  */
4715 int
mdi_prop_lookup_int(mdi_pathinfo_t * pip,char * name,int * data)4716 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4717 {
4718 	int rv;
4719 
4720 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4721 		return (DDI_PROP_NOT_FOUND);
4722 	}
4723 	rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4724 	return (i_map_nvlist_error_to_mdi(rv));
4725 }
4726 
4727 /*
4728  * mdi_prop_lookup_int64():
4729  *		Look for int64 property identified by name.  The data returned
4730  *		is the actual property and valid as long as mdi_pathinfo_t node
4731  *		is alive.
4732  */
4733 int
mdi_prop_lookup_int64(mdi_pathinfo_t * pip,char * name,int64_t * data)4734 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4735 {
4736 	int rv;
4737 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4738 		return (DDI_PROP_NOT_FOUND);
4739 	}
4740 	rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4741 	return (i_map_nvlist_error_to_mdi(rv));
4742 }
4743 
4744 /*
4745  * mdi_prop_lookup_int_array():
4746  *		Look for int array property identified by name.  The data
4747  *		returned is the actual property and valid as long as
4748  *		mdi_pathinfo_t node is alive.
4749  */
4750 int
mdi_prop_lookup_int_array(mdi_pathinfo_t * pip,char * name,int ** data,uint_t * nelements)4751 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4752     uint_t *nelements)
4753 {
4754 	int rv;
4755 
4756 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4757 		return (DDI_PROP_NOT_FOUND);
4758 	}
4759 	rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4760 	    (int32_t **)data, nelements);
4761 	return (i_map_nvlist_error_to_mdi(rv));
4762 }
4763 
4764 /*
4765  * mdi_prop_lookup_string():
4766  *		Look for string property identified by name.  The data
4767  *		returned is the actual property and valid as long as
4768  *		mdi_pathinfo_t node is alive.
4769  */
4770 int
mdi_prop_lookup_string(mdi_pathinfo_t * pip,char * name,char ** data)4771 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4772 {
4773 	int rv;
4774 
4775 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4776 		return (DDI_PROP_NOT_FOUND);
4777 	}
4778 	rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4779 	return (i_map_nvlist_error_to_mdi(rv));
4780 }
4781 
4782 /*
4783  * mdi_prop_lookup_string_array():
4784  *		Look for string array property identified by name.  The data
4785  *		returned is the actual property and valid as long as
4786  *		mdi_pathinfo_t node is alive.
4787  */
4788 int
mdi_prop_lookup_string_array(mdi_pathinfo_t * pip,char * name,char *** data,uint_t * nelements)4789 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4790     uint_t *nelements)
4791 {
4792 	int rv;
4793 
4794 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4795 		return (DDI_PROP_NOT_FOUND);
4796 	}
4797 	rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4798 	    nelements);
4799 	return (i_map_nvlist_error_to_mdi(rv));
4800 }
4801 
4802 /*
4803  * mdi_prop_free():
4804  *		Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4805  *		functions return the pointer to actual property data and not a
4806  *		copy of it.  So the data returned is valid as long as
4807  *		mdi_pathinfo_t node is valid.
4808  */
4809 /*ARGSUSED*/
4810 int
mdi_prop_free(void * data)4811 mdi_prop_free(void *data)
4812 {
4813 	return (DDI_PROP_SUCCESS);
4814 }
4815 
4816 /*ARGSUSED*/
4817 static void
i_mdi_report_path_state(mdi_client_t * ct,mdi_pathinfo_t * pip)4818 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4819 {
4820 	char		*ct_path;
4821 	char		*ct_status;
4822 	char		*status;
4823 	dev_info_t	*cdip = ct->ct_dip;
4824 	char		lb_buf[64];
4825 	int		report_lb_c = 0, report_lb_p = 0;
4826 
4827 	ASSERT(MDI_CLIENT_LOCKED(ct));
4828 	if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) ||
4829 	    (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4830 		return;
4831 	}
4832 	if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4833 		ct_status = "optimal";
4834 		report_lb_c = 1;
4835 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4836 		ct_status = "degraded";
4837 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4838 		ct_status = "failed";
4839 	} else {
4840 		ct_status = "unknown";
4841 	}
4842 
4843 	lb_buf[0] = 0;		/* not interested in load balancing config */
4844 
4845 	if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) {
4846 		status = "removed";
4847 	} else if (MDI_PI_IS_OFFLINE(pip)) {
4848 		status = "offline";
4849 	} else if (MDI_PI_IS_ONLINE(pip)) {
4850 		status = "online";
4851 		report_lb_p = 1;
4852 	} else if (MDI_PI_IS_STANDBY(pip)) {
4853 		status = "standby";
4854 	} else if (MDI_PI_IS_FAULT(pip)) {
4855 		status = "faulted";
4856 	} else {
4857 		status = "unknown";
4858 	}
4859 
4860 	if (cdip) {
4861 		ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4862 
4863 		/*
4864 		 * NOTE: Keeping "multipath status: %s" and
4865 		 * "Load balancing: %s" format unchanged in case someone
4866 		 * scrubs /var/adm/messages looking for these messages.
4867 		 */
4868 		if (report_lb_c && report_lb_p) {
4869 			if (ct->ct_lb == LOAD_BALANCE_LBA) {
4870 				(void) snprintf(lb_buf, sizeof (lb_buf),
4871 				    "%s, region-size: %d", mdi_load_balance_lba,
4872 				    ct->ct_lb_args->region_size);
4873 			} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4874 				(void) snprintf(lb_buf, sizeof (lb_buf),
4875 				    "%s", mdi_load_balance_none);
4876 			} else {
4877 				(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4878 				    mdi_load_balance_rr);
4879 			}
4880 
4881 			cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4882 			    "?%s (%s%d) multipath status: %s: "
4883 			    "path %d %s is %s: Load balancing: %s\n",
4884 			    ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4885 			    ddi_get_instance(cdip), ct_status,
4886 			    mdi_pi_get_path_instance(pip),
4887 			    mdi_pi_spathname(pip), status, lb_buf);
4888 		} else {
4889 			cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4890 			    "?%s (%s%d) multipath status: %s: "
4891 			    "path %d %s is %s\n",
4892 			    ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4893 			    ddi_get_instance(cdip), ct_status,
4894 			    mdi_pi_get_path_instance(pip),
4895 			    mdi_pi_spathname(pip), status);
4896 		}
4897 
4898 		kmem_free(ct_path, MAXPATHLEN);
4899 		MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4900 	}
4901 }
4902 
4903 #ifdef	DEBUG
4904 /*
4905  * i_mdi_log():
4906  *		Utility function for error message management
4907  *
4908  *		NOTE: Implementation takes care of trailing \n for cmn_err,
4909  *		MDI_DEBUG should not terminate fmt strings with \n.
4910  *
4911  *		NOTE: If the level is >= 2, and there is no leading !?^
4912  *		then a leading ! is implied (but can be overriden via
4913  *		mdi_debug_consoleonly). If you are using kmdb on the console,
4914  *		consider setting mdi_debug_consoleonly to 1 as an aid.
4915  */
4916 /*PRINTFLIKE4*/
4917 static void
i_mdi_log(int level,const char * func,dev_info_t * dip,const char * fmt,...)4918 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...)
4919 {
4920 	char		name[MAXNAMELEN];
4921 	char		buf[512];
4922 	char		*bp;
4923 	va_list		ap;
4924 	int		log_only = 0;
4925 	int		boot_only = 0;
4926 	int		console_only = 0;
4927 
4928 	if (dip) {
4929 		(void) snprintf(name, sizeof(name), "%s%d: ",
4930 		    ddi_driver_name(dip), ddi_get_instance(dip));
4931 	} else {
4932 		name[0] = 0;
4933 	}
4934 
4935 	va_start(ap, fmt);
4936 	(void) vsnprintf(buf, sizeof(buf), fmt, ap);
4937 	va_end(ap);
4938 
4939 	switch (buf[0]) {
4940 	case '!':
4941 		bp = &buf[1];
4942 		log_only = 1;
4943 		break;
4944 	case '?':
4945 		bp = &buf[1];
4946 		boot_only = 1;
4947 		break;
4948 	case '^':
4949 		bp = &buf[1];
4950 		console_only = 1;
4951 		break;
4952 	default:
4953 		if (level >= 2)
4954 			log_only = 1;		/* ! implied */
4955 		bp = buf;
4956 		break;
4957 	}
4958 	if (mdi_debug_logonly) {
4959 		log_only = 1;
4960 		boot_only = 0;
4961 		console_only = 0;
4962 	}
4963 	if (mdi_debug_consoleonly) {
4964 		log_only = 0;
4965 		boot_only = 0;
4966 		console_only = 1;
4967 		level = CE_NOTE;
4968 		goto console;
4969 	}
4970 
4971 	switch (level) {
4972 	case CE_NOTE:
4973 		level = CE_CONT;
4974 		/* FALLTHROUGH */
4975 	case CE_CONT:
4976 		if (boot_only) {
4977 			cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp);
4978 		} else if (console_only) {
4979 			cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp);
4980 		} else if (log_only) {
4981 			cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp);
4982 		} else {
4983 			cmn_err(level, "mdi: %s%s: %s\n", name, func, bp);
4984 		}
4985 		break;
4986 
4987 	case CE_WARN:
4988 	case CE_PANIC:
4989 	console:
4990 		if (boot_only) {
4991 			cmn_err(level, "?mdi: %s%s: %s", name, func, bp);
4992 		} else if (console_only) {
4993 			cmn_err(level, "^mdi: %s%s: %s", name, func, bp);
4994 		} else if (log_only) {
4995 			cmn_err(level, "!mdi: %s%s: %s", name, func, bp);
4996 		} else {
4997 			cmn_err(level, "mdi: %s%s: %s", name, func, bp);
4998 		}
4999 		break;
5000 	default:
5001 		cmn_err(level, "mdi: %s%s", name, bp);
5002 		break;
5003 	}
5004 }
5005 #endif	/* DEBUG */
5006 
5007 void
i_mdi_client_online(dev_info_t * ct_dip)5008 i_mdi_client_online(dev_info_t *ct_dip)
5009 {
5010 	mdi_client_t	*ct;
5011 
5012 	/*
5013 	 * Client online notification. Mark client state as online
5014 	 * restore our binding with dev_info node
5015 	 */
5016 	ct = i_devi_get_client(ct_dip);
5017 	ASSERT(ct != NULL);
5018 	MDI_CLIENT_LOCK(ct);
5019 	MDI_CLIENT_SET_ONLINE(ct);
5020 	/* catch for any memory leaks */
5021 	ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
5022 	ct->ct_dip = ct_dip;
5023 
5024 	if (ct->ct_power_cnt == 0)
5025 		(void) i_mdi_power_all_phci(ct);
5026 
5027 	MDI_DEBUG(4, (MDI_NOTE, ct_dip,
5028 	    "i_mdi_pm_hold_client %p", (void *)ct));
5029 	i_mdi_pm_hold_client(ct, 1);
5030 
5031 	MDI_CLIENT_UNLOCK(ct);
5032 }
5033 
5034 void
i_mdi_phci_online(dev_info_t * ph_dip)5035 i_mdi_phci_online(dev_info_t *ph_dip)
5036 {
5037 	mdi_phci_t	*ph;
5038 
5039 	/* pHCI online notification. Mark state accordingly */
5040 	ph = i_devi_get_phci(ph_dip);
5041 	ASSERT(ph != NULL);
5042 	MDI_PHCI_LOCK(ph);
5043 	MDI_PHCI_SET_ONLINE(ph);
5044 	MDI_PHCI_UNLOCK(ph);
5045 }
5046 
5047 /*
5048  * mdi_devi_online():
5049  *		Online notification from NDI framework on pHCI/client
5050  *		device online.
5051  * Return Values:
5052  *		NDI_SUCCESS
5053  *		MDI_FAILURE
5054  */
5055 /*ARGSUSED*/
5056 int
mdi_devi_online(dev_info_t * dip,uint_t flags)5057 mdi_devi_online(dev_info_t *dip, uint_t flags)
5058 {
5059 	if (MDI_PHCI(dip)) {
5060 		i_mdi_phci_online(dip);
5061 	}
5062 
5063 	if (MDI_CLIENT(dip)) {
5064 		i_mdi_client_online(dip);
5065 	}
5066 	return (NDI_SUCCESS);
5067 }
5068 
5069 /*
5070  * mdi_devi_offline():
5071  *		Offline notification from NDI framework on pHCI/Client device
5072  *		offline.
5073  *
5074  * Return Values:
5075  *		NDI_SUCCESS
5076  *		NDI_FAILURE
5077  */
5078 /*ARGSUSED*/
5079 int
mdi_devi_offline(dev_info_t * dip,uint_t flags)5080 mdi_devi_offline(dev_info_t *dip, uint_t flags)
5081 {
5082 	int		rv = NDI_SUCCESS;
5083 
5084 	if (MDI_CLIENT(dip)) {
5085 		rv = i_mdi_client_offline(dip, flags);
5086 		if (rv != NDI_SUCCESS)
5087 			return (rv);
5088 	}
5089 
5090 	if (MDI_PHCI(dip)) {
5091 		rv = i_mdi_phci_offline(dip, flags);
5092 
5093 		if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
5094 			/* set client back online */
5095 			i_mdi_client_online(dip);
5096 		}
5097 	}
5098 
5099 	return (rv);
5100 }
5101 
5102 /*ARGSUSED*/
5103 static int
i_mdi_phci_offline(dev_info_t * dip,uint_t flags)5104 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
5105 {
5106 	int		rv = NDI_SUCCESS;
5107 	mdi_phci_t	*ph;
5108 	mdi_client_t	*ct;
5109 	mdi_pathinfo_t	*pip;
5110 	mdi_pathinfo_t	*next;
5111 	mdi_pathinfo_t	*failed_pip = NULL;
5112 	dev_info_t	*cdip;
5113 
5114 	/*
5115 	 * pHCI component offline notification
5116 	 * Make sure that this pHCI instance is free to be offlined.
5117 	 * If it is OK to proceed, Offline and remove all the child
5118 	 * mdi_pathinfo nodes.  This process automatically offlines
5119 	 * corresponding client devices, for which this pHCI provides
5120 	 * critical services.
5121 	 */
5122 	ph = i_devi_get_phci(dip);
5123 	MDI_DEBUG(2, (MDI_NOTE, dip,
5124 	    "called %p %p", (void *)dip, (void *)ph));
5125 	if (ph == NULL) {
5126 		return (rv);
5127 	}
5128 
5129 	MDI_PHCI_LOCK(ph);
5130 
5131 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5132 		MDI_DEBUG(1, (MDI_WARN, dip,
5133 		    "!pHCI already offlined: %p", (void *)dip));
5134 		MDI_PHCI_UNLOCK(ph);
5135 		return (NDI_SUCCESS);
5136 	}
5137 
5138 	/*
5139 	 * Check to see if the pHCI can be offlined
5140 	 */
5141 	if (ph->ph_unstable) {
5142 		MDI_DEBUG(1, (MDI_WARN, dip,
5143 		    "!One or more target devices are in transient state. "
5144 		    "This device can not be removed at this moment. "
5145 		    "Please try again later."));
5146 		MDI_PHCI_UNLOCK(ph);
5147 		return (NDI_BUSY);
5148 	}
5149 
5150 	pip = ph->ph_path_head;
5151 	while (pip != NULL) {
5152 		MDI_PI_LOCK(pip);
5153 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5154 
5155 		/*
5156 		 * The mdi_pathinfo state is OK. Check the client state.
5157 		 * If failover in progress fail the pHCI from offlining
5158 		 */
5159 		ct = MDI_PI(pip)->pi_client;
5160 		i_mdi_client_lock(ct, pip);
5161 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5162 		    (ct->ct_unstable)) {
5163 			/*
5164 			 * Failover is in progress, Fail the DR
5165 			 */
5166 			MDI_DEBUG(1, (MDI_WARN, dip,
5167 			    "!pHCI device is busy. "
5168 			    "This device can not be removed at this moment. "
5169 			    "Please try again later."));
5170 			MDI_PI_UNLOCK(pip);
5171 			i_mdi_client_unlock(ct);
5172 			MDI_PHCI_UNLOCK(ph);
5173 			return (NDI_BUSY);
5174 		}
5175 		MDI_PI_UNLOCK(pip);
5176 
5177 		/*
5178 		 * Check to see of we are removing the last path of this
5179 		 * client device...
5180 		 */
5181 		cdip = ct->ct_dip;
5182 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5183 		    (i_mdi_client_compute_state(ct, ph) ==
5184 		    MDI_CLIENT_STATE_FAILED)) {
5185 			i_mdi_client_unlock(ct);
5186 			MDI_PHCI_UNLOCK(ph);
5187 			if (ndi_devi_offline(cdip,
5188 			    NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
5189 				/*
5190 				 * ndi_devi_offline() failed.
5191 				 * This pHCI provides the critical path
5192 				 * to one or more client devices.
5193 				 * Return busy.
5194 				 */
5195 				MDI_PHCI_LOCK(ph);
5196 				MDI_DEBUG(1, (MDI_WARN, dip,
5197 				    "!pHCI device is busy. "
5198 				    "This device can not be removed at this "
5199 				    "moment. Please try again later."));
5200 				failed_pip = pip;
5201 				break;
5202 			} else {
5203 				MDI_PHCI_LOCK(ph);
5204 				pip = next;
5205 			}
5206 		} else {
5207 			i_mdi_client_unlock(ct);
5208 			pip = next;
5209 		}
5210 	}
5211 
5212 	if (failed_pip) {
5213 		pip = ph->ph_path_head;
5214 		while (pip != failed_pip) {
5215 			MDI_PI_LOCK(pip);
5216 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5217 			ct = MDI_PI(pip)->pi_client;
5218 			i_mdi_client_lock(ct, pip);
5219 			cdip = ct->ct_dip;
5220 			switch (MDI_CLIENT_STATE(ct)) {
5221 			case MDI_CLIENT_STATE_OPTIMAL:
5222 			case MDI_CLIENT_STATE_DEGRADED:
5223 				if (cdip) {
5224 					MDI_PI_UNLOCK(pip);
5225 					i_mdi_client_unlock(ct);
5226 					MDI_PHCI_UNLOCK(ph);
5227 					(void) ndi_devi_online(cdip, 0);
5228 					MDI_PHCI_LOCK(ph);
5229 					pip = next;
5230 					continue;
5231 				}
5232 				break;
5233 
5234 			case MDI_CLIENT_STATE_FAILED:
5235 				if (cdip) {
5236 					MDI_PI_UNLOCK(pip);
5237 					i_mdi_client_unlock(ct);
5238 					MDI_PHCI_UNLOCK(ph);
5239 					(void) ndi_devi_offline(cdip,
5240 						NDI_DEVFS_CLEAN);
5241 					MDI_PHCI_LOCK(ph);
5242 					pip = next;
5243 					continue;
5244 				}
5245 				break;
5246 			}
5247 			MDI_PI_UNLOCK(pip);
5248 			i_mdi_client_unlock(ct);
5249 			pip = next;
5250 		}
5251 		MDI_PHCI_UNLOCK(ph);
5252 		return (NDI_BUSY);
5253 	}
5254 
5255 	/*
5256 	 * Mark the pHCI as offline
5257 	 */
5258 	MDI_PHCI_SET_OFFLINE(ph);
5259 
5260 	/*
5261 	 * Mark the child mdi_pathinfo nodes as transient
5262 	 */
5263 	pip = ph->ph_path_head;
5264 	while (pip != NULL) {
5265 		MDI_PI_LOCK(pip);
5266 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5267 		MDI_PI_SET_OFFLINING(pip);
5268 		MDI_PI_UNLOCK(pip);
5269 		pip = next;
5270 	}
5271 	MDI_PHCI_UNLOCK(ph);
5272 	/*
5273 	 * Give a chance for any pending commands to execute
5274 	 */
5275 	delay_random(mdi_delay);
5276 	MDI_PHCI_LOCK(ph);
5277 	pip = ph->ph_path_head;
5278 	while (pip != NULL) {
5279 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5280 		(void) i_mdi_pi_offline(pip, flags);
5281 		MDI_PI_LOCK(pip);
5282 		ct = MDI_PI(pip)->pi_client;
5283 		if (!MDI_PI_IS_OFFLINE(pip)) {
5284 			MDI_DEBUG(1, (MDI_WARN, dip,
5285 			    "!pHCI device is busy. "
5286 			    "This device can not be removed at this moment. "
5287 			    "Please try again later."));
5288 			MDI_PI_UNLOCK(pip);
5289 			MDI_PHCI_SET_ONLINE(ph);
5290 			MDI_PHCI_UNLOCK(ph);
5291 			return (NDI_BUSY);
5292 		}
5293 		MDI_PI_UNLOCK(pip);
5294 		pip = next;
5295 	}
5296 	MDI_PHCI_UNLOCK(ph);
5297 
5298 	return (rv);
5299 }
5300 
5301 void
mdi_phci_mark_retiring(dev_info_t * dip,char ** cons_array)5302 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
5303 {
5304 	mdi_phci_t	*ph;
5305 	mdi_client_t	*ct;
5306 	mdi_pathinfo_t	*pip;
5307 	mdi_pathinfo_t	*next;
5308 	dev_info_t	*cdip;
5309 
5310 	if (!MDI_PHCI(dip))
5311 		return;
5312 
5313 	ph = i_devi_get_phci(dip);
5314 	if (ph == NULL) {
5315 		return;
5316 	}
5317 
5318 	MDI_PHCI_LOCK(ph);
5319 
5320 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5321 		/* has no last path */
5322 		MDI_PHCI_UNLOCK(ph);
5323 		return;
5324 	}
5325 
5326 	pip = ph->ph_path_head;
5327 	while (pip != NULL) {
5328 		MDI_PI_LOCK(pip);
5329 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5330 
5331 		ct = MDI_PI(pip)->pi_client;
5332 		i_mdi_client_lock(ct, pip);
5333 		MDI_PI_UNLOCK(pip);
5334 
5335 		cdip = ct->ct_dip;
5336 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5337 		    (i_mdi_client_compute_state(ct, ph) ==
5338 		    MDI_CLIENT_STATE_FAILED)) {
5339 			/* Last path. Mark client dip as retiring */
5340 			i_mdi_client_unlock(ct);
5341 			MDI_PHCI_UNLOCK(ph);
5342 			(void) e_ddi_mark_retiring(cdip, cons_array);
5343 			MDI_PHCI_LOCK(ph);
5344 			pip = next;
5345 		} else {
5346 			i_mdi_client_unlock(ct);
5347 			pip = next;
5348 		}
5349 	}
5350 
5351 	MDI_PHCI_UNLOCK(ph);
5352 
5353 	return;
5354 }
5355 
5356 void
mdi_phci_retire_notify(dev_info_t * dip,int * constraint)5357 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
5358 {
5359 	mdi_phci_t	*ph;
5360 	mdi_client_t	*ct;
5361 	mdi_pathinfo_t	*pip;
5362 	mdi_pathinfo_t	*next;
5363 	dev_info_t	*cdip;
5364 
5365 	if (!MDI_PHCI(dip))
5366 		return;
5367 
5368 	ph = i_devi_get_phci(dip);
5369 	if (ph == NULL)
5370 		return;
5371 
5372 	MDI_PHCI_LOCK(ph);
5373 
5374 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5375 		MDI_PHCI_UNLOCK(ph);
5376 		/* not last path */
5377 		return;
5378 	}
5379 
5380 	if (ph->ph_unstable) {
5381 		MDI_PHCI_UNLOCK(ph);
5382 		/* can't check for constraints */
5383 		*constraint = 0;
5384 		return;
5385 	}
5386 
5387 	pip = ph->ph_path_head;
5388 	while (pip != NULL) {
5389 		MDI_PI_LOCK(pip);
5390 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5391 
5392 		/*
5393 		 * The mdi_pathinfo state is OK. Check the client state.
5394 		 * If failover in progress fail the pHCI from offlining
5395 		 */
5396 		ct = MDI_PI(pip)->pi_client;
5397 		i_mdi_client_lock(ct, pip);
5398 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5399 		    (ct->ct_unstable)) {
5400 			/*
5401 			 * Failover is in progress, can't check for constraints
5402 			 */
5403 			MDI_PI_UNLOCK(pip);
5404 			i_mdi_client_unlock(ct);
5405 			MDI_PHCI_UNLOCK(ph);
5406 			*constraint = 0;
5407 			return;
5408 		}
5409 		MDI_PI_UNLOCK(pip);
5410 
5411 		/*
5412 		 * Check to see of we are retiring the last path of this
5413 		 * client device...
5414 		 */
5415 		cdip = ct->ct_dip;
5416 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5417 		    (i_mdi_client_compute_state(ct, ph) ==
5418 		    MDI_CLIENT_STATE_FAILED)) {
5419 			i_mdi_client_unlock(ct);
5420 			MDI_PHCI_UNLOCK(ph);
5421 			(void) e_ddi_retire_notify(cdip, constraint);
5422 			MDI_PHCI_LOCK(ph);
5423 			pip = next;
5424 		} else {
5425 			i_mdi_client_unlock(ct);
5426 			pip = next;
5427 		}
5428 	}
5429 
5430 	MDI_PHCI_UNLOCK(ph);
5431 
5432 	return;
5433 }
5434 
5435 /*
5436  * offline the path(s) hanging off the pHCI. If the
5437  * last path to any client, check that constraints
5438  * have been applied.
5439  *
5440  * If constraint is 0, we aren't going to retire the
5441  * pHCI. However we still need to go through the paths
5442  * calling e_ddi_retire_finalize() to clear their
5443  * contract barriers.
5444  */
5445 void
mdi_phci_retire_finalize(dev_info_t * dip,int phci_only,void * constraint)5446 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint)
5447 {
5448 	mdi_phci_t	*ph;
5449 	mdi_client_t	*ct;
5450 	mdi_pathinfo_t	*pip;
5451 	mdi_pathinfo_t	*next;
5452 	dev_info_t	*cdip;
5453 	int		unstable = 0;
5454 	int		tmp_constraint;
5455 
5456 	if (!MDI_PHCI(dip))
5457 		return;
5458 
5459 	ph = i_devi_get_phci(dip);
5460 	if (ph == NULL) {
5461 		/* no last path and no pips */
5462 		return;
5463 	}
5464 
5465 	MDI_PHCI_LOCK(ph);
5466 
5467 	if (MDI_PHCI_IS_OFFLINE(ph)) {
5468 		MDI_PHCI_UNLOCK(ph);
5469 		/* no last path and no pips */
5470 		return;
5471 	}
5472 
5473 	/*
5474 	 * Check to see if the pHCI can be offlined
5475 	 */
5476 	if (ph->ph_unstable) {
5477 		unstable = 1;
5478 	}
5479 
5480 	pip = ph->ph_path_head;
5481 	while (pip != NULL) {
5482 		MDI_PI_LOCK(pip);
5483 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5484 
5485 		/*
5486 		 * if failover in progress fail the pHCI from offlining
5487 		 */
5488 		ct = MDI_PI(pip)->pi_client;
5489 		i_mdi_client_lock(ct, pip);
5490 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5491 		    (ct->ct_unstable)) {
5492 			unstable = 1;
5493 		}
5494 		MDI_PI_UNLOCK(pip);
5495 
5496 		/*
5497 		 * Check to see of we are removing the last path of this
5498 		 * client device...
5499 		 */
5500 		cdip = ct->ct_dip;
5501 		if (!phci_only && cdip &&
5502 		    (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5503 		    (i_mdi_client_compute_state(ct, ph) ==
5504 		    MDI_CLIENT_STATE_FAILED)) {
5505 			i_mdi_client_unlock(ct);
5506 			MDI_PHCI_UNLOCK(ph);
5507 			/*
5508 			 * This is the last path to this client.
5509 			 *
5510 			 * Constraint will only be set to 1 if this client can
5511 			 * be retired (as already determined by
5512 			 * mdi_phci_retire_notify). However we don't actually
5513 			 * need to retire the client (we just retire the last
5514 			 * path - MPXIO will then fail all I/Os to the client).
5515 			 * But we still need to call e_ddi_retire_finalize so
5516 			 * the contract barriers can be cleared. Therefore we
5517 			 * temporarily set constraint = 0 so that the client
5518 			 * dip is not retired.
5519 			 */
5520 			tmp_constraint = 0;
5521 			(void) e_ddi_retire_finalize(cdip, &tmp_constraint);
5522 			MDI_PHCI_LOCK(ph);
5523 			pip = next;
5524 		} else {
5525 			i_mdi_client_unlock(ct);
5526 			pip = next;
5527 		}
5528 	}
5529 
5530 	if (!phci_only && *((int *)constraint) == 0) {
5531 		MDI_PHCI_UNLOCK(ph);
5532 		return;
5533 	}
5534 
5535 	/*
5536 	 * Cannot offline pip(s)
5537 	 */
5538 	if (unstable) {
5539 		cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: "
5540 		    "pHCI in transient state, cannot retire",
5541 		    ddi_driver_name(dip), ddi_get_instance(dip));
5542 		MDI_PHCI_UNLOCK(ph);
5543 		return;
5544 	}
5545 
5546 	/*
5547 	 * Mark the pHCI as offline
5548 	 */
5549 	MDI_PHCI_SET_OFFLINE(ph);
5550 
5551 	/*
5552 	 * Mark the child mdi_pathinfo nodes as transient
5553 	 */
5554 	pip = ph->ph_path_head;
5555 	while (pip != NULL) {
5556 		MDI_PI_LOCK(pip);
5557 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5558 		MDI_PI_SET_OFFLINING(pip);
5559 		MDI_PI_UNLOCK(pip);
5560 		pip = next;
5561 	}
5562 	MDI_PHCI_UNLOCK(ph);
5563 	/*
5564 	 * Give a chance for any pending commands to execute
5565 	 */
5566 	delay_random(mdi_delay);
5567 	MDI_PHCI_LOCK(ph);
5568 	pip = ph->ph_path_head;
5569 	while (pip != NULL) {
5570 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5571 		(void) i_mdi_pi_offline(pip, 0);
5572 		MDI_PI_LOCK(pip);
5573 		ct = MDI_PI(pip)->pi_client;
5574 		if (!MDI_PI_IS_OFFLINE(pip)) {
5575 			cmn_err(CE_WARN, "mdi_phci_retire_finalize: "
5576 			    "path %d %s busy, cannot offline",
5577 			    mdi_pi_get_path_instance(pip),
5578 			    mdi_pi_spathname(pip));
5579 			MDI_PI_UNLOCK(pip);
5580 			MDI_PHCI_SET_ONLINE(ph);
5581 			MDI_PHCI_UNLOCK(ph);
5582 			return;
5583 		}
5584 		MDI_PI_UNLOCK(pip);
5585 		pip = next;
5586 	}
5587 	MDI_PHCI_UNLOCK(ph);
5588 
5589 	return;
5590 }
5591 
5592 void
mdi_phci_unretire(dev_info_t * dip)5593 mdi_phci_unretire(dev_info_t *dip)
5594 {
5595 	mdi_phci_t	*ph;
5596 	mdi_pathinfo_t	*pip;
5597 	mdi_pathinfo_t	*next;
5598 
5599 	ASSERT(MDI_PHCI(dip));
5600 
5601 	/*
5602 	 * Online the phci
5603 	 */
5604 	i_mdi_phci_online(dip);
5605 
5606 	ph = i_devi_get_phci(dip);
5607 	MDI_PHCI_LOCK(ph);
5608 	pip = ph->ph_path_head;
5609 	while (pip != NULL) {
5610 		MDI_PI_LOCK(pip);
5611 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5612 		MDI_PI_UNLOCK(pip);
5613 		(void) i_mdi_pi_online(pip, 0);
5614 		pip = next;
5615 	}
5616 	MDI_PHCI_UNLOCK(ph);
5617 }
5618 
5619 /*ARGSUSED*/
5620 static int
i_mdi_client_offline(dev_info_t * dip,uint_t flags)5621 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
5622 {
5623 	int		rv = NDI_SUCCESS;
5624 	mdi_client_t	*ct;
5625 
5626 	/*
5627 	 * Client component to go offline.  Make sure that we are
5628 	 * not in failing over state and update client state
5629 	 * accordingly
5630 	 */
5631 	ct = i_devi_get_client(dip);
5632 	MDI_DEBUG(2, (MDI_NOTE, dip,
5633 	    "called %p %p", (void *)dip, (void *)ct));
5634 	if (ct != NULL) {
5635 		MDI_CLIENT_LOCK(ct);
5636 		if (ct->ct_unstable) {
5637 			/*
5638 			 * One or more paths are in transient state,
5639 			 * Dont allow offline of a client device
5640 			 */
5641 			MDI_DEBUG(1, (MDI_WARN, dip,
5642 			    "!One or more paths to "
5643 			    "this device are in transient state. "
5644 			    "This device can not be removed at this moment. "
5645 			    "Please try again later."));
5646 			MDI_CLIENT_UNLOCK(ct);
5647 			return (NDI_BUSY);
5648 		}
5649 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
5650 			/*
5651 			 * Failover is in progress, Dont allow DR of
5652 			 * a client device
5653 			 */
5654 			MDI_DEBUG(1, (MDI_WARN, dip,
5655 			    "!Client device is Busy. "
5656 			    "This device can not be removed at this moment. "
5657 			    "Please try again later."));
5658 			MDI_CLIENT_UNLOCK(ct);
5659 			return (NDI_BUSY);
5660 		}
5661 		MDI_CLIENT_SET_OFFLINE(ct);
5662 
5663 		/*
5664 		 * Unbind our relationship with the dev_info node
5665 		 */
5666 		if (flags & NDI_DEVI_REMOVE) {
5667 			ct->ct_dip = NULL;
5668 		}
5669 		MDI_CLIENT_UNLOCK(ct);
5670 	}
5671 	return (rv);
5672 }
5673 
5674 /*
5675  * mdi_pre_attach():
5676  *		Pre attach() notification handler
5677  */
5678 /*ARGSUSED*/
5679 int
mdi_pre_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)5680 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5681 {
5682 	/* don't support old DDI_PM_RESUME */
5683 	if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
5684 	    (cmd == DDI_PM_RESUME))
5685 		return (DDI_FAILURE);
5686 
5687 	return (DDI_SUCCESS);
5688 }
5689 
5690 /*
5691  * mdi_post_attach():
5692  *		Post attach() notification handler
5693  */
5694 /*ARGSUSED*/
5695 void
mdi_post_attach(dev_info_t * dip,ddi_attach_cmd_t cmd,int error)5696 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
5697 {
5698 	mdi_phci_t	*ph;
5699 	mdi_client_t	*ct;
5700 	mdi_vhci_t	*vh;
5701 
5702 	if (MDI_PHCI(dip)) {
5703 		ph = i_devi_get_phci(dip);
5704 		ASSERT(ph != NULL);
5705 
5706 		MDI_PHCI_LOCK(ph);
5707 		switch (cmd) {
5708 		case DDI_ATTACH:
5709 			MDI_DEBUG(2, (MDI_NOTE, dip,
5710 			    "phci post_attach called %p", (void *)ph));
5711 			if (error == DDI_SUCCESS) {
5712 				MDI_PHCI_SET_ATTACH(ph);
5713 			} else {
5714 				MDI_DEBUG(1, (MDI_NOTE, dip,
5715 				    "!pHCI post_attach failed: error %d",
5716 				    error));
5717 				MDI_PHCI_SET_DETACH(ph);
5718 			}
5719 			break;
5720 
5721 		case DDI_RESUME:
5722 		case DDI_PM_RESUME:
5723 			MDI_DEBUG(2, (MDI_NOTE, dip,
5724 			    "pHCI post_resume: called %p", (void *)ph));
5725 			if (error == DDI_SUCCESS) {
5726 				MDI_PHCI_SET_RESUME(ph);
5727 			} else {
5728 				MDI_DEBUG(1, (MDI_NOTE, dip,
5729 				    "!pHCI post_resume failed: error %d",
5730 				    error));
5731 				MDI_PHCI_SET_SUSPEND(ph);
5732 			}
5733 			break;
5734 		}
5735 		MDI_PHCI_UNLOCK(ph);
5736 	}
5737 
5738 	if (MDI_CLIENT(dip)) {
5739 		ct = i_devi_get_client(dip);
5740 		ASSERT(ct != NULL);
5741 
5742 		MDI_CLIENT_LOCK(ct);
5743 		switch (cmd) {
5744 		case DDI_ATTACH:
5745 			MDI_DEBUG(2, (MDI_NOTE, dip,
5746 			    "client post_attach called %p", (void *)ct));
5747 			if (error != DDI_SUCCESS) {
5748 				MDI_DEBUG(1, (MDI_NOTE, dip,
5749 				    "!client post_attach failed: error %d",
5750 				    error));
5751 				MDI_CLIENT_SET_DETACH(ct);
5752 				MDI_DEBUG(4, (MDI_WARN, dip,
5753 				    "i_mdi_pm_reset_client"));
5754 				i_mdi_pm_reset_client(ct);
5755 				break;
5756 			}
5757 
5758 			/*
5759 			 * Client device has successfully attached, inform
5760 			 * the vhci.
5761 			 */
5762 			vh = ct->ct_vhci;
5763 			if (vh->vh_ops->vo_client_attached)
5764 				(*vh->vh_ops->vo_client_attached)(dip);
5765 
5766 			MDI_CLIENT_SET_ATTACH(ct);
5767 			break;
5768 
5769 		case DDI_RESUME:
5770 		case DDI_PM_RESUME:
5771 			MDI_DEBUG(2, (MDI_NOTE, dip,
5772 			    "client post_attach: called %p", (void *)ct));
5773 			if (error == DDI_SUCCESS) {
5774 				MDI_CLIENT_SET_RESUME(ct);
5775 			} else {
5776 				MDI_DEBUG(1, (MDI_NOTE, dip,
5777 				    "!client post_resume failed: error %d",
5778 				    error));
5779 				MDI_CLIENT_SET_SUSPEND(ct);
5780 			}
5781 			break;
5782 		}
5783 		MDI_CLIENT_UNLOCK(ct);
5784 	}
5785 }
5786 
5787 /*
5788  * mdi_pre_detach():
5789  *		Pre detach notification handler
5790  */
5791 /*ARGSUSED*/
5792 int
mdi_pre_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)5793 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5794 {
5795 	int rv = DDI_SUCCESS;
5796 
5797 	if (MDI_CLIENT(dip)) {
5798 		(void) i_mdi_client_pre_detach(dip, cmd);
5799 	}
5800 
5801 	if (MDI_PHCI(dip)) {
5802 		rv = i_mdi_phci_pre_detach(dip, cmd);
5803 	}
5804 
5805 	return (rv);
5806 }
5807 
5808 /*ARGSUSED*/
5809 static int
i_mdi_phci_pre_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)5810 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5811 {
5812 	int		rv = DDI_SUCCESS;
5813 	mdi_phci_t	*ph;
5814 	mdi_client_t	*ct;
5815 	mdi_pathinfo_t	*pip;
5816 	mdi_pathinfo_t	*failed_pip = NULL;
5817 	mdi_pathinfo_t	*next;
5818 
5819 	ph = i_devi_get_phci(dip);
5820 	if (ph == NULL) {
5821 		return (rv);
5822 	}
5823 
5824 	MDI_PHCI_LOCK(ph);
5825 	switch (cmd) {
5826 	case DDI_DETACH:
5827 		MDI_DEBUG(2, (MDI_NOTE, dip,
5828 		    "pHCI pre_detach: called %p", (void *)ph));
5829 		if (!MDI_PHCI_IS_OFFLINE(ph)) {
5830 			/*
5831 			 * mdi_pathinfo nodes are still attached to
5832 			 * this pHCI. Fail the detach for this pHCI.
5833 			 */
5834 			MDI_DEBUG(2, (MDI_WARN, dip,
5835 			    "pHCI pre_detach: paths are still attached %p",
5836 			    (void *)ph));
5837 			rv = DDI_FAILURE;
5838 			break;
5839 		}
5840 		MDI_PHCI_SET_DETACH(ph);
5841 		break;
5842 
5843 	case DDI_SUSPEND:
5844 		/*
5845 		 * pHCI is getting suspended.  Since mpxio client
5846 		 * devices may not be suspended at this point, to avoid
5847 		 * a potential stack overflow, it is important to suspend
5848 		 * client devices before pHCI can be suspended.
5849 		 */
5850 
5851 		MDI_DEBUG(2, (MDI_NOTE, dip,
5852 		    "pHCI pre_suspend: called %p", (void *)ph));
5853 		/*
5854 		 * Suspend all the client devices accessible through this pHCI
5855 		 */
5856 		pip = ph->ph_path_head;
5857 		while (pip != NULL && rv == DDI_SUCCESS) {
5858 			dev_info_t *cdip;
5859 			MDI_PI_LOCK(pip);
5860 			next =
5861 			    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5862 			ct = MDI_PI(pip)->pi_client;
5863 			i_mdi_client_lock(ct, pip);
5864 			cdip = ct->ct_dip;
5865 			MDI_PI_UNLOCK(pip);
5866 			if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
5867 			    MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
5868 				i_mdi_client_unlock(ct);
5869 				if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
5870 				    DDI_SUCCESS) {
5871 					/*
5872 					 * Suspend of one of the client
5873 					 * device has failed.
5874 					 */
5875 					MDI_DEBUG(1, (MDI_WARN, dip,
5876 					    "!suspend of device (%s%d) failed.",
5877 					    ddi_driver_name(cdip),
5878 					    ddi_get_instance(cdip)));
5879 					failed_pip = pip;
5880 					break;
5881 				}
5882 			} else {
5883 				i_mdi_client_unlock(ct);
5884 			}
5885 			pip = next;
5886 		}
5887 
5888 		if (rv == DDI_SUCCESS) {
5889 			/*
5890 			 * Suspend of client devices is complete. Proceed
5891 			 * with pHCI suspend.
5892 			 */
5893 			MDI_PHCI_SET_SUSPEND(ph);
5894 		} else {
5895 			/*
5896 			 * Revert back all the suspended client device states
5897 			 * to converse.
5898 			 */
5899 			pip = ph->ph_path_head;
5900 			while (pip != failed_pip) {
5901 				dev_info_t *cdip;
5902 				MDI_PI_LOCK(pip);
5903 				next =
5904 				    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5905 				ct = MDI_PI(pip)->pi_client;
5906 				i_mdi_client_lock(ct, pip);
5907 				cdip = ct->ct_dip;
5908 				MDI_PI_UNLOCK(pip);
5909 				if (MDI_CLIENT_IS_SUSPENDED(ct)) {
5910 					i_mdi_client_unlock(ct);
5911 					(void) devi_attach(cdip, DDI_RESUME);
5912 				} else {
5913 					i_mdi_client_unlock(ct);
5914 				}
5915 				pip = next;
5916 			}
5917 		}
5918 		break;
5919 
5920 	default:
5921 		rv = DDI_FAILURE;
5922 		break;
5923 	}
5924 	MDI_PHCI_UNLOCK(ph);
5925 	return (rv);
5926 }
5927 
5928 /*ARGSUSED*/
5929 static int
i_mdi_client_pre_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)5930 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5931 {
5932 	int		rv = DDI_SUCCESS;
5933 	mdi_client_t	*ct;
5934 
5935 	ct = i_devi_get_client(dip);
5936 	if (ct == NULL) {
5937 		return (rv);
5938 	}
5939 
5940 	MDI_CLIENT_LOCK(ct);
5941 	switch (cmd) {
5942 	case DDI_DETACH:
5943 		MDI_DEBUG(2, (MDI_NOTE, dip,
5944 		    "client pre_detach: called %p",
5945 		     (void *)ct));
5946 		MDI_CLIENT_SET_DETACH(ct);
5947 		break;
5948 
5949 	case DDI_SUSPEND:
5950 		MDI_DEBUG(2, (MDI_NOTE, dip,
5951 		    "client pre_suspend: called %p",
5952 		    (void *)ct));
5953 		MDI_CLIENT_SET_SUSPEND(ct);
5954 		break;
5955 
5956 	default:
5957 		rv = DDI_FAILURE;
5958 		break;
5959 	}
5960 	MDI_CLIENT_UNLOCK(ct);
5961 	return (rv);
5962 }
5963 
5964 /*
5965  * mdi_post_detach():
5966  *		Post detach notification handler
5967  */
5968 /*ARGSUSED*/
5969 void
mdi_post_detach(dev_info_t * dip,ddi_detach_cmd_t cmd,int error)5970 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5971 {
5972 	/*
5973 	 * Detach/Suspend of mpxio component failed. Update our state
5974 	 * too
5975 	 */
5976 	if (MDI_PHCI(dip))
5977 		i_mdi_phci_post_detach(dip, cmd, error);
5978 
5979 	if (MDI_CLIENT(dip))
5980 		i_mdi_client_post_detach(dip, cmd, error);
5981 }
5982 
5983 /*ARGSUSED*/
5984 static void
i_mdi_phci_post_detach(dev_info_t * dip,ddi_detach_cmd_t cmd,int error)5985 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5986 {
5987 	mdi_phci_t	*ph;
5988 
5989 	/*
5990 	 * Detach/Suspend of phci component failed. Update our state
5991 	 * too
5992 	 */
5993 	ph = i_devi_get_phci(dip);
5994 	if (ph == NULL) {
5995 		return;
5996 	}
5997 
5998 	MDI_PHCI_LOCK(ph);
5999 	/*
6000 	 * Detach of pHCI failed. Restore back converse
6001 	 * state
6002 	 */
6003 	switch (cmd) {
6004 	case DDI_DETACH:
6005 		MDI_DEBUG(2, (MDI_NOTE, dip,
6006 		    "pHCI post_detach: called %p",
6007 		    (void *)ph));
6008 		if (error != DDI_SUCCESS)
6009 			MDI_PHCI_SET_ATTACH(ph);
6010 		break;
6011 
6012 	case DDI_SUSPEND:
6013 	case DDI_PM_SUSPEND:
6014 		MDI_DEBUG(2, (MDI_NOTE, dip,
6015 		    "pHCI post_suspend: called %p",
6016 		    (void *)ph));
6017 		if (error != DDI_SUCCESS)
6018 			MDI_PHCI_SET_RESUME(ph);
6019 		break;
6020 	case DDI_HOTPLUG_DETACH:
6021 		break;
6022 	}
6023 	MDI_PHCI_UNLOCK(ph);
6024 }
6025 
6026 /*ARGSUSED*/
6027 static void
i_mdi_client_post_detach(dev_info_t * dip,ddi_detach_cmd_t cmd,int error)6028 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
6029 {
6030 	mdi_client_t	*ct;
6031 
6032 	ct = i_devi_get_client(dip);
6033 	if (ct == NULL) {
6034 		return;
6035 	}
6036 	MDI_CLIENT_LOCK(ct);
6037 	/*
6038 	 * Detach of Client failed. Restore back converse
6039 	 * state
6040 	 */
6041 	switch (cmd) {
6042 	case DDI_DETACH:
6043 		MDI_DEBUG(2, (MDI_NOTE, dip,
6044 		    "client post_detach: called %p", (void *)ct));
6045 		if (DEVI_IS_ATTACHING(dip)) {
6046 			MDI_DEBUG(4, (MDI_NOTE, dip,
6047 			    "i_mdi_pm_rele_client\n"));
6048 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6049 		} else {
6050 			MDI_DEBUG(4, (MDI_NOTE, dip,
6051 			    "i_mdi_pm_reset_client\n"));
6052 			i_mdi_pm_reset_client(ct);
6053 		}
6054 		if (error != DDI_SUCCESS)
6055 			MDI_CLIENT_SET_ATTACH(ct);
6056 		break;
6057 
6058 	case DDI_SUSPEND:
6059 	case DDI_PM_SUSPEND:
6060 		MDI_DEBUG(2, (MDI_NOTE, dip,
6061 		    "called %p", (void *)ct));
6062 		if (error != DDI_SUCCESS)
6063 			MDI_CLIENT_SET_RESUME(ct);
6064 		break;
6065 	case DDI_HOTPLUG_DETACH:
6066 		break;
6067 	}
6068 	MDI_CLIENT_UNLOCK(ct);
6069 }
6070 
6071 int
mdi_pi_kstat_exists(mdi_pathinfo_t * pip)6072 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
6073 {
6074 	return (MDI_PI(pip)->pi_kstats ? 1 : 0);
6075 }
6076 
6077 /*
6078  * create and install per-path (client - pHCI) statistics
6079  * I/O stats supported: nread, nwritten, reads, and writes
6080  * Error stats - hard errors, soft errors, & transport errors
6081  */
6082 int
mdi_pi_kstat_create(mdi_pathinfo_t * pip,char * ksname)6083 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
6084 {
6085 	kstat_t			*kiosp, *kerrsp;
6086 	struct pi_errs		*nsp;
6087 	struct mdi_pi_kstats	*mdi_statp;
6088 
6089 	if (MDI_PI(pip)->pi_kstats != NULL)
6090 		return (MDI_SUCCESS);
6091 
6092 	if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
6093 	    KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
6094 		return (MDI_FAILURE);
6095 	}
6096 
6097 	(void) strcat(ksname, ",err");
6098 	kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
6099 	    KSTAT_TYPE_NAMED,
6100 	    sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
6101 	if (kerrsp == NULL) {
6102 		kstat_delete(kiosp);
6103 		return (MDI_FAILURE);
6104 	}
6105 
6106 	nsp = (struct pi_errs *)kerrsp->ks_data;
6107 	kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
6108 	kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
6109 	kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
6110 	    KSTAT_DATA_UINT32);
6111 	kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
6112 	    KSTAT_DATA_UINT32);
6113 	kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
6114 	    KSTAT_DATA_UINT32);
6115 	kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
6116 	    KSTAT_DATA_UINT32);
6117 	kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
6118 	    KSTAT_DATA_UINT32);
6119 	kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
6120 	    KSTAT_DATA_UINT32);
6121 	kstat_named_init(&nsp->pi_failedfrom, "Failed From",
6122 	    KSTAT_DATA_UINT32);
6123 	kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
6124 
6125 	mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
6126 	mdi_statp->pi_kstat_ref = 1;
6127 	mdi_statp->pi_kstat_iostats = kiosp;
6128 	mdi_statp->pi_kstat_errstats = kerrsp;
6129 	kstat_install(kiosp);
6130 	kstat_install(kerrsp);
6131 	MDI_PI(pip)->pi_kstats = mdi_statp;
6132 	return (MDI_SUCCESS);
6133 }
6134 
6135 /*
6136  * destroy per-path properties
6137  */
6138 static void
i_mdi_pi_kstat_destroy(mdi_pathinfo_t * pip)6139 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
6140 {
6141 
6142 	struct mdi_pi_kstats *mdi_statp;
6143 
6144 	if (MDI_PI(pip)->pi_kstats == NULL)
6145 		return;
6146 	if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
6147 		return;
6148 
6149 	MDI_PI(pip)->pi_kstats = NULL;
6150 
6151 	/*
6152 	 * the kstat may be shared between multiple pathinfo nodes
6153 	 * decrement this pathinfo's usage, removing the kstats
6154 	 * themselves when the last pathinfo reference is removed.
6155 	 */
6156 	ASSERT(mdi_statp->pi_kstat_ref > 0);
6157 	if (--mdi_statp->pi_kstat_ref != 0)
6158 		return;
6159 
6160 	kstat_delete(mdi_statp->pi_kstat_iostats);
6161 	kstat_delete(mdi_statp->pi_kstat_errstats);
6162 	kmem_free(mdi_statp, sizeof (*mdi_statp));
6163 }
6164 
6165 /*
6166  * update I/O paths KSTATS
6167  */
6168 void
mdi_pi_kstat_iosupdate(mdi_pathinfo_t * pip,struct buf * bp)6169 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
6170 {
6171 	kstat_t *iostatp;
6172 	size_t xfer_cnt;
6173 
6174 	ASSERT(pip != NULL);
6175 
6176 	/*
6177 	 * I/O can be driven across a path prior to having path
6178 	 * statistics available, i.e. probe(9e).
6179 	 */
6180 	if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
6181 		iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
6182 		xfer_cnt = bp->b_bcount - bp->b_resid;
6183 		if (bp->b_flags & B_READ) {
6184 			KSTAT_IO_PTR(iostatp)->reads++;
6185 			KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
6186 		} else {
6187 			KSTAT_IO_PTR(iostatp)->writes++;
6188 			KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
6189 		}
6190 	}
6191 }
6192 
6193 /*
6194  * Enable the path(specific client/target/initiator)
6195  * Enabling a path means that MPxIO may select the enabled path for routing
6196  * future I/O requests, subject to other path state constraints.
6197  */
6198 int
mdi_pi_enable_path(mdi_pathinfo_t * pip,int flags)6199 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
6200 {
6201 	mdi_phci_t	*ph;
6202 
6203 	ph = MDI_PI(pip)->pi_phci;
6204 	if (ph == NULL) {
6205 		MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6206 		    "!failed: path %s %p: NULL ph",
6207 		    mdi_pi_spathname(pip), (void *)pip));
6208 		return (MDI_FAILURE);
6209 	}
6210 
6211 	(void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
6212 		MDI_ENABLE_OP);
6213 	MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6214 	    "!returning success pip = %p. ph = %p",
6215 	    (void *)pip, (void *)ph));
6216 	return (MDI_SUCCESS);
6217 
6218 }
6219 
6220 /*
6221  * Disable the path (specific client/target/initiator)
6222  * Disabling a path means that MPxIO will not select the disabled path for
6223  * routing any new I/O requests.
6224  */
6225 int
mdi_pi_disable_path(mdi_pathinfo_t * pip,int flags)6226 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
6227 {
6228 	mdi_phci_t	*ph;
6229 
6230 	ph = MDI_PI(pip)->pi_phci;
6231 	if (ph == NULL) {
6232 		MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6233 		    "!failed: path %s %p: NULL ph",
6234 		    mdi_pi_spathname(pip), (void *)pip));
6235 		return (MDI_FAILURE);
6236 	}
6237 
6238 	(void) i_mdi_enable_disable_path(pip,
6239 	    ph->ph_vhci, flags, MDI_DISABLE_OP);
6240 	MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6241 	    "!returning success pip = %p. ph = %p",
6242 	    (void *)pip, (void *)ph));
6243 	return (MDI_SUCCESS);
6244 }
6245 
6246 /*
6247  * disable the path to a particular pHCI (pHCI specified in the phci_path
6248  * argument) for a particular client (specified in the client_path argument).
6249  * Disabling a path means that MPxIO will not select the disabled path for
6250  * routing any new I/O requests.
6251  * NOTE: this will be removed once the NWS files are changed to use the new
6252  * mdi_{enable,disable}_path interfaces
6253  */
6254 int
mdi_pi_disable(dev_info_t * cdip,dev_info_t * pdip,int flags)6255 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6256 {
6257 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
6258 }
6259 
6260 /*
6261  * Enable the path to a particular pHCI (pHCI specified in the phci_path
6262  * argument) for a particular client (specified in the client_path argument).
6263  * Enabling a path means that MPxIO may select the enabled path for routing
6264  * future I/O requests, subject to other path state constraints.
6265  * NOTE: this will be removed once the NWS files are changed to use the new
6266  * mdi_{enable,disable}_path interfaces
6267  */
6268 
6269 int
mdi_pi_enable(dev_info_t * cdip,dev_info_t * pdip,int flags)6270 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6271 {
6272 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
6273 }
6274 
6275 /*
6276  * Common routine for doing enable/disable.
6277  */
6278 static mdi_pathinfo_t *
i_mdi_enable_disable_path(mdi_pathinfo_t * pip,mdi_vhci_t * vh,int flags,int op)6279 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
6280 		int op)
6281 {
6282 	int		sync_flag = 0;
6283 	int		rv;
6284 	mdi_pathinfo_t	*next;
6285 	int		(*f)() = NULL;
6286 
6287 	/*
6288 	 * Check to make sure the path is not already in the
6289 	 * requested state. If it is just return the next path
6290 	 * as we have nothing to do here.
6291 	 */
6292 	if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) ||
6293 	    (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) {
6294 		MDI_PI_LOCK(pip);
6295 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6296 		MDI_PI_UNLOCK(pip);
6297 		return (next);
6298 	}
6299 
6300 	f = vh->vh_ops->vo_pi_state_change;
6301 
6302 	sync_flag = (flags << 8) & 0xf00;
6303 
6304 	/*
6305 	 * Do a callback into the mdi consumer to let it
6306 	 * know that path is about to get enabled/disabled.
6307 	 */
6308 	rv = MDI_SUCCESS;
6309 	if (f != NULL) {
6310 		rv = (*f)(vh->vh_dip, pip, 0,
6311 			MDI_PI_EXT_STATE(pip),
6312 			MDI_EXT_STATE_CHANGE | sync_flag |
6313 			op | MDI_BEFORE_STATE_CHANGE);
6314 		if (rv != MDI_SUCCESS) {
6315 			MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6316 			    "vo_pi_state_change: failed rv = %x", rv));
6317 		}
6318 	}
6319 	MDI_PI_LOCK(pip);
6320 	next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6321 
6322 	switch (flags) {
6323 		case USER_DISABLE:
6324 			if (op == MDI_DISABLE_OP) {
6325 				MDI_PI_SET_USER_DISABLE(pip);
6326 			} else {
6327 				MDI_PI_SET_USER_ENABLE(pip);
6328 			}
6329 			break;
6330 		case DRIVER_DISABLE:
6331 			if (op == MDI_DISABLE_OP) {
6332 				MDI_PI_SET_DRV_DISABLE(pip);
6333 			} else {
6334 				MDI_PI_SET_DRV_ENABLE(pip);
6335 			}
6336 			break;
6337 		case DRIVER_DISABLE_TRANSIENT:
6338 			if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
6339 				MDI_PI_SET_DRV_DISABLE_TRANS(pip);
6340 			} else {
6341 				MDI_PI_SET_DRV_ENABLE_TRANS(pip);
6342 			}
6343 			break;
6344 	}
6345 	MDI_PI_UNLOCK(pip);
6346 	/*
6347 	 * Do a callback into the mdi consumer to let it
6348 	 * know that path is now enabled/disabled.
6349 	 */
6350 	if (f != NULL) {
6351 		rv = (*f)(vh->vh_dip, pip, 0,
6352 			MDI_PI_EXT_STATE(pip),
6353 			MDI_EXT_STATE_CHANGE | sync_flag |
6354 			op | MDI_AFTER_STATE_CHANGE);
6355 		if (rv != MDI_SUCCESS) {
6356 			MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6357 			    "vo_pi_state_change failed: rv = %x", rv));
6358 		}
6359 	}
6360 	return (next);
6361 }
6362 
6363 /*
6364  * Common routine for doing enable/disable.
6365  * NOTE: this will be removed once the NWS files are changed to use the new
6366  * mdi_{enable,disable}_path has been putback
6367  */
6368 int
i_mdi_pi_enable_disable(dev_info_t * cdip,dev_info_t * pdip,int flags,int op)6369 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
6370 {
6371 
6372 	mdi_phci_t	*ph;
6373 	mdi_vhci_t	*vh = NULL;
6374 	mdi_client_t	*ct;
6375 	mdi_pathinfo_t	*next, *pip;
6376 	int		found_it;
6377 
6378 	ph = i_devi_get_phci(pdip);
6379 	MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6380 	    "!op = %d pdip = %p cdip = %p", op, (void *)pdip,
6381 	    (void *)cdip));
6382 	if (ph == NULL) {
6383 		MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6384 		    "!failed: operation %d: NULL ph", op));
6385 		return (MDI_FAILURE);
6386 	}
6387 
6388 	if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
6389 		MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6390 		    "!failed: invalid operation %d", op));
6391 		return (MDI_FAILURE);
6392 	}
6393 
6394 	vh = ph->ph_vhci;
6395 
6396 	if (cdip == NULL) {
6397 		/*
6398 		 * Need to mark the Phci as enabled/disabled.
6399 		 */
6400 		MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip,
6401 		    "op %d for the phci", op));
6402 		MDI_PHCI_LOCK(ph);
6403 		switch (flags) {
6404 			case USER_DISABLE:
6405 				if (op == MDI_DISABLE_OP) {
6406 					MDI_PHCI_SET_USER_DISABLE(ph);
6407 				} else {
6408 					MDI_PHCI_SET_USER_ENABLE(ph);
6409 				}
6410 				break;
6411 			case DRIVER_DISABLE:
6412 				if (op == MDI_DISABLE_OP) {
6413 					MDI_PHCI_SET_DRV_DISABLE(ph);
6414 				} else {
6415 					MDI_PHCI_SET_DRV_ENABLE(ph);
6416 				}
6417 				break;
6418 			case DRIVER_DISABLE_TRANSIENT:
6419 				if (op == MDI_DISABLE_OP) {
6420 					MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
6421 				} else {
6422 					MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
6423 				}
6424 				break;
6425 			default:
6426 				MDI_PHCI_UNLOCK(ph);
6427 				MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6428 				    "!invalid flag argument= %d", flags));
6429 		}
6430 
6431 		/*
6432 		 * Phci has been disabled. Now try to enable/disable
6433 		 * path info's to each client.
6434 		 */
6435 		pip = ph->ph_path_head;
6436 		while (pip != NULL) {
6437 			pip = i_mdi_enable_disable_path(pip, vh, flags, op);
6438 		}
6439 		MDI_PHCI_UNLOCK(ph);
6440 	} else {
6441 
6442 		/*
6443 		 * Disable a specific client.
6444 		 */
6445 		ct = i_devi_get_client(cdip);
6446 		if (ct == NULL) {
6447 			MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6448 			    "!failed: operation = %d: NULL ct", op));
6449 			return (MDI_FAILURE);
6450 		}
6451 
6452 		MDI_CLIENT_LOCK(ct);
6453 		pip = ct->ct_path_head;
6454 		found_it = 0;
6455 		while (pip != NULL) {
6456 			MDI_PI_LOCK(pip);
6457 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6458 			if (MDI_PI(pip)->pi_phci == ph) {
6459 				MDI_PI_UNLOCK(pip);
6460 				found_it = 1;
6461 				break;
6462 			}
6463 			MDI_PI_UNLOCK(pip);
6464 			pip = next;
6465 		}
6466 
6467 
6468 		MDI_CLIENT_UNLOCK(ct);
6469 		if (found_it == 0) {
6470 			MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6471 			    "!failed. Could not find corresponding pip\n"));
6472 			return (MDI_FAILURE);
6473 		}
6474 
6475 		(void) i_mdi_enable_disable_path(pip, vh, flags, op);
6476 	}
6477 
6478 	MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6479 	    "!op %d returning success pdip = %p cdip = %p",
6480 	    op, (void *)pdip, (void *)cdip));
6481 	return (MDI_SUCCESS);
6482 }
6483 
6484 /*
6485  * Ensure phci powered up
6486  */
6487 static void
i_mdi_pm_hold_pip(mdi_pathinfo_t * pip)6488 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
6489 {
6490 	dev_info_t	*ph_dip;
6491 
6492 	ASSERT(pip != NULL);
6493 	ASSERT(MDI_PI_LOCKED(pip));
6494 
6495 	if (MDI_PI(pip)->pi_pm_held) {
6496 		return;
6497 	}
6498 
6499 	ph_dip = mdi_pi_get_phci(pip);
6500 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6501 	    "%s %p", mdi_pi_spathname(pip), (void *)pip));
6502 	if (ph_dip == NULL) {
6503 		return;
6504 	}
6505 
6506 	MDI_PI_UNLOCK(pip);
6507 	MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d",
6508 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6509 	pm_hold_power(ph_dip);
6510 	MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d",
6511 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
6512 	MDI_PI_LOCK(pip);
6513 
6514 	/* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
6515 	if (DEVI(ph_dip)->devi_pm_info)
6516 		MDI_PI(pip)->pi_pm_held = 1;
6517 }
6518 
6519 /*
6520  * Allow phci powered down
6521  */
6522 static void
i_mdi_pm_rele_pip(mdi_pathinfo_t * pip)6523 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
6524 {
6525 	dev_info_t	*ph_dip = NULL;
6526 
6527 	ASSERT(pip != NULL);
6528 	ASSERT(MDI_PI_LOCKED(pip));
6529 
6530 	if (MDI_PI(pip)->pi_pm_held == 0) {
6531 		return;
6532 	}
6533 
6534 	ph_dip = mdi_pi_get_phci(pip);
6535 	ASSERT(ph_dip != NULL);
6536 
6537 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6538 	    "%s %p", mdi_pi_spathname(pip), (void *)pip));
6539 
6540 	MDI_PI_UNLOCK(pip);
6541 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6542 	    "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6543 	pm_rele_power(ph_dip);
6544 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6545 	    "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6546 	MDI_PI_LOCK(pip);
6547 
6548 	MDI_PI(pip)->pi_pm_held = 0;
6549 }
6550 
6551 static void
i_mdi_pm_hold_client(mdi_client_t * ct,int incr)6552 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
6553 {
6554 	ASSERT(MDI_CLIENT_LOCKED(ct));
6555 
6556 	ct->ct_power_cnt += incr;
6557 	MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6558 	    "%p ct_power_cnt = %d incr = %d",
6559 	    (void *)ct, ct->ct_power_cnt, incr));
6560 	ASSERT(ct->ct_power_cnt >= 0);
6561 }
6562 
6563 static void
i_mdi_rele_all_phci(mdi_client_t * ct)6564 i_mdi_rele_all_phci(mdi_client_t *ct)
6565 {
6566 	mdi_pathinfo_t  *pip;
6567 
6568 	ASSERT(MDI_CLIENT_LOCKED(ct));
6569 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
6570 	while (pip != NULL) {
6571 		mdi_hold_path(pip);
6572 		MDI_PI_LOCK(pip);
6573 		i_mdi_pm_rele_pip(pip);
6574 		MDI_PI_UNLOCK(pip);
6575 		mdi_rele_path(pip);
6576 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6577 	}
6578 }
6579 
6580 static void
i_mdi_pm_rele_client(mdi_client_t * ct,int decr)6581 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
6582 {
6583 	ASSERT(MDI_CLIENT_LOCKED(ct));
6584 
6585 	if (i_ddi_devi_attached(ct->ct_dip)) {
6586 		ct->ct_power_cnt -= decr;
6587 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6588 		    "%p ct_power_cnt = %d decr = %d",
6589 		    (void *)ct, ct->ct_power_cnt, decr));
6590 	}
6591 
6592 	ASSERT(ct->ct_power_cnt >= 0);
6593 	if (ct->ct_power_cnt == 0) {
6594 		i_mdi_rele_all_phci(ct);
6595 		return;
6596 	}
6597 }
6598 
6599 static void
i_mdi_pm_reset_client(mdi_client_t * ct)6600 i_mdi_pm_reset_client(mdi_client_t *ct)
6601 {
6602 	MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6603 	    "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt));
6604 	ASSERT(MDI_CLIENT_LOCKED(ct));
6605 	ct->ct_power_cnt = 0;
6606 	i_mdi_rele_all_phci(ct);
6607 	ct->ct_powercnt_config = 0;
6608 	ct->ct_powercnt_unconfig = 0;
6609 	ct->ct_powercnt_reset = 1;
6610 }
6611 
6612 static int
i_mdi_power_one_phci(mdi_pathinfo_t * pip)6613 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
6614 {
6615 	int		ret;
6616 	dev_info_t	*ph_dip;
6617 
6618 	MDI_PI_LOCK(pip);
6619 	i_mdi_pm_hold_pip(pip);
6620 
6621 	ph_dip = mdi_pi_get_phci(pip);
6622 	MDI_PI_UNLOCK(pip);
6623 
6624 	/* bring all components of phci to full power */
6625 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6626 	    "pm_powerup for %s%d %p", ddi_driver_name(ph_dip),
6627 	    ddi_get_instance(ph_dip), (void *)pip));
6628 
6629 	ret = pm_powerup(ph_dip);
6630 
6631 	if (ret == DDI_FAILURE) {
6632 		MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6633 		    "pm_powerup FAILED for %s%d %p",
6634 		    ddi_driver_name(ph_dip), ddi_get_instance(ph_dip),
6635 		    (void *)pip));
6636 
6637 		MDI_PI_LOCK(pip);
6638 		i_mdi_pm_rele_pip(pip);
6639 		MDI_PI_UNLOCK(pip);
6640 		return (MDI_FAILURE);
6641 	}
6642 
6643 	return (MDI_SUCCESS);
6644 }
6645 
6646 static int
i_mdi_power_all_phci(mdi_client_t * ct)6647 i_mdi_power_all_phci(mdi_client_t *ct)
6648 {
6649 	mdi_pathinfo_t  *pip;
6650 	int		succeeded = 0;
6651 
6652 	ASSERT(MDI_CLIENT_LOCKED(ct));
6653 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
6654 	while (pip != NULL) {
6655 		/*
6656 		 * Don't power if MDI_PATHINFO_STATE_FAULT
6657 		 * or MDI_PATHINFO_STATE_OFFLINE.
6658 		 */
6659 		if (MDI_PI_IS_INIT(pip) ||
6660 		    MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
6661 			mdi_hold_path(pip);
6662 			MDI_CLIENT_UNLOCK(ct);
6663 			if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
6664 				succeeded = 1;
6665 
6666 			ASSERT(ct == MDI_PI(pip)->pi_client);
6667 			MDI_CLIENT_LOCK(ct);
6668 			mdi_rele_path(pip);
6669 		}
6670 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6671 	}
6672 
6673 	return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
6674 }
6675 
6676 /*
6677  * mdi_bus_power():
6678  *		1. Place the phci(s) into powered up state so that
6679  *		   client can do power management
6680  *		2. Ensure phci powered up as client power managing
6681  * Return Values:
6682  *		MDI_SUCCESS
6683  *		MDI_FAILURE
6684  */
6685 int
mdi_bus_power(dev_info_t * parent,void * impl_arg,pm_bus_power_op_t op,void * arg,void * result)6686 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
6687     void *arg, void *result)
6688 {
6689 	int			ret = MDI_SUCCESS;
6690 	pm_bp_child_pwrchg_t	*bpc;
6691 	mdi_client_t		*ct;
6692 	dev_info_t		*cdip;
6693 	pm_bp_has_changed_t	*bphc;
6694 
6695 	/*
6696 	 * BUS_POWER_NOINVOL not supported
6697 	 */
6698 	if (op == BUS_POWER_NOINVOL)
6699 		return (MDI_FAILURE);
6700 
6701 	/*
6702 	 * ignore other OPs.
6703 	 * return quickly to save cou cycles on the ct processing
6704 	 */
6705 	switch (op) {
6706 	case BUS_POWER_PRE_NOTIFICATION:
6707 	case BUS_POWER_POST_NOTIFICATION:
6708 		bpc = (pm_bp_child_pwrchg_t *)arg;
6709 		cdip = bpc->bpc_dip;
6710 		break;
6711 	case BUS_POWER_HAS_CHANGED:
6712 		bphc = (pm_bp_has_changed_t *)arg;
6713 		cdip = bphc->bphc_dip;
6714 		break;
6715 	default:
6716 		return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
6717 	}
6718 
6719 	ASSERT(MDI_CLIENT(cdip));
6720 
6721 	ct = i_devi_get_client(cdip);
6722 	if (ct == NULL)
6723 		return (MDI_FAILURE);
6724 
6725 	/*
6726 	 * wait till the mdi_pathinfo node state change are processed
6727 	 */
6728 	MDI_CLIENT_LOCK(ct);
6729 	switch (op) {
6730 	case BUS_POWER_PRE_NOTIFICATION:
6731 		MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6732 		    "BUS_POWER_PRE_NOTIFICATION:"
6733 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6734 		    ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6735 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
6736 
6737 		/* serialize power level change per client */
6738 		while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6739 			cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6740 
6741 		MDI_CLIENT_SET_POWER_TRANSITION(ct);
6742 
6743 		if (ct->ct_power_cnt == 0) {
6744 			ret = i_mdi_power_all_phci(ct);
6745 		}
6746 
6747 		/*
6748 		 * if new_level > 0:
6749 		 *	- hold phci(s)
6750 		 *	- power up phci(s) if not already
6751 		 * ignore power down
6752 		 */
6753 		if (bpc->bpc_nlevel > 0) {
6754 			if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
6755 				MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6756 				    "i_mdi_pm_hold_client\n"));
6757 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
6758 			}
6759 		}
6760 		break;
6761 	case BUS_POWER_POST_NOTIFICATION:
6762 		MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6763 		    "BUS_POWER_POST_NOTIFICATION:"
6764 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d",
6765 		    ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6766 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
6767 		    *(int *)result));
6768 
6769 		if (*(int *)result == DDI_SUCCESS) {
6770 			if (bpc->bpc_nlevel > 0) {
6771 				MDI_CLIENT_SET_POWER_UP(ct);
6772 			} else {
6773 				MDI_CLIENT_SET_POWER_DOWN(ct);
6774 			}
6775 		}
6776 
6777 		/* release the hold we did in pre-notification */
6778 		if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
6779 		    !DEVI_IS_ATTACHING(ct->ct_dip)) {
6780 			MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6781 			    "i_mdi_pm_rele_client\n"));
6782 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6783 		}
6784 
6785 		if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
6786 			/* another thread might started attaching */
6787 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6788 				MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6789 				    "i_mdi_pm_rele_client\n"));
6790 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
6791 			/* detaching has been taken care in pm_post_unconfig */
6792 			} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
6793 				MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6794 				    "i_mdi_pm_reset_client\n"));
6795 				i_mdi_pm_reset_client(ct);
6796 			}
6797 		}
6798 
6799 		MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
6800 		cv_broadcast(&ct->ct_powerchange_cv);
6801 
6802 		break;
6803 
6804 	/* need to do more */
6805 	case BUS_POWER_HAS_CHANGED:
6806 		MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6807 		    "BUS_POWER_HAS_CHANGED:"
6808 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6809 		    ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
6810 		    bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
6811 
6812 		if (bphc->bphc_nlevel > 0 &&
6813 		    bphc->bphc_nlevel > bphc->bphc_olevel) {
6814 			if (ct->ct_power_cnt == 0) {
6815 				ret = i_mdi_power_all_phci(ct);
6816 			}
6817 			MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6818 			    "i_mdi_pm_hold_client\n"));
6819 			i_mdi_pm_hold_client(ct, ct->ct_path_count);
6820 		}
6821 
6822 		if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
6823 			MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6824 			    "i_mdi_pm_rele_client\n"));
6825 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
6826 		}
6827 		break;
6828 	default:
6829 		dev_err(parent, CE_WARN, "!unhandled bus power operation: 0x%x",
6830 		    op);
6831 		break;
6832 	}
6833 
6834 	MDI_CLIENT_UNLOCK(ct);
6835 	return (ret);
6836 }
6837 
6838 static int
i_mdi_pm_pre_config_one(dev_info_t * child)6839 i_mdi_pm_pre_config_one(dev_info_t *child)
6840 {
6841 	int		ret = MDI_SUCCESS;
6842 	mdi_client_t	*ct;
6843 
6844 	ct = i_devi_get_client(child);
6845 	if (ct == NULL)
6846 		return (MDI_FAILURE);
6847 
6848 	MDI_CLIENT_LOCK(ct);
6849 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6850 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6851 
6852 	if (!MDI_CLIENT_IS_FAILED(ct)) {
6853 		MDI_CLIENT_UNLOCK(ct);
6854 		MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n"));
6855 		return (MDI_SUCCESS);
6856 	}
6857 
6858 	if (ct->ct_powercnt_config) {
6859 		MDI_CLIENT_UNLOCK(ct);
6860 		MDI_DEBUG(4, (MDI_NOTE, child, "already held\n"));
6861 		return (MDI_SUCCESS);
6862 	}
6863 
6864 	if (ct->ct_power_cnt == 0) {
6865 		ret = i_mdi_power_all_phci(ct);
6866 	}
6867 	MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6868 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6869 	ct->ct_powercnt_config = 1;
6870 	ct->ct_powercnt_reset = 0;
6871 	MDI_CLIENT_UNLOCK(ct);
6872 	return (ret);
6873 }
6874 
6875 static int
i_mdi_pm_pre_config(dev_info_t * vdip,dev_info_t * child)6876 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
6877 {
6878 	int			ret = MDI_SUCCESS;
6879 	dev_info_t		*cdip;
6880 
6881 	ASSERT(MDI_VHCI(vdip));
6882 
6883 	/* ndi_devi_config_one */
6884 	if (child) {
6885 		ASSERT(DEVI_BUSY_OWNED(vdip));
6886 		return (i_mdi_pm_pre_config_one(child));
6887 	}
6888 
6889 	/* devi_config_common */
6890 	ndi_devi_enter(vdip);
6891 	cdip = ddi_get_child(vdip);
6892 	while (cdip) {
6893 		dev_info_t *next = ddi_get_next_sibling(cdip);
6894 
6895 		ret = i_mdi_pm_pre_config_one(cdip);
6896 		if (ret != MDI_SUCCESS)
6897 			break;
6898 		cdip = next;
6899 	}
6900 	ndi_devi_exit(vdip);
6901 	return (ret);
6902 }
6903 
6904 static int
i_mdi_pm_pre_unconfig_one(dev_info_t * child,int * held,int flags)6905 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
6906 {
6907 	int		ret = MDI_SUCCESS;
6908 	mdi_client_t	*ct;
6909 
6910 	ct = i_devi_get_client(child);
6911 	if (ct == NULL)
6912 		return (MDI_FAILURE);
6913 
6914 	MDI_CLIENT_LOCK(ct);
6915 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6916 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6917 
6918 	if (!i_ddi_devi_attached(child)) {
6919 		MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n"));
6920 		MDI_CLIENT_UNLOCK(ct);
6921 		return (MDI_SUCCESS);
6922 	}
6923 
6924 	if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6925 	    (flags & NDI_AUTODETACH)) {
6926 		MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n"));
6927 		MDI_CLIENT_UNLOCK(ct);
6928 		return (MDI_FAILURE);
6929 	}
6930 
6931 	if (ct->ct_powercnt_unconfig) {
6932 		MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n"));
6933 		MDI_CLIENT_UNLOCK(ct);
6934 		*held = 1;
6935 		return (MDI_SUCCESS);
6936 	}
6937 
6938 	if (ct->ct_power_cnt == 0) {
6939 		ret = i_mdi_power_all_phci(ct);
6940 	}
6941 	MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6942 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
6943 	ct->ct_powercnt_unconfig = 1;
6944 	ct->ct_powercnt_reset = 0;
6945 	MDI_CLIENT_UNLOCK(ct);
6946 	if (ret == MDI_SUCCESS)
6947 		*held = 1;
6948 	return (ret);
6949 }
6950 
6951 static int
i_mdi_pm_pre_unconfig(dev_info_t * vdip,dev_info_t * child,int * held,int flags)6952 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
6953     int flags)
6954 {
6955 	int			ret = MDI_SUCCESS;
6956 	dev_info_t		*cdip;
6957 
6958 	ASSERT(MDI_VHCI(vdip));
6959 	*held = 0;
6960 
6961 	/* ndi_devi_unconfig_one */
6962 	if (child) {
6963 		ASSERT(DEVI_BUSY_OWNED(vdip));
6964 		return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6965 	}
6966 
6967 	/* devi_unconfig_common */
6968 	ndi_devi_enter(vdip);
6969 	cdip = ddi_get_child(vdip);
6970 	while (cdip) {
6971 		dev_info_t *next = ddi_get_next_sibling(cdip);
6972 
6973 		ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6974 		cdip = next;
6975 	}
6976 	ndi_devi_exit(vdip);
6977 
6978 	if (*held)
6979 		ret = MDI_SUCCESS;
6980 
6981 	return (ret);
6982 }
6983 
6984 static void
i_mdi_pm_post_config_one(dev_info_t * child)6985 i_mdi_pm_post_config_one(dev_info_t *child)
6986 {
6987 	mdi_client_t	*ct;
6988 
6989 	ct = i_devi_get_client(child);
6990 	if (ct == NULL)
6991 		return;
6992 
6993 	MDI_CLIENT_LOCK(ct);
6994 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6995 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6996 
6997 	if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6998 		MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n"));
6999 		MDI_CLIENT_UNLOCK(ct);
7000 		return;
7001 	}
7002 
7003 	/* client has not been updated */
7004 	if (MDI_CLIENT_IS_FAILED(ct)) {
7005 		MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n"));
7006 		MDI_CLIENT_UNLOCK(ct);
7007 		return;
7008 	}
7009 
7010 	/* another thread might have powered it down or detached it */
7011 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7012 	    !DEVI_IS_ATTACHING(child)) ||
7013 	    (!i_ddi_devi_attached(child) &&
7014 	    !DEVI_IS_ATTACHING(child))) {
7015 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7016 		i_mdi_pm_reset_client(ct);
7017 	} else {
7018 		mdi_pathinfo_t  *pip, *next;
7019 		int	valid_path_count = 0;
7020 
7021 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7022 		pip = ct->ct_path_head;
7023 		while (pip != NULL) {
7024 			MDI_PI_LOCK(pip);
7025 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7026 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7027 				valid_path_count ++;
7028 			MDI_PI_UNLOCK(pip);
7029 			pip = next;
7030 		}
7031 		i_mdi_pm_rele_client(ct, valid_path_count);
7032 	}
7033 	ct->ct_powercnt_config = 0;
7034 	MDI_CLIENT_UNLOCK(ct);
7035 }
7036 
7037 static void
i_mdi_pm_post_config(dev_info_t * vdip,dev_info_t * child)7038 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
7039 {
7040 	dev_info_t	*cdip;
7041 
7042 	ASSERT(MDI_VHCI(vdip));
7043 
7044 	/* ndi_devi_config_one */
7045 	if (child) {
7046 		ASSERT(DEVI_BUSY_OWNED(vdip));
7047 		i_mdi_pm_post_config_one(child);
7048 		return;
7049 	}
7050 
7051 	/* devi_config_common */
7052 	ndi_devi_enter(vdip);
7053 	cdip = ddi_get_child(vdip);
7054 	while (cdip) {
7055 		dev_info_t *next = ddi_get_next_sibling(cdip);
7056 
7057 		i_mdi_pm_post_config_one(cdip);
7058 		cdip = next;
7059 	}
7060 	ndi_devi_exit(vdip);
7061 }
7062 
7063 static void
i_mdi_pm_post_unconfig_one(dev_info_t * child)7064 i_mdi_pm_post_unconfig_one(dev_info_t *child)
7065 {
7066 	mdi_client_t	*ct;
7067 
7068 	ct = i_devi_get_client(child);
7069 	if (ct == NULL)
7070 		return;
7071 
7072 	MDI_CLIENT_LOCK(ct);
7073 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
7074 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
7075 
7076 	if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
7077 		MDI_DEBUG(4, (MDI_NOTE, child, "not held\n"));
7078 		MDI_CLIENT_UNLOCK(ct);
7079 		return;
7080 	}
7081 
7082 	/* failure detaching or another thread just attached it */
7083 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7084 	    i_ddi_devi_attached(child)) ||
7085 	    (!i_ddi_devi_attached(child) &&
7086 	    !DEVI_IS_ATTACHING(child))) {
7087 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7088 		i_mdi_pm_reset_client(ct);
7089 	} else {
7090 		mdi_pathinfo_t  *pip, *next;
7091 		int	valid_path_count = 0;
7092 
7093 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7094 		pip = ct->ct_path_head;
7095 		while (pip != NULL) {
7096 			MDI_PI_LOCK(pip);
7097 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7098 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7099 				valid_path_count ++;
7100 			MDI_PI_UNLOCK(pip);
7101 			pip = next;
7102 		}
7103 		i_mdi_pm_rele_client(ct, valid_path_count);
7104 		ct->ct_powercnt_unconfig = 0;
7105 	}
7106 
7107 	MDI_CLIENT_UNLOCK(ct);
7108 }
7109 
7110 static void
i_mdi_pm_post_unconfig(dev_info_t * vdip,dev_info_t * child,int held)7111 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
7112 {
7113 	dev_info_t		*cdip;
7114 
7115 	ASSERT(MDI_VHCI(vdip));
7116 
7117 	if (!held) {
7118 		MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held));
7119 		return;
7120 	}
7121 
7122 	if (child) {
7123 		ASSERT(DEVI_BUSY_OWNED(vdip));
7124 		i_mdi_pm_post_unconfig_one(child);
7125 		return;
7126 	}
7127 
7128 	ndi_devi_enter(vdip);
7129 	cdip = ddi_get_child(vdip);
7130 	while (cdip) {
7131 		dev_info_t *next = ddi_get_next_sibling(cdip);
7132 
7133 		i_mdi_pm_post_unconfig_one(cdip);
7134 		cdip = next;
7135 	}
7136 	ndi_devi_exit(vdip);
7137 }
7138 
7139 int
mdi_power(dev_info_t * vdip,mdi_pm_op_t op,void * args,char * devnm,int flags)7140 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
7141 {
7142 	int			ret = MDI_SUCCESS;
7143 	dev_info_t		*client_dip = NULL;
7144 	mdi_client_t		*ct;
7145 
7146 	/*
7147 	 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
7148 	 * Power up pHCI for the named client device.
7149 	 * Note: Before the client is enumerated under vhci by phci,
7150 	 * client_dip can be NULL. Then proceed to power up all the
7151 	 * pHCIs.
7152 	 */
7153 	if (devnm != NULL) {
7154 		ndi_devi_enter(vdip);
7155 		client_dip = ndi_devi_findchild(vdip, devnm);
7156 	}
7157 
7158 	MDI_DEBUG(4, (MDI_NOTE, vdip,
7159 	    "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip));
7160 
7161 	switch (op) {
7162 	case MDI_PM_PRE_CONFIG:
7163 		ret = i_mdi_pm_pre_config(vdip, client_dip);
7164 		break;
7165 
7166 	case MDI_PM_PRE_UNCONFIG:
7167 		ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
7168 		    flags);
7169 		break;
7170 
7171 	case MDI_PM_POST_CONFIG:
7172 		i_mdi_pm_post_config(vdip, client_dip);
7173 		break;
7174 
7175 	case MDI_PM_POST_UNCONFIG:
7176 		i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
7177 		break;
7178 
7179 	case MDI_PM_HOLD_POWER:
7180 	case MDI_PM_RELE_POWER:
7181 		ASSERT(args);
7182 
7183 		client_dip = (dev_info_t *)args;
7184 		ASSERT(MDI_CLIENT(client_dip));
7185 
7186 		ct = i_devi_get_client(client_dip);
7187 		MDI_CLIENT_LOCK(ct);
7188 
7189 		if (op == MDI_PM_HOLD_POWER) {
7190 			if (ct->ct_power_cnt == 0) {
7191 				(void) i_mdi_power_all_phci(ct);
7192 				MDI_DEBUG(4, (MDI_NOTE, client_dip,
7193 				    "i_mdi_pm_hold_client\n"));
7194 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
7195 			}
7196 		} else {
7197 			if (DEVI_IS_ATTACHING(client_dip)) {
7198 				MDI_DEBUG(4, (MDI_NOTE, client_dip,
7199 				    "i_mdi_pm_rele_client\n"));
7200 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
7201 			} else {
7202 				MDI_DEBUG(4, (MDI_NOTE, client_dip,
7203 				    "i_mdi_pm_reset_client\n"));
7204 				i_mdi_pm_reset_client(ct);
7205 			}
7206 		}
7207 
7208 		MDI_CLIENT_UNLOCK(ct);
7209 		break;
7210 
7211 	default:
7212 		break;
7213 	}
7214 
7215 	if (devnm)
7216 		ndi_devi_exit(vdip);
7217 
7218 	return (ret);
7219 }
7220 
7221 int
mdi_component_is_vhci(dev_info_t * dip,const char ** mdi_class)7222 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
7223 {
7224 	mdi_vhci_t *vhci;
7225 
7226 	if (!MDI_VHCI(dip))
7227 		return (MDI_FAILURE);
7228 
7229 	if (mdi_class) {
7230 		vhci = DEVI(dip)->devi_mdi_xhci;
7231 		ASSERT(vhci);
7232 		*mdi_class = vhci->vh_class;
7233 	}
7234 
7235 	return (MDI_SUCCESS);
7236 }
7237 
7238 int
mdi_component_is_phci(dev_info_t * dip,const char ** mdi_class)7239 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
7240 {
7241 	mdi_phci_t *phci;
7242 
7243 	if (!MDI_PHCI(dip))
7244 		return (MDI_FAILURE);
7245 
7246 	if (mdi_class) {
7247 		phci = DEVI(dip)->devi_mdi_xhci;
7248 		ASSERT(phci);
7249 		*mdi_class = phci->ph_vhci->vh_class;
7250 	}
7251 
7252 	return (MDI_SUCCESS);
7253 }
7254 
7255 int
mdi_component_is_client(dev_info_t * dip,const char ** mdi_class)7256 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
7257 {
7258 	mdi_client_t *client;
7259 
7260 	if (!MDI_CLIENT(dip))
7261 		return (MDI_FAILURE);
7262 
7263 	if (mdi_class) {
7264 		client = DEVI(dip)->devi_mdi_client;
7265 		ASSERT(client);
7266 		*mdi_class = client->ct_vhci->vh_class;
7267 	}
7268 
7269 	return (MDI_SUCCESS);
7270 }
7271 
7272 void *
mdi_client_get_vhci_private(dev_info_t * dip)7273 mdi_client_get_vhci_private(dev_info_t *dip)
7274 {
7275 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7276 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7277 		mdi_client_t	*ct;
7278 		ct = i_devi_get_client(dip);
7279 		return (ct->ct_vprivate);
7280 	}
7281 	return (NULL);
7282 }
7283 
7284 void
mdi_client_set_vhci_private(dev_info_t * dip,void * data)7285 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
7286 {
7287 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7288 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7289 		mdi_client_t	*ct;
7290 		ct = i_devi_get_client(dip);
7291 		ct->ct_vprivate = data;
7292 	}
7293 }
7294 /*
7295  * mdi_pi_get_vhci_private():
7296  *		Get the vhci private information associated with the
7297  *		mdi_pathinfo node
7298  */
7299 void *
mdi_pi_get_vhci_private(mdi_pathinfo_t * pip)7300 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
7301 {
7302 	caddr_t	vprivate = NULL;
7303 	if (pip) {
7304 		vprivate = MDI_PI(pip)->pi_vprivate;
7305 	}
7306 	return (vprivate);
7307 }
7308 
7309 /*
7310  * mdi_pi_set_vhci_private():
7311  *		Set the vhci private information in the mdi_pathinfo node
7312  */
7313 void
mdi_pi_set_vhci_private(mdi_pathinfo_t * pip,void * priv)7314 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
7315 {
7316 	if (pip) {
7317 		MDI_PI(pip)->pi_vprivate = priv;
7318 	}
7319 }
7320 
7321 /*
7322  * mdi_phci_get_vhci_private():
7323  *		Get the vhci private information associated with the
7324  *		mdi_phci node
7325  */
7326 void *
mdi_phci_get_vhci_private(dev_info_t * dip)7327 mdi_phci_get_vhci_private(dev_info_t *dip)
7328 {
7329 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7330 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7331 		mdi_phci_t	*ph;
7332 		ph = i_devi_get_phci(dip);
7333 		return (ph->ph_vprivate);
7334 	}
7335 	return (NULL);
7336 }
7337 
7338 /*
7339  * mdi_phci_set_vhci_private():
7340  *		Set the vhci private information in the mdi_phci node
7341  */
7342 void
mdi_phci_set_vhci_private(dev_info_t * dip,void * priv)7343 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
7344 {
7345 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7346 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7347 		mdi_phci_t	*ph;
7348 		ph = i_devi_get_phci(dip);
7349 		ph->ph_vprivate = priv;
7350 	}
7351 }
7352 
7353 int
mdi_pi_ishidden(mdi_pathinfo_t * pip)7354 mdi_pi_ishidden(mdi_pathinfo_t *pip)
7355 {
7356 	return (MDI_PI_FLAGS_IS_HIDDEN(pip));
7357 }
7358 
7359 int
mdi_pi_device_isremoved(mdi_pathinfo_t * pip)7360 mdi_pi_device_isremoved(mdi_pathinfo_t *pip)
7361 {
7362 	return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip));
7363 }
7364 
7365 /* Return 1 if all client paths are device_removed */
7366 static int
i_mdi_client_all_devices_removed(mdi_client_t * ct)7367 i_mdi_client_all_devices_removed(mdi_client_t *ct)
7368 {
7369 	mdi_pathinfo_t  *pip;
7370 	int		all_devices_removed = 1;
7371 
7372 	MDI_CLIENT_LOCK(ct);
7373 	for (pip = ct->ct_path_head; pip;
7374 	    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) {
7375 		if (!mdi_pi_device_isremoved(pip)) {
7376 			all_devices_removed = 0;
7377 			break;
7378 		}
7379 	}
7380 	MDI_CLIENT_UNLOCK(ct);
7381 	return (all_devices_removed);
7382 }
7383 
7384 /*
7385  * When processing path hotunplug, represent device removal.
7386  */
7387 int
mdi_pi_device_remove(mdi_pathinfo_t * pip)7388 mdi_pi_device_remove(mdi_pathinfo_t *pip)
7389 {
7390 	mdi_client_t	*ct;
7391 
7392 	MDI_PI_LOCK(pip);
7393 	if (mdi_pi_device_isremoved(pip)) {
7394 		MDI_PI_UNLOCK(pip);
7395 		return (0);
7396 	}
7397 	MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip);
7398 	MDI_PI_FLAGS_SET_HIDDEN(pip);
7399 	MDI_PI_UNLOCK(pip);
7400 
7401 	/*
7402 	 * If all paths associated with the client are now DEVICE_REMOVED,
7403 	 * reflect DEVICE_REMOVED in the client.
7404 	 */
7405 	ct = MDI_PI(pip)->pi_client;
7406 	if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct))
7407 		(void) ndi_devi_device_remove(ct->ct_dip);
7408 	else
7409 		i_ddi_di_cache_invalidate();
7410 
7411 	return (1);
7412 }
7413 
7414 /*
7415  * When processing hotplug, if a path marked mdi_pi_device_isremoved()
7416  * is now accessible then this interfaces is used to represent device insertion.
7417  */
7418 int
mdi_pi_device_insert(mdi_pathinfo_t * pip)7419 mdi_pi_device_insert(mdi_pathinfo_t *pip)
7420 {
7421 	MDI_PI_LOCK(pip);
7422 	if (!mdi_pi_device_isremoved(pip)) {
7423 		MDI_PI_UNLOCK(pip);
7424 		return (0);
7425 	}
7426 	MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip);
7427 	MDI_PI_FLAGS_CLR_HIDDEN(pip);
7428 	MDI_PI_UNLOCK(pip);
7429 
7430 	i_ddi_di_cache_invalidate();
7431 
7432 	return (1);
7433 }
7434 
7435 /*
7436  * List of vhci class names:
7437  * A vhci class name must be in this list only if the corresponding vhci
7438  * driver intends to use the mdi provided bus config implementation
7439  * (i.e., mdi_vhci_bus_config()).
7440  */
7441 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
7442 #define	N_VHCI_CLASSES	(sizeof (vhci_class_list) / sizeof (char *))
7443 
7444 /*
7445  * During boot time, the on-disk vhci cache for every vhci class is read
7446  * in the form of an nvlist and stored here.
7447  */
7448 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
7449 
7450 /* nvpair names in vhci cache nvlist */
7451 #define	MDI_VHCI_CACHE_VERSION	1
7452 #define	MDI_NVPNAME_VERSION	"version"
7453 #define	MDI_NVPNAME_PHCIS	"phcis"
7454 #define	MDI_NVPNAME_CTADDRMAP	"clientaddrmap"
7455 
7456 /*
7457  * Given vhci class name, return its on-disk vhci cache filename.
7458  * Memory for the returned filename which includes the full path is allocated
7459  * by this function.
7460  */
7461 static char *
vhclass2vhcache_filename(char * vhclass)7462 vhclass2vhcache_filename(char *vhclass)
7463 {
7464 	char *filename;
7465 	int len;
7466 	static char *fmt = "/etc/devices/mdi_%s_cache";
7467 
7468 	/*
7469 	 * fmt contains the on-disk vhci cache file name format;
7470 	 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
7471 	 */
7472 
7473 	/* the -1 below is to account for "%s" in the format string */
7474 	len = strlen(fmt) + strlen(vhclass) - 1;
7475 	filename = kmem_alloc(len, KM_SLEEP);
7476 	(void) snprintf(filename, len, fmt, vhclass);
7477 	ASSERT(len == (strlen(filename) + 1));
7478 	return (filename);
7479 }
7480 
7481 /*
7482  * initialize the vhci cache related data structures and read the on-disk
7483  * vhci cached data into memory.
7484  */
7485 static void
setup_vhci_cache(mdi_vhci_t * vh)7486 setup_vhci_cache(mdi_vhci_t *vh)
7487 {
7488 	mdi_vhci_config_t *vhc;
7489 	mdi_vhci_cache_t *vhcache;
7490 	int i;
7491 	nvlist_t *nvl = NULL;
7492 
7493 	vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
7494 	vh->vh_config = vhc;
7495 	vhcache = &vhc->vhc_vhcache;
7496 
7497 	vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
7498 
7499 	mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
7500 	cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
7501 
7502 	rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
7503 
7504 	/*
7505 	 * Create string hash; same as mod_hash_create_strhash() except that
7506 	 * we use NULL key destructor.
7507 	 */
7508 	vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
7509 	    mdi_bus_config_cache_hash_size,
7510 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
7511 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
7512 
7513 	/*
7514 	 * The on-disk vhci cache is read during booting prior to the
7515 	 * lights-out period by mdi_read_devices_files().
7516 	 */
7517 	for (i = 0; i < N_VHCI_CLASSES; i++) {
7518 		if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
7519 			nvl = vhcache_nvl[i];
7520 			vhcache_nvl[i] = NULL;
7521 			break;
7522 		}
7523 	}
7524 
7525 	/*
7526 	 * this is to cover the case of some one manually causing unloading
7527 	 * (or detaching) and reloading (or attaching) of a vhci driver.
7528 	 */
7529 	if (nvl == NULL && modrootloaded)
7530 		nvl = read_on_disk_vhci_cache(vh->vh_class);
7531 
7532 	if (nvl != NULL) {
7533 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7534 		if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
7535 			vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
7536 		else  {
7537 			cmn_err(CE_WARN,
7538 			    "%s: data file corrupted, will recreate",
7539 			    vhc->vhc_vhcache_filename);
7540 		}
7541 		rw_exit(&vhcache->vhcache_lock);
7542 		nvlist_free(nvl);
7543 	}
7544 
7545 	vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
7546 	    CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
7547 
7548 	vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
7549 	vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
7550 }
7551 
7552 /*
7553  * free all vhci cache related resources
7554  */
7555 static int
destroy_vhci_cache(mdi_vhci_t * vh)7556 destroy_vhci_cache(mdi_vhci_t *vh)
7557 {
7558 	mdi_vhci_config_t *vhc = vh->vh_config;
7559 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7560 	mdi_vhcache_phci_t *cphci, *cphci_next;
7561 	mdi_vhcache_client_t *cct, *cct_next;
7562 	mdi_vhcache_pathinfo_t *cpi, *cpi_next;
7563 
7564 	if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
7565 		return (MDI_FAILURE);
7566 
7567 	kmem_free(vhc->vhc_vhcache_filename,
7568 	    strlen(vhc->vhc_vhcache_filename) + 1);
7569 
7570 	mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
7571 
7572 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7573 	    cphci = cphci_next) {
7574 		cphci_next = cphci->cphci_next;
7575 		free_vhcache_phci(cphci);
7576 	}
7577 
7578 	for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
7579 		cct_next = cct->cct_next;
7580 		for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
7581 			cpi_next = cpi->cpi_next;
7582 			free_vhcache_pathinfo(cpi);
7583 		}
7584 		free_vhcache_client(cct);
7585 	}
7586 
7587 	rw_destroy(&vhcache->vhcache_lock);
7588 
7589 	mutex_destroy(&vhc->vhc_lock);
7590 	cv_destroy(&vhc->vhc_cv);
7591 	kmem_free(vhc, sizeof (mdi_vhci_config_t));
7592 	return (MDI_SUCCESS);
7593 }
7594 
7595 /*
7596  * Stop all vhci cache related async threads and free their resources.
7597  */
7598 static int
stop_vhcache_async_threads(mdi_vhci_config_t * vhc)7599 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
7600 {
7601 	mdi_async_client_config_t *acc, *acc_next;
7602 
7603 	mutex_enter(&vhc->vhc_lock);
7604 	vhc->vhc_flags |= MDI_VHC_EXIT;
7605 	ASSERT(vhc->vhc_acc_thrcount >= 0);
7606 	cv_broadcast(&vhc->vhc_cv);
7607 
7608 	while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
7609 	    vhc->vhc_acc_thrcount != 0) {
7610 		mutex_exit(&vhc->vhc_lock);
7611 		delay_random(mdi_delay);
7612 		mutex_enter(&vhc->vhc_lock);
7613 	}
7614 
7615 	vhc->vhc_flags &= ~MDI_VHC_EXIT;
7616 
7617 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
7618 		acc_next = acc->acc_next;
7619 		free_async_client_config(acc);
7620 	}
7621 	vhc->vhc_acc_list_head = NULL;
7622 	vhc->vhc_acc_list_tail = NULL;
7623 	vhc->vhc_acc_count = 0;
7624 
7625 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7626 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7627 		mutex_exit(&vhc->vhc_lock);
7628 		if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
7629 			vhcache_dirty(vhc);
7630 			return (MDI_FAILURE);
7631 		}
7632 	} else
7633 		mutex_exit(&vhc->vhc_lock);
7634 
7635 	if (callb_delete(vhc->vhc_cbid) != 0)
7636 		return (MDI_FAILURE);
7637 
7638 	return (MDI_SUCCESS);
7639 }
7640 
7641 /*
7642  * Stop vhci cache flush thread
7643  */
7644 /* ARGSUSED */
7645 static boolean_t
stop_vhcache_flush_thread(void * arg,int code)7646 stop_vhcache_flush_thread(void *arg, int code)
7647 {
7648 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7649 
7650 	mutex_enter(&vhc->vhc_lock);
7651 	vhc->vhc_flags |= MDI_VHC_EXIT;
7652 	cv_broadcast(&vhc->vhc_cv);
7653 
7654 	while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7655 		mutex_exit(&vhc->vhc_lock);
7656 		delay_random(mdi_delay);
7657 		mutex_enter(&vhc->vhc_lock);
7658 	}
7659 
7660 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7661 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7662 		mutex_exit(&vhc->vhc_lock);
7663 		(void) flush_vhcache(vhc, 1);
7664 	} else
7665 		mutex_exit(&vhc->vhc_lock);
7666 
7667 	return (B_TRUE);
7668 }
7669 
7670 /*
7671  * Enqueue the vhcache phci (cphci) at the tail of the list
7672  */
7673 static void
enqueue_vhcache_phci(mdi_vhci_cache_t * vhcache,mdi_vhcache_phci_t * cphci)7674 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
7675 {
7676 	cphci->cphci_next = NULL;
7677 	if (vhcache->vhcache_phci_head == NULL)
7678 		vhcache->vhcache_phci_head = cphci;
7679 	else
7680 		vhcache->vhcache_phci_tail->cphci_next = cphci;
7681 	vhcache->vhcache_phci_tail = cphci;
7682 }
7683 
7684 /*
7685  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
7686  */
7687 static void
enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t * cct,mdi_vhcache_pathinfo_t * cpi)7688 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7689     mdi_vhcache_pathinfo_t *cpi)
7690 {
7691 	cpi->cpi_next = NULL;
7692 	if (cct->cct_cpi_head == NULL)
7693 		cct->cct_cpi_head = cpi;
7694 	else
7695 		cct->cct_cpi_tail->cpi_next = cpi;
7696 	cct->cct_cpi_tail = cpi;
7697 }
7698 
7699 /*
7700  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
7701  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7702  * flag set come at the beginning of the list. All cpis which have this
7703  * flag set come at the end of the list.
7704  */
7705 static void
enqueue_vhcache_pathinfo(mdi_vhcache_client_t * cct,mdi_vhcache_pathinfo_t * newcpi)7706 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7707     mdi_vhcache_pathinfo_t *newcpi)
7708 {
7709 	mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
7710 
7711 	if (cct->cct_cpi_head == NULL ||
7712 	    (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
7713 		enqueue_tail_vhcache_pathinfo(cct, newcpi);
7714 	else {
7715 		for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
7716 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
7717 		    prev_cpi = cpi, cpi = cpi->cpi_next)
7718 			;
7719 
7720 		if (prev_cpi == NULL)
7721 			cct->cct_cpi_head = newcpi;
7722 		else
7723 			prev_cpi->cpi_next = newcpi;
7724 
7725 		newcpi->cpi_next = cpi;
7726 
7727 		if (cpi == NULL)
7728 			cct->cct_cpi_tail = newcpi;
7729 	}
7730 }
7731 
7732 /*
7733  * Enqueue the vhcache client (cct) at the tail of the list
7734  */
7735 static void
enqueue_vhcache_client(mdi_vhci_cache_t * vhcache,mdi_vhcache_client_t * cct)7736 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
7737     mdi_vhcache_client_t *cct)
7738 {
7739 	cct->cct_next = NULL;
7740 	if (vhcache->vhcache_client_head == NULL)
7741 		vhcache->vhcache_client_head = cct;
7742 	else
7743 		vhcache->vhcache_client_tail->cct_next = cct;
7744 	vhcache->vhcache_client_tail = cct;
7745 }
7746 
7747 static void
free_string_array(char ** str,int nelem)7748 free_string_array(char **str, int nelem)
7749 {
7750 	int i;
7751 
7752 	if (str) {
7753 		for (i = 0; i < nelem; i++) {
7754 			if (str[i])
7755 				kmem_free(str[i], strlen(str[i]) + 1);
7756 		}
7757 		kmem_free(str, sizeof (char *) * nelem);
7758 	}
7759 }
7760 
7761 static void
free_vhcache_phci(mdi_vhcache_phci_t * cphci)7762 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
7763 {
7764 	kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
7765 	kmem_free(cphci, sizeof (*cphci));
7766 }
7767 
7768 static void
free_vhcache_pathinfo(mdi_vhcache_pathinfo_t * cpi)7769 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
7770 {
7771 	kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
7772 	kmem_free(cpi, sizeof (*cpi));
7773 }
7774 
7775 static void
free_vhcache_client(mdi_vhcache_client_t * cct)7776 free_vhcache_client(mdi_vhcache_client_t *cct)
7777 {
7778 	kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
7779 	kmem_free(cct, sizeof (*cct));
7780 }
7781 
7782 static char *
vhcache_mknameaddr(char * ct_name,char * ct_addr,int * ret_len)7783 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
7784 {
7785 	char *name_addr;
7786 	int len;
7787 
7788 	len = strlen(ct_name) + strlen(ct_addr) + 2;
7789 	name_addr = kmem_alloc(len, KM_SLEEP);
7790 	(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
7791 
7792 	if (ret_len)
7793 		*ret_len = len;
7794 	return (name_addr);
7795 }
7796 
7797 /*
7798  * Copy the contents of paddrnvl to vhci cache.
7799  * paddrnvl nvlist contains path information for a vhci client.
7800  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
7801  */
7802 static void
paddrnvl_to_vhcache(nvlist_t * nvl,mdi_vhcache_phci_t * cphci_list[],mdi_vhcache_client_t * cct)7803 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
7804     mdi_vhcache_client_t *cct)
7805 {
7806 	nvpair_t *nvp = NULL;
7807 	mdi_vhcache_pathinfo_t *cpi;
7808 	uint_t nelem;
7809 	uint32_t *val;
7810 
7811 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7812 		ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
7813 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7814 		cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7815 		(void) nvpair_value_uint32_array(nvp, &val, &nelem);
7816 		ASSERT(nelem == 2);
7817 		cpi->cpi_cphci = cphci_list[val[0]];
7818 		cpi->cpi_flags = val[1];
7819 		enqueue_tail_vhcache_pathinfo(cct, cpi);
7820 	}
7821 }
7822 
7823 /*
7824  * Copy the contents of caddrmapnvl to vhci cache.
7825  * caddrmapnvl nvlist contains vhci client address to phci client address
7826  * mappings. See the comment in mainnvl_to_vhcache() for the format of
7827  * this nvlist.
7828  */
7829 static void
caddrmapnvl_to_vhcache(mdi_vhci_cache_t * vhcache,nvlist_t * nvl,mdi_vhcache_phci_t * cphci_list[])7830 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
7831     mdi_vhcache_phci_t *cphci_list[])
7832 {
7833 	nvpair_t *nvp = NULL;
7834 	nvlist_t *paddrnvl;
7835 	mdi_vhcache_client_t *cct;
7836 
7837 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7838 		ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
7839 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7840 		cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7841 		(void) nvpair_value_nvlist(nvp, &paddrnvl);
7842 		paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
7843 		/* the client must contain at least one path */
7844 		ASSERT(cct->cct_cpi_head != NULL);
7845 
7846 		enqueue_vhcache_client(vhcache, cct);
7847 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
7848 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7849 	}
7850 }
7851 
7852 /*
7853  * Copy the contents of the main nvlist to vhci cache.
7854  *
7855  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
7856  * The nvlist contains the mappings between the vhci client addresses and
7857  * their corresponding phci client addresses.
7858  *
7859  * The structure of the nvlist is as follows:
7860  *
7861  * Main nvlist:
7862  *	NAME		TYPE		DATA
7863  *	version		int32		version number
7864  *	phcis		string array	array of phci paths
7865  *	clientaddrmap	nvlist_t	c2paddrs_nvl (see below)
7866  *
7867  * structure of c2paddrs_nvl:
7868  *	NAME		TYPE		DATA
7869  *	caddr1		nvlist_t	paddrs_nvl1
7870  *	caddr2		nvlist_t	paddrs_nvl2
7871  *	...
7872  * where caddr1, caddr2, ... are vhci client name and addresses in the
7873  * form of "<clientname>@<clientaddress>".
7874  * (for example: "ssd@2000002037cd9f72");
7875  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
7876  *
7877  * structure of paddrs_nvl:
7878  *	NAME		TYPE		DATA
7879  *	pi_addr1	uint32_array	(phci-id, cpi_flags)
7880  *	pi_addr2	uint32_array	(phci-id, cpi_flags)
7881  *	...
7882  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
7883  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
7884  * phci-ids are integers that identify pHCIs to which the
7885  * the bus specific address belongs to. These integers are used as an index
7886  * into to the phcis string array in the main nvlist to get the pHCI path.
7887  */
7888 static int
mainnvl_to_vhcache(mdi_vhci_cache_t * vhcache,nvlist_t * nvl)7889 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
7890 {
7891 	char **phcis, **phci_namep;
7892 	uint_t nphcis;
7893 	mdi_vhcache_phci_t *cphci, **cphci_list;
7894 	nvlist_t *caddrmapnvl;
7895 	int32_t ver;
7896 	int i;
7897 	size_t cphci_list_size;
7898 
7899 	ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7900 
7901 	if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7902 	    ver != MDI_VHCI_CACHE_VERSION)
7903 		return (MDI_FAILURE);
7904 
7905 	if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7906 	    &nphcis) != 0)
7907 		return (MDI_SUCCESS);
7908 
7909 	ASSERT(nphcis > 0);
7910 
7911 	cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7912 	cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7913 	for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7914 		cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7915 		cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7916 		enqueue_vhcache_phci(vhcache, cphci);
7917 		cphci_list[i] = cphci;
7918 	}
7919 
7920 	ASSERT(vhcache->vhcache_phci_head != NULL);
7921 
7922 	if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7923 		caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7924 
7925 	kmem_free(cphci_list, cphci_list_size);
7926 	return (MDI_SUCCESS);
7927 }
7928 
7929 /*
7930  * Build paddrnvl for the specified client using the information in the
7931  * vhci cache and add it to the caddrmapnnvl.
7932  * Returns 0 on success, errno on failure.
7933  */
7934 static int
vhcache_to_paddrnvl(mdi_vhci_cache_t * vhcache,mdi_vhcache_client_t * cct,nvlist_t * caddrmapnvl)7935 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7936     nvlist_t *caddrmapnvl)
7937 {
7938 	mdi_vhcache_pathinfo_t *cpi;
7939 	nvlist_t *nvl;
7940 	int err;
7941 	uint32_t val[2];
7942 
7943 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7944 
7945 	if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7946 		return (err);
7947 
7948 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7949 		val[0] = cpi->cpi_cphci->cphci_id;
7950 		val[1] = cpi->cpi_flags;
7951 		if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7952 		    != 0)
7953 			goto out;
7954 	}
7955 
7956 	err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7957 out:
7958 	nvlist_free(nvl);
7959 	return (err);
7960 }
7961 
7962 /*
7963  * Build caddrmapnvl using the information in the vhci cache
7964  * and add it to the mainnvl.
7965  * Returns 0 on success, errno on failure.
7966  */
7967 static int
vhcache_to_caddrmapnvl(mdi_vhci_cache_t * vhcache,nvlist_t * mainnvl)7968 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7969 {
7970 	mdi_vhcache_client_t *cct;
7971 	nvlist_t *nvl;
7972 	int err;
7973 
7974 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7975 
7976 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7977 		return (err);
7978 
7979 	for (cct = vhcache->vhcache_client_head; cct != NULL;
7980 	    cct = cct->cct_next) {
7981 		if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7982 			goto out;
7983 	}
7984 
7985 	err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7986 out:
7987 	nvlist_free(nvl);
7988 	return (err);
7989 }
7990 
7991 /*
7992  * Build nvlist using the information in the vhci cache.
7993  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7994  * Returns nvl on success, NULL on failure.
7995  */
7996 static nvlist_t *
vhcache_to_mainnvl(mdi_vhci_cache_t * vhcache)7997 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7998 {
7999 	mdi_vhcache_phci_t *cphci;
8000 	uint_t phci_count;
8001 	char **phcis;
8002 	nvlist_t *nvl;
8003 	int err, i;
8004 
8005 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
8006 		nvl = NULL;
8007 		goto out;
8008 	}
8009 
8010 	if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
8011 	    MDI_VHCI_CACHE_VERSION)) != 0)
8012 		goto out;
8013 
8014 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8015 	if (vhcache->vhcache_phci_head == NULL) {
8016 		rw_exit(&vhcache->vhcache_lock);
8017 		return (nvl);
8018 	}
8019 
8020 	phci_count = 0;
8021 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8022 	    cphci = cphci->cphci_next)
8023 		cphci->cphci_id = phci_count++;
8024 
8025 	/* build phci pathname list */
8026 	phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
8027 	for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
8028 	    cphci = cphci->cphci_next, i++)
8029 		phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
8030 
8031 	err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
8032 	    phci_count);
8033 	free_string_array(phcis, phci_count);
8034 
8035 	if (err == 0 &&
8036 	    (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
8037 		rw_exit(&vhcache->vhcache_lock);
8038 		return (nvl);
8039 	}
8040 
8041 	rw_exit(&vhcache->vhcache_lock);
8042 out:
8043 	nvlist_free(nvl);
8044 	return (NULL);
8045 }
8046 
8047 /*
8048  * Lookup vhcache phci structure for the specified phci path.
8049  */
8050 static mdi_vhcache_phci_t *
lookup_vhcache_phci_by_name(mdi_vhci_cache_t * vhcache,char * phci_path)8051 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
8052 {
8053 	mdi_vhcache_phci_t *cphci;
8054 
8055 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8056 
8057 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8058 	    cphci = cphci->cphci_next) {
8059 		if (strcmp(cphci->cphci_path, phci_path) == 0)
8060 			return (cphci);
8061 	}
8062 
8063 	return (NULL);
8064 }
8065 
8066 /*
8067  * Lookup vhcache phci structure for the specified phci.
8068  */
8069 static mdi_vhcache_phci_t *
lookup_vhcache_phci_by_addr(mdi_vhci_cache_t * vhcache,mdi_phci_t * ph)8070 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
8071 {
8072 	mdi_vhcache_phci_t *cphci;
8073 
8074 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8075 
8076 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8077 	    cphci = cphci->cphci_next) {
8078 		if (cphci->cphci_phci == ph)
8079 			return (cphci);
8080 	}
8081 
8082 	return (NULL);
8083 }
8084 
8085 /*
8086  * Add the specified phci to the vhci cache if not already present.
8087  */
8088 static void
vhcache_phci_add(mdi_vhci_config_t * vhc,mdi_phci_t * ph)8089 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8090 {
8091 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8092 	mdi_vhcache_phci_t *cphci;
8093 	char *pathname;
8094 	int cache_updated;
8095 
8096 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8097 
8098 	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8099 	(void) ddi_pathname(ph->ph_dip, pathname);
8100 	if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
8101 	    != NULL) {
8102 		cphci->cphci_phci = ph;
8103 		cache_updated = 0;
8104 	} else {
8105 		cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
8106 		cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
8107 		cphci->cphci_phci = ph;
8108 		enqueue_vhcache_phci(vhcache, cphci);
8109 		cache_updated = 1;
8110 	}
8111 
8112 	rw_exit(&vhcache->vhcache_lock);
8113 
8114 	/*
8115 	 * Since a new phci has been added, reset
8116 	 * vhc_path_discovery_cutoff_time to allow for discovery of paths
8117 	 * during next vhcache_discover_paths().
8118 	 */
8119 	mutex_enter(&vhc->vhc_lock);
8120 	vhc->vhc_path_discovery_cutoff_time = 0;
8121 	mutex_exit(&vhc->vhc_lock);
8122 
8123 	kmem_free(pathname, MAXPATHLEN);
8124 	if (cache_updated)
8125 		vhcache_dirty(vhc);
8126 }
8127 
8128 /*
8129  * Remove the reference to the specified phci from the vhci cache.
8130  */
8131 static void
vhcache_phci_remove(mdi_vhci_config_t * vhc,mdi_phci_t * ph)8132 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8133 {
8134 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8135 	mdi_vhcache_phci_t *cphci;
8136 
8137 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8138 	if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
8139 		/* do not remove the actual mdi_vhcache_phci structure */
8140 		cphci->cphci_phci = NULL;
8141 	}
8142 	rw_exit(&vhcache->vhcache_lock);
8143 }
8144 
8145 static void
init_vhcache_lookup_token(mdi_vhcache_lookup_token_t * dst,mdi_vhcache_lookup_token_t * src)8146 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
8147     mdi_vhcache_lookup_token_t *src)
8148 {
8149 	if (src == NULL) {
8150 		dst->lt_cct = NULL;
8151 		dst->lt_cct_lookup_time = 0;
8152 	} else {
8153 		dst->lt_cct = src->lt_cct;
8154 		dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
8155 	}
8156 }
8157 
8158 /*
8159  * Look up vhcache client for the specified client.
8160  */
8161 static mdi_vhcache_client_t *
lookup_vhcache_client(mdi_vhci_cache_t * vhcache,char * ct_name,char * ct_addr,mdi_vhcache_lookup_token_t * token)8162 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
8163     mdi_vhcache_lookup_token_t *token)
8164 {
8165 	mod_hash_val_t hv;
8166 	char *name_addr;
8167 	int len;
8168 
8169 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8170 
8171 	/*
8172 	 * If no vhcache clean occurred since the last lookup, we can
8173 	 * simply return the cct from the last lookup operation.
8174 	 * It works because ccts are never freed except during the vhcache
8175 	 * cleanup operation.
8176 	 */
8177 	if (token != NULL &&
8178 	    vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
8179 		return (token->lt_cct);
8180 
8181 	name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
8182 	if (mod_hash_find(vhcache->vhcache_client_hash,
8183 	    (mod_hash_key_t)name_addr, &hv) == 0) {
8184 		if (token) {
8185 			token->lt_cct = (mdi_vhcache_client_t *)hv;
8186 			token->lt_cct_lookup_time = ddi_get_lbolt64();
8187 		}
8188 	} else {
8189 		if (token) {
8190 			token->lt_cct = NULL;
8191 			token->lt_cct_lookup_time = 0;
8192 		}
8193 		hv = NULL;
8194 	}
8195 	kmem_free(name_addr, len);
8196 	return ((mdi_vhcache_client_t *)hv);
8197 }
8198 
8199 /*
8200  * Add the specified path to the vhci cache if not already present.
8201  * Also add the vhcache client for the client corresponding to this path
8202  * if it doesn't already exist.
8203  */
8204 static void
vhcache_pi_add(mdi_vhci_config_t * vhc,struct mdi_pathinfo * pip)8205 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8206 {
8207 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8208 	mdi_vhcache_client_t *cct;
8209 	mdi_vhcache_pathinfo_t *cpi;
8210 	mdi_phci_t *ph = pip->pi_phci;
8211 	mdi_client_t *ct = pip->pi_client;
8212 	int cache_updated = 0;
8213 
8214 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8215 
8216 	/* if vhcache client for this pip doesn't already exist, add it */
8217 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8218 	    NULL)) == NULL) {
8219 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
8220 		cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
8221 		    ct->ct_guid, NULL);
8222 		enqueue_vhcache_client(vhcache, cct);
8223 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
8224 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
8225 		cache_updated = 1;
8226 	}
8227 
8228 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8229 		if (cpi->cpi_cphci->cphci_phci == ph &&
8230 		    strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
8231 			cpi->cpi_pip = pip;
8232 			if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
8233 				cpi->cpi_flags &=
8234 				    ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8235 				sort_vhcache_paths(cct);
8236 				cache_updated = 1;
8237 			}
8238 			break;
8239 		}
8240 	}
8241 
8242 	if (cpi == NULL) {
8243 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
8244 		cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
8245 		cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
8246 		ASSERT(cpi->cpi_cphci != NULL);
8247 		cpi->cpi_pip = pip;
8248 		enqueue_vhcache_pathinfo(cct, cpi);
8249 		cache_updated = 1;
8250 	}
8251 
8252 	rw_exit(&vhcache->vhcache_lock);
8253 
8254 	if (cache_updated)
8255 		vhcache_dirty(vhc);
8256 }
8257 
8258 /*
8259  * Remove the reference to the specified path from the vhci cache.
8260  */
8261 static void
vhcache_pi_remove(mdi_vhci_config_t * vhc,struct mdi_pathinfo * pip)8262 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8263 {
8264 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8265 	mdi_client_t *ct = pip->pi_client;
8266 	mdi_vhcache_client_t *cct;
8267 	mdi_vhcache_pathinfo_t *cpi;
8268 
8269 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8270 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8271 	    NULL)) != NULL) {
8272 		for (cpi = cct->cct_cpi_head; cpi != NULL;
8273 		    cpi = cpi->cpi_next) {
8274 			if (cpi->cpi_pip == pip) {
8275 				cpi->cpi_pip = NULL;
8276 				break;
8277 			}
8278 		}
8279 	}
8280 	rw_exit(&vhcache->vhcache_lock);
8281 }
8282 
8283 /*
8284  * Flush the vhci cache to disk.
8285  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
8286  */
8287 static int
flush_vhcache(mdi_vhci_config_t * vhc,int force_flag)8288 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
8289 {
8290 	nvlist_t *nvl;
8291 	int err;
8292 	int rv;
8293 
8294 	/*
8295 	 * It is possible that the system may shutdown before
8296 	 * i_ddi_io_initialized (during stmsboot for example). To allow for
8297 	 * flushing the cache in this case do not check for
8298 	 * i_ddi_io_initialized when force flag is set.
8299 	 */
8300 	if (force_flag == 0 && !i_ddi_io_initialized())
8301 		return (MDI_FAILURE);
8302 
8303 	if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
8304 		err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
8305 		nvlist_free(nvl);
8306 	} else
8307 		err = EFAULT;
8308 
8309 	rv = MDI_SUCCESS;
8310 	mutex_enter(&vhc->vhc_lock);
8311 	if (err != 0) {
8312 		if (err == EROFS) {
8313 			vhc->vhc_flags |= MDI_VHC_READONLY_FS;
8314 			vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
8315 			    MDI_VHC_VHCACHE_DIRTY);
8316 		} else {
8317 			if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
8318 				cmn_err(CE_CONT, "%s: update failed\n",
8319 				    vhc->vhc_vhcache_filename);
8320 				vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
8321 			}
8322 			rv = MDI_FAILURE;
8323 		}
8324 	} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
8325 		cmn_err(CE_CONT,
8326 		    "%s: update now ok\n", vhc->vhc_vhcache_filename);
8327 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
8328 	}
8329 	mutex_exit(&vhc->vhc_lock);
8330 
8331 	return (rv);
8332 }
8333 
8334 /*
8335  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
8336  * Exits itself if left idle for the idle timeout period.
8337  */
8338 static void
vhcache_flush_thread(void * arg)8339 vhcache_flush_thread(void *arg)
8340 {
8341 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8342 	clock_t idle_time, quit_at_ticks;
8343 	callb_cpr_t cprinfo;
8344 
8345 	/* number of seconds to sleep idle before exiting */
8346 	idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
8347 
8348 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8349 	    "mdi_vhcache_flush");
8350 	mutex_enter(&vhc->vhc_lock);
8351 	for (; ; ) {
8352 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8353 		    (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
8354 			if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
8355 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
8356 				(void) cv_timedwait(&vhc->vhc_cv,
8357 				    &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
8358 				CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8359 			} else {
8360 				vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
8361 				mutex_exit(&vhc->vhc_lock);
8362 
8363 				if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
8364 					vhcache_dirty(vhc);
8365 
8366 				mutex_enter(&vhc->vhc_lock);
8367 			}
8368 		}
8369 
8370 		quit_at_ticks = ddi_get_lbolt() + idle_time;
8371 
8372 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8373 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
8374 		    ddi_get_lbolt() < quit_at_ticks) {
8375 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
8376 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8377 			    quit_at_ticks);
8378 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8379 		}
8380 
8381 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8382 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
8383 			goto out;
8384 	}
8385 
8386 out:
8387 	vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
8388 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8389 	CALLB_CPR_EXIT(&cprinfo);
8390 }
8391 
8392 /*
8393  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
8394  */
8395 static void
vhcache_dirty(mdi_vhci_config_t * vhc)8396 vhcache_dirty(mdi_vhci_config_t *vhc)
8397 {
8398 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8399 	int create_thread;
8400 
8401 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8402 	/* do not flush cache until the cache is fully built */
8403 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8404 		rw_exit(&vhcache->vhcache_lock);
8405 		return;
8406 	}
8407 	rw_exit(&vhcache->vhcache_lock);
8408 
8409 	mutex_enter(&vhc->vhc_lock);
8410 	if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
8411 		mutex_exit(&vhc->vhc_lock);
8412 		return;
8413 	}
8414 
8415 	vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
8416 	vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
8417 	    mdi_vhcache_flush_delay * TICKS_PER_SECOND;
8418 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
8419 		cv_broadcast(&vhc->vhc_cv);
8420 		create_thread = 0;
8421 	} else {
8422 		vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
8423 		create_thread = 1;
8424 	}
8425 	mutex_exit(&vhc->vhc_lock);
8426 
8427 	if (create_thread)
8428 		(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
8429 		    0, &p0, TS_RUN, minclsyspri);
8430 }
8431 
8432 /*
8433  * phci bus config structure - one for for each phci bus config operation that
8434  * we initiate on behalf of a vhci.
8435  */
8436 typedef struct mdi_phci_bus_config_s {
8437 	char *phbc_phci_path;
8438 	struct mdi_vhci_bus_config_s *phbc_vhbusconfig;	/* vhci bus config */
8439 	struct mdi_phci_bus_config_s *phbc_next;
8440 } mdi_phci_bus_config_t;
8441 
8442 /* vhci bus config structure - one for each vhci bus config operation */
8443 typedef struct mdi_vhci_bus_config_s {
8444 	ddi_bus_config_op_t vhbc_op;	/* bus config op */
8445 	major_t vhbc_op_major;		/* bus config op major */
8446 	uint_t vhbc_op_flags;		/* bus config op flags */
8447 	kmutex_t vhbc_lock;
8448 	kcondvar_t vhbc_cv;
8449 	int vhbc_thr_count;
8450 } mdi_vhci_bus_config_t;
8451 
8452 /*
8453  * bus config the specified phci
8454  */
8455 static void
bus_config_phci(void * arg)8456 bus_config_phci(void *arg)
8457 {
8458 	mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
8459 	mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
8460 	dev_info_t *ph_dip;
8461 
8462 	/*
8463 	 * first configure all path components upto phci and then configure
8464 	 * the phci children.
8465 	 */
8466 	if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
8467 	    != NULL) {
8468 		if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
8469 		    vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
8470 			(void) ndi_devi_config_driver(ph_dip,
8471 			    vhbc->vhbc_op_flags,
8472 			    vhbc->vhbc_op_major);
8473 		} else
8474 			(void) ndi_devi_config(ph_dip,
8475 			    vhbc->vhbc_op_flags);
8476 
8477 		/* release the hold that e_ddi_hold_devi_by_path() placed */
8478 		ndi_rele_devi(ph_dip);
8479 	}
8480 
8481 	kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
8482 	kmem_free(phbc, sizeof (*phbc));
8483 
8484 	mutex_enter(&vhbc->vhbc_lock);
8485 	vhbc->vhbc_thr_count--;
8486 	if (vhbc->vhbc_thr_count == 0)
8487 		cv_broadcast(&vhbc->vhbc_cv);
8488 	mutex_exit(&vhbc->vhbc_lock);
8489 }
8490 
8491 /*
8492  * Bus config all phcis associated with the vhci in parallel.
8493  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
8494  */
8495 static void
bus_config_all_phcis(mdi_vhci_cache_t * vhcache,uint_t flags,ddi_bus_config_op_t op,major_t maj)8496 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
8497     ddi_bus_config_op_t op, major_t maj)
8498 {
8499 	mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
8500 	mdi_vhci_bus_config_t *vhbc;
8501 	mdi_vhcache_phci_t *cphci;
8502 
8503 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8504 	if (vhcache->vhcache_phci_head == NULL) {
8505 		rw_exit(&vhcache->vhcache_lock);
8506 		return;
8507 	}
8508 
8509 	vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
8510 
8511 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8512 	    cphci = cphci->cphci_next) {
8513 		/* skip phcis that haven't attached before root is available */
8514 		if (!modrootloaded && (cphci->cphci_phci == NULL))
8515 			continue;
8516 		phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
8517 		phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
8518 		    KM_SLEEP);
8519 		phbc->phbc_vhbusconfig = vhbc;
8520 		phbc->phbc_next = phbc_head;
8521 		phbc_head = phbc;
8522 		vhbc->vhbc_thr_count++;
8523 	}
8524 	rw_exit(&vhcache->vhcache_lock);
8525 
8526 	vhbc->vhbc_op = op;
8527 	vhbc->vhbc_op_major = maj;
8528 	vhbc->vhbc_op_flags = NDI_NO_EVENT |
8529 	    (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
8530 	mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
8531 	cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
8532 
8533 	/* now create threads to initiate bus config on all phcis in parallel */
8534 	for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
8535 		phbc_next = phbc->phbc_next;
8536 		if (mdi_mtc_off)
8537 			bus_config_phci((void *)phbc);
8538 		else
8539 			(void) thread_create(NULL, 0, bus_config_phci, phbc,
8540 			    0, &p0, TS_RUN, minclsyspri);
8541 	}
8542 
8543 	mutex_enter(&vhbc->vhbc_lock);
8544 	/* wait until all threads exit */
8545 	while (vhbc->vhbc_thr_count > 0)
8546 		cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
8547 	mutex_exit(&vhbc->vhbc_lock);
8548 
8549 	mutex_destroy(&vhbc->vhbc_lock);
8550 	cv_destroy(&vhbc->vhbc_cv);
8551 	kmem_free(vhbc, sizeof (*vhbc));
8552 }
8553 
8554 /*
8555  * Single threaded version of bus_config_all_phcis()
8556  */
8557 static void
st_bus_config_all_phcis(mdi_vhci_config_t * vhc,uint_t flags,ddi_bus_config_op_t op,major_t maj)8558 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
8559     ddi_bus_config_op_t op, major_t maj)
8560 {
8561 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8562 
8563 	single_threaded_vhconfig_enter(vhc);
8564 	bus_config_all_phcis(vhcache, flags, op, maj);
8565 	single_threaded_vhconfig_exit(vhc);
8566 }
8567 
8568 /*
8569  * Perform BUS_CONFIG_ONE on the specified child of the phci.
8570  * The path includes the child component in addition to the phci path.
8571  */
8572 static int
bus_config_one_phci_child(char * path)8573 bus_config_one_phci_child(char *path)
8574 {
8575 	dev_info_t *ph_dip, *child;
8576 	char *devnm;
8577 	int rv = MDI_FAILURE;
8578 
8579 	/* extract the child component of the phci */
8580 	devnm = strrchr(path, '/');
8581 	*devnm++ = '\0';
8582 
8583 	/*
8584 	 * first configure all path components upto phci and then
8585 	 * configure the phci child.
8586 	 */
8587 	if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
8588 		if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
8589 		    NDI_SUCCESS) {
8590 			/*
8591 			 * release the hold that ndi_devi_config_one() placed
8592 			 */
8593 			ndi_rele_devi(child);
8594 			rv = MDI_SUCCESS;
8595 		}
8596 
8597 		/* release the hold that e_ddi_hold_devi_by_path() placed */
8598 		ndi_rele_devi(ph_dip);
8599 	}
8600 
8601 	devnm--;
8602 	*devnm = '/';
8603 	return (rv);
8604 }
8605 
8606 /*
8607  * Build a list of phci client paths for the specified vhci client.
8608  * The list includes only those phci client paths which aren't configured yet.
8609  */
8610 static mdi_phys_path_t *
build_phclient_path_list(mdi_vhcache_client_t * cct,char * ct_name)8611 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
8612 {
8613 	mdi_vhcache_pathinfo_t *cpi;
8614 	mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
8615 	int config_path, len;
8616 
8617 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8618 		/*
8619 		 * include only those paths that aren't configured.
8620 		 */
8621 		config_path = 0;
8622 		if (cpi->cpi_pip == NULL)
8623 			config_path = 1;
8624 		else {
8625 			MDI_PI_LOCK(cpi->cpi_pip);
8626 			if (MDI_PI_IS_INIT(cpi->cpi_pip))
8627 				config_path = 1;
8628 			MDI_PI_UNLOCK(cpi->cpi_pip);
8629 		}
8630 
8631 		if (config_path) {
8632 			pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
8633 			len = strlen(cpi->cpi_cphci->cphci_path) +
8634 			    strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
8635 			pp->phys_path = kmem_alloc(len, KM_SLEEP);
8636 			(void) snprintf(pp->phys_path, len, "%s/%s@%s",
8637 			    cpi->cpi_cphci->cphci_path, ct_name,
8638 			    cpi->cpi_addr);
8639 			pp->phys_path_next = NULL;
8640 
8641 			if (pp_head == NULL)
8642 				pp_head = pp;
8643 			else
8644 				pp_tail->phys_path_next = pp;
8645 			pp_tail = pp;
8646 		}
8647 	}
8648 
8649 	return (pp_head);
8650 }
8651 
8652 /*
8653  * Free the memory allocated for phci client path list.
8654  */
8655 static void
free_phclient_path_list(mdi_phys_path_t * pp_head)8656 free_phclient_path_list(mdi_phys_path_t *pp_head)
8657 {
8658 	mdi_phys_path_t *pp, *pp_next;
8659 
8660 	for (pp = pp_head; pp != NULL; pp = pp_next) {
8661 		pp_next = pp->phys_path_next;
8662 		kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
8663 		kmem_free(pp, sizeof (*pp));
8664 	}
8665 }
8666 
8667 /*
8668  * Allocated async client structure and initialize with the specified values.
8669  */
8670 static mdi_async_client_config_t *
alloc_async_client_config(char * ct_name,char * ct_addr,mdi_phys_path_t * pp_head,mdi_vhcache_lookup_token_t * tok)8671 alloc_async_client_config(char *ct_name, char *ct_addr,
8672     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8673 {
8674 	mdi_async_client_config_t *acc;
8675 
8676 	acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
8677 	acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
8678 	acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
8679 	acc->acc_phclient_path_list_head = pp_head;
8680 	init_vhcache_lookup_token(&acc->acc_token, tok);
8681 	acc->acc_next = NULL;
8682 	return (acc);
8683 }
8684 
8685 /*
8686  * Free the memory allocated for the async client structure and their members.
8687  */
8688 static void
free_async_client_config(mdi_async_client_config_t * acc)8689 free_async_client_config(mdi_async_client_config_t *acc)
8690 {
8691 	if (acc->acc_phclient_path_list_head)
8692 		free_phclient_path_list(acc->acc_phclient_path_list_head);
8693 	kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
8694 	kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
8695 	kmem_free(acc, sizeof (*acc));
8696 }
8697 
8698 /*
8699  * Sort vhcache pathinfos (cpis) of the specified client.
8700  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
8701  * flag set come at the beginning of the list. All cpis which have this
8702  * flag set come at the end of the list.
8703  */
8704 static void
sort_vhcache_paths(mdi_vhcache_client_t * cct)8705 sort_vhcache_paths(mdi_vhcache_client_t *cct)
8706 {
8707 	mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
8708 
8709 	cpi_head = cct->cct_cpi_head;
8710 	cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8711 	for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8712 		cpi_next = cpi->cpi_next;
8713 		enqueue_vhcache_pathinfo(cct, cpi);
8714 	}
8715 }
8716 
8717 /*
8718  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
8719  * every vhcache pathinfo of the specified client. If not adjust the flag
8720  * setting appropriately.
8721  *
8722  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
8723  * on-disk vhci cache. So every time this flag is updated the cache must be
8724  * flushed.
8725  */
8726 static void
adjust_sort_vhcache_paths(mdi_vhci_config_t * vhc,char * ct_name,char * ct_addr,mdi_vhcache_lookup_token_t * tok)8727 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8728     mdi_vhcache_lookup_token_t *tok)
8729 {
8730 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8731 	mdi_vhcache_client_t *cct;
8732 	mdi_vhcache_pathinfo_t *cpi;
8733 
8734 	rw_enter(&vhcache->vhcache_lock, RW_READER);
8735 	if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
8736 	    == NULL) {
8737 		rw_exit(&vhcache->vhcache_lock);
8738 		return;
8739 	}
8740 
8741 	/*
8742 	 * to avoid unnecessary on-disk cache updates, first check if an
8743 	 * update is really needed. If no update is needed simply return.
8744 	 */
8745 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8746 		if ((cpi->cpi_pip != NULL &&
8747 		    (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
8748 		    (cpi->cpi_pip == NULL &&
8749 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
8750 			break;
8751 		}
8752 	}
8753 	if (cpi == NULL) {
8754 		rw_exit(&vhcache->vhcache_lock);
8755 		return;
8756 	}
8757 
8758 	if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
8759 		rw_exit(&vhcache->vhcache_lock);
8760 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8761 		if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
8762 		    tok)) == NULL) {
8763 			rw_exit(&vhcache->vhcache_lock);
8764 			return;
8765 		}
8766 	}
8767 
8768 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8769 		if (cpi->cpi_pip != NULL)
8770 			cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8771 		else
8772 			cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8773 	}
8774 	sort_vhcache_paths(cct);
8775 
8776 	rw_exit(&vhcache->vhcache_lock);
8777 	vhcache_dirty(vhc);
8778 }
8779 
8780 /*
8781  * Configure all specified paths of the client.
8782  */
8783 static void
config_client_paths_sync(mdi_vhci_config_t * vhc,char * ct_name,char * ct_addr,mdi_phys_path_t * pp_head,mdi_vhcache_lookup_token_t * tok)8784 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8785     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8786 {
8787 	mdi_phys_path_t *pp;
8788 
8789 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
8790 		(void) bus_config_one_phci_child(pp->phys_path);
8791 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
8792 }
8793 
8794 /*
8795  * Dequeue elements from vhci async client config list and bus configure
8796  * their corresponding phci clients.
8797  */
8798 static void
config_client_paths_thread(void * arg)8799 config_client_paths_thread(void *arg)
8800 {
8801 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8802 	mdi_async_client_config_t *acc;
8803 	clock_t quit_at_ticks;
8804 	clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
8805 	callb_cpr_t cprinfo;
8806 
8807 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8808 	    "mdi_config_client_paths");
8809 
8810 	for (; ; ) {
8811 		quit_at_ticks = ddi_get_lbolt() + idle_time;
8812 
8813 		mutex_enter(&vhc->vhc_lock);
8814 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8815 		    vhc->vhc_acc_list_head == NULL &&
8816 		    ddi_get_lbolt() < quit_at_ticks) {
8817 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
8818 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8819 			    quit_at_ticks);
8820 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8821 		}
8822 
8823 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8824 		    vhc->vhc_acc_list_head == NULL)
8825 			goto out;
8826 
8827 		acc = vhc->vhc_acc_list_head;
8828 		vhc->vhc_acc_list_head = acc->acc_next;
8829 		if (vhc->vhc_acc_list_head == NULL)
8830 			vhc->vhc_acc_list_tail = NULL;
8831 		vhc->vhc_acc_count--;
8832 		mutex_exit(&vhc->vhc_lock);
8833 
8834 		config_client_paths_sync(vhc, acc->acc_ct_name,
8835 		    acc->acc_ct_addr, acc->acc_phclient_path_list_head,
8836 		    &acc->acc_token);
8837 
8838 		free_async_client_config(acc);
8839 	}
8840 
8841 out:
8842 	vhc->vhc_acc_thrcount--;
8843 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8844 	CALLB_CPR_EXIT(&cprinfo);
8845 }
8846 
8847 /*
8848  * Arrange for all the phci client paths (pp_head) for the specified client
8849  * to be bus configured asynchronously by a thread.
8850  */
8851 static void
config_client_paths_async(mdi_vhci_config_t * vhc,char * ct_name,char * ct_addr,mdi_phys_path_t * pp_head,mdi_vhcache_lookup_token_t * tok)8852 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8853     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8854 {
8855 	mdi_async_client_config_t *acc, *newacc;
8856 	int create_thread;
8857 
8858 	if (pp_head == NULL)
8859 		return;
8860 
8861 	if (mdi_mtc_off) {
8862 		config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
8863 		free_phclient_path_list(pp_head);
8864 		return;
8865 	}
8866 
8867 	newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
8868 	ASSERT(newacc);
8869 
8870 	mutex_enter(&vhc->vhc_lock);
8871 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
8872 		if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
8873 		    strcmp(ct_addr, acc->acc_ct_addr) == 0) {
8874 			free_async_client_config(newacc);
8875 			mutex_exit(&vhc->vhc_lock);
8876 			return;
8877 		}
8878 	}
8879 
8880 	if (vhc->vhc_acc_list_head == NULL)
8881 		vhc->vhc_acc_list_head = newacc;
8882 	else
8883 		vhc->vhc_acc_list_tail->acc_next = newacc;
8884 	vhc->vhc_acc_list_tail = newacc;
8885 	vhc->vhc_acc_count++;
8886 	if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
8887 		cv_broadcast(&vhc->vhc_cv);
8888 		create_thread = 0;
8889 	} else {
8890 		vhc->vhc_acc_thrcount++;
8891 		create_thread = 1;
8892 	}
8893 	mutex_exit(&vhc->vhc_lock);
8894 
8895 	if (create_thread)
8896 		(void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8897 		    0, &p0, TS_RUN, minclsyspri);
8898 }
8899 
8900 /*
8901  * Return number of online paths for the specified client.
8902  */
8903 static int
nonline_paths(mdi_vhcache_client_t * cct)8904 nonline_paths(mdi_vhcache_client_t *cct)
8905 {
8906 	mdi_vhcache_pathinfo_t *cpi;
8907 	int online_count = 0;
8908 
8909 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8910 		if (cpi->cpi_pip != NULL) {
8911 			MDI_PI_LOCK(cpi->cpi_pip);
8912 			if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8913 				online_count++;
8914 			MDI_PI_UNLOCK(cpi->cpi_pip);
8915 		}
8916 	}
8917 
8918 	return (online_count);
8919 }
8920 
8921 /*
8922  * Bus configure all paths for the specified vhci client.
8923  * If at least one path for the client is already online, the remaining paths
8924  * will be configured asynchronously. Otherwise, it synchronously configures
8925  * the paths until at least one path is online and then rest of the paths
8926  * will be configured asynchronously.
8927  */
8928 static void
config_client_paths(mdi_vhci_config_t * vhc,char * ct_name,char * ct_addr)8929 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8930 {
8931 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8932 	mdi_phys_path_t *pp_head, *pp;
8933 	mdi_vhcache_client_t *cct;
8934 	mdi_vhcache_lookup_token_t tok;
8935 
8936 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8937 
8938 	init_vhcache_lookup_token(&tok, NULL);
8939 
8940 	if (ct_name == NULL || ct_addr == NULL ||
8941 	    (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8942 	    == NULL ||
8943 	    (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8944 		rw_exit(&vhcache->vhcache_lock);
8945 		return;
8946 	}
8947 
8948 	/* if at least one path is online, configure the rest asynchronously */
8949 	if (nonline_paths(cct) > 0) {
8950 		rw_exit(&vhcache->vhcache_lock);
8951 		config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8952 		return;
8953 	}
8954 
8955 	rw_exit(&vhcache->vhcache_lock);
8956 
8957 	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8958 		if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8959 			rw_enter(&vhcache->vhcache_lock, RW_READER);
8960 
8961 			if ((cct = lookup_vhcache_client(vhcache, ct_name,
8962 			    ct_addr, &tok)) == NULL) {
8963 				rw_exit(&vhcache->vhcache_lock);
8964 				goto out;
8965 			}
8966 
8967 			if (nonline_paths(cct) > 0 &&
8968 			    pp->phys_path_next != NULL) {
8969 				rw_exit(&vhcache->vhcache_lock);
8970 				config_client_paths_async(vhc, ct_name, ct_addr,
8971 				    pp->phys_path_next, &tok);
8972 				pp->phys_path_next = NULL;
8973 				goto out;
8974 			}
8975 
8976 			rw_exit(&vhcache->vhcache_lock);
8977 		}
8978 	}
8979 
8980 	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8981 out:
8982 	free_phclient_path_list(pp_head);
8983 }
8984 
8985 static void
single_threaded_vhconfig_enter(mdi_vhci_config_t * vhc)8986 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8987 {
8988 	mutex_enter(&vhc->vhc_lock);
8989 	while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8990 		cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8991 	vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8992 	mutex_exit(&vhc->vhc_lock);
8993 }
8994 
8995 static void
single_threaded_vhconfig_exit(mdi_vhci_config_t * vhc)8996 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8997 {
8998 	mutex_enter(&vhc->vhc_lock);
8999 	vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
9000 	cv_broadcast(&vhc->vhc_cv);
9001 	mutex_exit(&vhc->vhc_lock);
9002 }
9003 
9004 typedef struct mdi_phci_driver_info {
9005 	char	*phdriver_name;	/* name of the phci driver */
9006 
9007 	/* set to non zero if the phci driver supports root device */
9008 	int	phdriver_root_support;
9009 } mdi_phci_driver_info_t;
9010 
9011 /*
9012  * vhci class and root support capability of a phci driver can be
9013  * specified using ddi-vhci-class and ddi-no-root-support properties in the
9014  * phci driver.conf file. The built-in tables below contain this information
9015  * for those phci drivers whose driver.conf files don't yet contain this info.
9016  *
9017  * All phci drivers expect iscsi have root device support.
9018  */
9019 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
9020 	{ "fp", 1 },
9021 	{ "iscsi", 0 },
9022 	{ "ibsrp", 1 }
9023 	};
9024 
9025 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
9026 
9027 static void *
mdi_realloc(void * old_ptr,size_t old_size,size_t new_size)9028 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
9029 {
9030 	void *new_ptr;
9031 
9032 	new_ptr = kmem_zalloc(new_size, KM_SLEEP);
9033 	if (old_ptr) {
9034 		bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
9035 		kmem_free(old_ptr, old_size);
9036 	}
9037 	return (new_ptr);
9038 }
9039 
9040 static void
add_to_phci_list(char *** driver_list,int ** root_support_list,int * cur_elements,int * max_elements,char * driver_name,int root_support)9041 add_to_phci_list(char ***driver_list, int **root_support_list,
9042     int *cur_elements, int *max_elements, char *driver_name, int root_support)
9043 {
9044 	ASSERT(*cur_elements <= *max_elements);
9045 	if (*cur_elements == *max_elements) {
9046 		*max_elements += 10;
9047 		*driver_list = mdi_realloc(*driver_list,
9048 		    sizeof (char *) * (*cur_elements),
9049 		    sizeof (char *) * (*max_elements));
9050 		*root_support_list = mdi_realloc(*root_support_list,
9051 		    sizeof (int) * (*cur_elements),
9052 		    sizeof (int) * (*max_elements));
9053 	}
9054 	(*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
9055 	(*root_support_list)[*cur_elements] = root_support;
9056 	(*cur_elements)++;
9057 }
9058 
9059 static void
get_phci_driver_list(char * vhci_class,char *** driver_list,int ** root_support_list,int * cur_elements,int * max_elements)9060 get_phci_driver_list(char *vhci_class, char ***driver_list,
9061     int **root_support_list, int *cur_elements, int *max_elements)
9062 {
9063 	mdi_phci_driver_info_t	*st_driver_list, *p;
9064 	int		st_ndrivers, root_support, i, j, driver_conf_count;
9065 	major_t		m;
9066 	struct devnames	*dnp;
9067 	ddi_prop_t	*propp;
9068 
9069 	*driver_list = NULL;
9070 	*root_support_list = NULL;
9071 	*cur_elements = 0;
9072 	*max_elements = 0;
9073 
9074 	/* add the phci drivers derived from the phci driver.conf files */
9075 	for (m = 0; m < devcnt; m++) {
9076 		dnp = &devnamesp[m];
9077 
9078 		if (dnp->dn_flags & DN_PHCI_DRIVER) {
9079 			LOCK_DEV_OPS(&dnp->dn_lock);
9080 			if (dnp->dn_global_prop_ptr != NULL &&
9081 			    (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
9082 			    DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
9083 			    &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
9084 			    strcmp(propp->prop_val, vhci_class) == 0) {
9085 
9086 				root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
9087 				    DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
9088 				    &dnp->dn_global_prop_ptr->prop_list)
9089 				    == NULL) ? 1 : 0;
9090 
9091 				add_to_phci_list(driver_list, root_support_list,
9092 				    cur_elements, max_elements, dnp->dn_name,
9093 				    root_support);
9094 
9095 				UNLOCK_DEV_OPS(&dnp->dn_lock);
9096 			} else
9097 				UNLOCK_DEV_OPS(&dnp->dn_lock);
9098 		}
9099 	}
9100 
9101 	driver_conf_count = *cur_elements;
9102 
9103 	/* add the phci drivers specified in the built-in tables */
9104 	if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
9105 		st_driver_list = scsi_phci_driver_list;
9106 		st_ndrivers = sizeof (scsi_phci_driver_list) /
9107 		    sizeof (mdi_phci_driver_info_t);
9108 	} else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
9109 		st_driver_list = ib_phci_driver_list;
9110 		st_ndrivers = sizeof (ib_phci_driver_list) /
9111 		    sizeof (mdi_phci_driver_info_t);
9112 	} else {
9113 		st_driver_list = NULL;
9114 		st_ndrivers = 0;
9115 	}
9116 
9117 	for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
9118 		/* add this phci driver if not already added before */
9119 		for (j = 0; j < driver_conf_count; j++) {
9120 			if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
9121 				break;
9122 		}
9123 		if (j == driver_conf_count) {
9124 			add_to_phci_list(driver_list, root_support_list,
9125 			    cur_elements, max_elements, p->phdriver_name,
9126 			    p->phdriver_root_support);
9127 		}
9128 	}
9129 }
9130 
9131 /*
9132  * Attach the phci driver instances associated with the specified vhci class.
9133  * If root is mounted attach all phci driver instances.
9134  * If root is not mounted, attach the instances of only those phci
9135  * drivers that have the root support.
9136  */
9137 static void
attach_phci_drivers(char * vhci_class)9138 attach_phci_drivers(char *vhci_class)
9139 {
9140 	char	**driver_list, **p;
9141 	int	*root_support_list;
9142 	int	cur_elements, max_elements, i;
9143 	major_t	m;
9144 
9145 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9146 	    &cur_elements, &max_elements);
9147 
9148 	for (i = 0; i < cur_elements; i++) {
9149 		if (modrootloaded || root_support_list[i]) {
9150 			m = ddi_name_to_major(driver_list[i]);
9151 			if (m != DDI_MAJOR_T_NONE &&
9152 			    ddi_hold_installed_driver(m))
9153 				ddi_rele_driver(m);
9154 		}
9155 	}
9156 
9157 	if (driver_list) {
9158 		for (i = 0, p = driver_list; i < cur_elements; i++, p++)
9159 			kmem_free(*p, strlen(*p) + 1);
9160 		kmem_free(driver_list, sizeof (char *) * max_elements);
9161 		kmem_free(root_support_list, sizeof (int) * max_elements);
9162 	}
9163 }
9164 
9165 /*
9166  * Build vhci cache:
9167  *
9168  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
9169  * the phci driver instances. During this process the cache gets built.
9170  *
9171  * Cache is built fully if the root is mounted.
9172  * If the root is not mounted, phci drivers that do not have root support
9173  * are not attached. As a result the cache is built partially. The entries
9174  * in the cache reflect only those phci drivers that have root support.
9175  */
9176 static int
build_vhci_cache(mdi_vhci_t * vh)9177 build_vhci_cache(mdi_vhci_t *vh)
9178 {
9179 	mdi_vhci_config_t *vhc = vh->vh_config;
9180 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9181 
9182 	single_threaded_vhconfig_enter(vhc);
9183 
9184 	rw_enter(&vhcache->vhcache_lock, RW_READER);
9185 	if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
9186 		rw_exit(&vhcache->vhcache_lock);
9187 		single_threaded_vhconfig_exit(vhc);
9188 		return (0);
9189 	}
9190 	rw_exit(&vhcache->vhcache_lock);
9191 
9192 	attach_phci_drivers(vh->vh_class);
9193 	bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
9194 	    BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9195 
9196 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9197 	vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
9198 	rw_exit(&vhcache->vhcache_lock);
9199 
9200 	single_threaded_vhconfig_exit(vhc);
9201 	vhcache_dirty(vhc);
9202 	return (1);
9203 }
9204 
9205 /*
9206  * Determine if discovery of paths is needed.
9207  */
9208 static int
vhcache_do_discovery(mdi_vhci_config_t * vhc)9209 vhcache_do_discovery(mdi_vhci_config_t *vhc)
9210 {
9211 	int rv = 1;
9212 
9213 	mutex_enter(&vhc->vhc_lock);
9214 	if (i_ddi_io_initialized() == 0) {
9215 		if (vhc->vhc_path_discovery_boot > 0) {
9216 			vhc->vhc_path_discovery_boot--;
9217 			goto out;
9218 		}
9219 	} else {
9220 		if (vhc->vhc_path_discovery_postboot > 0) {
9221 			vhc->vhc_path_discovery_postboot--;
9222 			goto out;
9223 		}
9224 	}
9225 
9226 	/*
9227 	 * Do full path discovery at most once per mdi_path_discovery_interval.
9228 	 * This is to avoid a series of full path discoveries when opening
9229 	 * stale /dev/[r]dsk links.
9230 	 */
9231 	if (mdi_path_discovery_interval != -1 &&
9232 	    ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time)
9233 		goto out;
9234 
9235 	rv = 0;
9236 out:
9237 	mutex_exit(&vhc->vhc_lock);
9238 	return (rv);
9239 }
9240 
9241 /*
9242  * Discover all paths:
9243  *
9244  * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
9245  * driver instances. During this process all paths will be discovered.
9246  */
9247 static int
vhcache_discover_paths(mdi_vhci_t * vh)9248 vhcache_discover_paths(mdi_vhci_t *vh)
9249 {
9250 	mdi_vhci_config_t *vhc = vh->vh_config;
9251 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9252 	int rv = 0;
9253 
9254 	single_threaded_vhconfig_enter(vhc);
9255 
9256 	if (vhcache_do_discovery(vhc)) {
9257 		attach_phci_drivers(vh->vh_class);
9258 		bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
9259 		    NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9260 
9261 		mutex_enter(&vhc->vhc_lock);
9262 		vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() +
9263 		    mdi_path_discovery_interval * TICKS_PER_SECOND;
9264 		mutex_exit(&vhc->vhc_lock);
9265 		rv = 1;
9266 	}
9267 
9268 	single_threaded_vhconfig_exit(vhc);
9269 	return (rv);
9270 }
9271 
9272 /*
9273  * Generic vhci bus config implementation:
9274  *
9275  * Parameters
9276  *	vdip	vhci dip
9277  *	flags	bus config flags
9278  *	op	bus config operation
9279  *	The remaining parameters are bus config operation specific
9280  *
9281  * for BUS_CONFIG_ONE
9282  *	arg	pointer to name@addr
9283  *	child	upon successful return from this function, *child will be
9284  *		set to the configured and held devinfo child node of vdip.
9285  *	ct_addr	pointer to client address (i.e. GUID)
9286  *
9287  * for BUS_CONFIG_DRIVER
9288  *	arg	major number of the driver
9289  *	child and ct_addr parameters are ignored
9290  *
9291  * for BUS_CONFIG_ALL
9292  *	arg, child, and ct_addr parameters are ignored
9293  *
9294  * Note that for the rest of the bus config operations, this function simply
9295  * calls the framework provided default bus config routine.
9296  */
9297 int
mdi_vhci_bus_config(dev_info_t * vdip,uint_t flags,ddi_bus_config_op_t op,void * arg,dev_info_t ** child,char * ct_addr)9298 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
9299     void *arg, dev_info_t **child, char *ct_addr)
9300 {
9301 	mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9302 	mdi_vhci_config_t *vhc = vh->vh_config;
9303 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9304 	int rv = 0;
9305 	int params_valid = 0;
9306 	char *cp;
9307 
9308 	/*
9309 	 * To bus config vhcis we relay operation, possibly using another
9310 	 * thread, to phcis. The phci driver then interacts with MDI to cause
9311 	 * vhci child nodes to be enumerated under the vhci node.  Adding a
9312 	 * vhci child requires an ndi_devi_enter of the vhci. Since another
9313 	 * thread may be adding the child, to avoid deadlock we can't wait
9314 	 * for the relayed operations to complete if we have already entered
9315 	 * the vhci node.
9316 	 */
9317 	if (DEVI_BUSY_OWNED(vdip)) {
9318 		MDI_DEBUG(2, (MDI_NOTE, vdip,
9319 		    "vhci dip is busy owned %p", (void *)vdip));
9320 		goto default_bus_config;
9321 	}
9322 
9323 	rw_enter(&vhcache->vhcache_lock, RW_READER);
9324 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
9325 		rw_exit(&vhcache->vhcache_lock);
9326 		rv = build_vhci_cache(vh);
9327 		rw_enter(&vhcache->vhcache_lock, RW_READER);
9328 	}
9329 
9330 	switch (op) {
9331 	case BUS_CONFIG_ONE:
9332 		if (arg != NULL && ct_addr != NULL) {
9333 			/* extract node name */
9334 			cp = (char *)arg;
9335 			while (*cp != '\0' && *cp != '@')
9336 				cp++;
9337 			if (*cp == '@') {
9338 				params_valid = 1;
9339 				*cp = '\0';
9340 				config_client_paths(vhc, (char *)arg, ct_addr);
9341 				/* config_client_paths() releases cache_lock */
9342 				*cp = '@';
9343 				break;
9344 			}
9345 		}
9346 
9347 		rw_exit(&vhcache->vhcache_lock);
9348 		break;
9349 
9350 	case BUS_CONFIG_DRIVER:
9351 		rw_exit(&vhcache->vhcache_lock);
9352 		if (rv == 0)
9353 			st_bus_config_all_phcis(vhc, flags, op,
9354 			    (major_t)(uintptr_t)arg);
9355 		break;
9356 
9357 	case BUS_CONFIG_ALL:
9358 		rw_exit(&vhcache->vhcache_lock);
9359 		if (rv == 0)
9360 			st_bus_config_all_phcis(vhc, flags, op, -1);
9361 		break;
9362 
9363 	default:
9364 		rw_exit(&vhcache->vhcache_lock);
9365 		break;
9366 	}
9367 
9368 
9369 default_bus_config:
9370 	/*
9371 	 * All requested child nodes are enumerated under the vhci.
9372 	 * Now configure them.
9373 	 */
9374 	if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9375 	    NDI_SUCCESS) {
9376 		return (MDI_SUCCESS);
9377 	} else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
9378 		/* discover all paths and try configuring again */
9379 		if (vhcache_discover_paths(vh) &&
9380 		    ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9381 		    NDI_SUCCESS)
9382 			return (MDI_SUCCESS);
9383 	}
9384 
9385 	return (MDI_FAILURE);
9386 }
9387 
9388 /*
9389  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
9390  */
9391 static nvlist_t *
read_on_disk_vhci_cache(char * vhci_class)9392 read_on_disk_vhci_cache(char *vhci_class)
9393 {
9394 	nvlist_t *nvl;
9395 	int err;
9396 	char *filename;
9397 
9398 	filename = vhclass2vhcache_filename(vhci_class);
9399 
9400 	if ((err = fread_nvlist(filename, &nvl)) == 0) {
9401 		kmem_free(filename, strlen(filename) + 1);
9402 		return (nvl);
9403 	} else if (err == EIO)
9404 		cmn_err(CE_WARN, "%s: I/O error, will recreate", filename);
9405 	else if (err == EINVAL)
9406 		cmn_err(CE_WARN,
9407 		    "%s: data file corrupted, will recreate", filename);
9408 
9409 	kmem_free(filename, strlen(filename) + 1);
9410 	return (NULL);
9411 }
9412 
9413 /*
9414  * Read on-disk vhci cache into nvlists for all vhci classes.
9415  * Called during booting by i_ddi_read_devices_files().
9416  */
9417 void
mdi_read_devices_files(void)9418 mdi_read_devices_files(void)
9419 {
9420 	int i;
9421 
9422 	for (i = 0; i < N_VHCI_CLASSES; i++)
9423 		vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
9424 }
9425 
9426 /*
9427  * Remove all stale entries from vhci cache.
9428  */
9429 static void
clean_vhcache(mdi_vhci_config_t * vhc)9430 clean_vhcache(mdi_vhci_config_t *vhc)
9431 {
9432 	mdi_vhci_cache_t	*vhcache = &vhc->vhc_vhcache;
9433 	mdi_vhcache_phci_t	*phci, *nxt_phci;
9434 	mdi_vhcache_client_t	*client, *nxt_client;
9435 	mdi_vhcache_pathinfo_t	*path, *nxt_path;
9436 
9437 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9438 
9439 	client = vhcache->vhcache_client_head;
9440 	vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
9441 	for ( ; client != NULL; client = nxt_client) {
9442 		nxt_client = client->cct_next;
9443 
9444 		path = client->cct_cpi_head;
9445 		client->cct_cpi_head = client->cct_cpi_tail = NULL;
9446 		for ( ; path != NULL; path = nxt_path) {
9447 			nxt_path = path->cpi_next;
9448 			if ((path->cpi_cphci->cphci_phci != NULL) &&
9449 			    (path->cpi_pip != NULL)) {
9450 				enqueue_tail_vhcache_pathinfo(client, path);
9451 			} else if (path->cpi_pip != NULL) {
9452 				/* Not valid to have a path without a phci. */
9453 				free_vhcache_pathinfo(path);
9454 			}
9455 		}
9456 
9457 		if (client->cct_cpi_head != NULL)
9458 			enqueue_vhcache_client(vhcache, client);
9459 		else {
9460 			(void) mod_hash_destroy(vhcache->vhcache_client_hash,
9461 			    (mod_hash_key_t)client->cct_name_addr);
9462 			free_vhcache_client(client);
9463 		}
9464 	}
9465 
9466 	phci = vhcache->vhcache_phci_head;
9467 	vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
9468 	for ( ; phci != NULL; phci = nxt_phci) {
9469 
9470 		nxt_phci = phci->cphci_next;
9471 		if (phci->cphci_phci != NULL)
9472 			enqueue_vhcache_phci(vhcache, phci);
9473 		else
9474 			free_vhcache_phci(phci);
9475 	}
9476 
9477 	vhcache->vhcache_clean_time = ddi_get_lbolt64();
9478 	rw_exit(&vhcache->vhcache_lock);
9479 	vhcache_dirty(vhc);
9480 }
9481 
9482 /*
9483  * Remove all stale entries from vhci cache.
9484  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
9485  */
9486 void
mdi_clean_vhcache(void)9487 mdi_clean_vhcache(void)
9488 {
9489 	mdi_vhci_t *vh;
9490 
9491 	mutex_enter(&mdi_mutex);
9492 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9493 		vh->vh_refcnt++;
9494 		mutex_exit(&mdi_mutex);
9495 		clean_vhcache(vh->vh_config);
9496 		mutex_enter(&mdi_mutex);
9497 		vh->vh_refcnt--;
9498 	}
9499 	mutex_exit(&mdi_mutex);
9500 }
9501 
9502 /*
9503  * mdi_vhci_walk_clients():
9504  *		Walker routine to traverse client dev_info nodes
9505  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
9506  * below the client, including nexus devices, which we dont want.
9507  * So we just traverse the immediate siblings, starting from 1st client.
9508  */
9509 void
mdi_vhci_walk_clients(dev_info_t * vdip,int (* f)(dev_info_t *,void *),void * arg)9510 mdi_vhci_walk_clients(dev_info_t *vdip,
9511     int (*f)(dev_info_t *, void *), void *arg)
9512 {
9513 	mdi_vhci_t	*vh = i_devi_get_vhci(vdip);
9514 	dev_info_t	*cdip;
9515 	mdi_client_t	*ct;
9516 
9517 	MDI_VHCI_CLIENT_LOCK(vh);
9518 	cdip = ddi_get_child(vdip);
9519 	while (cdip) {
9520 		ct = i_devi_get_client(cdip);
9521 		MDI_CLIENT_LOCK(ct);
9522 
9523 		if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
9524 			cdip = ddi_get_next_sibling(cdip);
9525 		else
9526 			cdip = NULL;
9527 
9528 		MDI_CLIENT_UNLOCK(ct);
9529 	}
9530 	MDI_VHCI_CLIENT_UNLOCK(vh);
9531 }
9532 
9533 /*
9534  * mdi_vhci_walk_phcis():
9535  *		Walker routine to traverse phci dev_info nodes
9536  */
9537 void
mdi_vhci_walk_phcis(dev_info_t * vdip,int (* f)(dev_info_t *,void *),void * arg)9538 mdi_vhci_walk_phcis(dev_info_t *vdip,
9539     int (*f)(dev_info_t *, void *), void *arg)
9540 {
9541 	mdi_vhci_t	*vh = i_devi_get_vhci(vdip);
9542 	mdi_phci_t	*ph, *next;
9543 
9544 	MDI_VHCI_PHCI_LOCK(vh);
9545 	ph = vh->vh_phci_head;
9546 	while (ph) {
9547 		MDI_PHCI_LOCK(ph);
9548 
9549 		if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
9550 			next = ph->ph_next;
9551 		else
9552 			next = NULL;
9553 
9554 		MDI_PHCI_UNLOCK(ph);
9555 		ph = next;
9556 	}
9557 	MDI_VHCI_PHCI_UNLOCK(vh);
9558 }
9559 
9560 
9561 /*
9562  * mdi_walk_vhcis():
9563  *		Walker routine to traverse vhci dev_info nodes
9564  */
9565 void
mdi_walk_vhcis(int (* f)(dev_info_t *,void *),void * arg)9566 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
9567 {
9568 	mdi_vhci_t	*vh = NULL;
9569 
9570 	mutex_enter(&mdi_mutex);
9571 	/*
9572 	 * Scan for already registered vhci
9573 	 */
9574 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9575 		vh->vh_refcnt++;
9576 		mutex_exit(&mdi_mutex);
9577 		if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
9578 			mutex_enter(&mdi_mutex);
9579 			vh->vh_refcnt--;
9580 			break;
9581 		} else {
9582 			mutex_enter(&mdi_mutex);
9583 			vh->vh_refcnt--;
9584 		}
9585 	}
9586 
9587 	mutex_exit(&mdi_mutex);
9588 }
9589 
9590 /*
9591  * i_mdi_log_sysevent():
9592  *		Logs events for pickup by syseventd
9593  */
9594 static void
i_mdi_log_sysevent(dev_info_t * dip,char * ph_vh_class,char * subclass)9595 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
9596 {
9597 	char		*path_name;
9598 	nvlist_t	*attr_list;
9599 
9600 	if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
9601 	    KM_SLEEP) != DDI_SUCCESS) {
9602 		goto alloc_failed;
9603 	}
9604 
9605 	path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
9606 	(void) ddi_pathname(dip, path_name);
9607 
9608 	if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
9609 	    ddi_driver_name(dip)) != DDI_SUCCESS) {
9610 		goto error;
9611 	}
9612 
9613 	if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
9614 	    (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
9615 		goto error;
9616 	}
9617 
9618 	if (nvlist_add_int32(attr_list, DDI_INSTANCE,
9619 	    (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
9620 		goto error;
9621 	}
9622 
9623 	if (nvlist_add_string(attr_list, DDI_PATHNAME,
9624 	    path_name) != DDI_SUCCESS) {
9625 		goto error;
9626 	}
9627 
9628 	if (nvlist_add_string(attr_list, DDI_CLASS,
9629 	    ph_vh_class) != DDI_SUCCESS) {
9630 		goto error;
9631 	}
9632 
9633 	(void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
9634 	    attr_list, NULL, DDI_SLEEP);
9635 
9636 error:
9637 	kmem_free(path_name, MAXPATHLEN);
9638 	nvlist_free(attr_list);
9639 	return;
9640 
9641 alloc_failed:
9642 	MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent"));
9643 }
9644 
9645 char **
mdi_get_phci_driver_list(char * vhci_class,int * ndrivers)9646 mdi_get_phci_driver_list(char *vhci_class, int	*ndrivers)
9647 {
9648 	char	**driver_list, **ret_driver_list = NULL;
9649 	int	*root_support_list;
9650 	int	cur_elements, max_elements;
9651 
9652 	get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9653 	    &cur_elements, &max_elements);
9654 
9655 
9656 	if (driver_list) {
9657 		kmem_free(root_support_list, sizeof (int) * max_elements);
9658 		ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
9659 		    * max_elements, sizeof (char *) * cur_elements);
9660 	}
9661 	*ndrivers = cur_elements;
9662 
9663 	return (ret_driver_list);
9664 
9665 }
9666 
9667 void
mdi_free_phci_driver_list(char ** driver_list,int ndrivers)9668 mdi_free_phci_driver_list(char **driver_list, int ndrivers)
9669 {
9670 	char	**p;
9671 	int	i;
9672 
9673 	if (driver_list) {
9674 		for (i = 0, p = driver_list; i < ndrivers; i++, p++)
9675 			kmem_free(*p, strlen(*p) + 1);
9676 		kmem_free(driver_list, sizeof (char *) * ndrivers);
9677 	}
9678 }
9679 
9680 /*
9681  * mdi_is_dev_supported():
9682  *		function called by pHCI bus config operation to determine if a
9683  *		device should be represented as a child of the vHCI or the
9684  *		pHCI.  This decision is made by the vHCI, using cinfo idenity
9685  *		information passed by the pHCI - specifics of the cinfo
9686  *		representation are by agreement between the pHCI and vHCI.
9687  * Return Values:
9688  *		MDI_SUCCESS
9689  *		MDI_FAILURE
9690  */
9691 int
mdi_is_dev_supported(char * class,dev_info_t * pdip,void * cinfo)9692 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo)
9693 {
9694 	mdi_vhci_t	*vh;
9695 
9696 	ASSERT(class && pdip);
9697 
9698 	/*
9699 	 * For dev_supported, mdi_phci_register() must have established pdip as
9700 	 * a pHCI.
9701 	 *
9702 	 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and
9703 	 * MDI_PHCI(pdip) will return false if mpxio is disabled.
9704 	 */
9705 	if (!MDI_PHCI(pdip))
9706 		return (MDI_FAILURE);
9707 
9708 	/* Return MDI_FAILURE if vHCI does not support asking the question. */
9709 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
9710 	if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) {
9711 		return (MDI_FAILURE);
9712 	}
9713 
9714 	/* Return vHCI answer */
9715 	return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo));
9716 }
9717 
9718 int
mdi_dc_return_dev_state(mdi_pathinfo_t * pip,struct devctl_iocdata * dcp)9719 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp)
9720 {
9721 	uint_t devstate = 0;
9722 	dev_info_t *cdip;
9723 
9724 	if ((pip == NULL) || (dcp == NULL))
9725 		return (MDI_FAILURE);
9726 
9727 	cdip = mdi_pi_get_client(pip);
9728 
9729 	switch (mdi_pi_get_state(pip)) {
9730 	case MDI_PATHINFO_STATE_INIT:
9731 		devstate = DEVICE_DOWN;
9732 		break;
9733 	case MDI_PATHINFO_STATE_ONLINE:
9734 		devstate = DEVICE_ONLINE;
9735 		if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED))
9736 			devstate |= DEVICE_BUSY;
9737 		break;
9738 	case MDI_PATHINFO_STATE_STANDBY:
9739 		devstate = DEVICE_ONLINE;
9740 		break;
9741 	case MDI_PATHINFO_STATE_FAULT:
9742 		devstate = DEVICE_DOWN;
9743 		break;
9744 	case MDI_PATHINFO_STATE_OFFLINE:
9745 		devstate = DEVICE_OFFLINE;
9746 		break;
9747 	default:
9748 		ASSERT(MDI_PI(pip)->pi_state);
9749 	}
9750 
9751 	if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0)
9752 		return (MDI_FAILURE);
9753 
9754 	return (MDI_SUCCESS);
9755 }
9756