xref: /illumos-gate/usr/src/uts/sun4v/io/vnet_gen.c (revision ccd81fdda071e031209c777983199d191c35b0a2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/errno.h>
28 #include <sys/sysmacros.h>
29 #include <sys/param.h>
30 #include <sys/machsystm.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/note.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ldc.h>
48 #include <sys/mach_descrip.h>
49 #include <sys/mdeg.h>
50 #include <net/if.h>
51 #include <sys/vnet.h>
52 #include <sys/vio_mailbox.h>
53 #include <sys/vio_common.h>
54 #include <sys/vnet_common.h>
55 #include <sys/vnet_mailbox.h>
56 #include <sys/vio_util.h>
57 #include <sys/vnet_gen.h>
58 #include <sys/atomic.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 #include <sys/vlan.h>
64 
65 /*
66  * Implementation of the mac provider functionality for vnet using the
67  * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
68  */
69 
70 /* Entry Points */
71 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
72     const uint8_t *macaddr, void **vgenhdl);
73 int vgen_init_mdeg(void *arg);
74 void vgen_uninit(void *arg);
75 int vgen_dds_tx(void *arg, void *dmsg);
76 int vgen_enable_intr(void *arg);
77 int vgen_disable_intr(void *arg);
78 mblk_t *vgen_rx_poll(void *arg, int bytes_to_pickup);
79 static int vgen_start(void *arg);
80 static void vgen_stop(void *arg);
81 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
82 static int vgen_multicst(void *arg, boolean_t add,
83 	const uint8_t *mca);
84 static int vgen_promisc(void *arg, boolean_t on);
85 static int vgen_unicst(void *arg, const uint8_t *mca);
86 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
87 static void vgen_ioctl(void *arg, queue_t *q, mblk_t *mp);
88 #ifdef	VNET_IOC_DEBUG
89 static int vgen_force_link_state(vgen_port_t *portp, int link_state);
90 #endif
91 
92 /* Port/LDC Configuration */
93 static int vgen_read_mdprops(vgen_t *vgenp);
94 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
95 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
96 	mde_cookie_t node);
97 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
98 	uint32_t *mtu);
99 static void vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
100 	boolean_t *pls);
101 static void vgen_detach_ports(vgen_t *vgenp);
102 static void vgen_port_detach(vgen_port_t *portp);
103 static void vgen_port_list_insert(vgen_port_t *portp);
104 static void vgen_port_list_remove(vgen_port_t *portp);
105 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
106 	int port_num);
107 static int vgen_mdeg_reg(vgen_t *vgenp);
108 static void vgen_mdeg_unreg(vgen_t *vgenp);
109 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
110 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
111 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
112 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
113 	mde_cookie_t mdex);
114 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
115 static int vgen_port_attach(vgen_port_t *portp);
116 static void vgen_port_detach_mdeg(vgen_port_t *portp);
117 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
118 	mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
119 static uint64_t	vgen_port_stat(vgen_port_t *portp, uint_t stat);
120 static void vgen_port_reset(vgen_port_t *portp);
121 static void vgen_reset_vsw_port(vgen_t *vgenp);
122 static int vgen_ldc_reset(vgen_ldc_t *ldcp, vgen_caller_t caller);
123 static void vgen_ldc_up(vgen_ldc_t *ldcp);
124 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
125 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
126 static void vgen_port_init(vgen_port_t *portp);
127 static void vgen_port_uninit(vgen_port_t *portp);
128 static int vgen_ldc_init(vgen_ldc_t *ldcp);
129 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
130 static uint64_t	vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
131 
132 /* I/O Processing */
133 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
134 static int vgen_ldcsend(void *arg, mblk_t *mp);
135 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
136 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
137 static void vgen_tx_watchdog(void *arg);
138 
139 /*  Dring Configuration */
140 static int vgen_create_dring(vgen_ldc_t *ldcp);
141 static void vgen_destroy_dring(vgen_ldc_t *ldcp);
142 static int vgen_map_dring(vgen_ldc_t *ldcp, void *pkt);
143 static void vgen_unmap_dring(vgen_ldc_t *ldcp);
144 
145 /* VIO Message Processing */
146 static int vgen_handshake(vgen_ldc_t *ldcp);
147 static int vgen_handshake_done(vgen_ldc_t *ldcp);
148 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
149 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
150 static int vgen_handshake_phase3(vgen_ldc_t *ldcp);
151 static void vgen_setup_handshake_params(vgen_ldc_t *ldcp);
152 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
153 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
154 static int vgen_send_rx_dring_reg(vgen_ldc_t *ldcp);
155 static int vgen_send_tx_dring_reg(vgen_ldc_t *ldcp);
156 static void vgen_init_dring_reg_msg(vgen_ldc_t *ldcp, vio_dring_reg_msg_t *msg,
157 	uint8_t option);
158 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
159 static int vgen_send_dringdata(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
160 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
161 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
162 	vio_msg_tag_t *tagp);
163 static int vgen_handle_attr_msg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
164 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg);
165 static int vgen_handle_attr_ack(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg);
166 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
167 static int vgen_handle_dring_reg_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
168 static int vgen_handle_dring_reg_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
169 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
170 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
172 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
173 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
174 	uint32_t msglen);
175 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
176 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
177 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
178 static int vgen_process_reset(vgen_ldc_t *ldcp, int flags);
179 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
180 static void vgen_hwatchdog(void *arg);
181 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
182 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
183 static void vgen_link_update(vgen_t *vgenp, link_state_t link_state);
184 
185 /* VLANs */
186 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
187 	mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
188 	uint16_t *nvidsp, uint16_t *default_idp);
189 static void vgen_vlan_create_hash(vgen_port_t *portp);
190 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
191 static void vgen_vlan_add_ids(vgen_port_t *portp);
192 static void vgen_vlan_remove_ids(vgen_port_t *portp);
193 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
194 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
195 	uint16_t *vidp);
196 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
197 	boolean_t is_tagged, uint16_t vid);
198 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
199 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
200 
201 /* Exported functions */
202 int vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller);
203 int vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller);
204 void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
205 void vgen_destroy_rxpools(void *arg);
206 
207 /* Externs */
208 extern void vnet_dds_rx(void *arg, void *dmsg);
209 extern void vnet_dds_cleanup_hio(vnet_t *vnetp);
210 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
211 extern void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
212 extern int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg,  size_t msglen,
213     boolean_t caller_holds_lock);
214 extern void vgen_stop_msg_thread(vgen_ldc_t *ldcp);
215 extern int vgen_create_tx_dring(vgen_ldc_t *ldcp);
216 extern void vgen_destroy_tx_dring(vgen_ldc_t *ldcp);
217 extern int vgen_map_rx_dring(vgen_ldc_t *ldcp, void *pkt);
218 extern void vgen_unmap_rx_dring(vgen_ldc_t *ldcp);
219 extern int vgen_create_rx_dring(vgen_ldc_t *ldcp);
220 extern void vgen_destroy_rx_dring(vgen_ldc_t *ldcp);
221 extern int vgen_map_tx_dring(vgen_ldc_t *ldcp, void *pkt);
222 extern void vgen_unmap_tx_dring(vgen_ldc_t *ldcp);
223 extern int vgen_map_data(vgen_ldc_t *ldcp, void *pkt);
224 extern int vgen_handle_dringdata_shm(void *arg1, void *arg2);
225 extern int vgen_handle_dringdata(void *arg1, void *arg2);
226 extern int vgen_dringsend_shm(void *arg, mblk_t *mp);
227 extern int vgen_dringsend(void *arg, mblk_t *mp);
228 extern void vgen_ldc_msg_worker(void *arg);
229 extern int vgen_send_dringack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
230     uint32_t start, int32_t end, uint8_t pstate);
231 extern mblk_t *vgen_poll_rcv_shm(vgen_ldc_t *ldcp, int bytes_to_pickup);
232 extern mblk_t *vgen_poll_rcv(vgen_ldc_t *ldcp, int bytes_to_pickup);
233 extern int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
234 
235 #define	VGEN_PRI_ETH_DEFINED(vgenp)	((vgenp)->pri_num_types != 0)
236 
237 #define	LDC_LOCK(ldcp)	\
238 				mutex_enter(&((ldcp)->cblock));\
239 				mutex_enter(&((ldcp)->rxlock));\
240 				mutex_enter(&((ldcp)->wrlock));\
241 				mutex_enter(&((ldcp)->txlock));\
242 				mutex_enter(&((ldcp)->tclock));
243 #define	LDC_UNLOCK(ldcp)	\
244 				mutex_exit(&((ldcp)->tclock));\
245 				mutex_exit(&((ldcp)->txlock));\
246 				mutex_exit(&((ldcp)->wrlock));\
247 				mutex_exit(&((ldcp)->rxlock));\
248 				mutex_exit(&((ldcp)->cblock));
249 
250 #define	VGEN_VER_EQ(ldcp, major, minor)	\
251 	((ldcp)->local_hparams.ver_major == (major) &&	\
252 	    (ldcp)->local_hparams.ver_minor == (minor))
253 
254 #define	VGEN_VER_LT(ldcp, major, minor)	\
255 	(((ldcp)->local_hparams.ver_major < (major)) ||	\
256 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
257 	    (ldcp)->local_hparams.ver_minor < (minor)))
258 
259 #define	VGEN_VER_GTEQ(ldcp, major, minor)	\
260 	(((ldcp)->local_hparams.ver_major > (major)) ||	\
261 	    ((ldcp)->local_hparams.ver_major == (major) &&	\
262 	    (ldcp)->local_hparams.ver_minor >= (minor)))
263 
264 /*
265  * Property names
266  */
267 static char macaddr_propname[] = "mac-address";
268 static char rmacaddr_propname[] = "remote-mac-address";
269 static char channel_propname[] = "channel-endpoint";
270 static char reg_propname[] = "reg";
271 static char port_propname[] = "port";
272 static char swport_propname[] = "switch-port";
273 static char id_propname[] = "id";
274 static char vdev_propname[] = "virtual-device";
275 static char vnet_propname[] = "network";
276 static char pri_types_propname[] = "priority-ether-types";
277 static char vgen_pvid_propname[] = "port-vlan-id";
278 static char vgen_vid_propname[] = "vlan-id";
279 static char vgen_dvid_propname[] = "default-vlan-id";
280 static char port_pvid_propname[] = "remote-port-vlan-id";
281 static char port_vid_propname[] = "remote-vlan-id";
282 static char vgen_mtu_propname[] = "mtu";
283 static char vgen_linkprop_propname[] = "linkprop";
284 
285 /*
286  * VIO Protocol Version Info:
287  *
288  * The version specified below represents the version of protocol currently
289  * supported in the driver. It means the driver can negotiate with peers with
290  * versions <= this version. Here is a summary of the feature(s) that are
291  * supported at each version of the protocol:
292  *
293  * 1.0			Basic VIO protocol.
294  * 1.1			vDisk protocol update (no virtual network update).
295  * 1.2			Support for priority frames (priority-ether-types).
296  * 1.3			VLAN and HybridIO support.
297  * 1.4			Jumbo Frame support.
298  * 1.5			Link State Notification support with optional support
299  * 			for Physical Link information.
300  * 1.6			Support for RxDringData mode.
301  */
302 static vgen_ver_t vgen_versions[VGEN_NUM_VER] =  { {1, 6} };
303 
304 /* Tunables */
305 uint32_t vgen_hwd_interval = 5;		/* handshake watchdog freq in sec */
306 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
307 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
308 uint32_t vgen_ldccl_retries = 5;	/* max # of ldc_close() retries */
309 uint32_t vgen_tx_delay = 0x30;		/* delay when tx descr not available */
310 uint32_t vgen_ldc_mtu = VGEN_LDC_MTU;		/* ldc mtu */
311 uint32_t vgen_txwd_interval = VGEN_TXWD_INTERVAL; /* watchdog freq in msec */
312 uint32_t vgen_txwd_timeout = VGEN_TXWD_TIMEOUT;   /* tx timeout in msec */
313 
314 /*
315  * Max # of channel resets allowed during handshake.
316  */
317 uint32_t vgen_ldc_max_resets = 5;
318 
319 /*
320  * We provide a tunable to enable RxDringData mode for versions >= 1.6. By
321  * default, this tunable is set to 1 (VIO_TX_DRING). To enable RxDringData mode
322  * set this tunable to 4 (VIO_RX_DRING_DATA).
323  * See comments in vsw.c for details on the dring modes supported.
324  */
325 uint8_t  vgen_dring_mode = VIO_TX_DRING;
326 
327 /*
328  * In RxDringData mode, # of buffers is determined by multiplying the # of
329  * descriptors with the factor below. Note that the factor must be > 1; i.e,
330  * the # of buffers must always be > # of descriptors. This is needed because,
331  * while the shared memory buffers are sent up the stack on the receiver, the
332  * sender needs additional buffers that can be used for further transmits.
333  * See vgen_create_rx_dring() for details.
334  */
335 uint32_t vgen_nrbufs_factor = 2;
336 
337 /*
338  * Retry delay used while destroying rx mblk pools. Used in both Dring modes.
339  */
340 int vgen_rxpool_cleanup_delay = 100000;	/* 100ms */
341 
342 /*
343  * Delay when rx descr not ready; used in TxDring mode only.
344  */
345 uint32_t vgen_recv_delay = 1;
346 
347 /*
348  * Retry when rx descr not ready; used in TxDring mode only.
349  */
350 uint32_t vgen_recv_retries = 10;
351 
352 /*
353  * Max # of packets accumulated prior to sending them up. It is best
354  * to keep this at 60% of the number of receive buffers. Used in TxDring mode
355  * by the msg worker thread. Used in RxDringData mode while in interrupt mode
356  * (not used in polled mode).
357  */
358 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
359 
360 /*
361  * Internal tunables for receive buffer pools, that is,  the size and number of
362  * mblks for each pool. At least 3 sizes must be specified if these are used.
363  * The sizes must be specified in increasing order. Non-zero value of the first
364  * size will be used as a hint to use these values instead of the algorithm
365  * that determines the sizes based on MTU. Used in TxDring mode only.
366  */
367 uint32_t vgen_rbufsz1 = 0;
368 uint32_t vgen_rbufsz2 = 0;
369 uint32_t vgen_rbufsz3 = 0;
370 uint32_t vgen_rbufsz4 = 0;
371 
372 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
373 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
374 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
375 uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
376 
377 /*
378  * In the absence of "priority-ether-types" property in MD, the following
379  * internal tunable can be set to specify a single priority ethertype.
380  */
381 uint64_t vgen_pri_eth_type = 0;
382 
383 /*
384  * Number of transmit priority buffers that are preallocated per device.
385  * This number is chosen to be a small value to throttle transmission
386  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
387  */
388 uint32_t vgen_pri_tx_nmblks = 64;
389 
390 uint32_t	vgen_vlan_nchains = 4;	/* # of chains in vlan id hash table */
391 
392 /*
393  * Matching criteria passed to the MDEG to register interest
394  * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
395  * by their 'name' and 'cfg-handle' properties.
396  */
397 static md_prop_match_t vdev_prop_match[] = {
398 	{ MDET_PROP_STR,    "name"   },
399 	{ MDET_PROP_VAL,    "cfg-handle" },
400 	{ MDET_LIST_END,    NULL    }
401 };
402 
403 static mdeg_node_match_t vdev_match = { "virtual-device",
404 						vdev_prop_match };
405 
406 /* MD update matching structure */
407 static md_prop_match_t	vport_prop_match[] = {
408 	{ MDET_PROP_VAL,	"id" },
409 	{ MDET_LIST_END,	NULL }
410 };
411 
412 static mdeg_node_match_t vport_match = { "virtual-device-port",
413 					vport_prop_match };
414 
415 /* Template for matching a particular vnet instance */
416 static mdeg_prop_spec_t vgen_prop_template[] = {
417 	{ MDET_PROP_STR,	"name",		"network" },
418 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
419 	{ MDET_LIST_END,	NULL,		NULL }
420 };
421 
422 #define	VGEN_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val)
423 
424 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
425 
426 #ifdef	VNET_IOC_DEBUG
427 #define	VGEN_M_CALLBACK_FLAGS	(MC_IOCTL)
428 #else
429 #define	VGEN_M_CALLBACK_FLAGS	(0)
430 #endif
431 
432 static mac_callbacks_t vgen_m_callbacks = {
433 	VGEN_M_CALLBACK_FLAGS,
434 	vgen_stat,
435 	vgen_start,
436 	vgen_stop,
437 	vgen_promisc,
438 	vgen_multicst,
439 	vgen_unicst,
440 	vgen_tx,
441 	NULL,
442 	vgen_ioctl,
443 	NULL,
444 	NULL
445 };
446 
447 /* Externs */
448 extern pri_t	maxclsyspri;
449 extern proc_t	p0;
450 extern uint32_t	vnet_ethermtu;
451 extern uint16_t	vnet_default_vlan_id;
452 
453 #ifdef DEBUG
454 
455 #define	DEBUG_PRINTF	vgen_debug_printf
456 
457 extern int vnet_dbglevel;
458 
459 void vgen_debug_printf(const char *fname, vgen_t *vgenp,
460 	vgen_ldc_t *ldcp, const char *fmt, ...);
461 
462 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
463 int vgendbg_ldcid = -1;
464 
465 /* Flags to simulate error conditions for debugging */
466 int vgen_inject_err_flag = 0;
467 
468 
469 boolean_t
470 vgen_inject_error(vgen_ldc_t *ldcp, int error)
471 {
472 	if ((vgendbg_ldcid == ldcp->ldc_id) &&
473 	    (vgen_inject_err_flag & error)) {
474 		return (B_TRUE);
475 	}
476 	return (B_FALSE);
477 }
478 
479 #endif
480 
481 /*
482  * vgen_init() is called by an instance of vnet driver to initialize the
483  * corresponding generic transport layer. This layer uses Logical Domain
484  * Channels (LDCs) to communicate with the virtual switch in the service domain
485  * and also with peer vnets in other guest domains in the system.
486  *
487  * Arguments:
488  *   vnetp:   an opaque pointer to the vnet instance
489  *   regprop: frame to be transmitted
490  *   vnetdip: dip of the vnet device
491  *   macaddr: mac address of the vnet device
492  *
493  * Returns:
494  *	Sucess:  a handle to the vgen instance (vgen_t)
495  *	Failure: NULL
496  */
497 int
498 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
499     const uint8_t *macaddr, void **vgenhdl)
500 {
501 	vgen_t	*vgenp;
502 	int	instance;
503 	int	rv;
504 	char	qname[TASKQ_NAMELEN];
505 
506 	if ((vnetp == NULL) || (vnetdip == NULL))
507 		return (DDI_FAILURE);
508 
509 	instance = ddi_get_instance(vnetdip);
510 
511 	DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
512 
513 	vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
514 
515 	vgenp->vnetp = vnetp;
516 	vgenp->instance = instance;
517 	vgenp->regprop = regprop;
518 	vgenp->vnetdip = vnetdip;
519 	bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
520 	vgenp->phys_link_state = LINK_STATE_UNKNOWN;
521 
522 	/* allocate multicast table */
523 	vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
524 	    sizeof (struct ether_addr), KM_SLEEP);
525 	vgenp->mccount = 0;
526 	vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
527 
528 	mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
529 	rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
530 
531 	(void) snprintf(qname, TASKQ_NAMELEN, "rxpool_taskq%d",
532 	    instance);
533 	if ((vgenp->rxp_taskq = ddi_taskq_create(vnetdip, qname, 1,
534 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
535 		cmn_err(CE_WARN, "!vnet%d: Unable to create rx pool task queue",
536 		    instance);
537 		goto vgen_init_fail;
538 	}
539 
540 	rv = vgen_read_mdprops(vgenp);
541 	if (rv != 0) {
542 		goto vgen_init_fail;
543 	}
544 	*vgenhdl = (void *)vgenp;
545 
546 	DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
547 	return (DDI_SUCCESS);
548 
549 vgen_init_fail:
550 	rw_destroy(&vgenp->vgenports.rwlock);
551 	mutex_destroy(&vgenp->lock);
552 	kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
553 	    sizeof (struct ether_addr));
554 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
555 		kmem_free(vgenp->pri_types,
556 		    sizeof (uint16_t) * vgenp->pri_num_types);
557 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
558 	}
559 	if (vgenp->rxp_taskq != NULL) {
560 		ddi_taskq_destroy(vgenp->rxp_taskq);
561 		vgenp->rxp_taskq = NULL;
562 	}
563 	KMEM_FREE(vgenp);
564 	return (DDI_FAILURE);
565 }
566 
567 int
568 vgen_init_mdeg(void *arg)
569 {
570 	vgen_t	*vgenp = (vgen_t *)arg;
571 
572 	/* register with MD event generator */
573 	return (vgen_mdeg_reg(vgenp));
574 }
575 
576 /*
577  * Called by vnet to undo the initializations done by vgen_init().
578  * The handle provided by generic transport during vgen_init() is the argument.
579  */
580 void
581 vgen_uninit(void *arg)
582 {
583 	vgen_t	*vgenp = (vgen_t *)arg;
584 
585 	if (vgenp == NULL) {
586 		return;
587 	}
588 
589 	DBG1(vgenp, NULL, "enter\n");
590 
591 	/* Unregister with MD event generator */
592 	vgen_mdeg_unreg(vgenp);
593 
594 	mutex_enter(&vgenp->lock);
595 
596 	/*
597 	 * Detach all ports from the device; note that the device should have
598 	 * been unplumbed by this time (See vnet_unattach() for the sequence)
599 	 * and thus vgen_stop() has already been invoked on all the ports.
600 	 */
601 	vgen_detach_ports(vgenp);
602 
603 	/*
604 	 * We now destroy the taskq used to clean up rx mblk pools that
605 	 * couldn't be destroyed when the ports/channels were detached.
606 	 * We implicitly wait for those tasks to complete in
607 	 * ddi_taskq_destroy().
608 	 */
609 	if (vgenp->rxp_taskq != NULL) {
610 		ddi_taskq_destroy(vgenp->rxp_taskq);
611 		vgenp->rxp_taskq = NULL;
612 	}
613 
614 	/* Free multicast table */
615 	kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
616 
617 	/* Free pri_types table */
618 	if (VGEN_PRI_ETH_DEFINED(vgenp)) {
619 		kmem_free(vgenp->pri_types,
620 		    sizeof (uint16_t) * vgenp->pri_num_types);
621 		(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
622 	}
623 
624 	mutex_exit(&vgenp->lock);
625 	rw_destroy(&vgenp->vgenports.rwlock);
626 	mutex_destroy(&vgenp->lock);
627 
628 	DBG1(vgenp, NULL, "exit\n");
629 	KMEM_FREE(vgenp);
630 }
631 
632 /* enable transmit/receive for the device */
633 int
634 vgen_start(void *arg)
635 {
636 	vgen_port_t	*portp = (vgen_port_t *)arg;
637 	vgen_t		*vgenp = portp->vgenp;
638 
639 	DBG1(vgenp, NULL, "enter\n");
640 	mutex_enter(&portp->lock);
641 	vgen_port_init(portp);
642 	portp->flags |= VGEN_STARTED;
643 	mutex_exit(&portp->lock);
644 	DBG1(vgenp, NULL, "exit\n");
645 
646 	return (DDI_SUCCESS);
647 }
648 
649 /* stop transmit/receive */
650 void
651 vgen_stop(void *arg)
652 {
653 	vgen_port_t	*portp = (vgen_port_t *)arg;
654 	vgen_t		*vgenp = portp->vgenp;
655 
656 	DBG1(vgenp, NULL, "enter\n");
657 
658 	mutex_enter(&portp->lock);
659 	if (portp->flags & VGEN_STARTED) {
660 		vgen_port_uninit(portp);
661 		portp->flags &= ~(VGEN_STARTED);
662 	}
663 	mutex_exit(&portp->lock);
664 	DBG1(vgenp, NULL, "exit\n");
665 
666 }
667 
668 /* vgen transmit function */
669 static mblk_t *
670 vgen_tx(void *arg, mblk_t *mp)
671 {
672 	vgen_port_t	*portp;
673 	int		status;
674 
675 	portp = (vgen_port_t *)arg;
676 	status = vgen_portsend(portp, mp);
677 	if (status != VGEN_SUCCESS) {
678 		/* failure */
679 		return (mp);
680 	}
681 	/* success */
682 	return (NULL);
683 }
684 
685 /*
686  * This function provides any necessary tagging/untagging of the frames
687  * that are being transmitted over the port. It first verifies the vlan
688  * membership of the destination(port) and drops the packet if the
689  * destination doesn't belong to the given vlan.
690  *
691  * Arguments:
692  *   portp:     port over which the frames should be transmitted
693  *   mp:        frame to be transmitted
694  *   is_tagged:
695  *              B_TRUE: indicates frame header contains the vlan tag already.
696  *              B_FALSE: indicates frame is untagged.
697  *   vid:       vlan in which the frame should be transmitted.
698  *
699  * Returns:
700  *              Sucess: frame(mblk_t *) after doing the necessary tag/untag.
701  *              Failure: NULL
702  */
703 static mblk_t *
704 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
705 	uint16_t vid)
706 {
707 	vgen_t		*vgenp;
708 	boolean_t	dst_tagged;
709 	int		rv;
710 
711 	vgenp = portp->vgenp;
712 
713 	/*
714 	 * If the packet is going to a vnet:
715 	 *   Check if the destination vnet is in the same vlan.
716 	 *   Check the frame header if tag or untag is needed.
717 	 *
718 	 * We do not check the above conditions if the packet is going to vsw:
719 	 *   vsw must be present implicitly in all the vlans that a vnet device
720 	 *   is configured into; even if vsw itself is not assigned to those
721 	 *   vlans as an interface. For instance, the packet might be destined
722 	 *   to another vnet(indirectly through vsw) or to an external host
723 	 *   which is in the same vlan as this vnet and vsw itself may not be
724 	 *   present in that vlan. Similarly packets going to vsw must be
725 	 *   always tagged(unless in the default-vlan) if not already tagged,
726 	 *   as we do not know the final destination. This is needed because
727 	 *   vsw must always invoke its switching function only after tagging
728 	 *   the packet; otherwise after switching function determines the
729 	 *   destination we cannot figure out if the destination belongs to the
730 	 *   the same vlan that the frame originated from and if it needs tag/
731 	 *   untag. Note that vsw will tag the packet itself when it receives
732 	 *   it over the channel from a client if needed. However, that is
733 	 *   needed only in the case of vlan unaware clients such as obp or
734 	 *   earlier versions of vnet.
735 	 *
736 	 */
737 	if (portp != vgenp->vsw_portp) {
738 		/*
739 		 * Packet going to a vnet. Check if the destination vnet is in
740 		 * the same vlan. Then check the frame header if tag/untag is
741 		 * needed.
742 		 */
743 		rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
744 		if (rv == B_FALSE) {
745 			/* drop the packet */
746 			freemsg(mp);
747 			return (NULL);
748 		}
749 
750 		/* is the destination tagged or untagged in this vlan? */
751 		(vid == portp->pvid) ? (dst_tagged = B_FALSE) :
752 		    (dst_tagged = B_TRUE);
753 
754 		if (is_tagged == dst_tagged) {
755 			/* no tagging/untagging needed */
756 			return (mp);
757 		}
758 
759 		if (is_tagged == B_TRUE) {
760 			/* frame is tagged; destination needs untagged */
761 			mp = vnet_vlan_remove_tag(mp);
762 			return (mp);
763 		}
764 
765 		/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
766 	}
767 
768 	/*
769 	 * Packet going to a vnet needs tagging.
770 	 * OR
771 	 * If the packet is going to vsw, then it must be tagged in all cases:
772 	 * unknown unicast, broadcast/multicast or to vsw interface.
773 	 */
774 
775 	if (is_tagged == B_FALSE) {
776 		mp = vnet_vlan_insert_tag(mp, vid);
777 	}
778 
779 	return (mp);
780 }
781 
782 /* transmit packets over the given port */
783 static int
784 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
785 {
786 	vgen_ldc_t		*ldcp;
787 	int			status;
788 	int			rv = VGEN_SUCCESS;
789 	vgen_t			*vgenp = portp->vgenp;
790 	vnet_t			*vnetp = vgenp->vnetp;
791 	boolean_t		is_tagged;
792 	boolean_t		dec_refcnt = B_FALSE;
793 	uint16_t		vlan_id;
794 	struct ether_header	*ehp;
795 
796 	if (portp == NULL) {
797 		return (VGEN_FAILURE);
798 	}
799 
800 	if (portp->use_vsw_port) {
801 		(void) atomic_inc_32(&vgenp->vsw_port_refcnt);
802 		portp = portp->vgenp->vsw_portp;
803 		ASSERT(portp != NULL);
804 		dec_refcnt = B_TRUE;
805 	}
806 
807 	/*
808 	 * Determine the vlan id that the frame belongs to.
809 	 */
810 	ehp = (struct ether_header *)mp->b_rptr;
811 	is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
812 
813 	if (vlan_id == vnetp->default_vlan_id) {
814 
815 		/* Frames in default vlan must be untagged */
816 		ASSERT(is_tagged == B_FALSE);
817 
818 		/*
819 		 * If the destination is a vnet-port verify it belongs to the
820 		 * default vlan; otherwise drop the packet. We do not need
821 		 * this check for vsw-port, as it should implicitly belong to
822 		 * this vlan; see comments in vgen_vlan_frame_fixtag().
823 		 */
824 		if (portp != vgenp->vsw_portp &&
825 		    portp->pvid != vnetp->default_vlan_id) {
826 			freemsg(mp);
827 			goto portsend_ret;
828 		}
829 
830 	} else {	/* frame not in default-vlan */
831 
832 		mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
833 		if (mp == NULL) {
834 			goto portsend_ret;
835 		}
836 
837 	}
838 
839 	ldcp = portp->ldcp;
840 	status = ldcp->tx(ldcp, mp);
841 
842 	if (status != VGEN_TX_SUCCESS) {
843 		rv = VGEN_FAILURE;
844 	}
845 
846 portsend_ret:
847 	if (dec_refcnt == B_TRUE) {
848 		(void) atomic_dec_32(&vgenp->vsw_port_refcnt);
849 	}
850 	return (rv);
851 }
852 
853 /*
854  * Wrapper function to transmit normal and/or priority frames over the channel.
855  */
856 static int
857 vgen_ldcsend(void *arg, mblk_t *mp)
858 {
859 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
860 	int			status;
861 	struct ether_header	*ehp;
862 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
863 	uint32_t		num_types;
864 	uint16_t		*types;
865 	int			i;
866 
867 	ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
868 
869 	num_types = vgenp->pri_num_types;
870 	types = vgenp->pri_types;
871 	ehp = (struct ether_header *)mp->b_rptr;
872 
873 	for (i = 0; i < num_types; i++) {
874 
875 		if (ehp->ether_type == types[i]) {
876 			/* priority frame, use pri tx function */
877 			vgen_ldcsend_pkt(ldcp, mp);
878 			return (VGEN_SUCCESS);
879 		}
880 
881 	}
882 
883 	if (ldcp->tx_dringdata == NULL) {
884 		freemsg(mp);
885 		return (VGEN_SUCCESS);
886 	}
887 
888 	status  = ldcp->tx_dringdata(ldcp, mp);
889 	return (status);
890 }
891 
892 /*
893  * This function transmits the frame in the payload of a raw data
894  * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
895  * send special frames with high priorities, without going through
896  * the normal data path which uses descriptor ring mechanism.
897  */
898 static void
899 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
900 {
901 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg;
902 	vio_raw_data_msg_t	*pkt;
903 	mblk_t			*bp;
904 	mblk_t			*nmp = NULL;
905 	vio_mblk_t		*vmp;
906 	caddr_t			dst;
907 	uint32_t		mblksz;
908 	uint32_t		size;
909 	uint32_t		nbytes;
910 	int			rv;
911 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
912 	vgen_stats_t		*statsp = &ldcp->stats;
913 
914 	/* drop the packet if ldc is not up or handshake is not done */
915 	if (ldcp->ldc_status != LDC_UP) {
916 		(void) atomic_inc_32(&statsp->tx_pri_fail);
917 		DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
918 		    ldcp->ldc_status);
919 		goto send_pkt_exit;
920 	}
921 
922 	if (ldcp->hphase != VH_DONE) {
923 		(void) atomic_inc_32(&statsp->tx_pri_fail);
924 		DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
925 		    ldcp->hphase);
926 		goto send_pkt_exit;
927 	}
928 
929 	size = msgsize(mp);
930 
931 	/* frame size bigger than available payload len of raw data msg ? */
932 	if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
933 		(void) atomic_inc_32(&statsp->tx_pri_fail);
934 		DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
935 		goto send_pkt_exit;
936 	}
937 
938 	if (size < ETHERMIN)
939 		size = ETHERMIN;
940 
941 	/* alloc space for a raw data message */
942 	vmp = vio_allocb(vgenp->pri_tx_vmp);
943 	if (vmp == NULL) {
944 		(void) atomic_inc_32(&statsp->tx_pri_fail);
945 		DWARN(vgenp, ldcp, "vio_allocb failed\n");
946 		goto send_pkt_exit;
947 	} else {
948 		nmp = vmp->mp;
949 	}
950 	pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
951 
952 	/* copy frame into the payload of raw data message */
953 	dst = (caddr_t)pkt->data;
954 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
955 		mblksz = MBLKL(bp);
956 		bcopy(bp->b_rptr, dst, mblksz);
957 		dst += mblksz;
958 	}
959 
960 	vmp->state = VIO_MBLK_HAS_DATA;
961 
962 	/* setup the raw data msg */
963 	pkt->tag.vio_msgtype = VIO_TYPE_DATA;
964 	pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
965 	pkt->tag.vio_subtype_env = VIO_PKT_DATA;
966 	pkt->tag.vio_sid = ldcp->local_sid;
967 	nbytes = VIO_PKT_DATA_HDRSIZE + size;
968 
969 	/* send the msg over ldc */
970 	rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
971 	if (rv != VGEN_SUCCESS) {
972 		(void) atomic_inc_32(&statsp->tx_pri_fail);
973 		DWARN(vgenp, ldcp, "Error sending priority frame\n");
974 		if (rv == ECONNRESET) {
975 			(void) vgen_handle_evt_reset(ldcp, VGEN_OTHER);
976 		}
977 		goto send_pkt_exit;
978 	}
979 
980 	/* update stats */
981 	(void) atomic_inc_64(&statsp->tx_pri_packets);
982 	(void) atomic_add_64(&statsp->tx_pri_bytes, size);
983 
984 send_pkt_exit:
985 	if (nmp != NULL)
986 		freemsg(nmp);
987 	freemsg(mp);
988 }
989 
990 /*
991  * enable/disable a multicast address
992  * note that the cblock of the ldc channel connected to the vsw is used for
993  * synchronization of the mctab.
994  */
995 int
996 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
997 {
998 	vgen_t			*vgenp;
999 	vnet_mcast_msg_t	mcastmsg;
1000 	vio_msg_tag_t		*tagp;
1001 	vgen_port_t		*portp;
1002 	vgen_ldc_t		*ldcp;
1003 	struct ether_addr	*addrp;
1004 	int			rv = DDI_FAILURE;
1005 	uint32_t		i;
1006 
1007 	portp = (vgen_port_t *)arg;
1008 	vgenp = portp->vgenp;
1009 
1010 	if (portp->is_vsw_port != B_TRUE) {
1011 		return (DDI_SUCCESS);
1012 	}
1013 
1014 	addrp = (struct ether_addr *)mca;
1015 	tagp = &mcastmsg.tag;
1016 	bzero(&mcastmsg, sizeof (mcastmsg));
1017 
1018 	ldcp = portp->ldcp;
1019 	if (ldcp == NULL) {
1020 		return (DDI_FAILURE);
1021 	}
1022 
1023 	mutex_enter(&ldcp->cblock);
1024 
1025 	if (ldcp->hphase == VH_DONE) {
1026 		/*
1027 		 * If handshake is done, send a msg to vsw to add/remove
1028 		 * the multicast address. Otherwise, we just update this
1029 		 * mcast address in our table and the table will be sync'd
1030 		 * with vsw when handshake completes.
1031 		 */
1032 		tagp->vio_msgtype = VIO_TYPE_CTRL;
1033 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
1034 		tagp->vio_subtype_env = VNET_MCAST_INFO;
1035 		tagp->vio_sid = ldcp->local_sid;
1036 		bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1037 		mcastmsg.set = add;
1038 		mcastmsg.count = 1;
1039 		if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1040 		    B_FALSE) != VGEN_SUCCESS) {
1041 			DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1042 			rv = DDI_FAILURE;
1043 			goto vgen_mcast_exit;
1044 		}
1045 	}
1046 
1047 	if (add) {
1048 
1049 		/* expand multicast table if necessary */
1050 		if (vgenp->mccount >= vgenp->mcsize) {
1051 			struct ether_addr	*newtab;
1052 			uint32_t		newsize;
1053 
1054 
1055 			newsize = vgenp->mcsize * 2;
1056 
1057 			newtab = kmem_zalloc(newsize *
1058 			    sizeof (struct ether_addr), KM_NOSLEEP);
1059 			if (newtab == NULL)
1060 				goto vgen_mcast_exit;
1061 			bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1062 			    sizeof (struct ether_addr));
1063 			kmem_free(vgenp->mctab,
1064 			    vgenp->mcsize * sizeof (struct ether_addr));
1065 
1066 			vgenp->mctab = newtab;
1067 			vgenp->mcsize = newsize;
1068 		}
1069 
1070 		/* add address to the table */
1071 		vgenp->mctab[vgenp->mccount++] = *addrp;
1072 
1073 	} else {
1074 
1075 		/* delete address from the table */
1076 		for (i = 0; i < vgenp->mccount; i++) {
1077 			if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1078 
1079 				/*
1080 				 * If there's more than one address in this
1081 				 * table, delete the unwanted one by moving
1082 				 * the last one in the list over top of it;
1083 				 * otherwise, just remove it.
1084 				 */
1085 				if (vgenp->mccount > 1) {
1086 					vgenp->mctab[i] =
1087 					    vgenp->mctab[vgenp->mccount-1];
1088 				}
1089 				vgenp->mccount--;
1090 				break;
1091 			}
1092 		}
1093 	}
1094 
1095 	rv = DDI_SUCCESS;
1096 
1097 vgen_mcast_exit:
1098 
1099 	mutex_exit(&ldcp->cblock);
1100 	return (rv);
1101 }
1102 
1103 /* set or clear promiscuous mode on the device */
1104 static int
1105 vgen_promisc(void *arg, boolean_t on)
1106 {
1107 	_NOTE(ARGUNUSED(arg, on))
1108 	return (DDI_SUCCESS);
1109 }
1110 
1111 /* set the unicast mac address of the device */
1112 static int
1113 vgen_unicst(void *arg, const uint8_t *mca)
1114 {
1115 	_NOTE(ARGUNUSED(arg, mca))
1116 	return (DDI_SUCCESS);
1117 }
1118 
1119 /* get device statistics */
1120 int
1121 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1122 {
1123 	vgen_port_t	*portp = (vgen_port_t *)arg;
1124 
1125 	*val = vgen_port_stat(portp, stat);
1126 	return (0);
1127 }
1128 
1129 /* vgen internal functions */
1130 /* detach all ports from the device */
1131 static void
1132 vgen_detach_ports(vgen_t *vgenp)
1133 {
1134 	vgen_port_t	*portp;
1135 	vgen_portlist_t	*plistp;
1136 
1137 	plistp = &(vgenp->vgenports);
1138 	WRITE_ENTER(&plistp->rwlock);
1139 	while ((portp = plistp->headp) != NULL) {
1140 		vgen_port_detach(portp);
1141 	}
1142 	RW_EXIT(&plistp->rwlock);
1143 }
1144 
1145 /*
1146  * detach the given port.
1147  */
1148 static void
1149 vgen_port_detach(vgen_port_t *portp)
1150 {
1151 	vgen_t		*vgenp;
1152 	int		port_num;
1153 
1154 	vgenp = portp->vgenp;
1155 	port_num = portp->port_num;
1156 
1157 	DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1158 
1159 	/*
1160 	 * If this port is connected to the vswitch, then
1161 	 * potentially there could be ports that may be using
1162 	 * this port to transmit packets. To address this do
1163 	 * the following:
1164 	 *	- First set vgenp->vsw_portp to NULL, so that
1165 	 *	  its not used after that.
1166 	 *	- Then wait for the refcnt to go down to 0.
1167 	 *	- Now we can safely detach this port.
1168 	 */
1169 	if (vgenp->vsw_portp == portp) {
1170 		vgenp->vsw_portp = NULL;
1171 		while (vgenp->vsw_port_refcnt > 0) {
1172 			delay(drv_usectohz(vgen_tx_delay));
1173 		}
1174 		(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
1175 	}
1176 
1177 	if (portp->vhp != NULL) {
1178 		vio_net_resource_unreg(portp->vhp);
1179 		portp->vhp = NULL;
1180 	}
1181 
1182 	vgen_vlan_destroy_hash(portp);
1183 
1184 	/* remove it from port list */
1185 	vgen_port_list_remove(portp);
1186 
1187 	/* detach channels from this port */
1188 	vgen_ldc_detach(portp->ldcp);
1189 
1190 	if (portp->num_ldcs != 0) {
1191 		kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1192 		portp->num_ldcs = 0;
1193 	}
1194 
1195 	mutex_destroy(&portp->lock);
1196 	KMEM_FREE(portp);
1197 
1198 	DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1199 }
1200 
1201 /* add a port to port list */
1202 static void
1203 vgen_port_list_insert(vgen_port_t *portp)
1204 {
1205 	vgen_portlist_t	*plistp;
1206 	vgen_t		*vgenp;
1207 
1208 	vgenp = portp->vgenp;
1209 	plistp = &(vgenp->vgenports);
1210 
1211 	if (plistp->headp == NULL) {
1212 		plistp->headp = portp;
1213 	} else {
1214 		plistp->tailp->nextp = portp;
1215 	}
1216 	plistp->tailp = portp;
1217 	portp->nextp = NULL;
1218 }
1219 
1220 /* remove a port from port list */
1221 static void
1222 vgen_port_list_remove(vgen_port_t *portp)
1223 {
1224 	vgen_port_t	*prevp;
1225 	vgen_port_t	*nextp;
1226 	vgen_portlist_t	*plistp;
1227 	vgen_t		*vgenp;
1228 
1229 	vgenp = portp->vgenp;
1230 
1231 	plistp = &(vgenp->vgenports);
1232 
1233 	if (plistp->headp == NULL)
1234 		return;
1235 
1236 	if (portp == plistp->headp) {
1237 		plistp->headp = portp->nextp;
1238 		if (portp == plistp->tailp)
1239 			plistp->tailp = plistp->headp;
1240 	} else {
1241 		for (prevp = plistp->headp;
1242 		    ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1243 		    prevp = nextp)
1244 			;
1245 		if (nextp == portp) {
1246 			prevp->nextp = portp->nextp;
1247 		}
1248 		if (portp == plistp->tailp)
1249 			plistp->tailp = prevp;
1250 	}
1251 }
1252 
1253 /* lookup a port in the list based on port_num */
1254 static vgen_port_t *
1255 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1256 {
1257 	vgen_port_t *portp = NULL;
1258 
1259 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1260 		if (portp->port_num == port_num) {
1261 			break;
1262 		}
1263 	}
1264 
1265 	return (portp);
1266 }
1267 
1268 static void
1269 vgen_port_init(vgen_port_t *portp)
1270 {
1271 	/* Add the port to the specified vlans */
1272 	vgen_vlan_add_ids(portp);
1273 
1274 	/* Bring up the channel */
1275 	(void) vgen_ldc_init(portp->ldcp);
1276 }
1277 
1278 static void
1279 vgen_port_uninit(vgen_port_t *portp)
1280 {
1281 	vgen_ldc_uninit(portp->ldcp);
1282 
1283 	/* remove the port from vlans it has been assigned to */
1284 	vgen_vlan_remove_ids(portp);
1285 }
1286 
1287 /*
1288  * Scan the machine description for this instance of vnet
1289  * and read its properties. Called only from vgen_init().
1290  * Returns: 0 on success, 1 on failure.
1291  */
1292 static int
1293 vgen_read_mdprops(vgen_t *vgenp)
1294 {
1295 	vnet_t		*vnetp = vgenp->vnetp;
1296 	md_t		*mdp = NULL;
1297 	mde_cookie_t	rootnode;
1298 	mde_cookie_t	*listp = NULL;
1299 	uint64_t	cfgh;
1300 	char		*name;
1301 	int		rv = 1;
1302 	int		num_nodes = 0;
1303 	int		num_devs = 0;
1304 	int		listsz = 0;
1305 	int		i;
1306 
1307 	if ((mdp = md_get_handle()) == NULL) {
1308 		return (rv);
1309 	}
1310 
1311 	num_nodes = md_node_count(mdp);
1312 	ASSERT(num_nodes > 0);
1313 
1314 	listsz = num_nodes * sizeof (mde_cookie_t);
1315 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1316 
1317 	rootnode = md_root_node(mdp);
1318 
1319 	/* search for all "virtual_device" nodes */
1320 	num_devs = md_scan_dag(mdp, rootnode,
1321 	    md_find_name(mdp, vdev_propname),
1322 	    md_find_name(mdp, "fwd"), listp);
1323 	if (num_devs <= 0) {
1324 		goto vgen_readmd_exit;
1325 	}
1326 
1327 	/*
1328 	 * Now loop through the list of virtual-devices looking for
1329 	 * devices with name "network" and for each such device compare
1330 	 * its instance with what we have from the 'reg' property to
1331 	 * find the right node in MD and then read all its properties.
1332 	 */
1333 	for (i = 0; i < num_devs; i++) {
1334 
1335 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1336 			goto vgen_readmd_exit;
1337 		}
1338 
1339 		/* is this a "network" device? */
1340 		if (strcmp(name, vnet_propname) != 0)
1341 			continue;
1342 
1343 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1344 			goto vgen_readmd_exit;
1345 		}
1346 
1347 		/* is this the required instance of vnet? */
1348 		if (vgenp->regprop != cfgh)
1349 			continue;
1350 
1351 		/*
1352 		 * Read the 'linkprop' property to know if this vnet
1353 		 * device should get physical link updates from vswitch.
1354 		 */
1355 		vgen_linkprop_read(vgenp, mdp, listp[i],
1356 		    &vnetp->pls_update);
1357 
1358 		/*
1359 		 * Read the mtu. Note that we set the mtu of vnet device within
1360 		 * this routine itself, after validating the range.
1361 		 */
1362 		vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
1363 		if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
1364 			vnetp->mtu = ETHERMTU;
1365 		}
1366 		vgenp->max_frame_size = vnetp->mtu +
1367 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1368 
1369 		/* read priority ether types */
1370 		vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1371 
1372 		/* read vlan id properties of this vnet instance */
1373 		vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1374 		    &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1375 		    &vnetp->default_vlan_id);
1376 
1377 		rv = 0;
1378 		break;
1379 	}
1380 
1381 vgen_readmd_exit:
1382 
1383 	kmem_free(listp, listsz);
1384 	(void) md_fini_handle(mdp);
1385 	return (rv);
1386 }
1387 
1388 /*
1389  * Read vlan id properties of the given MD node.
1390  * Arguments:
1391  *   arg:          device argument(vnet device or a port)
1392  *   type:         type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1393  *   mdp:          machine description
1394  *   node:         md node cookie
1395  *
1396  * Returns:
1397  *   pvidp:        port-vlan-id of the node
1398  *   vidspp:       list of vlan-ids of the node
1399  *   nvidsp:       # of vlan-ids in the list
1400  *   default_idp:  default-vlan-id of the node(if node is vnet device)
1401  */
1402 static void
1403 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1404 	uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1405 	uint16_t *default_idp)
1406 {
1407 	vgen_t		*vgenp;
1408 	vnet_t		*vnetp;
1409 	vgen_port_t	*portp;
1410 	char		*pvid_propname;
1411 	char		*vid_propname;
1412 	uint_t		nvids;
1413 	uint32_t	vids_size;
1414 	int		rv;
1415 	int		i;
1416 	uint64_t	*data;
1417 	uint64_t	val;
1418 	int		size;
1419 	int		inst;
1420 
1421 	if (type == VGEN_LOCAL) {
1422 
1423 		vgenp = (vgen_t *)arg;
1424 		vnetp = vgenp->vnetp;
1425 		pvid_propname = vgen_pvid_propname;
1426 		vid_propname = vgen_vid_propname;
1427 		inst = vnetp->instance;
1428 
1429 	} else if (type == VGEN_PEER) {
1430 
1431 		portp = (vgen_port_t *)arg;
1432 		vgenp = portp->vgenp;
1433 		vnetp = vgenp->vnetp;
1434 		pvid_propname = port_pvid_propname;
1435 		vid_propname = port_vid_propname;
1436 		inst = portp->port_num;
1437 
1438 	} else {
1439 		return;
1440 	}
1441 
1442 	if (type == VGEN_LOCAL && default_idp != NULL) {
1443 		rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1444 		if (rv != 0) {
1445 			DWARN(vgenp, NULL, "prop(%s) not found",
1446 			    vgen_dvid_propname);
1447 
1448 			*default_idp = vnet_default_vlan_id;
1449 		} else {
1450 			*default_idp = val & 0xFFF;
1451 			DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1452 			    inst, *default_idp);
1453 		}
1454 	}
1455 
1456 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1457 	if (rv != 0) {
1458 		DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1459 		*pvidp = vnet_default_vlan_id;
1460 	} else {
1461 
1462 		*pvidp = val & 0xFFF;
1463 		DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1464 		    pvid_propname, inst, *pvidp);
1465 	}
1466 
1467 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1468 	    &size);
1469 	if (rv != 0) {
1470 		DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1471 		size = 0;
1472 	} else {
1473 		size /= sizeof (uint64_t);
1474 	}
1475 	nvids = size;
1476 
1477 	if (nvids != 0) {
1478 		DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1479 		vids_size = sizeof (uint16_t) * nvids;
1480 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1481 		for (i = 0; i < nvids; i++) {
1482 			(*vidspp)[i] = data[i] & 0xFFFF;
1483 			DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1484 		}
1485 		DBG2(vgenp, NULL, "\n");
1486 	}
1487 
1488 	*nvidsp = nvids;
1489 }
1490 
1491 /*
1492  * Create a vlan id hash table for the given port.
1493  */
1494 static void
1495 vgen_vlan_create_hash(vgen_port_t *portp)
1496 {
1497 	char		hashname[MAXNAMELEN];
1498 
1499 	(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1500 	    portp->port_num);
1501 
1502 	portp->vlan_nchains = vgen_vlan_nchains;
1503 	portp->vlan_hashp = mod_hash_create_idhash(hashname,
1504 	    portp->vlan_nchains, mod_hash_null_valdtor);
1505 }
1506 
1507 /*
1508  * Destroy the vlan id hash table in the given port.
1509  */
1510 static void
1511 vgen_vlan_destroy_hash(vgen_port_t *portp)
1512 {
1513 	if (portp->vlan_hashp != NULL) {
1514 		mod_hash_destroy_hash(portp->vlan_hashp);
1515 		portp->vlan_hashp = NULL;
1516 		portp->vlan_nchains = 0;
1517 	}
1518 }
1519 
1520 /*
1521  * Add a port to the vlans specified in its port properites.
1522  */
1523 static void
1524 vgen_vlan_add_ids(vgen_port_t *portp)
1525 {
1526 	int		rv;
1527 	int		i;
1528 
1529 	rv = mod_hash_insert(portp->vlan_hashp,
1530 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1531 	    (mod_hash_val_t)B_TRUE);
1532 	ASSERT(rv == 0);
1533 
1534 	for (i = 0; i < portp->nvids; i++) {
1535 		rv = mod_hash_insert(portp->vlan_hashp,
1536 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1537 		    (mod_hash_val_t)B_TRUE);
1538 		ASSERT(rv == 0);
1539 	}
1540 }
1541 
1542 /*
1543  * Remove a port from the vlans it has been assigned to.
1544  */
1545 static void
1546 vgen_vlan_remove_ids(vgen_port_t *portp)
1547 {
1548 	int		rv;
1549 	int		i;
1550 	mod_hash_val_t	vp;
1551 
1552 	rv = mod_hash_remove(portp->vlan_hashp,
1553 	    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1554 	    (mod_hash_val_t *)&vp);
1555 	ASSERT(rv == 0);
1556 
1557 	for (i = 0; i < portp->nvids; i++) {
1558 		rv = mod_hash_remove(portp->vlan_hashp,
1559 		    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1560 		    (mod_hash_val_t *)&vp);
1561 		ASSERT(rv == 0);
1562 	}
1563 }
1564 
1565 /*
1566  * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1567  * then the vlan-id is available in the tag; otherwise, its vlan id is
1568  * implicitly obtained from the port-vlan-id of the vnet device.
1569  * The vlan id determined is returned in vidp.
1570  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1571  */
1572 static boolean_t
1573 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1574 {
1575 	struct ether_vlan_header	*evhp;
1576 
1577 	/* If it's a tagged frame, get the vlan id from vlan header */
1578 	if (ehp->ether_type == ETHERTYPE_VLAN) {
1579 
1580 		evhp = (struct ether_vlan_header *)ehp;
1581 		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
1582 		return (B_TRUE);
1583 	}
1584 
1585 	/* Untagged frame, vlan-id is the pvid of vnet device */
1586 	*vidp = vnetp->pvid;
1587 	return (B_FALSE);
1588 }
1589 
1590 /*
1591  * Find the given vlan id in the hash table.
1592  * Return: B_TRUE if the id is found; B_FALSE if not found.
1593  */
1594 static boolean_t
1595 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1596 {
1597 	int		rv;
1598 	mod_hash_val_t	vp;
1599 
1600 	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1601 
1602 	if (rv != 0)
1603 		return (B_FALSE);
1604 
1605 	return (B_TRUE);
1606 }
1607 
1608 /*
1609  * This function reads "priority-ether-types" property from md. This property
1610  * is used to enable support for priority frames. Applications which need
1611  * guaranteed and timely delivery of certain high priority frames to/from
1612  * a vnet or vsw within ldoms, should configure this property by providing
1613  * the ether type(s) for which the priority facility is needed.
1614  * Normal data frames are delivered over a ldc channel using the descriptor
1615  * ring mechanism which is constrained by factors such as descriptor ring size,
1616  * the rate at which the ring is processed at the peer ldc end point, etc.
1617  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1618  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1619  * descriptor ring path and enables a more reliable and timely delivery of
1620  * frames to the peer.
1621  */
1622 static void
1623 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1624 {
1625 	int		rv;
1626 	uint16_t	*types;
1627 	uint64_t	*data;
1628 	int		size;
1629 	int		i;
1630 	size_t		mblk_sz;
1631 
1632 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1633 	    (uint8_t **)&data, &size);
1634 	if (rv != 0) {
1635 		/*
1636 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1637 		 * Check if 'vgen_pri_eth_type' has been set in that case.
1638 		 */
1639 		if (vgen_pri_eth_type != 0) {
1640 			size = sizeof (vgen_pri_eth_type);
1641 			data = &vgen_pri_eth_type;
1642 		} else {
1643 			DBG2(vgenp, NULL,
1644 			    "prop(%s) not found", pri_types_propname);
1645 			size = 0;
1646 		}
1647 	}
1648 
1649 	if (size == 0) {
1650 		vgenp->pri_num_types = 0;
1651 		return;
1652 	}
1653 
1654 	/*
1655 	 * we have some priority-ether-types defined;
1656 	 * allocate a table of these types and also
1657 	 * allocate a pool of mblks to transmit these
1658 	 * priority packets.
1659 	 */
1660 	size /= sizeof (uint64_t);
1661 	vgenp->pri_num_types = size;
1662 	vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1663 	for (i = 0, types = vgenp->pri_types; i < size; i++) {
1664 		types[i] = data[i] & 0xFFFF;
1665 	}
1666 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1667 	(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz, NULL,
1668 	    &vgenp->pri_tx_vmp);
1669 }
1670 
1671 static void
1672 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1673 {
1674 	int		rv;
1675 	uint64_t	val;
1676 	char		*mtu_propname;
1677 
1678 	mtu_propname = vgen_mtu_propname;
1679 
1680 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1681 	if (rv != 0) {
1682 		DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
1683 		*mtu = vnet_ethermtu;
1684 	} else {
1685 
1686 		*mtu = val & 0xFFFF;
1687 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
1688 		    vgenp->instance, *mtu);
1689 	}
1690 }
1691 
1692 static void
1693 vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
1694 	boolean_t *pls)
1695 {
1696 	int		rv;
1697 	uint64_t	val;
1698 	char		*linkpropname;
1699 
1700 	linkpropname = vgen_linkprop_propname;
1701 
1702 	rv = md_get_prop_val(mdp, node, linkpropname, &val);
1703 	if (rv != 0) {
1704 		DWARN(vgenp, NULL, "prop(%s) not found", linkpropname);
1705 		*pls = B_FALSE;
1706 	} else {
1707 
1708 		*pls = (val & 0x1) ?  B_TRUE : B_FALSE;
1709 		DBG2(vgenp, NULL, "%s(%d): (%d)\n", linkpropname,
1710 		    vgenp->instance, *pls);
1711 	}
1712 }
1713 
1714 /* register with MD event generator */
1715 static int
1716 vgen_mdeg_reg(vgen_t *vgenp)
1717 {
1718 	mdeg_prop_spec_t	*pspecp;
1719 	mdeg_node_spec_t	*parentp;
1720 	uint_t			templatesz;
1721 	int			rv;
1722 	mdeg_handle_t		dev_hdl = NULL;
1723 	mdeg_handle_t		port_hdl = NULL;
1724 
1725 	templatesz = sizeof (vgen_prop_template);
1726 	pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1727 	if (pspecp == NULL) {
1728 		return (DDI_FAILURE);
1729 	}
1730 	parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1731 	if (parentp == NULL) {
1732 		kmem_free(pspecp, templatesz);
1733 		return (DDI_FAILURE);
1734 	}
1735 
1736 	bcopy(vgen_prop_template, pspecp, templatesz);
1737 
1738 	/*
1739 	 * NOTE: The instance here refers to the value of "reg" property and
1740 	 * not the dev_info instance (ddi_get_instance()) of vnet.
1741 	 */
1742 	VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
1743 
1744 	parentp->namep = "virtual-device";
1745 	parentp->specp = pspecp;
1746 
1747 	/* save parentp in vgen_t */
1748 	vgenp->mdeg_parentp = parentp;
1749 
1750 	/*
1751 	 * Register an interest in 'virtual-device' nodes with a
1752 	 * 'name' property of 'network'
1753 	 */
1754 	rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
1755 	if (rv != MDEG_SUCCESS) {
1756 		DERR(vgenp, NULL, "mdeg_register failed\n");
1757 		goto mdeg_reg_fail;
1758 	}
1759 
1760 	/* Register an interest in 'port' nodes */
1761 	rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
1762 	    &port_hdl);
1763 	if (rv != MDEG_SUCCESS) {
1764 		DERR(vgenp, NULL, "mdeg_register failed\n");
1765 		goto mdeg_reg_fail;
1766 	}
1767 
1768 	/* save mdeg handle in vgen_t */
1769 	vgenp->mdeg_dev_hdl = dev_hdl;
1770 	vgenp->mdeg_port_hdl = port_hdl;
1771 
1772 	return (DDI_SUCCESS);
1773 
1774 mdeg_reg_fail:
1775 	if (dev_hdl != NULL) {
1776 		(void) mdeg_unregister(dev_hdl);
1777 	}
1778 	KMEM_FREE(parentp);
1779 	kmem_free(pspecp, templatesz);
1780 	vgenp->mdeg_parentp = NULL;
1781 	return (DDI_FAILURE);
1782 }
1783 
1784 /* unregister with MD event generator */
1785 static void
1786 vgen_mdeg_unreg(vgen_t *vgenp)
1787 {
1788 	if (vgenp->mdeg_dev_hdl != NULL) {
1789 		(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
1790 		vgenp->mdeg_dev_hdl = NULL;
1791 	}
1792 	if (vgenp->mdeg_port_hdl != NULL) {
1793 		(void) mdeg_unregister(vgenp->mdeg_port_hdl);
1794 		vgenp->mdeg_port_hdl = NULL;
1795 	}
1796 
1797 	if (vgenp->mdeg_parentp != NULL) {
1798 		kmem_free(vgenp->mdeg_parentp->specp,
1799 		    sizeof (vgen_prop_template));
1800 		KMEM_FREE(vgenp->mdeg_parentp);
1801 		vgenp->mdeg_parentp = NULL;
1802 	}
1803 }
1804 
1805 /* mdeg callback function for the port node */
1806 static int
1807 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
1808 {
1809 	int		idx;
1810 	int		vsw_idx = -1;
1811 	uint64_t 	val;
1812 	vgen_t		*vgenp;
1813 
1814 	if ((resp == NULL) || (cb_argp == NULL)) {
1815 		return (MDEG_FAILURE);
1816 	}
1817 
1818 	vgenp = (vgen_t *)cb_argp;
1819 	DBG1(vgenp, NULL, "enter\n");
1820 
1821 	mutex_enter(&vgenp->lock);
1822 
1823 	DBG1(vgenp, NULL, "ports: removed(%x), "
1824 	"added(%x), updated(%x)\n", resp->removed.nelem,
1825 	    resp->added.nelem, resp->match_curr.nelem);
1826 
1827 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1828 		(void) vgen_remove_port(vgenp, resp->removed.mdp,
1829 		    resp->removed.mdep[idx]);
1830 	}
1831 
1832 	if (vgenp->vsw_portp == NULL) {
1833 		/*
1834 		 * find vsw_port and add it first, because other ports need
1835 		 * this when adding fdb entry (see vgen_port_init()).
1836 		 */
1837 		for (idx = 0; idx < resp->added.nelem; idx++) {
1838 			if (!(md_get_prop_val(resp->added.mdp,
1839 			    resp->added.mdep[idx], swport_propname, &val))) {
1840 				if (val == 0) {
1841 					/*
1842 					 * This port is connected to the
1843 					 * vsw on service domain.
1844 					 */
1845 					vsw_idx = idx;
1846 					if (vgen_add_port(vgenp,
1847 					    resp->added.mdp,
1848 					    resp->added.mdep[idx]) !=
1849 					    DDI_SUCCESS) {
1850 						cmn_err(CE_NOTE, "vnet%d Could "
1851 						    "not initialize virtual "
1852 						    "switch port.",
1853 						    vgenp->instance);
1854 						mutex_exit(&vgenp->lock);
1855 						return (MDEG_FAILURE);
1856 					}
1857 					break;
1858 				}
1859 			}
1860 		}
1861 		if (vsw_idx == -1) {
1862 			DWARN(vgenp, NULL, "can't find vsw_port\n");
1863 			mutex_exit(&vgenp->lock);
1864 			return (MDEG_FAILURE);
1865 		}
1866 	}
1867 
1868 	for (idx = 0; idx < resp->added.nelem; idx++) {
1869 		if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
1870 			continue;
1871 
1872 		/* If this port can't be added just skip it. */
1873 		(void) vgen_add_port(vgenp, resp->added.mdp,
1874 		    resp->added.mdep[idx]);
1875 	}
1876 
1877 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1878 		(void) vgen_update_port(vgenp, resp->match_curr.mdp,
1879 		    resp->match_curr.mdep[idx],
1880 		    resp->match_prev.mdp,
1881 		    resp->match_prev.mdep[idx]);
1882 	}
1883 
1884 	mutex_exit(&vgenp->lock);
1885 	DBG1(vgenp, NULL, "exit\n");
1886 	return (MDEG_SUCCESS);
1887 }
1888 
1889 /* mdeg callback function for the vnet node */
1890 static int
1891 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1892 {
1893 	vgen_t		*vgenp;
1894 	vnet_t		*vnetp;
1895 	md_t		*mdp;
1896 	mde_cookie_t	node;
1897 	uint64_t	inst;
1898 	char		*node_name = NULL;
1899 
1900 	if ((resp == NULL) || (cb_argp == NULL)) {
1901 		return (MDEG_FAILURE);
1902 	}
1903 
1904 	vgenp = (vgen_t *)cb_argp;
1905 	vnetp = vgenp->vnetp;
1906 
1907 	DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d"
1908 	    " : prev matched %d", resp->added.nelem, resp->removed.nelem,
1909 	    resp->match_curr.nelem, resp->match_prev.nelem);
1910 
1911 	mutex_enter(&vgenp->lock);
1912 
1913 	/*
1914 	 * We get an initial callback for this node as 'added' after
1915 	 * registering with mdeg. Note that we would have already gathered
1916 	 * information about this vnet node by walking MD earlier during attach
1917 	 * (in vgen_read_mdprops()). So, there is a window where the properties
1918 	 * of this node might have changed when we get this initial 'added'
1919 	 * callback. We handle this as if an update occured and invoke the same
1920 	 * function which handles updates to the properties of this vnet-node
1921 	 * if any. A non-zero 'match' value indicates that the MD has been
1922 	 * updated and that a 'network' node is present which may or may not
1923 	 * have been updated. It is up to the clients to examine their own
1924 	 * nodes and determine if they have changed.
1925 	 */
1926 	if (resp->added.nelem != 0) {
1927 
1928 		if (resp->added.nelem != 1) {
1929 			cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
1930 			    "invalid: %d\n", vnetp->instance,
1931 			    resp->added.nelem);
1932 			goto vgen_mdeg_cb_err;
1933 		}
1934 
1935 		mdp = resp->added.mdp;
1936 		node = resp->added.mdep[0];
1937 
1938 	} else if (resp->match_curr.nelem != 0) {
1939 
1940 		if (resp->match_curr.nelem != 1) {
1941 			cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
1942 			    "invalid: %d\n", vnetp->instance,
1943 			    resp->match_curr.nelem);
1944 			goto vgen_mdeg_cb_err;
1945 		}
1946 
1947 		mdp = resp->match_curr.mdp;
1948 		node = resp->match_curr.mdep[0];
1949 
1950 	} else {
1951 		goto vgen_mdeg_cb_err;
1952 	}
1953 
1954 	/* Validate name and instance */
1955 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1956 		DERR(vgenp, NULL, "unable to get node name\n");
1957 		goto vgen_mdeg_cb_err;
1958 	}
1959 
1960 	/* is this a virtual-network device? */
1961 	if (strcmp(node_name, vnet_propname) != 0) {
1962 		DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
1963 		goto vgen_mdeg_cb_err;
1964 	}
1965 
1966 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1967 		DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
1968 		goto vgen_mdeg_cb_err;
1969 	}
1970 
1971 	/* is this the right instance of vnet? */
1972 	if (inst != vgenp->regprop) {
1973 		DERR(vgenp, NULL,  "Invalid cfg-handle: %lx\n", inst);
1974 		goto vgen_mdeg_cb_err;
1975 	}
1976 
1977 	vgen_update_md_prop(vgenp, mdp, node);
1978 
1979 	mutex_exit(&vgenp->lock);
1980 	return (MDEG_SUCCESS);
1981 
1982 vgen_mdeg_cb_err:
1983 	mutex_exit(&vgenp->lock);
1984 	return (MDEG_FAILURE);
1985 }
1986 
1987 /*
1988  * Check to see if the relevant properties in the specified node have
1989  * changed, and if so take the appropriate action.
1990  */
1991 static void
1992 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1993 {
1994 	uint16_t	pvid;
1995 	uint16_t	*vids;
1996 	uint16_t	nvids;
1997 	vnet_t		*vnetp = vgenp->vnetp;
1998 	uint32_t	mtu;
1999 	boolean_t	pls_update;
2000 	enum		{ MD_init = 0x1,
2001 			    MD_vlans = 0x2,
2002 			    MD_mtu = 0x4,
2003 			    MD_pls = 0x8 } updated;
2004 	int		rv;
2005 
2006 	updated = MD_init;
2007 
2008 	/* Read the vlan ids */
2009 	vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2010 	    &nvids, NULL);
2011 
2012 	/* Determine if there are any vlan id updates */
2013 	if ((pvid != vnetp->pvid) ||		/* pvid changed? */
2014 	    (nvids != vnetp->nvids) ||		/* # of vids changed? */
2015 	    ((nvids != 0) && (vnetp->nvids != 0) &&	/* vids changed? */
2016 	    bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2017 		updated |= MD_vlans;
2018 	}
2019 
2020 	/* Read mtu */
2021 	vgen_mtu_read(vgenp, mdp, mdex, &mtu);
2022 	if (mtu != vnetp->mtu) {
2023 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2024 			updated |= MD_mtu;
2025 		} else {
2026 			cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
2027 			    " as the specified value:%d is invalid\n",
2028 			    vnetp->instance, mtu);
2029 		}
2030 	}
2031 
2032 	/*
2033 	 * Read the 'linkprop' property.
2034 	 */
2035 	vgen_linkprop_read(vgenp, mdp, mdex, &pls_update);
2036 	if (pls_update != vnetp->pls_update) {
2037 		updated |= MD_pls;
2038 	}
2039 
2040 	/* Now process the updated props */
2041 
2042 	if (updated & MD_vlans) {
2043 
2044 		/* save the new vlan ids */
2045 		vnetp->pvid = pvid;
2046 		if (vnetp->nvids != 0) {
2047 			kmem_free(vnetp->vids,
2048 			    sizeof (uint16_t) * vnetp->nvids);
2049 			vnetp->nvids = 0;
2050 		}
2051 		if (nvids != 0) {
2052 			vnetp->nvids = nvids;
2053 			vnetp->vids = vids;
2054 		}
2055 
2056 		/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2057 		vgen_reset_vlan_unaware_ports(vgenp);
2058 
2059 	} else {
2060 
2061 		if (nvids != 0) {
2062 			kmem_free(vids, sizeof (uint16_t) * nvids);
2063 		}
2064 	}
2065 
2066 	if (updated & MD_mtu) {
2067 
2068 		DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
2069 		    vnetp->mtu, mtu);
2070 
2071 		rv = vnet_mtu_update(vnetp, mtu);
2072 		if (rv == 0) {
2073 			vgenp->max_frame_size = mtu +
2074 			    sizeof (struct ether_header) + VLAN_TAGSZ;
2075 		}
2076 	}
2077 
2078 	if (updated & MD_pls) {
2079 		/* enable/disable physical link state updates */
2080 		vnetp->pls_update = pls_update;
2081 		mutex_exit(&vgenp->lock);
2082 
2083 		/* reset vsw-port to re-negotiate with the updated prop. */
2084 		vgen_reset_vsw_port(vgenp);
2085 
2086 		mutex_enter(&vgenp->lock);
2087 	}
2088 }
2089 
2090 /* add a new port to the device */
2091 static int
2092 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2093 {
2094 	vgen_port_t	*portp;
2095 	int		rv;
2096 
2097 	portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2098 
2099 	rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2100 	if (rv != DDI_SUCCESS) {
2101 		KMEM_FREE(portp);
2102 		return (DDI_FAILURE);
2103 	}
2104 
2105 	rv = vgen_port_attach(portp);
2106 	if (rv != DDI_SUCCESS) {
2107 		return (DDI_FAILURE);
2108 	}
2109 
2110 	return (DDI_SUCCESS);
2111 }
2112 
2113 /* read properties of the port from its md node */
2114 static int
2115 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2116 	mde_cookie_t mdex)
2117 {
2118 	uint64_t		port_num;
2119 	uint64_t		*ldc_ids;
2120 	uint64_t		macaddr;
2121 	uint64_t		val;
2122 	int			num_ldcs;
2123 	int			i;
2124 	int			addrsz;
2125 	int			num_nodes = 0;
2126 	int			listsz = 0;
2127 	mde_cookie_t		*listp = NULL;
2128 	uint8_t			*addrp;
2129 	struct ether_addr	ea;
2130 
2131 	/* read "id" property to get the port number */
2132 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2133 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2134 		return (DDI_FAILURE);
2135 	}
2136 
2137 	/*
2138 	 * Find the channel endpoint node(s) under this port node.
2139 	 */
2140 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2141 		DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2142 		    num_nodes);
2143 		return (DDI_FAILURE);
2144 	}
2145 
2146 	/* allocate space for node list */
2147 	listsz = num_nodes * sizeof (mde_cookie_t);
2148 	listp = kmem_zalloc(listsz, KM_NOSLEEP);
2149 	if (listp == NULL)
2150 		return (DDI_FAILURE);
2151 
2152 	num_ldcs = md_scan_dag(mdp, mdex,
2153 	    md_find_name(mdp, channel_propname),
2154 	    md_find_name(mdp, "fwd"), listp);
2155 
2156 	if (num_ldcs <= 0) {
2157 		DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2158 		kmem_free(listp, listsz);
2159 		return (DDI_FAILURE);
2160 	}
2161 
2162 	if (num_ldcs > 1) {
2163 		DWARN(vgenp, NULL, "Port %d: Number of channels %d > 1\n",
2164 		    port_num, num_ldcs);
2165 	}
2166 
2167 	ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2168 	if (ldc_ids == NULL) {
2169 		kmem_free(listp, listsz);
2170 		return (DDI_FAILURE);
2171 	}
2172 
2173 	for (i = 0; i < num_ldcs; i++) {
2174 		/* read channel ids */
2175 		if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2176 			DWARN(vgenp, NULL, "prop(%s) not found\n",
2177 			    id_propname);
2178 			kmem_free(listp, listsz);
2179 			kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2180 			return (DDI_FAILURE);
2181 		}
2182 		DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2183 	}
2184 
2185 	kmem_free(listp, listsz);
2186 
2187 	if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2188 	    &addrsz)) {
2189 		DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2190 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2191 		return (DDI_FAILURE);
2192 	}
2193 
2194 	if (addrsz < ETHERADDRL) {
2195 		DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2196 		kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2197 		return (DDI_FAILURE);
2198 	}
2199 
2200 	macaddr = *((uint64_t *)addrp);
2201 
2202 	DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2203 
2204 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2205 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2206 		macaddr >>= 8;
2207 	}
2208 
2209 	if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2210 		if (val == 0) {
2211 			/* This port is connected to the vswitch */
2212 			portp->is_vsw_port = B_TRUE;
2213 		} else {
2214 			portp->is_vsw_port = B_FALSE;
2215 		}
2216 	}
2217 
2218 	/* now update all properties into the port */
2219 	portp->vgenp = vgenp;
2220 	portp->port_num = port_num;
2221 	ether_copy(&ea, &portp->macaddr);
2222 	portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2223 	bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2224 	portp->num_ldcs = num_ldcs;
2225 
2226 	/* read vlan id properties of this port node */
2227 	vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2228 	    &portp->vids, &portp->nvids, NULL);
2229 
2230 	kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2231 
2232 	return (DDI_SUCCESS);
2233 }
2234 
2235 /* remove a port from the device */
2236 static int
2237 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2238 {
2239 	uint64_t	port_num;
2240 	vgen_port_t	*portp;
2241 	vgen_portlist_t	*plistp;
2242 
2243 	/* read "id" property to get the port number */
2244 	if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2245 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2246 		return (DDI_FAILURE);
2247 	}
2248 
2249 	plistp = &(vgenp->vgenports);
2250 
2251 	WRITE_ENTER(&plistp->rwlock);
2252 	portp = vgen_port_lookup(plistp, (int)port_num);
2253 	if (portp == NULL) {
2254 		DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2255 		RW_EXIT(&plistp->rwlock);
2256 		return (DDI_FAILURE);
2257 	}
2258 
2259 	vgen_port_detach_mdeg(portp);
2260 	RW_EXIT(&plistp->rwlock);
2261 
2262 	return (DDI_SUCCESS);
2263 }
2264 
2265 /* attach a port to the device based on mdeg data */
2266 static int
2267 vgen_port_attach(vgen_port_t *portp)
2268 {
2269 	vgen_portlist_t		*plistp;
2270 	vgen_t			*vgenp;
2271 	uint64_t		*ldcids;
2272 	mac_register_t		*macp;
2273 	vio_net_res_type_t	type;
2274 	int			rv;
2275 
2276 	ASSERT(portp != NULL);
2277 	vgenp = portp->vgenp;
2278 	ldcids = portp->ldc_ids;
2279 
2280 	DBG2(vgenp, NULL, "port_num(%d), ldcid(%lx)\n",
2281 	    portp->port_num, ldcids[0]);
2282 
2283 	mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
2284 
2285 	/*
2286 	 * attach the channel under the port using its channel id;
2287 	 * note that we only support one channel per port for now.
2288 	 */
2289 	if (vgen_ldc_attach(portp, ldcids[0]) == DDI_FAILURE) {
2290 		vgen_port_detach(portp);
2291 		return (DDI_FAILURE);
2292 	}
2293 
2294 	/* create vlan id hash table */
2295 	vgen_vlan_create_hash(portp);
2296 
2297 	if (portp->is_vsw_port == B_TRUE) {
2298 		/* This port is connected to the switch port */
2299 		(void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
2300 		type = VIO_NET_RES_LDC_SERVICE;
2301 	} else {
2302 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
2303 		type = VIO_NET_RES_LDC_GUEST;
2304 	}
2305 
2306 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2307 		vgen_port_detach(portp);
2308 		return (DDI_FAILURE);
2309 	}
2310 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2311 	macp->m_driver = portp;
2312 	macp->m_dip = vgenp->vnetdip;
2313 	macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
2314 	macp->m_callbacks = &vgen_m_callbacks;
2315 	macp->m_min_sdu = 0;
2316 	macp->m_max_sdu = ETHERMTU;
2317 
2318 	mutex_enter(&portp->lock);
2319 	rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
2320 	    portp->macaddr, &portp->vhp, &portp->vcb);
2321 	mutex_exit(&portp->lock);
2322 	mac_free(macp);
2323 
2324 	if (rv == 0) {
2325 		/* link it into the list of ports */
2326 		plistp = &(vgenp->vgenports);
2327 		WRITE_ENTER(&plistp->rwlock);
2328 		vgen_port_list_insert(portp);
2329 		RW_EXIT(&plistp->rwlock);
2330 
2331 		if (portp->is_vsw_port == B_TRUE) {
2332 			/* We now have the vswitch port attached */
2333 			vgenp->vsw_portp = portp;
2334 			(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
2335 		}
2336 	} else {
2337 		DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
2338 		    portp);
2339 		vgen_port_detach(portp);
2340 	}
2341 
2342 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2343 	return (DDI_SUCCESS);
2344 }
2345 
2346 /* detach a port from the device based on mdeg data */
2347 static void
2348 vgen_port_detach_mdeg(vgen_port_t *portp)
2349 {
2350 	vgen_t *vgenp = portp->vgenp;
2351 
2352 	DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2353 
2354 	mutex_enter(&portp->lock);
2355 
2356 	/* stop the port if needed */
2357 	if (portp->flags & VGEN_STARTED) {
2358 		vgen_port_uninit(portp);
2359 		portp->flags &= ~(VGEN_STARTED);
2360 	}
2361 
2362 	mutex_exit(&portp->lock);
2363 	vgen_port_detach(portp);
2364 
2365 	DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2366 }
2367 
2368 static int
2369 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2370 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2371 {
2372 	uint64_t	cport_num;
2373 	uint64_t	pport_num;
2374 	vgen_portlist_t	*plistp;
2375 	vgen_port_t	*portp;
2376 	boolean_t	updated_vlans = B_FALSE;
2377 	uint16_t	pvid;
2378 	uint16_t	*vids;
2379 	uint16_t	nvids;
2380 
2381 	/*
2382 	 * For now, we get port updates only if vlan ids changed.
2383 	 * We read the port num and do some sanity check.
2384 	 */
2385 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2386 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2387 		return (DDI_FAILURE);
2388 	}
2389 
2390 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2391 		DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2392 		return (DDI_FAILURE);
2393 	}
2394 	if (cport_num != pport_num)
2395 		return (DDI_FAILURE);
2396 
2397 	plistp = &(vgenp->vgenports);
2398 
2399 	READ_ENTER(&plistp->rwlock);
2400 
2401 	portp = vgen_port_lookup(plistp, (int)cport_num);
2402 	if (portp == NULL) {
2403 		DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2404 		RW_EXIT(&plistp->rwlock);
2405 		return (DDI_FAILURE);
2406 	}
2407 
2408 	/* Read the vlan ids */
2409 	vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2410 	    &nvids, NULL);
2411 
2412 	/* Determine if there are any vlan id updates */
2413 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2414 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2415 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2416 	    bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2417 		updated_vlans = B_TRUE;
2418 	}
2419 
2420 	if (updated_vlans == B_FALSE) {
2421 		RW_EXIT(&plistp->rwlock);
2422 		return (DDI_FAILURE);
2423 	}
2424 
2425 	/* remove the port from vlans it has been assigned to */
2426 	vgen_vlan_remove_ids(portp);
2427 
2428 	/* save the new vlan ids */
2429 	portp->pvid = pvid;
2430 	if (portp->nvids != 0) {
2431 		kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2432 		portp->nvids = 0;
2433 	}
2434 	if (nvids != 0) {
2435 		portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2436 		bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2437 		portp->nvids = nvids;
2438 		kmem_free(vids, sizeof (uint16_t) * nvids);
2439 	}
2440 
2441 	/* add port to the new vlans */
2442 	vgen_vlan_add_ids(portp);
2443 
2444 	/* reset the port if it is vlan unaware (ver < 1.3) */
2445 	vgen_vlan_unaware_port_reset(portp);
2446 
2447 	RW_EXIT(&plistp->rwlock);
2448 
2449 	return (DDI_SUCCESS);
2450 }
2451 
2452 static uint64_t
2453 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2454 {
2455 	return (vgen_ldc_stat(portp->ldcp, stat));
2456 }
2457 
2458 /* attach the channel corresponding to the given ldc_id to the port */
2459 static int
2460 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2461 {
2462 	vgen_t 		*vgenp;
2463 	vgen_ldc_t 	*ldcp;
2464 	ldc_attr_t 	attr;
2465 	int 		status;
2466 	ldc_status_t	istatus;
2467 	char		kname[MAXNAMELEN];
2468 	int		instance;
2469 	enum	{AST_init = 0x0, AST_ldc_alloc = 0x1,
2470 		AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2471 		AST_ldc_reg_cb = 0x8 } attach_state;
2472 
2473 	attach_state = AST_init;
2474 	vgenp = portp->vgenp;
2475 
2476 	ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2477 	if (ldcp == NULL) {
2478 		goto ldc_attach_failed;
2479 	}
2480 	ldcp->ldc_id = ldc_id;
2481 	ldcp->portp = portp;
2482 
2483 	attach_state |= AST_ldc_alloc;
2484 
2485 	mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2486 	mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2487 	mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2488 	mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2489 	mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2490 	mutex_init(&ldcp->pollq_lock, NULL, MUTEX_DRIVER, NULL);
2491 	mutex_init(&ldcp->msg_thr_lock, NULL, MUTEX_DRIVER, NULL);
2492 	cv_init(&ldcp->msg_thr_cv, NULL, CV_DRIVER, NULL);
2493 
2494 	attach_state |= AST_mutex_init;
2495 
2496 	attr.devclass = LDC_DEV_NT;
2497 	attr.instance = vgenp->instance;
2498 	attr.mode = LDC_MODE_UNRELIABLE;
2499 	attr.mtu = vgen_ldc_mtu;
2500 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2501 	if (status != 0) {
2502 		DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2503 		goto ldc_attach_failed;
2504 	}
2505 	attach_state |= AST_ldc_init;
2506 
2507 	status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2508 	if (status != 0) {
2509 		DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2510 		    status);
2511 		goto ldc_attach_failed;
2512 	}
2513 	/*
2514 	 * allocate a message for ldc_read()s, big enough to hold ctrl and
2515 	 * data msgs, including raw data msgs used to recv priority frames.
2516 	 */
2517 	ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2518 	ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2519 	attach_state |= AST_ldc_reg_cb;
2520 
2521 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2522 	ASSERT(istatus == LDC_INIT);
2523 	ldcp->ldc_status = istatus;
2524 
2525 	/* Setup kstats for the channel */
2526 	instance = vgenp->instance;
2527 	(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2528 	ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2529 	if (ldcp->ksp == NULL) {
2530 		goto ldc_attach_failed;
2531 	}
2532 
2533 	/* initialize vgen_versions supported */
2534 	bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2535 	vgen_reset_vnet_proto_ops(ldcp);
2536 
2537 	/* Link this channel to the port */
2538 	portp->ldcp = ldcp;
2539 
2540 	ldcp->link_state = LINK_STATE_UNKNOWN;
2541 #ifdef	VNET_IOC_DEBUG
2542 	ldcp->link_down_forced = B_FALSE;
2543 #endif
2544 	ldcp->flags |= CHANNEL_ATTACHED;
2545 	return (DDI_SUCCESS);
2546 
2547 ldc_attach_failed:
2548 	if (attach_state & AST_ldc_reg_cb) {
2549 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2550 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2551 	}
2552 
2553 	if (attach_state & AST_ldc_init) {
2554 		(void) ldc_fini(ldcp->ldc_handle);
2555 	}
2556 	if (attach_state & AST_mutex_init) {
2557 		mutex_destroy(&ldcp->tclock);
2558 		mutex_destroy(&ldcp->txlock);
2559 		mutex_destroy(&ldcp->cblock);
2560 		mutex_destroy(&ldcp->wrlock);
2561 		mutex_destroy(&ldcp->rxlock);
2562 		mutex_destroy(&ldcp->pollq_lock);
2563 	}
2564 	if (attach_state & AST_ldc_alloc) {
2565 		KMEM_FREE(ldcp);
2566 	}
2567 	return (DDI_FAILURE);
2568 }
2569 
2570 /* detach a channel from the port */
2571 static void
2572 vgen_ldc_detach(vgen_ldc_t *ldcp)
2573 {
2574 	vgen_port_t	*portp;
2575 	vgen_t 		*vgenp;
2576 
2577 	ASSERT(ldcp != NULL);
2578 
2579 	portp = ldcp->portp;
2580 	vgenp = portp->vgenp;
2581 
2582 	if (ldcp->ldc_status != LDC_INIT) {
2583 		DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
2584 	}
2585 
2586 	if (ldcp->flags & CHANNEL_ATTACHED) {
2587 		ldcp->flags &= ~(CHANNEL_ATTACHED);
2588 
2589 		(void) ldc_unreg_callback(ldcp->ldc_handle);
2590 		(void) ldc_fini(ldcp->ldc_handle);
2591 
2592 		kmem_free(ldcp->ldcmsg, ldcp->msglen);
2593 		vgen_destroy_kstats(ldcp->ksp);
2594 		ldcp->ksp = NULL;
2595 		mutex_destroy(&ldcp->tclock);
2596 		mutex_destroy(&ldcp->txlock);
2597 		mutex_destroy(&ldcp->cblock);
2598 		mutex_destroy(&ldcp->wrlock);
2599 		mutex_destroy(&ldcp->rxlock);
2600 		mutex_destroy(&ldcp->pollq_lock);
2601 		mutex_destroy(&ldcp->msg_thr_lock);
2602 		cv_destroy(&ldcp->msg_thr_cv);
2603 
2604 		KMEM_FREE(ldcp);
2605 	}
2606 }
2607 
2608 /* enable transmit/receive on the channel */
2609 static int
2610 vgen_ldc_init(vgen_ldc_t *ldcp)
2611 {
2612 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
2613 	ldc_status_t	istatus;
2614 	int		rv;
2615 	enum		{ ST_init = 0x0, ST_ldc_open = 0x1,
2616 			    ST_cb_enable = 0x2} init_state;
2617 	int		flag = 0;
2618 
2619 	init_state = ST_init;
2620 
2621 	DBG1(vgenp, ldcp, "enter\n");
2622 	LDC_LOCK(ldcp);
2623 
2624 	rv = ldc_open(ldcp->ldc_handle);
2625 	if (rv != 0) {
2626 		DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
2627 		goto ldcinit_failed;
2628 	}
2629 	init_state |= ST_ldc_open;
2630 
2631 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2632 	if (istatus != LDC_OPEN && istatus != LDC_READY) {
2633 		DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
2634 		goto ldcinit_failed;
2635 	}
2636 	ldcp->ldc_status = istatus;
2637 
2638 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
2639 	if (rv != 0) {
2640 		DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
2641 		goto ldcinit_failed;
2642 	}
2643 
2644 	init_state |= ST_cb_enable;
2645 
2646 	vgen_ldc_up(ldcp);
2647 
2648 	(void) ldc_status(ldcp->ldc_handle, &istatus);
2649 	if (istatus == LDC_UP) {
2650 		DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
2651 	}
2652 
2653 	ldcp->ldc_status = istatus;
2654 
2655 	ldcp->hphase = VH_PHASE0;
2656 	ldcp->hstate = 0;
2657 	ldcp->flags |= CHANNEL_STARTED;
2658 
2659 	vgen_setup_handshake_params(ldcp);
2660 
2661 	/* if channel is already UP - start handshake */
2662 	if (istatus == LDC_UP) {
2663 		vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2664 		if (ldcp->portp != vgenp->vsw_portp) {
2665 			/*
2666 			 * As the channel is up, use this port from now on.
2667 			 */
2668 			(void) atomic_swap_32(
2669 			    &ldcp->portp->use_vsw_port, B_FALSE);
2670 		}
2671 
2672 		/* Initialize local session id */
2673 		ldcp->local_sid = ddi_get_lbolt();
2674 
2675 		/* clear peer session id */
2676 		ldcp->peer_sid = 0;
2677 
2678 		mutex_exit(&ldcp->tclock);
2679 		mutex_exit(&ldcp->txlock);
2680 		mutex_exit(&ldcp->wrlock);
2681 		mutex_exit(&ldcp->rxlock);
2682 		rv = vgen_handshake(vh_nextphase(ldcp));
2683 		mutex_exit(&ldcp->cblock);
2684 		if (rv != 0) {
2685 			flag = (rv == ECONNRESET) ? VGEN_FLAG_EVT_RESET :
2686 			    VGEN_FLAG_NEED_LDCRESET;
2687 			(void) vgen_process_reset(ldcp, flag);
2688 		}
2689 	} else {
2690 		LDC_UNLOCK(ldcp);
2691 	}
2692 
2693 	return (DDI_SUCCESS);
2694 
2695 ldcinit_failed:
2696 	if (init_state & ST_cb_enable) {
2697 		(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
2698 	}
2699 	if (init_state & ST_ldc_open) {
2700 		(void) ldc_close(ldcp->ldc_handle);
2701 	}
2702 	LDC_UNLOCK(ldcp);
2703 	DBG1(vgenp, ldcp, "exit\n");
2704 	return (DDI_FAILURE);
2705 }
2706 
2707 /* stop transmit/receive on the channel */
2708 static void
2709 vgen_ldc_uninit(vgen_ldc_t *ldcp)
2710 {
2711 	vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2712 
2713 	DBG1(vgenp, ldcp, "enter\n");
2714 
2715 	LDC_LOCK(ldcp);
2716 
2717 	if ((ldcp->flags & CHANNEL_STARTED) == 0) {
2718 		LDC_UNLOCK(ldcp);
2719 		DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
2720 		return;
2721 	}
2722 
2723 	LDC_UNLOCK(ldcp);
2724 
2725 	while (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
2726 		delay(drv_usectohz(VGEN_LDC_UNINIT_DELAY));
2727 	}
2728 
2729 	(void) vgen_process_reset(ldcp, VGEN_FLAG_UNINIT);
2730 
2731 	DBG1(vgenp, ldcp, "exit\n");
2732 }
2733 
2734 /*
2735  * Create a descriptor ring, that will be exported to the peer for mapping.
2736  */
2737 static int
2738 vgen_create_dring(vgen_ldc_t *ldcp)
2739 {
2740 	vgen_hparams_t	*lp = &ldcp->local_hparams;
2741 	int		rv;
2742 
2743 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
2744 		rv = vgen_create_rx_dring(ldcp);
2745 	} else {
2746 		rv = vgen_create_tx_dring(ldcp);
2747 	}
2748 
2749 	return (rv);
2750 }
2751 
2752 /*
2753  * Destroy the descriptor ring.
2754  */
2755 static void
2756 vgen_destroy_dring(vgen_ldc_t *ldcp)
2757 {
2758 	vgen_hparams_t	*lp = &ldcp->local_hparams;
2759 
2760 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
2761 		vgen_destroy_rx_dring(ldcp);
2762 	} else {
2763 		vgen_destroy_tx_dring(ldcp);
2764 	}
2765 }
2766 
2767 /*
2768  * Map the descriptor ring exported by the peer.
2769  */
2770 static int
2771 vgen_map_dring(vgen_ldc_t *ldcp, void *pkt)
2772 {
2773 	int		rv;
2774 	vgen_hparams_t	*lp = &ldcp->local_hparams;
2775 
2776 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
2777 		/*
2778 		 * In RxDringData mode, dring that we map in
2779 		 * becomes our transmit descriptor ring.
2780 		 */
2781 		rv = vgen_map_tx_dring(ldcp, pkt);
2782 	} else {
2783 
2784 		/*
2785 		 * In TxDring mode, dring that we map in
2786 		 * becomes our receive descriptor ring.
2787 		 */
2788 		rv = vgen_map_rx_dring(ldcp, pkt);
2789 	}
2790 
2791 	return (rv);
2792 }
2793 
2794 /*
2795  * Unmap the descriptor ring exported by the peer.
2796  */
2797 static void
2798 vgen_unmap_dring(vgen_ldc_t *ldcp)
2799 {
2800 	vgen_hparams_t	*lp = &ldcp->local_hparams;
2801 
2802 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
2803 		vgen_unmap_tx_dring(ldcp);
2804 	} else {
2805 		vgen_unmap_rx_dring(ldcp);
2806 	}
2807 }
2808 
2809 void
2810 vgen_destroy_rxpools(void *arg)
2811 {
2812 	vio_mblk_pool_t	*poolp = (vio_mblk_pool_t *)arg;
2813 	vio_mblk_pool_t	*npoolp;
2814 
2815 	while (poolp != NULL) {
2816 		npoolp =  poolp->nextp;
2817 		while (vio_destroy_mblks(poolp) != 0) {
2818 			delay(drv_usectohz(vgen_rxpool_cleanup_delay));
2819 		}
2820 		poolp = npoolp;
2821 	}
2822 }
2823 
2824 /* get channel statistics */
2825 static uint64_t
2826 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
2827 {
2828 	vgen_stats_t	*statsp;
2829 	uint64_t	val;
2830 
2831 	val = 0;
2832 	statsp = &ldcp->stats;
2833 	switch (stat) {
2834 
2835 	case MAC_STAT_MULTIRCV:
2836 		val = statsp->multircv;
2837 		break;
2838 
2839 	case MAC_STAT_BRDCSTRCV:
2840 		val = statsp->brdcstrcv;
2841 		break;
2842 
2843 	case MAC_STAT_MULTIXMT:
2844 		val = statsp->multixmt;
2845 		break;
2846 
2847 	case MAC_STAT_BRDCSTXMT:
2848 		val = statsp->brdcstxmt;
2849 		break;
2850 
2851 	case MAC_STAT_NORCVBUF:
2852 		val = statsp->norcvbuf;
2853 		break;
2854 
2855 	case MAC_STAT_IERRORS:
2856 		val = statsp->ierrors;
2857 		break;
2858 
2859 	case MAC_STAT_NOXMTBUF:
2860 		val = statsp->noxmtbuf;
2861 		break;
2862 
2863 	case MAC_STAT_OERRORS:
2864 		val = statsp->oerrors;
2865 		break;
2866 
2867 	case MAC_STAT_COLLISIONS:
2868 		break;
2869 
2870 	case MAC_STAT_RBYTES:
2871 		val = statsp->rbytes;
2872 		break;
2873 
2874 	case MAC_STAT_IPACKETS:
2875 		val = statsp->ipackets;
2876 		break;
2877 
2878 	case MAC_STAT_OBYTES:
2879 		val = statsp->obytes;
2880 		break;
2881 
2882 	case MAC_STAT_OPACKETS:
2883 		val = statsp->opackets;
2884 		break;
2885 
2886 	/* stats not relevant to ldc, return 0 */
2887 	case MAC_STAT_IFSPEED:
2888 	case ETHER_STAT_ALIGN_ERRORS:
2889 	case ETHER_STAT_FCS_ERRORS:
2890 	case ETHER_STAT_FIRST_COLLISIONS:
2891 	case ETHER_STAT_MULTI_COLLISIONS:
2892 	case ETHER_STAT_DEFER_XMTS:
2893 	case ETHER_STAT_TX_LATE_COLLISIONS:
2894 	case ETHER_STAT_EX_COLLISIONS:
2895 	case ETHER_STAT_MACXMT_ERRORS:
2896 	case ETHER_STAT_CARRIER_ERRORS:
2897 	case ETHER_STAT_TOOLONG_ERRORS:
2898 	case ETHER_STAT_XCVR_ADDR:
2899 	case ETHER_STAT_XCVR_ID:
2900 	case ETHER_STAT_XCVR_INUSE:
2901 	case ETHER_STAT_CAP_1000FDX:
2902 	case ETHER_STAT_CAP_1000HDX:
2903 	case ETHER_STAT_CAP_100FDX:
2904 	case ETHER_STAT_CAP_100HDX:
2905 	case ETHER_STAT_CAP_10FDX:
2906 	case ETHER_STAT_CAP_10HDX:
2907 	case ETHER_STAT_CAP_ASMPAUSE:
2908 	case ETHER_STAT_CAP_PAUSE:
2909 	case ETHER_STAT_CAP_AUTONEG:
2910 	case ETHER_STAT_ADV_CAP_1000FDX:
2911 	case ETHER_STAT_ADV_CAP_1000HDX:
2912 	case ETHER_STAT_ADV_CAP_100FDX:
2913 	case ETHER_STAT_ADV_CAP_100HDX:
2914 	case ETHER_STAT_ADV_CAP_10FDX:
2915 	case ETHER_STAT_ADV_CAP_10HDX:
2916 	case ETHER_STAT_ADV_CAP_ASMPAUSE:
2917 	case ETHER_STAT_ADV_CAP_PAUSE:
2918 	case ETHER_STAT_ADV_CAP_AUTONEG:
2919 	case ETHER_STAT_LP_CAP_1000FDX:
2920 	case ETHER_STAT_LP_CAP_1000HDX:
2921 	case ETHER_STAT_LP_CAP_100FDX:
2922 	case ETHER_STAT_LP_CAP_100HDX:
2923 	case ETHER_STAT_LP_CAP_10FDX:
2924 	case ETHER_STAT_LP_CAP_10HDX:
2925 	case ETHER_STAT_LP_CAP_ASMPAUSE:
2926 	case ETHER_STAT_LP_CAP_PAUSE:
2927 	case ETHER_STAT_LP_CAP_AUTONEG:
2928 	case ETHER_STAT_LINK_ASMPAUSE:
2929 	case ETHER_STAT_LINK_PAUSE:
2930 	case ETHER_STAT_LINK_AUTONEG:
2931 	case ETHER_STAT_LINK_DUPLEX:
2932 	default:
2933 		val = 0;
2934 		break;
2935 
2936 	}
2937 	return (val);
2938 }
2939 
2940 /*
2941  * LDC channel is UP, start handshake process with peer.
2942  */
2943 static void
2944 vgen_handle_evt_up(vgen_ldc_t *ldcp)
2945 {
2946 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
2947 
2948 	DBG1(vgenp, ldcp, "enter\n");
2949 
2950 	ASSERT(MUTEX_HELD(&ldcp->cblock));
2951 
2952 	if (ldcp->portp != vgenp->vsw_portp) {
2953 		/*
2954 		 * As the channel is up, use this port from now on.
2955 		 */
2956 		(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
2957 	}
2958 
2959 	/* Initialize local session id */
2960 	ldcp->local_sid = ddi_get_lbolt();
2961 
2962 	/* clear peer session id */
2963 	ldcp->peer_sid = 0;
2964 
2965 	/* Initiate Handshake process with peer ldc endpoint */
2966 	(void) vgen_handshake(vh_nextphase(ldcp));
2967 
2968 	DBG1(vgenp, ldcp, "exit\n");
2969 }
2970 
2971 /*
2972  * LDC channel is Reset, terminate connection with peer and try to
2973  * bring the channel up again.
2974  */
2975 int
2976 vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller)
2977 {
2978 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2979 		ASSERT(MUTEX_HELD(&ldcp->cblock));
2980 	}
2981 
2982 	/* Set the flag to indicate reset is in progress */
2983 	if (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
2984 		/* another thread is already in the process of resetting */
2985 		return (EBUSY);
2986 	}
2987 
2988 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2989 		mutex_exit(&ldcp->cblock);
2990 	}
2991 
2992 	(void) vgen_process_reset(ldcp, VGEN_FLAG_EVT_RESET);
2993 
2994 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2995 		mutex_enter(&ldcp->cblock);
2996 	}
2997 
2998 	return (0);
2999 }
3000 
3001 /* Interrupt handler for the channel */
3002 static uint_t
3003 vgen_ldc_cb(uint64_t event, caddr_t arg)
3004 {
3005 	_NOTE(ARGUNUSED(event))
3006 	vgen_ldc_t	*ldcp;
3007 	vgen_t		*vgenp;
3008 	ldc_status_t 	istatus;
3009 	vgen_stats_t	*statsp;
3010 	uint_t		ret = LDC_SUCCESS;
3011 
3012 	ldcp = (vgen_ldc_t *)arg;
3013 	vgenp = LDC_TO_VGEN(ldcp);
3014 	statsp = &ldcp->stats;
3015 
3016 	DBG1(vgenp, ldcp, "enter\n");
3017 
3018 	mutex_enter(&ldcp->cblock);
3019 	statsp->callbacks++;
3020 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
3021 		DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3022 		    ldcp->ldc_status);
3023 		mutex_exit(&ldcp->cblock);
3024 		return (LDC_SUCCESS);
3025 	}
3026 
3027 	/*
3028 	 * NOTE: not using switch() as event could be triggered by
3029 	 * a state change and a read request. Also the ordering	of the
3030 	 * check for the event types is deliberate.
3031 	 */
3032 	if (event & LDC_EVT_UP) {
3033 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3034 			DWARN(vgenp, ldcp, "ldc_status err\n");
3035 			/* status couldn't be determined */
3036 			ret = LDC_FAILURE;
3037 			goto ldc_cb_ret;
3038 		}
3039 		ldcp->ldc_status = istatus;
3040 		if (ldcp->ldc_status != LDC_UP) {
3041 			DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3042 			    " but ldc status is not UP(0x%x)\n",
3043 			    ldcp->ldc_status);
3044 			/* spurious interrupt, return success */
3045 			goto ldc_cb_ret;
3046 		}
3047 		DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3048 		    event, ldcp->ldc_status);
3049 
3050 		vgen_handle_evt_up(ldcp);
3051 
3052 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3053 	}
3054 
3055 	/* Handle RESET/DOWN before READ event */
3056 	if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3057 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3058 			DWARN(vgenp, ldcp, "ldc_status error\n");
3059 			/* status couldn't be determined */
3060 			ret = LDC_FAILURE;
3061 			goto ldc_cb_ret;
3062 		}
3063 		ldcp->ldc_status = istatus;
3064 		DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3065 		    event, ldcp->ldc_status);
3066 
3067 		(void) vgen_handle_evt_reset(ldcp, VGEN_LDC_CB);
3068 
3069 		/*
3070 		 * As the channel is down/reset, ignore READ event
3071 		 * but print a debug warning message.
3072 		 */
3073 		if (event & LDC_EVT_READ) {
3074 			DWARN(vgenp, ldcp,
3075 			    "LDC_EVT_READ set along with RESET/DOWN\n");
3076 			event &= ~LDC_EVT_READ;
3077 		}
3078 	}
3079 
3080 	if (event & LDC_EVT_READ) {
3081 		DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3082 		    event, ldcp->ldc_status);
3083 
3084 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3085 
3086 		if (ldcp->msg_thread != NULL) {
3087 			/*
3088 			 * If the receive thread is enabled, then
3089 			 * wakeup the receive thread to process the
3090 			 * LDC messages.
3091 			 */
3092 			mutex_exit(&ldcp->cblock);
3093 			mutex_enter(&ldcp->msg_thr_lock);
3094 			if (!(ldcp->msg_thr_flags & VGEN_WTHR_DATARCVD)) {
3095 				ldcp->msg_thr_flags |= VGEN_WTHR_DATARCVD;
3096 				cv_signal(&ldcp->msg_thr_cv);
3097 			}
3098 			mutex_exit(&ldcp->msg_thr_lock);
3099 			mutex_enter(&ldcp->cblock);
3100 		} else  {
3101 			(void) vgen_handle_evt_read(ldcp, VGEN_LDC_CB);
3102 		}
3103 	}
3104 
3105 ldc_cb_ret:
3106 	mutex_exit(&ldcp->cblock);
3107 	DBG1(vgenp, ldcp, "exit\n");
3108 	return (ret);
3109 }
3110 
3111 int
3112 vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller)
3113 {
3114 	int		rv;
3115 	uint64_t	*ldcmsg;
3116 	size_t		msglen;
3117 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3118 	vio_msg_tag_t	*tagp;
3119 	ldc_status_t 	istatus;
3120 	boolean_t 	has_data;
3121 
3122 	DBG1(vgenp, ldcp, "enter\n");
3123 
3124 	if (caller == VGEN_LDC_CB) {
3125 		ASSERT(MUTEX_HELD(&ldcp->cblock));
3126 	} else if (caller == VGEN_MSG_THR) {
3127 		mutex_enter(&ldcp->cblock);
3128 	} else {
3129 		return (EINVAL);
3130 	}
3131 
3132 	ldcmsg = ldcp->ldcmsg;
3133 
3134 vgen_evtread:
3135 	do {
3136 		msglen = ldcp->msglen;
3137 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
3138 
3139 		if (rv != 0) {
3140 			DWARN(vgenp, ldcp, "ldc_read() failed "
3141 			    "rv(%d) len(%d)\n", rv, msglen);
3142 			if (rv == ECONNRESET)
3143 				goto vgen_evtread_error;
3144 			break;
3145 		}
3146 		if (msglen == 0) {
3147 			DBG2(vgenp, ldcp, "ldc_read NODATA");
3148 			break;
3149 		}
3150 		DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
3151 
3152 		tagp = (vio_msg_tag_t *)ldcmsg;
3153 
3154 		if (ldcp->peer_sid) {
3155 			/*
3156 			 * check sid only after we have received peer's sid
3157 			 * in the version negotiate msg.
3158 			 */
3159 #ifdef DEBUG
3160 			if (vgen_inject_error(ldcp, VGEN_ERR_HSID)) {
3161 				/* simulate bad sid condition */
3162 				tagp->vio_sid = 0;
3163 				vgen_inject_err_flag &= ~(VGEN_ERR_HSID);
3164 			}
3165 #endif
3166 			rv = vgen_check_sid(ldcp, tagp);
3167 			if (rv != VGEN_SUCCESS) {
3168 				/*
3169 				 * If sid mismatch is detected,
3170 				 * reset the channel.
3171 				 */
3172 				DWARN(vgenp, ldcp, "vgen_check_sid() failed\n");
3173 				goto vgen_evtread_error;
3174 			}
3175 		}
3176 
3177 		switch (tagp->vio_msgtype) {
3178 		case VIO_TYPE_CTRL:
3179 			rv = vgen_handle_ctrlmsg(ldcp, tagp);
3180 			if (rv != 0) {
3181 				DWARN(vgenp, ldcp, "vgen_handle_ctrlmsg()"
3182 				    " failed rv(%d)\n", rv);
3183 			}
3184 			break;
3185 
3186 		case VIO_TYPE_DATA:
3187 			rv = vgen_handle_datamsg(ldcp, tagp, msglen);
3188 			if (rv != 0) {
3189 				DWARN(vgenp, ldcp, "vgen_handle_datamsg()"
3190 				    " failed rv(%d)\n", rv);
3191 			}
3192 			break;
3193 
3194 		case VIO_TYPE_ERR:
3195 			vgen_handle_errmsg(ldcp, tagp);
3196 			break;
3197 
3198 		default:
3199 			DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
3200 			    tagp->vio_msgtype);
3201 			break;
3202 		}
3203 
3204 		/*
3205 		 * If an error is encountered, stop processing and
3206 		 * handle the error.
3207 		 */
3208 		if (rv != 0) {
3209 			goto vgen_evtread_error;
3210 		}
3211 
3212 	} while (msglen);
3213 
3214 	/* check once more before exiting */
3215 	rv = ldc_chkq(ldcp->ldc_handle, &has_data);
3216 	if ((rv == 0) && (has_data == B_TRUE)) {
3217 		DTRACE_PROBE1(vgen_chkq, vgen_ldc_t *, ldcp);
3218 		goto vgen_evtread;
3219 	}
3220 
3221 vgen_evtread_error:
3222 	if (rv != 0) {
3223 		/*
3224 		 * We handle the error and then return the error value. If we
3225 		 * are running in the context of the msg worker, the error
3226 		 * tells the worker thread to exit, as the channel would have
3227 		 * been reset.
3228 		 */
3229 		if (rv == ECONNRESET) {
3230 			if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3231 				DWARN(vgenp, ldcp, "ldc_status err\n");
3232 			} else {
3233 				ldcp->ldc_status = istatus;
3234 			}
3235 			(void) vgen_handle_evt_reset(ldcp, caller);
3236 		} else {
3237 			DWARN(vgenp, ldcp, "Calling vgen_ldc_reset()...\n");
3238 			(void) vgen_ldc_reset(ldcp, caller);
3239 		}
3240 	}
3241 
3242 	if (caller == VGEN_MSG_THR) {
3243 		mutex_exit(&ldcp->cblock);
3244 	}
3245 
3246 	DBG1(vgenp, ldcp, "exit\n");
3247 	return (rv);
3248 }
3249 
3250 /* vgen handshake functions */
3251 
3252 /* change the hphase for the channel to the next phase */
3253 static vgen_ldc_t *
3254 vh_nextphase(vgen_ldc_t *ldcp)
3255 {
3256 	if (ldcp->hphase == VH_PHASE4) {
3257 		ldcp->hphase = VH_DONE;
3258 	} else {
3259 		ldcp->hphase++;
3260 	}
3261 	return (ldcp);
3262 }
3263 
3264 /* send version negotiate message to the peer over ldc */
3265 static int
3266 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
3267 {
3268 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3269 	vio_ver_msg_t	vermsg;
3270 	vio_msg_tag_t	*tagp = &vermsg.tag;
3271 	int		rv;
3272 
3273 	bzero(&vermsg, sizeof (vermsg));
3274 
3275 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3276 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3277 	tagp->vio_subtype_env = VIO_VER_INFO;
3278 	tagp->vio_sid = ldcp->local_sid;
3279 
3280 	/* get version msg payload from ldcp->local */
3281 	vermsg.ver_major = ldcp->local_hparams.ver_major;
3282 	vermsg.ver_minor = ldcp->local_hparams.ver_minor;
3283 	vermsg.dev_class = ldcp->local_hparams.dev_class;
3284 
3285 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
3286 	if (rv != VGEN_SUCCESS) {
3287 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3288 		return (rv);
3289 	}
3290 
3291 	ldcp->hstate |= VER_INFO_SENT;
3292 	DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
3293 	    vermsg.ver_major, vermsg.ver_minor);
3294 
3295 	return (VGEN_SUCCESS);
3296 }
3297 
3298 /* send attr info message to the peer over ldc */
3299 static int
3300 vgen_send_attr_info(vgen_ldc_t *ldcp)
3301 {
3302 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3303 	vnet_attr_msg_t	attrmsg;
3304 	vio_msg_tag_t	*tagp = &attrmsg.tag;
3305 	int		rv;
3306 
3307 	bzero(&attrmsg, sizeof (attrmsg));
3308 
3309 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3310 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3311 	tagp->vio_subtype_env = VIO_ATTR_INFO;
3312 	tagp->vio_sid = ldcp->local_sid;
3313 
3314 	/* get attr msg payload from ldcp->local */
3315 	attrmsg.mtu = ldcp->local_hparams.mtu;
3316 	attrmsg.addr = ldcp->local_hparams.addr;
3317 	attrmsg.addr_type = ldcp->local_hparams.addr_type;
3318 	attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
3319 	attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
3320 	attrmsg.physlink_update = ldcp->local_hparams.physlink_update;
3321 	attrmsg.options = ldcp->local_hparams.dring_mode;
3322 
3323 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
3324 	if (rv != VGEN_SUCCESS) {
3325 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3326 		return (rv);
3327 	}
3328 
3329 	ldcp->hstate |= ATTR_INFO_SENT;
3330 	DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
3331 
3332 	return (VGEN_SUCCESS);
3333 }
3334 
3335 /*
3336  * Send descriptor ring register message to the peer over ldc.
3337  * Invoked in RxDringData mode.
3338  */
3339 static int
3340 vgen_send_rx_dring_reg(vgen_ldc_t *ldcp)
3341 {
3342 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
3343 	vio_dring_reg_msg_t	*msg;
3344 	vio_dring_reg_ext_msg_t	*emsg;
3345 	int			rv;
3346 	uint8_t			*buf;
3347 	uint_t			msgsize;
3348 
3349 	msgsize = VNET_DRING_REG_EXT_MSG_SIZE(ldcp->rx_data_ncookies);
3350 	msg = kmem_zalloc(msgsize, KM_SLEEP);
3351 
3352 	/* Initialize the common part of dring reg msg */
3353 	vgen_init_dring_reg_msg(ldcp, msg, VIO_RX_DRING_DATA);
3354 
3355 	/* skip over dring cookies at the tail of common section */
3356 	buf = (uint8_t *)msg->cookie;
3357 	ASSERT(msg->ncookies == 1);
3358 	buf += (msg->ncookies * sizeof (ldc_mem_cookie_t));
3359 
3360 	/* Now setup the extended part, specific to RxDringData mode */
3361 	emsg = (vio_dring_reg_ext_msg_t *)buf;
3362 
3363 	/* copy data_ncookies in the msg */
3364 	emsg->data_ncookies = ldcp->rx_data_ncookies;
3365 
3366 	/* copy data area size in the msg */
3367 	emsg->data_area_size = ldcp->rx_data_sz;
3368 
3369 	/* copy data area cookies in the msg */
3370 	bcopy(ldcp->rx_data_cookie, (ldc_mem_cookie_t *)emsg->data_cookie,
3371 	    sizeof (ldc_mem_cookie_t) * ldcp->rx_data_ncookies);
3372 
3373 	rv = vgen_sendmsg(ldcp, (caddr_t)msg, msgsize, B_FALSE);
3374 	if (rv != VGEN_SUCCESS) {
3375 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3376 		kmem_free(msg, msgsize);
3377 		return (rv);
3378 	}
3379 
3380 	ldcp->hstate |= DRING_INFO_SENT;
3381 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
3382 
3383 	kmem_free(msg, msgsize);
3384 	return (VGEN_SUCCESS);
3385 }
3386 
3387 /*
3388  * Send descriptor ring register message to the peer over ldc.
3389  * Invoked in TxDring mode.
3390  */
3391 static int
3392 vgen_send_tx_dring_reg(vgen_ldc_t *ldcp)
3393 {
3394 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
3395 	vio_dring_reg_msg_t	msg;
3396 	int			rv;
3397 
3398 	bzero(&msg, sizeof (msg));
3399 
3400 	/*
3401 	 * Initialize only the common part of dring reg msg in TxDring mode.
3402 	 */
3403 	vgen_init_dring_reg_msg(ldcp, &msg, VIO_TX_DRING);
3404 
3405 	rv = vgen_sendmsg(ldcp, (caddr_t)&msg, sizeof (msg), B_FALSE);
3406 	if (rv != VGEN_SUCCESS) {
3407 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3408 		return (rv);
3409 	}
3410 
3411 	ldcp->hstate |= DRING_INFO_SENT;
3412 	DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
3413 
3414 	return (VGEN_SUCCESS);
3415 }
3416 
3417 static int
3418 vgen_send_rdx_info(vgen_ldc_t *ldcp)
3419 {
3420 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3421 	vio_rdx_msg_t	rdxmsg;
3422 	vio_msg_tag_t	*tagp = &rdxmsg.tag;
3423 	int		rv;
3424 
3425 	bzero(&rdxmsg, sizeof (rdxmsg));
3426 
3427 	tagp->vio_msgtype = VIO_TYPE_CTRL;
3428 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
3429 	tagp->vio_subtype_env = VIO_RDX;
3430 	tagp->vio_sid = ldcp->local_sid;
3431 
3432 	rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
3433 	if (rv != VGEN_SUCCESS) {
3434 		DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3435 		return (rv);
3436 	}
3437 
3438 	ldcp->hstate |= RDX_INFO_SENT;
3439 	DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
3440 
3441 	return (VGEN_SUCCESS);
3442 }
3443 
3444 /* send multicast addr info message to vsw */
3445 static int
3446 vgen_send_mcast_info(vgen_ldc_t *ldcp)
3447 {
3448 	vnet_mcast_msg_t	mcastmsg;
3449 	vnet_mcast_msg_t	*msgp;
3450 	vio_msg_tag_t		*tagp;
3451 	vgen_t			*vgenp;
3452 	struct ether_addr	*mca;
3453 	int			rv;
3454 	int			i;
3455 	uint32_t		size;
3456 	uint32_t		mccount;
3457 	uint32_t		n;
3458 
3459 	msgp = &mcastmsg;
3460 	tagp = &msgp->tag;
3461 	vgenp = LDC_TO_VGEN(ldcp);
3462 
3463 	mccount = vgenp->mccount;
3464 	i = 0;
3465 
3466 	do {
3467 		tagp->vio_msgtype = VIO_TYPE_CTRL;
3468 		tagp->vio_subtype = VIO_SUBTYPE_INFO;
3469 		tagp->vio_subtype_env = VNET_MCAST_INFO;
3470 		tagp->vio_sid = ldcp->local_sid;
3471 
3472 		n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
3473 		size = n * sizeof (struct ether_addr);
3474 
3475 		mca = &(vgenp->mctab[i]);
3476 		bcopy(mca, (msgp->mca), size);
3477 		msgp->set = B_TRUE;
3478 		msgp->count = n;
3479 
3480 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
3481 		    B_FALSE);
3482 		if (rv != VGEN_SUCCESS) {
3483 			DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
3484 			return (rv);
3485 		}
3486 
3487 		mccount -= n;
3488 		i += n;
3489 
3490 	} while (mccount);
3491 
3492 	return (VGEN_SUCCESS);
3493 }
3494 
3495 /*
3496  * vgen_dds_rx -- post DDS messages to vnet.
3497  */
3498 static int
3499 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3500 {
3501 	vio_dds_msg_t	*dmsg = (vio_dds_msg_t *)tagp;
3502 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3503 
3504 	if (dmsg->dds_class != DDS_VNET_NIU) {
3505 		DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
3506 		return (EBADMSG);
3507 	}
3508 	vnet_dds_rx(vgenp->vnetp, dmsg);
3509 	return (0);
3510 }
3511 
3512 /*
3513  * vgen_dds_tx -- an interface called by vnet to send DDS messages.
3514  */
3515 int
3516 vgen_dds_tx(void *arg, void *msg)
3517 {
3518 	vgen_t		*vgenp = arg;
3519 	vio_dds_msg_t	*dmsg = msg;
3520 	vgen_portlist_t	*plistp = &vgenp->vgenports;
3521 	vgen_ldc_t	*ldcp;
3522 	int		rv = EIO;
3523 
3524 	READ_ENTER(&plistp->rwlock);
3525 	ldcp = vgenp->vsw_portp->ldcp;
3526 	if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
3527 		goto vgen_dsend_exit;
3528 	}
3529 
3530 	dmsg->tag.vio_sid = ldcp->local_sid;
3531 	rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
3532 	if (rv != VGEN_SUCCESS) {
3533 		rv = EIO;
3534 	} else {
3535 		rv = 0;
3536 	}
3537 
3538 vgen_dsend_exit:
3539 	RW_EXIT(&plistp->rwlock);
3540 	return (rv);
3541 
3542 }
3543 
3544 /* Initiate Phase 2 of handshake */
3545 static int
3546 vgen_handshake_phase2(vgen_ldc_t *ldcp)
3547 {
3548 	int	rv;
3549 
3550 #ifdef DEBUG
3551 	if (vgen_inject_error(ldcp, VGEN_ERR_HSTATE)) {
3552 		/* simulate out of state condition */
3553 		vgen_inject_err_flag &= ~(VGEN_ERR_HSTATE);
3554 		rv = vgen_send_rdx_info(ldcp);
3555 		return (rv);
3556 	}
3557 	if (vgen_inject_error(ldcp, VGEN_ERR_HTIMEOUT)) {
3558 		/* simulate timeout condition */
3559 		vgen_inject_err_flag &= ~(VGEN_ERR_HTIMEOUT);
3560 		return (VGEN_SUCCESS);
3561 	}
3562 #endif
3563 	rv = vgen_send_attr_info(ldcp);
3564 	if (rv != VGEN_SUCCESS) {
3565 		return (rv);
3566 	}
3567 
3568 	return (VGEN_SUCCESS);
3569 }
3570 
3571 static int
3572 vgen_handshake_phase3(vgen_ldc_t *ldcp)
3573 {
3574 	int		rv;
3575 	vgen_hparams_t	*lp = &ldcp->local_hparams;
3576 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3577 	vgen_stats_t	*statsp = &ldcp->stats;
3578 
3579 	/* dring mode has been negotiated in attr phase; save in stats */
3580 	statsp->dring_mode = lp->dring_mode;
3581 
3582 	if (lp->dring_mode == VIO_RX_DRING_DATA) {	/* RxDringData mode */
3583 		ldcp->rx_dringdata = vgen_handle_dringdata_shm;
3584 		ldcp->tx_dringdata = vgen_dringsend_shm;
3585 		if (!VGEN_PRI_ETH_DEFINED(vgenp)) {
3586 			/*
3587 			 * If priority frames are not in use, we don't need a
3588 			 * separate wrapper function for 'tx', so we set it to
3589 			 * 'tx_dringdata'. If priority frames are configured,
3590 			 * we leave the 'tx' pointer as is (initialized in
3591 			 * vgen_set_vnet_proto_ops()).
3592 			 */
3593 			ldcp->tx = ldcp->tx_dringdata;
3594 		}
3595 	} else {					/* TxDring mode */
3596 		ldcp->msg_thread = thread_create(NULL,
3597 		    2 * DEFAULTSTKSZ, vgen_ldc_msg_worker, ldcp, 0,
3598 		    &p0, TS_RUN, maxclsyspri);
3599 	}
3600 
3601 	rv = vgen_create_dring(ldcp);
3602 	if (rv != VGEN_SUCCESS) {
3603 		return (rv);
3604 	}
3605 
3606 	/* update local dring_info params */
3607 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
3608 		bcopy(&(ldcp->rx_dring_cookie),
3609 		    &(ldcp->local_hparams.dring_cookie),
3610 		    sizeof (ldc_mem_cookie_t));
3611 		ldcp->local_hparams.dring_ncookies = ldcp->rx_dring_ncookies;
3612 		ldcp->local_hparams.num_desc = ldcp->num_rxds;
3613 		ldcp->local_hparams.desc_size =
3614 		    sizeof (vnet_rx_dringdata_desc_t);
3615 		rv = vgen_send_rx_dring_reg(ldcp);
3616 	} else {
3617 		bcopy(&(ldcp->tx_dring_cookie),
3618 		    &(ldcp->local_hparams.dring_cookie),
3619 		    sizeof (ldc_mem_cookie_t));
3620 		ldcp->local_hparams.dring_ncookies = ldcp->tx_dring_ncookies;
3621 		ldcp->local_hparams.num_desc = ldcp->num_txds;
3622 		ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
3623 		rv = vgen_send_tx_dring_reg(ldcp);
3624 	}
3625 
3626 	if (rv != VGEN_SUCCESS) {
3627 		return (rv);
3628 	}
3629 
3630 	return (VGEN_SUCCESS);
3631 }
3632 
3633 /*
3634  * Set vnet-protocol-version dependent functions based on version.
3635  */
3636 static void
3637 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
3638 {
3639 	vgen_hparams_t	*lp = &ldcp->local_hparams;
3640 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3641 
3642 	/*
3643 	 * Setup the appropriate dring data processing routine and any
3644 	 * associated thread based on the version.
3645 	 *
3646 	 * In versions < 1.6, we only support TxDring mode. In this mode, the
3647 	 * msg worker thread processes all types of VIO msgs (ctrl and data).
3648 	 *
3649 	 * In versions >= 1.6, we also support RxDringData mode. In this mode,
3650 	 * all msgs including dring data messages are handled directly by the
3651 	 * callback (intr) thread. The dring data msgs (msgtype: VIO_TYPE_DATA,
3652 	 * subtype: VIO_SUBTYPE_INFO, subtype_env: VIO_DRING_DATA) can also be
3653 	 * disabled while the polling thread is active, in which case the
3654 	 * polling thread processes the rcv descriptor ring.
3655 	 *
3656 	 * However, for versions >= 1.6, we can force to only use TxDring mode.
3657 	 * This could happen if RxDringData mode has been disabled (see
3658 	 * vgen_dring_mode) on this guest or on the peer guest. This info is
3659 	 * determined as part of attr exchange phase of handshake. Hence, we
3660 	 * setup these pointers for v1.6 after attr msg phase completes during
3661 	 * handshake.
3662 	 */
3663 	if (VGEN_VER_GTEQ(ldcp, 1, 6)) {	/* Ver >= 1.6 */
3664 		/*
3665 		 * Set data dring mode for vgen_send_attr_info().
3666 		 */
3667 		if (vgen_dring_mode == VIO_RX_DRING_DATA) {
3668 			lp->dring_mode = (VIO_RX_DRING_DATA | VIO_TX_DRING);
3669 		} else {
3670 			lp->dring_mode = VIO_TX_DRING;
3671 		}
3672 	} else {				/* Ver <= 1.5 */
3673 		lp->dring_mode = VIO_TX_DRING;
3674 	}
3675 
3676 	if (VGEN_VER_GTEQ(ldcp, 1, 5)) {
3677 		vgen_port_t	*portp = ldcp->portp;
3678 		vnet_t		*vnetp = vgenp->vnetp;
3679 		/*
3680 		 * If the version negotiated with vswitch is >= 1.5 (link
3681 		 * status update support), set the required bits in our
3682 		 * attributes if this vnet device has been configured to get
3683 		 * physical link state updates.
3684 		 */
3685 		if (portp == vgenp->vsw_portp && vnetp->pls_update == B_TRUE) {
3686 			lp->physlink_update = PHYSLINK_UPDATE_STATE;
3687 		} else {
3688 			lp->physlink_update = PHYSLINK_UPDATE_NONE;
3689 		}
3690 	}
3691 
3692 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
3693 		/*
3694 		 * If the version negotiated with peer is >= 1.4(Jumbo Frame
3695 		 * Support), set the mtu in our attributes to max_frame_size.
3696 		 */
3697 		lp->mtu = vgenp->max_frame_size;
3698 	} else  if (VGEN_VER_EQ(ldcp, 1, 3)) {
3699 		/*
3700 		 * If the version negotiated with peer is == 1.3 (Vlan Tag
3701 		 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
3702 		 */
3703 		lp->mtu = ETHERMAX + VLAN_TAGSZ;
3704 	} else {
3705 		vgen_port_t	*portp = ldcp->portp;
3706 		vnet_t		*vnetp = vgenp->vnetp;
3707 		/*
3708 		 * Pre-1.3 peers expect max frame size of ETHERMAX.
3709 		 * We can negotiate that size with those peers provided the
3710 		 * following conditions are true:
3711 		 * - Only pvid is defined for our peer and there are no vids.
3712 		 * - pvids are equal.
3713 		 * If the above conditions are true, then we can send/recv only
3714 		 * untagged frames of max size ETHERMAX.
3715 		 */
3716 		if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
3717 			lp->mtu = ETHERMAX;
3718 		}
3719 	}
3720 
3721 	if (VGEN_VER_GTEQ(ldcp, 1, 2)) {	/* Versions >= 1.2 */
3722 		/*
3723 		 * Starting v1.2 we support priority frames; so set the
3724 		 * dring processing routines and xfer modes based on the
3725 		 * version. Note that the dring routines could be changed after
3726 		 * attribute handshake phase for versions >= 1.6 (See
3727 		 * vgen_handshake_phase3())
3728 		 */
3729 		ldcp->tx_dringdata = vgen_dringsend;
3730 		ldcp->rx_dringdata = vgen_handle_dringdata;
3731 
3732 		if (VGEN_PRI_ETH_DEFINED(vgenp)) {
3733 			/*
3734 			 * Enable priority routines and pkt mode only if
3735 			 * at least one pri-eth-type is specified in MD.
3736 			 */
3737 			ldcp->tx = vgen_ldcsend;
3738 			ldcp->rx_pktdata = vgen_handle_pkt_data;
3739 
3740 			/* set xfer mode for vgen_send_attr_info() */
3741 			lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
3742 		} else {
3743 			/* No priority eth types defined in MD */
3744 			ldcp->tx = ldcp->tx_dringdata;
3745 			ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
3746 
3747 			/* Set xfer mode for vgen_send_attr_info() */
3748 			lp->xfer_mode = VIO_DRING_MODE_V1_2;
3749 		}
3750 	} else { /* Versions prior to 1.2  */
3751 		vgen_reset_vnet_proto_ops(ldcp);
3752 	}
3753 }
3754 
3755 /*
3756  * Reset vnet-protocol-version dependent functions to pre-v1.2.
3757  */
3758 static void
3759 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
3760 {
3761 	vgen_hparams_t	*lp = &ldcp->local_hparams;
3762 
3763 	ldcp->tx = ldcp->tx_dringdata = vgen_dringsend;
3764 	ldcp->rx_dringdata = vgen_handle_dringdata;
3765 	ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
3766 
3767 	/* set xfer mode for vgen_send_attr_info() */
3768 	lp->xfer_mode = VIO_DRING_MODE_V1_0;
3769 }
3770 
3771 static void
3772 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
3773 {
3774 	vgen_ldc_t	*ldcp = portp->ldcp;
3775 	vgen_t		*vgenp = portp->vgenp;
3776 	vnet_t		*vnetp = vgenp->vnetp;
3777 	boolean_t	need_reset = B_FALSE;
3778 
3779 	mutex_enter(&ldcp->cblock);
3780 
3781 	/*
3782 	 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
3783 	 * the connection. See comments in vgen_set_vnet_proto_ops().
3784 	 */
3785 	if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
3786 	    (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
3787 		need_reset = B_TRUE;
3788 	}
3789 	mutex_exit(&ldcp->cblock);
3790 
3791 	if (need_reset == B_TRUE) {
3792 		(void) vgen_ldc_reset(ldcp, VGEN_OTHER);
3793 	}
3794 }
3795 
3796 static void
3797 vgen_port_reset(vgen_port_t *portp)
3798 {
3799 	(void) vgen_ldc_reset(portp->ldcp, VGEN_OTHER);
3800 }
3801 
3802 static void
3803 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
3804 {
3805 	vgen_port_t	*portp;
3806 	vgen_portlist_t	*plistp;
3807 
3808 	plistp = &(vgenp->vgenports);
3809 	READ_ENTER(&plistp->rwlock);
3810 
3811 	for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
3812 
3813 		vgen_vlan_unaware_port_reset(portp);
3814 
3815 	}
3816 
3817 	RW_EXIT(&plistp->rwlock);
3818 }
3819 
3820 static void
3821 vgen_reset_vsw_port(vgen_t *vgenp)
3822 {
3823 	vgen_port_t	*portp;
3824 
3825 	if ((portp = vgenp->vsw_portp) != NULL) {
3826 		vgen_port_reset(portp);
3827 	}
3828 }
3829 
3830 static void
3831 vgen_setup_handshake_params(vgen_ldc_t *ldcp)
3832 {
3833 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
3834 
3835 	/*
3836 	 * clear local handshake params and initialize.
3837 	 */
3838 	bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
3839 
3840 	/* set version to the highest version supported */
3841 	ldcp->local_hparams.ver_major =
3842 	    ldcp->vgen_versions[0].ver_major;
3843 	ldcp->local_hparams.ver_minor =
3844 	    ldcp->vgen_versions[0].ver_minor;
3845 	ldcp->local_hparams.dev_class = VDEV_NETWORK;
3846 
3847 	/* set attr_info params */
3848 	ldcp->local_hparams.mtu = vgenp->max_frame_size;
3849 	ldcp->local_hparams.addr =
3850 	    vnet_macaddr_strtoul(vgenp->macaddr);
3851 	ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
3852 	ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
3853 	ldcp->local_hparams.ack_freq = 0;	/* don't need acks */
3854 	ldcp->local_hparams.physlink_update = PHYSLINK_UPDATE_NONE;
3855 
3856 	/* reset protocol version specific function pointers */
3857 	vgen_reset_vnet_proto_ops(ldcp);
3858 	ldcp->local_hparams.dring_ident = 0;
3859 	ldcp->local_hparams.dring_ready = B_FALSE;
3860 
3861 	/* clear peer_hparams */
3862 	bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
3863 	ldcp->peer_hparams.dring_ready = B_FALSE;
3864 }
3865 
3866 /*
3867  * Process Channel Reset. We tear down the resources (timers, threads,
3868  * descriptor rings etc) associated with the channel and reinitialize the
3869  * channel based on the flags.
3870  *
3871  * Arguments:
3872  *    ldcp:	The channel being processed.
3873  *
3874  *    flags:
3875  *	VGEN_FLAG_EVT_RESET:
3876  *		A ECONNRESET error occured while doing ldc operations such as
3877  *		ldc_read() or ldc_write(); the channel is already reset and it
3878  *		needs to be handled.
3879  *	VGEN_FLAG_NEED_LDCRESET:
3880  *		Some other errors occured and the error handling code needs to
3881  *		explicitly reset the channel and restart handshake with the
3882  *		peer. The error could be either in ldc operations or other
3883  *		parts of the code such as timeouts or mdeg events etc.
3884  *	VGEN_FLAG_UNINIT:
3885  *		The channel is being torn down; no need to bring up the channel
3886  *		after resetting.
3887  */
3888 static int
3889 vgen_process_reset(vgen_ldc_t *ldcp, int flags)
3890 {
3891 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
3892 	vgen_port_t	*portp = ldcp->portp;
3893 	vgen_hparams_t  *lp = &ldcp->local_hparams;
3894 	boolean_t	is_vsw_port = B_FALSE;
3895 	boolean_t	link_update = B_FALSE;
3896 	ldc_status_t	istatus;
3897 	int		rv;
3898 	uint_t		retries = 0;
3899 	timeout_id_t	htid = 0;
3900 	timeout_id_t	wd_tid = 0;
3901 
3902 	if (portp == vgenp->vsw_portp) { /* vswitch port ? */
3903 		is_vsw_port = B_TRUE;
3904 	}
3905 
3906 	/*
3907 	 * Report that the channel is being reset; it ensures that any HybridIO
3908 	 * configuration is torn down before we reset the channel if it is not
3909 	 * already reset (flags == VGEN_FLAG_NEED_LDCRESET).
3910 	 */
3911 	if (is_vsw_port == B_TRUE) {
3912 		vio_net_report_err_t rep_err = portp->vcb.vio_net_report_err;
3913 		rep_err(portp->vhp, VIO_NET_RES_DOWN);
3914 	}
3915 
3916 again:
3917 	mutex_enter(&ldcp->cblock);
3918 
3919 	/* Clear hstate and hphase */
3920 	ldcp->hstate = 0;
3921 	ldcp->hphase = VH_PHASE0;
3922 	if (flags == VGEN_FLAG_NEED_LDCRESET || flags == VGEN_FLAG_UNINIT) {
3923 		DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
3924 		(void) ldc_down(ldcp->ldc_handle);
3925 		(void) ldc_status(ldcp->ldc_handle, &istatus);
3926 		DWARN(vgenp, ldcp, "Reset Done, ldc_status(%d)\n", istatus);
3927 		ldcp->ldc_status = istatus;
3928 
3929 		if (flags == VGEN_FLAG_UNINIT) {
3930 			/* disable further callbacks */
3931 			rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3932 			if (rv != 0) {
3933 				DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3934 			}
3935 		}
3936 
3937 	} else {
3938 		/* flags == VGEN_FLAG_EVT_RESET */
3939 		DWARN(vgenp, ldcp, "ldc status(%d)\n", ldcp->ldc_status);
3940 	}
3941 
3942 	/*
3943 	 * As the connection is now reset, mark the channel
3944 	 * link_state as 'down' and notify the stack if needed.
3945 	 */
3946 	if (ldcp->link_state != LINK_STATE_DOWN) {
3947 		ldcp->link_state = LINK_STATE_DOWN;
3948 
3949 		if (is_vsw_port == B_TRUE) { /* vswitch port ? */
3950 			/*
3951 			 * As the channel link is down, mark physical link also
3952 			 * as down. After the channel comes back up and
3953 			 * handshake completes, we will get an update on the
3954 			 * physlink state from vswitch (if this device has been
3955 			 * configured to get phys link updates).
3956 			 */
3957 			vgenp->phys_link_state = LINK_STATE_DOWN;
3958 			link_update = B_TRUE;
3959 
3960 		}
3961 	}
3962 
3963 	if (ldcp->htid != 0) {
3964 		htid = ldcp->htid;
3965 		ldcp->htid = 0;
3966 	}
3967 
3968 	if (ldcp->wd_tid != 0) {
3969 		wd_tid = ldcp->wd_tid;
3970 		ldcp->wd_tid = 0;
3971 	}
3972 
3973 	mutex_exit(&ldcp->cblock);
3974 
3975 	/* Update link state to the stack */
3976 	if (link_update == B_TRUE) {
3977 		vgen_link_update(vgenp, ldcp->link_state);
3978 	}
3979 
3980 	/*
3981 	 * As the channel is being reset, redirect traffic to the peer through
3982 	 * vswitch, until the channel becomes ready to be used again.
3983 	 */
3984 	if (is_vsw_port == B_FALSE && vgenp->vsw_portp != NULL) {
3985 		(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
3986 	}
3987 
3988 	/* Cancel handshake watchdog timeout */
3989 	if (htid) {
3990 		(void) untimeout(htid);
3991 	}
3992 
3993 	/* Cancel transmit watchdog timeout */
3994 	if (wd_tid) {
3995 		(void) untimeout(wd_tid);
3996 	}
3997 
3998 	/* Stop the msg worker thread */
3999 	if (lp->dring_mode == VIO_TX_DRING && curthread != ldcp->msg_thread) {
4000 		vgen_stop_msg_thread(ldcp);
4001 	}
4002 
4003 	/* Grab all locks while we tear down tx/rx resources */
4004 	LDC_LOCK(ldcp);
4005 
4006 	/* Destroy the local dring which is exported to the peer */
4007 	vgen_destroy_dring(ldcp);
4008 
4009 	/* Unmap the remote dring which is imported from the peer */
4010 	vgen_unmap_dring(ldcp);
4011 
4012 	/*
4013 	 * Bring up the channel and restart handshake
4014 	 * only if the channel is not being torn down.
4015 	 */
4016 	if (flags != VGEN_FLAG_UNINIT) {
4017 
4018 		/* Setup handshake parameters to restart a new handshake */
4019 		vgen_setup_handshake_params(ldcp);
4020 
4021 		/* Bring the channel up */
4022 		vgen_ldc_up(ldcp);
4023 
4024 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4025 			DWARN(vgenp, ldcp, "ldc_status err\n");
4026 		} else {
4027 			ldcp->ldc_status = istatus;
4028 		}
4029 
4030 		/* If the channel is UP, start handshake */
4031 		if (ldcp->ldc_status == LDC_UP) {
4032 
4033 			if (is_vsw_port == B_FALSE) {
4034 				/*
4035 				 * Channel is up; use this port from now on.
4036 				 */
4037 				(void) atomic_swap_32(&portp->use_vsw_port,
4038 				    B_FALSE);
4039 			}
4040 
4041 			/* Initialize local session id */
4042 			ldcp->local_sid = ddi_get_lbolt();
4043 
4044 			/* clear peer session id */
4045 			ldcp->peer_sid = 0;
4046 
4047 			/*
4048 			 * Initiate Handshake process with peer ldc endpoint by
4049 			 * sending version info vio message. If that fails we
4050 			 * go back to the top of this function to process the
4051 			 * error again. Note that we can be in this loop for
4052 			 * 'vgen_ldc_max_resets' times, after which the channel
4053 			 * is not brought up.
4054 			 */
4055 			mutex_exit(&ldcp->tclock);
4056 			mutex_exit(&ldcp->txlock);
4057 			mutex_exit(&ldcp->wrlock);
4058 			mutex_exit(&ldcp->rxlock);
4059 			rv = vgen_handshake(vh_nextphase(ldcp));
4060 			mutex_exit(&ldcp->cblock);
4061 			if (rv != 0) {
4062 				if (rv == ECONNRESET) {
4063 					flags = VGEN_FLAG_EVT_RESET;
4064 				} else {
4065 					flags = VGEN_FLAG_NEED_LDCRESET;
4066 				}
4067 
4068 				/*
4069 				 * We still hold 'reset_in_progress'; so we can
4070 				 * just loop back to the top to restart error
4071 				 * processing.
4072 				 */
4073 				goto again;
4074 			}
4075 		} else {
4076 			LDC_UNLOCK(ldcp);
4077 		}
4078 
4079 	} else {	/* flags == VGEN_FLAG_UNINIT */
4080 
4081 		/* Close the channel - retry on EAGAIN */
4082 		while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) {
4083 			if (++retries > vgen_ldccl_retries) {
4084 				break;
4085 			}
4086 			drv_usecwait(VGEN_LDC_CLOSE_DELAY);
4087 		}
4088 		if (rv != 0) {
4089 			cmn_err(CE_NOTE,
4090 			    "!vnet%d: Error(%d) closing the channel(0x%lx)\n",
4091 			    vgenp->instance, rv, ldcp->ldc_id);
4092 		}
4093 
4094 		ldcp->ldc_reset_count = 0;
4095 		ldcp->ldc_status = LDC_INIT;
4096 		ldcp->flags &= ~(CHANNEL_STARTED);
4097 
4098 		LDC_UNLOCK(ldcp);
4099 	}
4100 
4101 	/* Done processing channel reset; clear the atomic flag */
4102 	ldcp->reset_in_progress = 0;
4103 	return (0);
4104 }
4105 
4106 /*
4107  * Initiate handshake with the peer by sending various messages
4108  * based on the handshake-phase that the channel is currently in.
4109  */
4110 static int
4111 vgen_handshake(vgen_ldc_t *ldcp)
4112 {
4113 	uint32_t	hphase = ldcp->hphase;
4114 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4115 	int		rv = 0;
4116 	timeout_id_t	htid;
4117 
4118 	switch (hphase) {
4119 
4120 	case VH_PHASE1:
4121 
4122 		/*
4123 		 * start timer, for entire handshake process, turn this timer
4124 		 * off if all phases of handshake complete successfully and
4125 		 * hphase goes to VH_DONE(below) or channel is reset due to
4126 		 * errors or vgen_ldc_uninit() is invoked(vgen_stop).
4127 		 */
4128 		ASSERT(ldcp->htid == 0);
4129 		ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4130 		    drv_usectohz(vgen_hwd_interval * MICROSEC));
4131 
4132 		/* Phase 1 involves negotiating the version */
4133 		rv = vgen_send_version_negotiate(ldcp);
4134 		break;
4135 
4136 	case VH_PHASE2:
4137 		rv = vgen_handshake_phase2(ldcp);
4138 		break;
4139 
4140 	case VH_PHASE3:
4141 		rv = vgen_handshake_phase3(ldcp);
4142 		break;
4143 
4144 	case VH_PHASE4:
4145 		rv = vgen_send_rdx_info(ldcp);
4146 		break;
4147 
4148 	case VH_DONE:
4149 
4150 		ldcp->ldc_reset_count = 0;
4151 
4152 		DBG1(vgenp, ldcp, "Handshake Done\n");
4153 
4154 		/*
4155 		 * The channel is up and handshake is done successfully. Now we
4156 		 * can mark the channel link_state as 'up'. We also notify the
4157 		 * stack if the channel is connected to vswitch.
4158 		 */
4159 		ldcp->link_state = LINK_STATE_UP;
4160 
4161 		if (ldcp->portp == vgenp->vsw_portp) {
4162 			/*
4163 			 * If this channel(port) is connected to vsw,
4164 			 * need to sync multicast table with vsw.
4165 			 */
4166 			rv = vgen_send_mcast_info(ldcp);
4167 			if (rv != VGEN_SUCCESS)
4168 				break;
4169 
4170 			if (vgenp->pls_negotiated == B_FALSE) {
4171 				/*
4172 				 * We haven't negotiated with vswitch to get
4173 				 * physical link state updates. We can update
4174 				 * update the stack at this point as the
4175 				 * channel to vswitch is up and the handshake
4176 				 * is done successfully.
4177 				 *
4178 				 * If we have negotiated to get physical link
4179 				 * state updates, then we won't notify the
4180 				 * the stack here; we do that as soon as
4181 				 * vswitch sends us the initial phys link state
4182 				 * (see vgen_handle_physlink_info()).
4183 				 */
4184 				mutex_exit(&ldcp->cblock);
4185 				vgen_link_update(vgenp, ldcp->link_state);
4186 				mutex_enter(&ldcp->cblock);
4187 			}
4188 		}
4189 
4190 		if (ldcp->htid != 0) {
4191 			htid = ldcp->htid;
4192 			ldcp->htid = 0;
4193 
4194 			mutex_exit(&ldcp->cblock);
4195 			(void) untimeout(htid);
4196 			mutex_enter(&ldcp->cblock);
4197 		}
4198 
4199 		/*
4200 		 * Check if mac layer should be notified to restart
4201 		 * transmissions. This can happen if the channel got
4202 		 * reset and while tx_blocked is set.
4203 		 */
4204 		mutex_enter(&ldcp->tclock);
4205 		if (ldcp->tx_blocked) {
4206 			vio_net_tx_update_t vtx_update =
4207 			    ldcp->portp->vcb.vio_net_tx_update;
4208 
4209 			ldcp->tx_blocked = B_FALSE;
4210 			vtx_update(ldcp->portp->vhp);
4211 		}
4212 		mutex_exit(&ldcp->tclock);
4213 
4214 		/* start transmit watchdog timer */
4215 		ldcp->wd_tid = timeout(vgen_tx_watchdog, (caddr_t)ldcp,
4216 		    drv_usectohz(vgen_txwd_interval * 1000));
4217 
4218 		break;
4219 
4220 	default:
4221 		break;
4222 	}
4223 
4224 	return (rv);
4225 }
4226 
4227 /*
4228  * Check if the current handshake phase has completed successfully and
4229  * return the status.
4230  */
4231 static int
4232 vgen_handshake_done(vgen_ldc_t *ldcp)
4233 {
4234 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4235 	uint32_t	hphase = ldcp->hphase;
4236 	int 		status = 0;
4237 
4238 	switch (hphase) {
4239 
4240 	case VH_PHASE1:
4241 		/*
4242 		 * Phase1 is done, if version negotiation
4243 		 * completed successfully.
4244 		 */
4245 		status = ((ldcp->hstate & VER_NEGOTIATED) ==
4246 		    VER_NEGOTIATED);
4247 		break;
4248 
4249 	case VH_PHASE2:
4250 		/*
4251 		 * Phase 2 is done, if attr info
4252 		 * has been exchanged successfully.
4253 		 */
4254 		status = ((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
4255 		    ATTR_INFO_EXCHANGED);
4256 		break;
4257 
4258 	case VH_PHASE3:
4259 		/*
4260 		 * Phase 3 is done, if dring registration
4261 		 * has been exchanged successfully.
4262 		 */
4263 		status = ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
4264 		    DRING_INFO_EXCHANGED);
4265 		break;
4266 
4267 	case VH_PHASE4:
4268 		/* Phase 4 is done, if rdx msg has been exchanged */
4269 		status = ((ldcp->hstate & RDX_EXCHANGED) ==
4270 		    RDX_EXCHANGED);
4271 		break;
4272 
4273 	default:
4274 		break;
4275 	}
4276 
4277 	if (status == 0) {
4278 		return (VGEN_FAILURE);
4279 	}
4280 	DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
4281 	return (VGEN_SUCCESS);
4282 }
4283 
4284 /*
4285  * Link State Update Notes:
4286  * The link state of the channel connected to vswitch is reported as the link
4287  * state of the vnet device, by default. If the channel is down or reset, then
4288  * the link state is marked 'down'. If the channel is 'up' *and* handshake
4289  * between the vnet and vswitch is successful, then the link state is marked
4290  * 'up'. If physical network link state is desired, then the vnet device must
4291  * be configured to get physical link updates and the 'linkprop' property
4292  * in the virtual-device MD node indicates this. As part of attribute exchange
4293  * the vnet device negotiates with the vswitch to obtain physical link state
4294  * updates. If it successfully negotiates, vswitch sends an initial physlink
4295  * msg once the handshake is done and further whenever the physical link state
4296  * changes. Currently we don't have mac layer interfaces to report two distinct
4297  * link states - virtual and physical. Thus, if the vnet has been configured to
4298  * get physical link updates, then the link status will be reported as 'up'
4299  * only when both the virtual and physical links are up.
4300  */
4301 static void
4302 vgen_link_update(vgen_t *vgenp, link_state_t link_state)
4303 {
4304 	vnet_link_update(vgenp->vnetp, link_state);
4305 }
4306 
4307 /*
4308  * Handle a version info msg from the peer or an ACK/NACK from the peer
4309  * to a version info msg that we sent.
4310  */
4311 static int
4312 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4313 {
4314 	vgen_t		*vgenp;
4315 	vio_ver_msg_t	*vermsg = (vio_ver_msg_t *)tagp;
4316 	int		ack = 0;
4317 	int		failed = 0;
4318 	int		idx;
4319 	vgen_ver_t	*versions = ldcp->vgen_versions;
4320 	int		rv = 0;
4321 
4322 	vgenp = LDC_TO_VGEN(ldcp);
4323 	DBG1(vgenp, ldcp, "enter\n");
4324 	switch (tagp->vio_subtype) {
4325 	case VIO_SUBTYPE_INFO:
4326 
4327 		/*  Cache sid of peer if this is the first time */
4328 		if (ldcp->peer_sid == 0) {
4329 			DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
4330 			    tagp->vio_sid);
4331 			ldcp->peer_sid = tagp->vio_sid;
4332 		}
4333 
4334 		if (ldcp->hphase != VH_PHASE1) {
4335 			/*
4336 			 * If we are not already in VH_PHASE1, reset to
4337 			 * pre-handshake state, and initiate handshake
4338 			 * to the peer too.
4339 			 */
4340 			return (EINVAL);
4341 		}
4342 
4343 		ldcp->hstate |= VER_INFO_RCVD;
4344 
4345 		/* save peer's requested values */
4346 		ldcp->peer_hparams.ver_major = vermsg->ver_major;
4347 		ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
4348 		ldcp->peer_hparams.dev_class = vermsg->dev_class;
4349 
4350 		if ((vermsg->dev_class != VDEV_NETWORK) &&
4351 		    (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
4352 			/* unsupported dev_class, send NACK */
4353 
4354 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4355 
4356 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4357 			tagp->vio_sid = ldcp->local_sid;
4358 			/* send reply msg back to peer */
4359 			rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4360 			    sizeof (*vermsg), B_FALSE);
4361 			if (rv != VGEN_SUCCESS) {
4362 				return (rv);
4363 			}
4364 			return (VGEN_FAILURE);
4365 		}
4366 
4367 		DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
4368 		    vermsg->ver_major,  vermsg->ver_minor);
4369 
4370 		idx = 0;
4371 
4372 		for (;;) {
4373 
4374 			if (vermsg->ver_major > versions[idx].ver_major) {
4375 
4376 				/* nack with next lower version */
4377 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4378 				vermsg->ver_major = versions[idx].ver_major;
4379 				vermsg->ver_minor = versions[idx].ver_minor;
4380 				break;
4381 			}
4382 
4383 			if (vermsg->ver_major == versions[idx].ver_major) {
4384 
4385 				/* major version match - ACK version */
4386 				tagp->vio_subtype = VIO_SUBTYPE_ACK;
4387 				ack = 1;
4388 
4389 				/*
4390 				 * lower minor version to the one this endpt
4391 				 * supports, if necessary
4392 				 */
4393 				if (vermsg->ver_minor >
4394 				    versions[idx].ver_minor) {
4395 					vermsg->ver_minor =
4396 					    versions[idx].ver_minor;
4397 					ldcp->peer_hparams.ver_minor =
4398 					    versions[idx].ver_minor;
4399 				}
4400 				break;
4401 			}
4402 
4403 			idx++;
4404 
4405 			if (idx == VGEN_NUM_VER) {
4406 
4407 				/* no version match - send NACK */
4408 				tagp->vio_subtype = VIO_SUBTYPE_NACK;
4409 				vermsg->ver_major = 0;
4410 				vermsg->ver_minor = 0;
4411 				failed = 1;
4412 				break;
4413 			}
4414 
4415 		}
4416 
4417 		tagp->vio_sid = ldcp->local_sid;
4418 
4419 		/* send reply msg back to peer */
4420 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
4421 		    B_FALSE);
4422 		if (rv != VGEN_SUCCESS) {
4423 			return (rv);
4424 		}
4425 
4426 		if (ack) {
4427 			ldcp->hstate |= VER_ACK_SENT;
4428 			DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
4429 			    vermsg->ver_major, vermsg->ver_minor);
4430 		}
4431 		if (failed) {
4432 			DWARN(vgenp, ldcp, "Negotiation Failed\n");
4433 			return (VGEN_FAILURE);
4434 		}
4435 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4436 
4437 			/*  VER_ACK_SENT and VER_ACK_RCVD */
4438 
4439 			/* local and peer versions match? */
4440 			ASSERT((ldcp->local_hparams.ver_major ==
4441 			    ldcp->peer_hparams.ver_major) &&
4442 			    (ldcp->local_hparams.ver_minor ==
4443 			    ldcp->peer_hparams.ver_minor));
4444 
4445 			vgen_set_vnet_proto_ops(ldcp);
4446 
4447 			/* move to the next phase */
4448 			rv = vgen_handshake(vh_nextphase(ldcp));
4449 			if (rv != 0) {
4450 				return (rv);
4451 			}
4452 		}
4453 
4454 		break;
4455 
4456 	case VIO_SUBTYPE_ACK:
4457 
4458 		if (ldcp->hphase != VH_PHASE1) {
4459 			/*  This should not happen. */
4460 			DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
4461 			return (VGEN_FAILURE);
4462 		}
4463 
4464 		/* SUCCESS - we have agreed on a version */
4465 		ldcp->local_hparams.ver_major = vermsg->ver_major;
4466 		ldcp->local_hparams.ver_minor = vermsg->ver_minor;
4467 		ldcp->hstate |= VER_ACK_RCVD;
4468 
4469 		DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
4470 		    vermsg->ver_major,  vermsg->ver_minor);
4471 
4472 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4473 
4474 			/*  VER_ACK_SENT and VER_ACK_RCVD */
4475 
4476 			/* local and peer versions match? */
4477 			ASSERT((ldcp->local_hparams.ver_major ==
4478 			    ldcp->peer_hparams.ver_major) &&
4479 			    (ldcp->local_hparams.ver_minor ==
4480 			    ldcp->peer_hparams.ver_minor));
4481 
4482 			vgen_set_vnet_proto_ops(ldcp);
4483 
4484 			/* move to the next phase */
4485 			rv = vgen_handshake(vh_nextphase(ldcp));
4486 			if (rv != 0) {
4487 				return (rv);
4488 			}
4489 		}
4490 		break;
4491 
4492 	case VIO_SUBTYPE_NACK:
4493 
4494 		if (ldcp->hphase != VH_PHASE1) {
4495 			/*  This should not happen.  */
4496 			DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
4497 			"Phase(%u)\n", ldcp->hphase);
4498 			return (VGEN_FAILURE);
4499 		}
4500 
4501 		DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
4502 		    vermsg->ver_major, vermsg->ver_minor);
4503 
4504 		/* check if version in NACK is zero */
4505 		if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
4506 			/*
4507 			 * Version Negotiation has failed.
4508 			 */
4509 			DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4510 			return (VGEN_FAILURE);
4511 		}
4512 
4513 		idx = 0;
4514 
4515 		for (;;) {
4516 
4517 			if (vermsg->ver_major > versions[idx].ver_major) {
4518 				/* select next lower version */
4519 
4520 				ldcp->local_hparams.ver_major =
4521 				    versions[idx].ver_major;
4522 				ldcp->local_hparams.ver_minor =
4523 				    versions[idx].ver_minor;
4524 				break;
4525 			}
4526 
4527 			if (vermsg->ver_major == versions[idx].ver_major) {
4528 				/* major version match */
4529 
4530 				ldcp->local_hparams.ver_major =
4531 				    versions[idx].ver_major;
4532 
4533 				ldcp->local_hparams.ver_minor =
4534 				    versions[idx].ver_minor;
4535 				break;
4536 			}
4537 
4538 			idx++;
4539 
4540 			if (idx == VGEN_NUM_VER) {
4541 				/*
4542 				 * no version match.
4543 				 * Version Negotiation has failed.
4544 				 */
4545 				DWARN(vgenp, ldcp,
4546 				    "Version Negotiation Failed\n");
4547 				return (VGEN_FAILURE);
4548 			}
4549 
4550 		}
4551 
4552 		rv = vgen_send_version_negotiate(ldcp);
4553 		if (rv != VGEN_SUCCESS) {
4554 			return (rv);
4555 		}
4556 
4557 		break;
4558 	}
4559 
4560 	DBG1(vgenp, ldcp, "exit\n");
4561 	return (VGEN_SUCCESS);
4562 }
4563 
4564 static int
4565 vgen_handle_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
4566 {
4567 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4568 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4569 	vgen_hparams_t	*rp = &ldcp->peer_hparams;
4570 	uint32_t	mtu;
4571 	uint8_t		dring_mode;
4572 
4573 	ldcp->hstate |= ATTR_INFO_RCVD;
4574 
4575 	/* save peer's values */
4576 	rp->mtu = msg->mtu;
4577 	rp->addr = msg->addr;
4578 	rp->addr_type = msg->addr_type;
4579 	rp->xfer_mode = msg->xfer_mode;
4580 	rp->ack_freq = msg->ack_freq;
4581 	rp->dring_mode = msg->options;
4582 
4583 	/*
4584 	 * Process address type, ack frequency and transfer mode attributes.
4585 	 */
4586 	if ((msg->addr_type != ADDR_TYPE_MAC) ||
4587 	    (msg->ack_freq > 64) ||
4588 	    (msg->xfer_mode != lp->xfer_mode)) {
4589 		return (VGEN_FAILURE);
4590 	}
4591 
4592 	/*
4593 	 * Process dring mode attribute.
4594 	 */
4595 	if (VGEN_VER_GTEQ(ldcp, 1, 6)) {
4596 		/*
4597 		 * Versions >= 1.6:
4598 		 * Though we are operating in v1.6 mode, it is possible that
4599 		 * RxDringData mode has been disabled either on this guest or
4600 		 * on the peer guest. If so, we revert to pre v1.6 behavior of
4601 		 * TxDring mode. But this must be agreed upon in both
4602 		 * directions of attr exchange. We first determine the mode
4603 		 * that can be negotiated.
4604 		 */
4605 		if ((msg->options & VIO_RX_DRING_DATA) != 0 &&
4606 		    vgen_dring_mode == VIO_RX_DRING_DATA) {
4607 			/*
4608 			 * We are capable of handling RxDringData AND the peer
4609 			 * is also capable of it; we enable RxDringData mode on
4610 			 * this channel.
4611 			 */
4612 			dring_mode = VIO_RX_DRING_DATA;
4613 		} else if ((msg->options & VIO_TX_DRING) != 0) {
4614 			/*
4615 			 * If the peer is capable of TxDring mode, we
4616 			 * negotiate TxDring mode on this channel.
4617 			 */
4618 			dring_mode = VIO_TX_DRING;
4619 		} else {
4620 			/*
4621 			 * We support only VIO_TX_DRING and VIO_RX_DRING_DATA
4622 			 * modes. We don't support VIO_RX_DRING mode.
4623 			 */
4624 			return (VGEN_FAILURE);
4625 		}
4626 
4627 		/*
4628 		 * If we have received an ack for the attr info that we sent,
4629 		 * then check if the dring mode matches what the peer had ack'd
4630 		 * (saved in local hparams). If they don't match, we fail the
4631 		 * handshake.
4632 		 */
4633 		if (ldcp->hstate & ATTR_ACK_RCVD) {
4634 			if (msg->options != lp->dring_mode) {
4635 				/* send NACK */
4636 				return (VGEN_FAILURE);
4637 			}
4638 		} else {
4639 			/*
4640 			 * Save the negotiated dring mode in our attr
4641 			 * parameters, so it gets sent in the attr info from us
4642 			 * to the peer.
4643 			 */
4644 			lp->dring_mode = dring_mode;
4645 		}
4646 
4647 		/* save the negotiated dring mode in the msg to be replied */
4648 		msg->options = dring_mode;
4649 	}
4650 
4651 	/*
4652 	 * Process MTU attribute.
4653 	 */
4654 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4655 		/*
4656 		 * Versions >= 1.4:
4657 		 * Validate mtu of the peer is at least ETHERMAX. Then, the mtu
4658 		 * is negotiated down to the minimum of our mtu and peer's mtu.
4659 		 */
4660 		if (msg->mtu < ETHERMAX) {
4661 			return (VGEN_FAILURE);
4662 		}
4663 
4664 		mtu = MIN(msg->mtu, vgenp->max_frame_size);
4665 
4666 		/*
4667 		 * If we have received an ack for the attr info
4668 		 * that we sent, then check if the mtu computed
4669 		 * above matches the mtu that the peer had ack'd
4670 		 * (saved in local hparams). If they don't
4671 		 * match, we fail the handshake.
4672 		 */
4673 		if (ldcp->hstate & ATTR_ACK_RCVD) {
4674 			if (mtu != lp->mtu) {
4675 				/* send NACK */
4676 				return (VGEN_FAILURE);
4677 			}
4678 		} else {
4679 			/*
4680 			 * Save the mtu computed above in our
4681 			 * attr parameters, so it gets sent in
4682 			 * the attr info from us to the peer.
4683 			 */
4684 			lp->mtu = mtu;
4685 		}
4686 
4687 		/* save the MIN mtu in the msg to be replied */
4688 		msg->mtu = mtu;
4689 
4690 	} else {
4691 		/* versions < 1.4, mtu must match */
4692 		if (msg->mtu != lp->mtu) {
4693 			return (VGEN_FAILURE);
4694 		}
4695 	}
4696 
4697 	return (VGEN_SUCCESS);
4698 }
4699 
4700 static int
4701 vgen_handle_attr_ack(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
4702 {
4703 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4704 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4705 
4706 	/*
4707 	 * Process dring mode attribute.
4708 	 */
4709 	if (VGEN_VER_GTEQ(ldcp, 1, 6)) {
4710 		/*
4711 		 * Versions >= 1.6:
4712 		 * The ack msg sent by the peer contains the negotiated dring
4713 		 * mode between our capability (that we had sent in our attr
4714 		 * info) and the peer's capability.
4715 		 */
4716 		if (ldcp->hstate & ATTR_ACK_SENT) {
4717 			/*
4718 			 * If we have sent an ack for the attr info msg from
4719 			 * the peer, check if the dring mode that was
4720 			 * negotiated then (saved in local hparams) matches the
4721 			 * mode that the peer has ack'd. If they don't match,
4722 			 * we fail the handshake.
4723 			 */
4724 			if (lp->dring_mode != msg->options) {
4725 				return (VGEN_FAILURE);
4726 			}
4727 		} else {
4728 			if ((msg->options & lp->dring_mode) == 0) {
4729 				/*
4730 				 * Peer ack'd with a mode that we don't
4731 				 * support; we fail the handshake.
4732 				 */
4733 				return (VGEN_FAILURE);
4734 			}
4735 			if ((msg->options & (VIO_TX_DRING|VIO_RX_DRING_DATA))
4736 			    == (VIO_TX_DRING|VIO_RX_DRING_DATA)) {
4737 				/*
4738 				 * Peer must ack with only one negotiated mode.
4739 				 * Otherwise fail handshake.
4740 				 */
4741 				return (VGEN_FAILURE);
4742 			}
4743 
4744 			/*
4745 			 * Save the negotiated mode, so we can validate it when
4746 			 * we receive attr info from the peer.
4747 			 */
4748 			lp->dring_mode = msg->options;
4749 		}
4750 	}
4751 
4752 	/*
4753 	 * Process Physical Link Update attribute.
4754 	 */
4755 	if (VGEN_VER_GTEQ(ldcp, 1, 5) &&
4756 	    ldcp->portp == vgenp->vsw_portp) {
4757 		/*
4758 		 * Versions >= 1.5:
4759 		 * If the vnet device has been configured to get
4760 		 * physical link state updates, check the corresponding
4761 		 * bits in the ack msg, if the peer is vswitch.
4762 		 */
4763 		if (((lp->physlink_update & PHYSLINK_UPDATE_STATE_MASK) ==
4764 		    PHYSLINK_UPDATE_STATE) &&
4765 		    ((msg->physlink_update & PHYSLINK_UPDATE_STATE_MASK) ==
4766 		    PHYSLINK_UPDATE_STATE_ACK)) {
4767 			vgenp->pls_negotiated = B_TRUE;
4768 		} else {
4769 			vgenp->pls_negotiated = B_FALSE;
4770 		}
4771 	}
4772 
4773 	/*
4774 	 * Process MTU attribute.
4775 	 */
4776 	if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4777 		/*
4778 		 * Versions >= 1.4:
4779 		 * The ack msg sent by the peer contains the minimum of
4780 		 * our mtu (that we had sent in our attr info) and the
4781 		 * peer's mtu.
4782 		 *
4783 		 * If we have sent an ack for the attr info msg from
4784 		 * the peer, check if the mtu that was computed then
4785 		 * (saved in local hparams) matches the mtu that the
4786 		 * peer has ack'd. If they don't match, we fail the
4787 		 * handshake.
4788 		 */
4789 		if (ldcp->hstate & ATTR_ACK_SENT) {
4790 			if (lp->mtu != msg->mtu) {
4791 				return (VGEN_FAILURE);
4792 			}
4793 		} else {
4794 			/*
4795 			 * If the mtu ack'd by the peer is > our mtu
4796 			 * fail handshake. Otherwise, save the mtu, so
4797 			 * we can validate it when we receive attr info
4798 			 * from our peer.
4799 			 */
4800 			if (msg->mtu > lp->mtu) {
4801 				return (VGEN_FAILURE);
4802 			}
4803 			if (msg->mtu <= lp->mtu) {
4804 				lp->mtu = msg->mtu;
4805 			}
4806 		}
4807 	}
4808 
4809 	return (VGEN_SUCCESS);
4810 }
4811 
4812 
4813 /*
4814  * Handle an attribute info msg from the peer or an ACK/NACK from the peer
4815  * to an attr info msg that we sent.
4816  */
4817 static int
4818 vgen_handle_attr_msg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4819 {
4820 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4821 	vnet_attr_msg_t	*msg = (vnet_attr_msg_t *)tagp;
4822 	int		rv = 0;
4823 
4824 	DBG1(vgenp, ldcp, "enter\n");
4825 	if (ldcp->hphase != VH_PHASE2) {
4826 		DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
4827 		" Invalid Phase(%u)\n",
4828 		    tagp->vio_subtype, ldcp->hphase);
4829 		return (VGEN_FAILURE);
4830 	}
4831 	switch (tagp->vio_subtype) {
4832 	case VIO_SUBTYPE_INFO:
4833 
4834 		rv = vgen_handle_attr_info(ldcp, msg);
4835 		if (rv == VGEN_SUCCESS) {
4836 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
4837 		} else {
4838 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4839 		}
4840 		tagp->vio_sid = ldcp->local_sid;
4841 
4842 		/* send reply msg back to peer */
4843 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
4844 		    B_FALSE);
4845 		if (rv != VGEN_SUCCESS) {
4846 			return (rv);
4847 		}
4848 
4849 		if (tagp->vio_subtype == VIO_SUBTYPE_NACK) {
4850 			DWARN(vgenp, ldcp, "ATTR_NACK_SENT");
4851 			break;
4852 		}
4853 
4854 		ldcp->hstate |= ATTR_ACK_SENT;
4855 		DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
4856 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4857 			rv = vgen_handshake(vh_nextphase(ldcp));
4858 			if (rv != 0) {
4859 				return (rv);
4860 			}
4861 		}
4862 
4863 		break;
4864 
4865 	case VIO_SUBTYPE_ACK:
4866 
4867 		rv = vgen_handle_attr_ack(ldcp, msg);
4868 		if (rv == VGEN_FAILURE) {
4869 			break;
4870 		}
4871 
4872 		ldcp->hstate |= ATTR_ACK_RCVD;
4873 		DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
4874 
4875 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4876 			rv = vgen_handshake(vh_nextphase(ldcp));
4877 			if (rv != 0) {
4878 				return (rv);
4879 			}
4880 		}
4881 		break;
4882 
4883 	case VIO_SUBTYPE_NACK:
4884 
4885 		DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
4886 		return (VGEN_FAILURE);
4887 	}
4888 	DBG1(vgenp, ldcp, "exit\n");
4889 	return (VGEN_SUCCESS);
4890 }
4891 
4892 static int
4893 vgen_handle_dring_reg_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4894 {
4895 	int		rv = 0;
4896 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4897 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4898 
4899 	DBG2(vgenp, ldcp, "DRING_INFO_RCVD");
4900 	ldcp->hstate |= DRING_INFO_RCVD;
4901 
4902 	if (VGEN_VER_GTEQ(ldcp, 1, 6) &&
4903 	    (lp->dring_mode != ((vio_dring_reg_msg_t *)tagp)->options)) {
4904 		/*
4905 		 * The earlier version of Solaris vnet driver doesn't set the
4906 		 * option (VIO_TX_DRING in its case) correctly in its dring reg
4907 		 * message. We workaround that here by doing the check only
4908 		 * for versions >= v1.6.
4909 		 */
4910 		DWARN(vgenp, ldcp,
4911 		    "Rcvd dring reg option (%d), negotiated mode (%d)\n",
4912 		    ((vio_dring_reg_msg_t *)tagp)->options, lp->dring_mode);
4913 		return (VGEN_FAILURE);
4914 	}
4915 
4916 	/*
4917 	 * Map dring exported by the peer.
4918 	 */
4919 	rv = vgen_map_dring(ldcp, (void *)tagp);
4920 	if (rv != VGEN_SUCCESS) {
4921 		return (rv);
4922 	}
4923 
4924 	/*
4925 	 * Map data buffers exported by the peer if we are in RxDringData mode.
4926 	 */
4927 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
4928 		rv = vgen_map_data(ldcp, (void *)tagp);
4929 		if (rv != VGEN_SUCCESS) {
4930 			vgen_unmap_dring(ldcp);
4931 			return (rv);
4932 		}
4933 	}
4934 
4935 	if (ldcp->peer_hparams.dring_ready == B_FALSE) {
4936 		ldcp->peer_hparams.dring_ready = B_TRUE;
4937 	}
4938 
4939 	return (VGEN_SUCCESS);
4940 }
4941 
4942 static int
4943 vgen_handle_dring_reg_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4944 {
4945 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4946 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4947 
4948 	DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
4949 	ldcp->hstate |= DRING_ACK_RCVD;
4950 
4951 	if (lp->dring_ready) {
4952 		return (VGEN_SUCCESS);
4953 	}
4954 
4955 	/* save dring_ident acked by peer */
4956 	lp->dring_ident = ((vio_dring_reg_msg_t *)tagp)->dring_ident;
4957 
4958 	/* local dring is now ready */
4959 	lp->dring_ready = B_TRUE;
4960 
4961 	return (VGEN_SUCCESS);
4962 }
4963 
4964 /*
4965  * Handle a descriptor ring register msg from the peer or an ACK/NACK from
4966  * the peer to a dring register msg that we sent.
4967  */
4968 static int
4969 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4970 {
4971 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
4972 	int		rv = 0;
4973 	int		msgsize;
4974 	vgen_hparams_t	*lp = &ldcp->local_hparams;
4975 
4976 	DBG1(vgenp, ldcp, "enter\n");
4977 	if (ldcp->hphase < VH_PHASE2) {
4978 		/* dring_info can be rcvd in any of the phases after Phase1 */
4979 		DWARN(vgenp, ldcp,
4980 		    "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
4981 		    tagp->vio_subtype, ldcp->hphase);
4982 		return (VGEN_FAILURE);
4983 	}
4984 
4985 	switch (tagp->vio_subtype) {
4986 	case VIO_SUBTYPE_INFO:
4987 
4988 		rv = vgen_handle_dring_reg_info(ldcp, tagp);
4989 		if (rv == VGEN_SUCCESS) {
4990 			tagp->vio_subtype = VIO_SUBTYPE_ACK;
4991 		} else {
4992 			tagp->vio_subtype = VIO_SUBTYPE_NACK;
4993 		}
4994 
4995 		tagp->vio_sid = ldcp->local_sid;
4996 
4997 		if (lp->dring_mode == VIO_RX_DRING_DATA) {
4998 			msgsize =
4999 			    VNET_DRING_REG_EXT_MSG_SIZE(ldcp->tx_data_ncookies);
5000 		} else {
5001 			msgsize = sizeof (vio_dring_reg_msg_t);
5002 		}
5003 
5004 		/* send reply msg back to peer */
5005 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, msgsize,
5006 		    B_FALSE);
5007 		if (rv != VGEN_SUCCESS) {
5008 			return (rv);
5009 		}
5010 
5011 		if (tagp->vio_subtype == VIO_SUBTYPE_NACK) {
5012 			DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5013 			return (VGEN_FAILURE);
5014 		}
5015 
5016 		ldcp->hstate |= DRING_ACK_SENT;
5017 		DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5018 
5019 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5020 			rv = vgen_handshake(vh_nextphase(ldcp));
5021 			if (rv != 0) {
5022 				return (rv);
5023 			}
5024 		}
5025 		break;
5026 
5027 	case VIO_SUBTYPE_ACK:
5028 
5029 		rv = vgen_handle_dring_reg_ack(ldcp, tagp);
5030 		if (rv == VGEN_FAILURE) {
5031 			return (rv);
5032 		}
5033 
5034 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5035 			rv = vgen_handshake(vh_nextphase(ldcp));
5036 			if (rv != 0) {
5037 				return (rv);
5038 			}
5039 		}
5040 
5041 		break;
5042 
5043 	case VIO_SUBTYPE_NACK:
5044 
5045 		DWARN(vgenp, ldcp, "DRING_NACK_RCVD");
5046 		return (VGEN_FAILURE);
5047 	}
5048 	DBG1(vgenp, ldcp, "exit\n");
5049 	return (VGEN_SUCCESS);
5050 }
5051 
5052 /*
5053  * Handle a rdx info msg from the peer or an ACK/NACK
5054  * from the peer to a rdx info msg that we sent.
5055  */
5056 static int
5057 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5058 {
5059 	int	rv = 0;
5060 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5061 
5062 	DBG1(vgenp, ldcp, "enter\n");
5063 	if (ldcp->hphase != VH_PHASE4) {
5064 		DWARN(vgenp, ldcp,
5065 		    "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5066 		    tagp->vio_subtype, ldcp->hphase);
5067 		return (VGEN_FAILURE);
5068 	}
5069 	switch (tagp->vio_subtype) {
5070 	case VIO_SUBTYPE_INFO:
5071 
5072 		DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5073 		ldcp->hstate |= RDX_INFO_RCVD;
5074 
5075 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5076 		tagp->vio_sid = ldcp->local_sid;
5077 		/* send reply msg back to peer */
5078 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5079 		    B_FALSE);
5080 		if (rv != VGEN_SUCCESS) {
5081 			return (rv);
5082 		}
5083 
5084 		ldcp->hstate |= RDX_ACK_SENT;
5085 		DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5086 
5087 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5088 			rv = vgen_handshake(vh_nextphase(ldcp));
5089 			if (rv != 0) {
5090 				return (rv);
5091 			}
5092 		}
5093 
5094 		break;
5095 
5096 	case VIO_SUBTYPE_ACK:
5097 
5098 		ldcp->hstate |= RDX_ACK_RCVD;
5099 
5100 		DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5101 
5102 		if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5103 			rv = vgen_handshake(vh_nextphase(ldcp));
5104 			if (rv != 0) {
5105 				return (rv);
5106 			}
5107 		}
5108 		break;
5109 
5110 	case VIO_SUBTYPE_NACK:
5111 
5112 		DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5113 		return (VGEN_FAILURE);
5114 	}
5115 	DBG1(vgenp, ldcp, "exit\n");
5116 	return (VGEN_SUCCESS);
5117 }
5118 
5119 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5120 static int
5121 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5122 {
5123 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5124 	vnet_mcast_msg_t	*msgp = (vnet_mcast_msg_t *)tagp;
5125 	struct ether_addr	*addrp;
5126 	int			count;
5127 	int			i;
5128 
5129 	DBG1(vgenp, ldcp, "enter\n");
5130 	switch (tagp->vio_subtype) {
5131 
5132 	case VIO_SUBTYPE_INFO:
5133 
5134 		/* vnet shouldn't recv set mcast msg, only vsw handles it */
5135 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5136 		break;
5137 
5138 	case VIO_SUBTYPE_ACK:
5139 
5140 		/* success adding/removing multicast addr */
5141 		DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5142 		break;
5143 
5144 	case VIO_SUBTYPE_NACK:
5145 
5146 		DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5147 		if (!(msgp->set)) {
5148 			/* multicast remove request failed */
5149 			break;
5150 		}
5151 
5152 		/* multicast add request failed */
5153 		for (count = 0; count < msgp->count; count++) {
5154 			addrp = &(msgp->mca[count]);
5155 
5156 			/* delete address from the table */
5157 			for (i = 0; i < vgenp->mccount; i++) {
5158 				if (ether_cmp(addrp,
5159 				    &(vgenp->mctab[i])) == 0) {
5160 					if (vgenp->mccount > 1) {
5161 						int t = vgenp->mccount - 1;
5162 						vgenp->mctab[i] =
5163 						    vgenp->mctab[t];
5164 					}
5165 					vgenp->mccount--;
5166 					break;
5167 				}
5168 			}
5169 		}
5170 		break;
5171 
5172 	}
5173 	DBG1(vgenp, ldcp, "exit\n");
5174 
5175 	return (VGEN_SUCCESS);
5176 }
5177 
5178 /*
5179  * Physical link information message from the peer. Only vswitch should send
5180  * us this message; if the vnet device has been configured to get physical link
5181  * state updates. Note that we must have already negotiated this with the
5182  * vswitch during attribute exchange phase of handshake.
5183  */
5184 static int
5185 vgen_handle_physlink_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5186 {
5187 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5188 	vnet_physlink_msg_t	*msgp = (vnet_physlink_msg_t *)tagp;
5189 	link_state_t		link_state;
5190 	int			rv;
5191 
5192 	if (ldcp->portp != vgenp->vsw_portp) {
5193 		/*
5194 		 * drop the message and don't process; as we should
5195 		 * receive physlink_info message from only vswitch.
5196 		 */
5197 		return (VGEN_SUCCESS);
5198 	}
5199 
5200 	if (vgenp->pls_negotiated == B_FALSE) {
5201 		/*
5202 		 * drop the message and don't process; as we should receive
5203 		 * physlink_info message only if physlink update is enabled for
5204 		 * the device and negotiated with vswitch.
5205 		 */
5206 		return (VGEN_SUCCESS);
5207 	}
5208 
5209 	switch (tagp->vio_subtype) {
5210 
5211 	case VIO_SUBTYPE_INFO:
5212 
5213 		if ((msgp->physlink_info & VNET_PHYSLINK_STATE_MASK) ==
5214 		    VNET_PHYSLINK_STATE_UP) {
5215 			link_state = LINK_STATE_UP;
5216 		} else {
5217 			link_state = LINK_STATE_DOWN;
5218 		}
5219 
5220 		if (vgenp->phys_link_state != link_state) {
5221 			vgenp->phys_link_state = link_state;
5222 			mutex_exit(&ldcp->cblock);
5223 
5224 			/* Now update the stack */
5225 			vgen_link_update(vgenp, link_state);
5226 
5227 			mutex_enter(&ldcp->cblock);
5228 		}
5229 
5230 		tagp->vio_subtype = VIO_SUBTYPE_ACK;
5231 		tagp->vio_sid = ldcp->local_sid;
5232 
5233 		/* send reply msg back to peer */
5234 		rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5235 		    sizeof (vnet_physlink_msg_t), B_FALSE);
5236 		if (rv != VGEN_SUCCESS) {
5237 			return (rv);
5238 		}
5239 		break;
5240 
5241 	case VIO_SUBTYPE_ACK:
5242 
5243 		/* vnet shouldn't recv physlink acks */
5244 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_ACK \n");
5245 		break;
5246 
5247 	case VIO_SUBTYPE_NACK:
5248 
5249 		/* vnet shouldn't recv physlink nacks */
5250 		DWARN(vgenp, ldcp, "rcvd PHYSLINK_NACK \n");
5251 		break;
5252 
5253 	}
5254 	DBG1(vgenp, ldcp, "exit\n");
5255 
5256 	return (VGEN_SUCCESS);
5257 }
5258 
5259 /* handler for control messages received from the peer ldc end-point */
5260 static int
5261 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5262 {
5263 	int	rv = 0;
5264 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5265 
5266 	DBG1(vgenp, ldcp, "enter\n");
5267 	switch (tagp->vio_subtype_env) {
5268 
5269 	case VIO_VER_INFO:
5270 		rv = vgen_handle_version_negotiate(ldcp, tagp);
5271 		break;
5272 
5273 	case VIO_ATTR_INFO:
5274 		rv = vgen_handle_attr_msg(ldcp, tagp);
5275 		break;
5276 
5277 	case VIO_DRING_REG:
5278 		rv = vgen_handle_dring_reg(ldcp, tagp);
5279 		break;
5280 
5281 	case VIO_RDX:
5282 		rv = vgen_handle_rdx_info(ldcp, tagp);
5283 		break;
5284 
5285 	case VNET_MCAST_INFO:
5286 		rv = vgen_handle_mcast_info(ldcp, tagp);
5287 		break;
5288 
5289 	case VIO_DDS_INFO:
5290 		/*
5291 		 * If we are in the process of resetting the vswitch channel,
5292 		 * drop the dds message. A new handshake will be initiated
5293 		 * when the channel comes back up after the reset and dds
5294 		 * negotiation can then continue.
5295 		 */
5296 		if (ldcp->reset_in_progress == 1) {
5297 			break;
5298 		}
5299 		rv = vgen_dds_rx(ldcp, tagp);
5300 		break;
5301 
5302 	case VNET_PHYSLINK_INFO:
5303 		rv = vgen_handle_physlink_info(ldcp, tagp);
5304 		break;
5305 	}
5306 
5307 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5308 	return (rv);
5309 }
5310 
5311 /* handler for error messages received from the peer ldc end-point */
5312 static void
5313 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5314 {
5315 	_NOTE(ARGUNUSED(ldcp, tagp))
5316 }
5317 
5318 /*
5319  * This function handles raw pkt data messages received over the channel.
5320  * Currently, only priority-eth-type frames are received through this mechanism.
5321  * In this case, the frame(data) is present within the message itself which
5322  * is copied into an mblk before sending it up the stack.
5323  */
5324 void
5325 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
5326 {
5327 	vgen_ldc_t		*ldcp = (vgen_ldc_t *)arg1;
5328 	vio_raw_data_msg_t	*pkt	= (vio_raw_data_msg_t *)arg2;
5329 	uint32_t		size;
5330 	mblk_t			*mp;
5331 	vio_mblk_t		*vmp;
5332 	vio_net_rx_cb_t		vrx_cb = NULL;
5333 	vgen_t			*vgenp = LDC_TO_VGEN(ldcp);
5334 	vgen_stats_t		*statsp = &ldcp->stats;
5335 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5336 	uint_t			dring_mode = lp->dring_mode;
5337 
5338 	ASSERT(MUTEX_HELD(&ldcp->cblock));
5339 
5340 	mutex_exit(&ldcp->cblock);
5341 
5342 	size = msglen - VIO_PKT_DATA_HDRSIZE;
5343 	if (size < ETHERMIN || size > lp->mtu) {
5344 		(void) atomic_inc_32(&statsp->rx_pri_fail);
5345 		mutex_enter(&ldcp->cblock);
5346 		return;
5347 	}
5348 
5349 	vmp = vio_multipool_allocb(&ldcp->vmp, size);
5350 	if (vmp == NULL) {
5351 		mp = allocb(size, BPRI_MED);
5352 		if (mp == NULL) {
5353 			(void) atomic_inc_32(&statsp->rx_pri_fail);
5354 			DWARN(vgenp, ldcp, "allocb failure, "
5355 			    "unable to process priority frame\n");
5356 			mutex_enter(&ldcp->cblock);
5357 			return;
5358 		}
5359 	} else {
5360 		mp = vmp->mp;
5361 	}
5362 
5363 	/* copy the frame from the payload of raw data msg into the mblk */
5364 	bcopy(pkt->data, mp->b_rptr, size);
5365 	mp->b_wptr = mp->b_rptr + size;
5366 
5367 	if (vmp != NULL) {
5368 		vmp->state = VIO_MBLK_HAS_DATA;
5369 	}
5370 
5371 	/* update stats */
5372 	(void) atomic_inc_64(&statsp->rx_pri_packets);
5373 	(void) atomic_add_64(&statsp->rx_pri_bytes, size);
5374 
5375 	/*
5376 	 * If polling is currently enabled, add the packet to the priority
5377 	 * packets list and return. It will be picked up by the polling thread.
5378 	 */
5379 	if (dring_mode == VIO_RX_DRING_DATA) {
5380 		mutex_enter(&ldcp->rxlock);
5381 	} else {
5382 		mutex_enter(&ldcp->pollq_lock);
5383 	}
5384 
5385 	if (ldcp->polling_on == B_TRUE) {
5386 		if (ldcp->rx_pri_tail != NULL) {
5387 			ldcp->rx_pri_tail->b_next = mp;
5388 		} else {
5389 			ldcp->rx_pri_head = ldcp->rx_pri_tail = mp;
5390 		}
5391 	} else {
5392 		vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
5393 	}
5394 
5395 	if (dring_mode == VIO_RX_DRING_DATA) {
5396 		mutex_exit(&ldcp->rxlock);
5397 	} else {
5398 		mutex_exit(&ldcp->pollq_lock);
5399 	}
5400 
5401 	if (vrx_cb != NULL) {
5402 		vrx_cb(ldcp->portp->vhp, mp);
5403 	}
5404 
5405 	mutex_enter(&ldcp->cblock);
5406 }
5407 
5408 /*
5409  * dummy pkt data handler function for vnet protocol version 1.0
5410  */
5411 static void
5412 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
5413 {
5414 	_NOTE(ARGUNUSED(arg1, arg2, msglen))
5415 }
5416 
5417 /* handler for data messages received from the peer ldc end-point */
5418 static int
5419 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5420 {
5421 	int		rv = 0;
5422 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5423 	vgen_hparams_t	*lp = &ldcp->local_hparams;
5424 
5425 	DBG1(vgenp, ldcp, "enter\n");
5426 
5427 	if (ldcp->hphase != VH_DONE) {
5428 		return (0);
5429 	}
5430 
5431 	/*
5432 	 * We check the data msg seqnum. This is needed only in TxDring mode.
5433 	 */
5434 	if (lp->dring_mode == VIO_TX_DRING &&
5435 	    tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5436 		rv = vgen_check_datamsg_seq(ldcp, tagp);
5437 		if (rv != 0) {
5438 			return (rv);
5439 		}
5440 	}
5441 
5442 	switch (tagp->vio_subtype_env) {
5443 	case VIO_DRING_DATA:
5444 		rv = ldcp->rx_dringdata((void *)ldcp, (void *)tagp);
5445 		break;
5446 
5447 	case VIO_PKT_DATA:
5448 		ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
5449 		break;
5450 	default:
5451 		break;
5452 	}
5453 
5454 	DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5455 	return (rv);
5456 }
5457 
5458 
5459 static int
5460 vgen_ldc_reset(vgen_ldc_t *ldcp, vgen_caller_t caller)
5461 {
5462 	int	rv;
5463 
5464 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5465 		ASSERT(MUTEX_HELD(&ldcp->cblock));
5466 	}
5467 
5468 	/* Set the flag to indicate reset is in progress */
5469 	if (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
5470 		/* another thread is already in the process of resetting */
5471 		return (EBUSY);
5472 	}
5473 
5474 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5475 		mutex_exit(&ldcp->cblock);
5476 	}
5477 
5478 	rv = vgen_process_reset(ldcp, VGEN_FLAG_NEED_LDCRESET);
5479 
5480 	if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5481 		mutex_enter(&ldcp->cblock);
5482 	}
5483 
5484 	return (rv);
5485 }
5486 
5487 static void
5488 vgen_ldc_up(vgen_ldc_t *ldcp)
5489 {
5490 	int		rv;
5491 	uint32_t	retries = 0;
5492 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5493 
5494 	ASSERT(MUTEX_HELD(&ldcp->cblock));
5495 
5496 	/*
5497 	 * If the channel has been reset max # of times, without successfully
5498 	 * completing handshake, stop and do not bring the channel up.
5499 	 */
5500 	if (ldcp->ldc_reset_count == vgen_ldc_max_resets) {
5501 		cmn_err(CE_WARN, "!vnet%d: exceeded number of permitted"
5502 		    " handshake attempts (%d) on channel %ld",
5503 		    vgenp->instance, vgen_ldc_max_resets, ldcp->ldc_id);
5504 		return;
5505 	}
5506 	ldcp->ldc_reset_count++;
5507 
5508 	do {
5509 		rv = ldc_up(ldcp->ldc_handle);
5510 		if ((rv != 0) && (rv == EWOULDBLOCK)) {
5511 			drv_usecwait(VGEN_LDC_UP_DELAY);
5512 		}
5513 		if (retries++ >= vgen_ldcup_retries)
5514 			break;
5515 	} while (rv == EWOULDBLOCK);
5516 
5517 	if (rv != 0) {
5518 		DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
5519 	}
5520 }
5521 
5522 int
5523 vgen_enable_intr(void *arg)
5524 {
5525 	uint32_t		end_ix;
5526 	vio_dring_msg_t		msg;
5527 	vgen_port_t		*portp = (vgen_port_t *)arg;
5528 	vgen_ldc_t		*ldcp = portp->ldcp;
5529 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5530 
5531 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
5532 		mutex_enter(&ldcp->rxlock);
5533 
5534 		ldcp->polling_on = B_FALSE;
5535 		/*
5536 		 * We send a stopped message to peer (sender) as we are turning
5537 		 * off polled mode. This effectively restarts data interrupts
5538 		 * by allowing the peer to send further dring data msgs to us.
5539 		 */
5540 		end_ix = ldcp->next_rxi;
5541 		DECR_RXI(end_ix, ldcp);
5542 		msg.dring_ident = ldcp->peer_hparams.dring_ident;
5543 		(void) vgen_send_dringack_shm(ldcp, (vio_msg_tag_t *)&msg,
5544 		    VNET_START_IDX_UNSPEC, end_ix, VIO_DP_STOPPED);
5545 
5546 		mutex_exit(&ldcp->rxlock);
5547 	} else {
5548 		mutex_enter(&ldcp->pollq_lock);
5549 		ldcp->polling_on = B_FALSE;
5550 		mutex_exit(&ldcp->pollq_lock);
5551 	}
5552 
5553 	return (0);
5554 }
5555 
5556 int
5557 vgen_disable_intr(void *arg)
5558 {
5559 	vgen_port_t		*portp = (vgen_port_t *)arg;
5560 	vgen_ldc_t		*ldcp = portp->ldcp;
5561 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5562 
5563 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
5564 		mutex_enter(&ldcp->rxlock);
5565 		ldcp->polling_on = B_TRUE;
5566 		mutex_exit(&ldcp->rxlock);
5567 	} else {
5568 		mutex_enter(&ldcp->pollq_lock);
5569 		ldcp->polling_on = B_TRUE;
5570 		mutex_exit(&ldcp->pollq_lock);
5571 	}
5572 
5573 	return (0);
5574 }
5575 
5576 mblk_t *
5577 vgen_rx_poll(void *arg, int bytes_to_pickup)
5578 {
5579 	vgen_port_t		*portp = (vgen_port_t *)arg;
5580 	vgen_ldc_t		*ldcp = portp->ldcp;
5581 	vgen_hparams_t		*lp = &ldcp->local_hparams;
5582 	mblk_t			*mp = NULL;
5583 
5584 	if (lp->dring_mode == VIO_RX_DRING_DATA) {
5585 		mp = vgen_poll_rcv_shm(ldcp, bytes_to_pickup);
5586 	} else {
5587 		mp = vgen_poll_rcv(ldcp, bytes_to_pickup);
5588 	}
5589 
5590 	return (mp);
5591 }
5592 
5593 /* transmit watchdog timeout handler */
5594 static void
5595 vgen_tx_watchdog(void *arg)
5596 {
5597 	vgen_ldc_t	*ldcp;
5598 	vgen_t		*vgenp;
5599 	int		rv;
5600 	boolean_t	tx_blocked;
5601 	clock_t		tx_blocked_lbolt;
5602 
5603 	ldcp = (vgen_ldc_t *)arg;
5604 	vgenp = LDC_TO_VGEN(ldcp);
5605 
5606 	tx_blocked = ldcp->tx_blocked;
5607 	tx_blocked_lbolt = ldcp->tx_blocked_lbolt;
5608 
5609 	if (vgen_txwd_timeout &&
5610 	    (tx_blocked == B_TRUE) &&
5611 	    ((ddi_get_lbolt() - tx_blocked_lbolt) >
5612 	    drv_usectohz(vgen_txwd_timeout * 1000))) {
5613 		/*
5614 		 * Something is wrong; the peer is not picking up the packets
5615 		 * in the transmit dring. We now go ahead and reset the channel
5616 		 * to break out of this condition.
5617 		 */
5618 		DWARN(vgenp, ldcp, "transmit timeout lbolt(%lx), "
5619 		    "tx_blocked_lbolt(%lx)\n",
5620 		    ddi_get_lbolt(), tx_blocked_lbolt);
5621 
5622 #ifdef DEBUG
5623 		if (vgen_inject_error(ldcp, VGEN_ERR_TXTIMEOUT)) {
5624 			/* tx timeout triggered for debugging */
5625 			vgen_inject_err_flag &= ~(VGEN_ERR_TXTIMEOUT);
5626 		}
5627 #endif
5628 
5629 		/*
5630 		 * Clear tid before invoking vgen_ldc_reset(). Otherwise,
5631 		 * it will result in a deadlock when vgen_process_reset() tries
5632 		 * to untimeout() on seeing a non-zero tid, but it is being
5633 		 * invoked by the timer itself in this case.
5634 		 */
5635 		mutex_enter(&ldcp->cblock);
5636 		if (ldcp->wd_tid == 0) {
5637 			/* Cancelled by vgen_process_reset() */
5638 			mutex_exit(&ldcp->cblock);
5639 			return;
5640 		}
5641 		ldcp->wd_tid = 0;
5642 		mutex_exit(&ldcp->cblock);
5643 
5644 		/*
5645 		 * Now reset the channel.
5646 		 */
5647 		rv = vgen_ldc_reset(ldcp, VGEN_OTHER);
5648 		if (rv == 0) {
5649 			/*
5650 			 * We have successfully reset the channel. If we are
5651 			 * in tx flow controlled state, clear it now and enable
5652 			 * transmit in the upper layer.
5653 			 */
5654 			if (ldcp->tx_blocked) {
5655 				vio_net_tx_update_t vtx_update =
5656 				    ldcp->portp->vcb.vio_net_tx_update;
5657 
5658 				ldcp->tx_blocked = B_FALSE;
5659 				vtx_update(ldcp->portp->vhp);
5660 			}
5661 		}
5662 
5663 		/*
5664 		 * Channel has been reset by us or some other thread is already
5665 		 * in the process of resetting. In either case, we return
5666 		 * without restarting the timer. When handshake completes and
5667 		 * the channel is ready for data transmit/receive we start a
5668 		 * new watchdog timer.
5669 		 */
5670 		return;
5671 	}
5672 
5673 restart_timer:
5674 	/* Restart the timer */
5675 	mutex_enter(&ldcp->cblock);
5676 	if (ldcp->wd_tid == 0) {
5677 		/* Cancelled by vgen_process_reset() */
5678 		mutex_exit(&ldcp->cblock);
5679 		return;
5680 	}
5681 	ldcp->wd_tid = timeout(vgen_tx_watchdog, (caddr_t)ldcp,
5682 	    drv_usectohz(vgen_txwd_interval * 1000));
5683 	mutex_exit(&ldcp->cblock);
5684 }
5685 
5686 /* Handshake watchdog timeout handler */
5687 static void
5688 vgen_hwatchdog(void *arg)
5689 {
5690 	vgen_ldc_t	*ldcp = (vgen_ldc_t *)arg;
5691 	vgen_t		*vgenp = LDC_TO_VGEN(ldcp);
5692 
5693 	DWARN(vgenp, ldcp, "handshake timeout phase(%x) state(%x)\n",
5694 	    ldcp->hphase, ldcp->hstate);
5695 
5696 	mutex_enter(&ldcp->cblock);
5697 	if (ldcp->htid == 0) {
5698 		/* Cancelled by vgen_process_reset() */
5699 		mutex_exit(&ldcp->cblock);
5700 		return;
5701 	}
5702 	ldcp->htid = 0;
5703 	mutex_exit(&ldcp->cblock);
5704 
5705 	/*
5706 	 * Something is wrong; handshake with the peer seems to be hung. We now
5707 	 * go ahead and reset the channel to break out of this condition.
5708 	 */
5709 	(void) vgen_ldc_reset(ldcp, VGEN_OTHER);
5710 }
5711 
5712 /* Check if the session id in the received message is valid */
5713 static int
5714 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5715 {
5716 	vgen_t	*vgenp = LDC_TO_VGEN(ldcp);
5717 
5718 	if (tagp->vio_sid != ldcp->peer_sid) {
5719 		DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
5720 		    ldcp->peer_sid, tagp->vio_sid);
5721 		return (VGEN_FAILURE);
5722 	}
5723 	else
5724 		return (VGEN_SUCCESS);
5725 }
5726 
5727 /*
5728  * Initialize the common part of dring registration
5729  * message; used in both TxDring and RxDringData modes.
5730  */
5731 static void
5732 vgen_init_dring_reg_msg(vgen_ldc_t *ldcp, vio_dring_reg_msg_t *msg,
5733 	uint8_t option)
5734 {
5735 	vio_msg_tag_t		*tagp;
5736 
5737 	tagp = &msg->tag;
5738 	tagp->vio_msgtype = VIO_TYPE_CTRL;
5739 	tagp->vio_subtype = VIO_SUBTYPE_INFO;
5740 	tagp->vio_subtype_env = VIO_DRING_REG;
5741 	tagp->vio_sid = ldcp->local_sid;
5742 
5743 	/* get dring info msg payload from ldcp->local */
5744 	bcopy(&(ldcp->local_hparams.dring_cookie), &(msg->cookie[0]),
5745 	    sizeof (ldc_mem_cookie_t));
5746 	msg->ncookies = ldcp->local_hparams.dring_ncookies;
5747 	msg->num_descriptors = ldcp->local_hparams.num_desc;
5748 	msg->descriptor_size = ldcp->local_hparams.desc_size;
5749 
5750 	msg->options = option;
5751 
5752 	/*
5753 	 * dring_ident is set to 0. After mapping the dring, peer sets this
5754 	 * value and sends it in the ack, which is saved in
5755 	 * vgen_handle_dring_reg().
5756 	 */
5757 	msg->dring_ident = 0;
5758 }
5759 
5760 #if DEBUG
5761 
5762 /*
5763  * Print debug messages - set to 0xf to enable all msgs
5764  */
5765 void
5766 vgen_debug_printf(const char *fname, vgen_t *vgenp,
5767     vgen_ldc_t *ldcp, const char *fmt, ...)
5768 {
5769 	char	buf[256];
5770 	char	*bufp = buf;
5771 	va_list	ap;
5772 
5773 	if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
5774 		(void) sprintf(bufp, "vnet%d:",
5775 		    ((vnet_t *)(vgenp->vnetp))->instance);
5776 		bufp += strlen(bufp);
5777 	}
5778 	if (ldcp != NULL) {
5779 		(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
5780 		bufp += strlen(bufp);
5781 	}
5782 	(void) sprintf(bufp, "%s: ", fname);
5783 	bufp += strlen(bufp);
5784 
5785 	va_start(ap, fmt);
5786 	(void) vsprintf(bufp, fmt, ap);
5787 	va_end(ap);
5788 
5789 	if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
5790 	    (vgendbg_ldcid == ldcp->ldc_id)) {
5791 		cmn_err(CE_CONT, "%s\n", buf);
5792 	}
5793 }
5794 #endif
5795 
5796 #ifdef	VNET_IOC_DEBUG
5797 
5798 static void
5799 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
5800 {
5801 	struct iocblk	*iocp;
5802 	vgen_port_t	*portp;
5803 	enum		ioc_reply {
5804 			IOC_INVAL = -1,		/* bad, NAK with EINVAL */
5805 			IOC_ACK			/* OK, just send ACK    */
5806 	}		status;
5807 	int		rv;
5808 
5809 	iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
5810 	iocp->ioc_error = 0;
5811 	portp = (vgen_port_t *)arg;
5812 
5813 	if (portp == NULL) {
5814 		status = IOC_INVAL;
5815 		goto vgen_ioc_exit;
5816 	}
5817 
5818 	mutex_enter(&portp->lock);
5819 
5820 	switch (iocp->ioc_cmd) {
5821 
5822 	case VNET_FORCE_LINK_DOWN:
5823 	case VNET_FORCE_LINK_UP:
5824 		rv = vgen_force_link_state(portp, iocp->ioc_cmd);
5825 		(rv == 0) ? (status = IOC_ACK) : (status = IOC_INVAL);
5826 		break;
5827 
5828 	default:
5829 		status = IOC_INVAL;
5830 		break;
5831 
5832 	}
5833 
5834 	mutex_exit(&portp->lock);
5835 
5836 vgen_ioc_exit:
5837 
5838 	switch (status) {
5839 	default:
5840 	case IOC_INVAL:
5841 		/* Error, reply with a NAK and EINVAL error */
5842 		miocnak(q, mp, 0, EINVAL);
5843 		break;
5844 	case IOC_ACK:
5845 		/* OK, reply with an ACK */
5846 		miocack(q, mp, 0, 0);
5847 		break;
5848 	}
5849 }
5850 
5851 static int
5852 vgen_force_link_state(vgen_port_t *portp, int cmd)
5853 {
5854 	ldc_status_t	istatus;
5855 	int		rv;
5856 	vgen_ldc_t	*ldcp = portp->ldcp;
5857 	vgen_t		*vgenp = portp->vgenp;
5858 
5859 	mutex_enter(&ldcp->cblock);
5860 
5861 	switch (cmd) {
5862 
5863 	case VNET_FORCE_LINK_DOWN:
5864 		(void) ldc_down(ldcp->ldc_handle);
5865 		ldcp->link_down_forced = B_TRUE;
5866 		break;
5867 
5868 	case VNET_FORCE_LINK_UP:
5869 		vgen_ldc_up(ldcp);
5870 		ldcp->link_down_forced = B_FALSE;
5871 
5872 		if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
5873 			DWARN(vgenp, ldcp, "ldc_status err\n");
5874 		} else {
5875 			ldcp->ldc_status = istatus;
5876 		}
5877 
5878 		/* if channel is already UP - restart handshake */
5879 		if (ldcp->ldc_status == LDC_UP) {
5880 			vgen_handle_evt_up(ldcp);
5881 		}
5882 		break;
5883 
5884 	}
5885 
5886 	mutex_exit(&ldcp->cblock);
5887 
5888 	return (0);
5889 }
5890 
5891 #else
5892 
5893 static void
5894 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
5895 {
5896 	vgen_port_t	*portp;
5897 
5898 	portp = (vgen_port_t *)arg;
5899 
5900 	if (portp == NULL) {
5901 		miocnak(q, mp, 0, EINVAL);
5902 		return;
5903 	}
5904 
5905 	miocnak(q, mp, 0, ENOTSUP);
5906 }
5907 
5908 #endif
5909