xref: /illumos-gate/usr/src/uts/common/io/aggr/aggr_grp.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
28  *
29  * An instance of the structure aggr_grp_t is allocated for each
30  * link aggregation group. When created, aggr_grp_t objects are
31  * entered into the aggr_grp_hash hash table maintained by the modhash
32  * module. The hash key is the linkid associated with the link
33  * aggregation group.
34  *
35  * A set of MAC ports are associated with each association group.
36  */
37 
38 #include <sys/types.h>
39 #include <sys/sysmacros.h>
40 #include <sys/conf.h>
41 #include <sys/cmn_err.h>
42 #include <sys/disp.h>
43 #include <sys/list.h>
44 #include <sys/ksynch.h>
45 #include <sys/kmem.h>
46 #include <sys/stream.h>
47 #include <sys/modctl.h>
48 #include <sys/ddi.h>
49 #include <sys/sunddi.h>
50 #include <sys/atomic.h>
51 #include <sys/stat.h>
52 #include <sys/modhash.h>
53 #include <sys/id_space.h>
54 #include <sys/strsun.h>
55 #include <sys/dlpi.h>
56 #include <sys/mac_provider.h>
57 #include <sys/dls.h>
58 #include <sys/vlan.h>
59 #include <sys/aggr.h>
60 #include <sys/aggr_impl.h>
61 
62 static int aggr_m_start(void *);
63 static void aggr_m_stop(void *);
64 static int aggr_m_promisc(void *, boolean_t);
65 static int aggr_m_multicst(void *, boolean_t, const uint8_t *);
66 static int aggr_m_unicst(void *, const uint8_t *);
67 static int aggr_m_stat(void *, uint_t, uint64_t *);
68 static void aggr_m_ioctl(void *, queue_t *, mblk_t *);
69 static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *);
70 static int aggr_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
71     const void *);
72 static int aggr_m_getprop(void *, const char *, mac_prop_id_t, uint_t,
73     uint_t, void *, uint_t *);
74 
75 
76 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t);
77 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
78     boolean_t *);
79 
80 static void aggr_grp_capab_set(aggr_grp_t *);
81 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
82 static uint_t aggr_grp_max_sdu(aggr_grp_t *);
83 static uint32_t aggr_grp_max_margin(aggr_grp_t *);
84 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
85 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *);
86 
87 static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
88 static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
89 static int aggr_pseudo_disable_intr(mac_intr_handle_t);
90 static int aggr_pseudo_enable_intr(mac_intr_handle_t);
91 static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t);
92 static void aggr_pseudo_stop_ring(mac_ring_driver_t);
93 static int aggr_addmac(void *, const uint8_t *);
94 static int aggr_remmac(void *, const uint8_t *);
95 static mblk_t *aggr_rx_poll(void *, int);
96 static void aggr_fill_ring(void *, mac_ring_type_t, const int,
97     const int, mac_ring_info_t *, mac_ring_handle_t);
98 static void aggr_fill_group(void *, mac_ring_type_t, const int,
99     mac_group_info_t *, mac_group_handle_t);
100 
101 static kmem_cache_t	*aggr_grp_cache;
102 static mod_hash_t	*aggr_grp_hash;
103 static krwlock_t	aggr_grp_lock;
104 static uint_t		aggr_grp_cnt;
105 static id_space_t	*key_ids;
106 
107 #define	GRP_HASHSZ		64
108 #define	GRP_HASH_KEY(linkid)	((mod_hash_key_t)(uintptr_t)linkid)
109 #define	AGGR_PORT_NAME_DELIMIT '-'
110 
111 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
112 
113 #define	AGGR_M_CALLBACK_FLAGS	\
114 	(MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP)
115 
116 static mac_callbacks_t aggr_m_callbacks = {
117 	AGGR_M_CALLBACK_FLAGS,
118 	aggr_m_stat,
119 	aggr_m_start,
120 	aggr_m_stop,
121 	aggr_m_promisc,
122 	aggr_m_multicst,
123 	NULL,
124 	aggr_m_tx,
125 	aggr_m_ioctl,
126 	aggr_m_capab_get,
127 	NULL,
128 	NULL,
129 	aggr_m_setprop,
130 	aggr_m_getprop
131 };
132 
133 /*ARGSUSED*/
134 static int
135 aggr_grp_constructor(void *buf, void *arg, int kmflag)
136 {
137 	aggr_grp_t *grp = buf;
138 
139 	bzero(grp, sizeof (*grp));
140 	mutex_init(&grp->lg_lacp_lock, NULL, MUTEX_DEFAULT, NULL);
141 	cv_init(&grp->lg_lacp_cv, NULL, CV_DEFAULT, NULL);
142 	rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL);
143 	mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL);
144 	cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL);
145 	grp->lg_link_state = LINK_STATE_UNKNOWN;
146 	return (0);
147 }
148 
149 /*ARGSUSED*/
150 static void
151 aggr_grp_destructor(void *buf, void *arg)
152 {
153 	aggr_grp_t *grp = buf;
154 
155 	if (grp->lg_tx_ports != NULL) {
156 		kmem_free(grp->lg_tx_ports,
157 		    grp->lg_tx_ports_size * sizeof (aggr_port_t *));
158 	}
159 
160 	mutex_destroy(&grp->lg_lacp_lock);
161 	cv_destroy(&grp->lg_lacp_cv);
162 	mutex_destroy(&grp->lg_port_lock);
163 	cv_destroy(&grp->lg_port_cv);
164 	rw_destroy(&grp->lg_tx_lock);
165 }
166 
167 void
168 aggr_grp_init(void)
169 {
170 	aggr_grp_cache = kmem_cache_create("aggr_grp_cache",
171 	    sizeof (aggr_grp_t), 0, aggr_grp_constructor,
172 	    aggr_grp_destructor, NULL, NULL, NULL, 0);
173 
174 	aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash",
175 	    GRP_HASHSZ, mod_hash_null_valdtor);
176 	rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL);
177 	aggr_grp_cnt = 0;
178 
179 	/*
180 	 * Allocate an id space to manage key values (when key is not
181 	 * specified). The range of the id space will be from
182 	 * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol
183 	 * uses a 16-bit key.
184 	 */
185 	key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX);
186 	ASSERT(key_ids != NULL);
187 }
188 
189 void
190 aggr_grp_fini(void)
191 {
192 	id_space_destroy(key_ids);
193 	rw_destroy(&aggr_grp_lock);
194 	mod_hash_destroy_idhash(aggr_grp_hash);
195 	kmem_cache_destroy(aggr_grp_cache);
196 }
197 
198 uint_t
199 aggr_grp_count(void)
200 {
201 	uint_t	count;
202 
203 	rw_enter(&aggr_grp_lock, RW_READER);
204 	count = aggr_grp_cnt;
205 	rw_exit(&aggr_grp_lock);
206 	return (count);
207 }
208 
209 /*
210  * Since both aggr_port_notify_cb() and aggr_port_timer_thread() functions
211  * requires the mac perimeter, this function holds a reference of the aggr
212  * and aggr won't call mac_unregister() until this reference drops to 0.
213  */
214 void
215 aggr_grp_port_hold(aggr_port_t *port)
216 {
217 	aggr_grp_t	*grp = port->lp_grp;
218 
219 	AGGR_PORT_REFHOLD(port);
220 	mutex_enter(&grp->lg_port_lock);
221 	grp->lg_port_ref++;
222 	mutex_exit(&grp->lg_port_lock);
223 }
224 
225 /*
226  * Release the reference of the grp and inform aggr_grp_delete() calling
227  * mac_unregister() is now safe.
228  */
229 void
230 aggr_grp_port_rele(aggr_port_t *port)
231 {
232 	aggr_grp_t	*grp = port->lp_grp;
233 
234 	mutex_enter(&grp->lg_port_lock);
235 	if (--grp->lg_port_ref == 0)
236 		cv_signal(&grp->lg_port_cv);
237 	mutex_exit(&grp->lg_port_lock);
238 	AGGR_PORT_REFRELE(port);
239 }
240 
241 /*
242  * Wait for the port's lacp timer thread and the port's notification callback
243  * to exit.
244  */
245 void
246 aggr_grp_port_wait(aggr_grp_t *grp)
247 {
248 	mutex_enter(&grp->lg_port_lock);
249 	if (grp->lg_port_ref != 0)
250 		cv_wait(&grp->lg_port_cv, &grp->lg_port_lock);
251 	mutex_exit(&grp->lg_port_lock);
252 }
253 
254 /*
255  * Attach a port to a link aggregation group.
256  *
257  * A port is attached to a link aggregation group once its speed
258  * and link state have been verified.
259  *
260  * Returns B_TRUE if the group link state or speed has changed. If
261  * it's the case, the caller must notify the MAC layer via a call
262  * to mac_link().
263  */
264 boolean_t
265 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
266 {
267 	boolean_t link_state_changed = B_FALSE;
268 
269 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
270 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
271 
272 	if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
273 		return (B_FALSE);
274 
275 	/*
276 	 * Validate the MAC port link speed and update the group
277 	 * link speed if needed.
278 	 */
279 	if (port->lp_ifspeed == 0 ||
280 	    port->lp_link_state != LINK_STATE_UP ||
281 	    port->lp_link_duplex != LINK_DUPLEX_FULL) {
282 		/*
283 		 * Can't attach a MAC port with unknown link speed,
284 		 * down link, or not in full duplex mode.
285 		 */
286 		return (B_FALSE);
287 	}
288 
289 	if (grp->lg_ifspeed == 0) {
290 		/*
291 		 * The group inherits the speed of the first link being
292 		 * attached.
293 		 */
294 		grp->lg_ifspeed = port->lp_ifspeed;
295 		link_state_changed = B_TRUE;
296 	} else if (grp->lg_ifspeed != port->lp_ifspeed) {
297 		/*
298 		 * The link speed of the MAC port must be the same as
299 		 * the group link speed, as per 802.3ad. Since it is
300 		 * not, the attach is cancelled.
301 		 */
302 		return (B_FALSE);
303 	}
304 
305 	grp->lg_nattached_ports++;
306 
307 	/*
308 	 * Update the group link state.
309 	 */
310 	if (grp->lg_link_state != LINK_STATE_UP) {
311 		grp->lg_link_state = LINK_STATE_UP;
312 		grp->lg_link_duplex = LINK_DUPLEX_FULL;
313 		link_state_changed = B_TRUE;
314 	}
315 
316 	/*
317 	 * Update port's state.
318 	 */
319 	port->lp_state = AGGR_PORT_STATE_ATTACHED;
320 
321 	aggr_grp_multicst_port(port, B_TRUE);
322 
323 	/*
324 	 * Set port's receive callback
325 	 */
326 	mac_rx_set(port->lp_mch, aggr_recv_cb, port);
327 
328 	/*
329 	 * If LACP is OFF, the port can be used to send data as soon
330 	 * as its link is up and verified to be compatible with the
331 	 * aggregation.
332 	 *
333 	 * If LACP is active or passive, notify the LACP subsystem, which
334 	 * will enable sending on the port following the LACP protocol.
335 	 */
336 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
337 		aggr_send_port_enable(port);
338 	else
339 		aggr_lacp_port_attached(port);
340 
341 	return (link_state_changed);
342 }
343 
344 boolean_t
345 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port)
346 {
347 	boolean_t link_state_changed = B_FALSE;
348 
349 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
350 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
351 
352 	/* update state */
353 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
354 		return (B_FALSE);
355 
356 	mac_rx_clear(port->lp_mch);
357 
358 	aggr_grp_multicst_port(port, B_FALSE);
359 
360 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
361 		aggr_send_port_disable(port);
362 	else
363 		aggr_lacp_port_detached(port);
364 
365 	port->lp_state = AGGR_PORT_STATE_STANDBY;
366 
367 	grp->lg_nattached_ports--;
368 	if (grp->lg_nattached_ports == 0) {
369 		/* the last attached MAC port of the group is being detached */
370 		grp->lg_ifspeed = 0;
371 		grp->lg_link_state = LINK_STATE_DOWN;
372 		grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
373 		link_state_changed = B_TRUE;
374 	}
375 
376 	return (link_state_changed);
377 }
378 
379 /*
380  * Update the MAC addresses of the constituent ports of the specified
381  * group. This function is invoked:
382  * - after creating a new aggregation group.
383  * - after adding new ports to an aggregation group.
384  * - after removing a port from a group when the MAC address of
385  *   that port was used for the MAC address of the group.
386  * - after the MAC address of a port changed when the MAC address
387  *   of that port was used for the MAC address of the group.
388  *
389  * Return true if the link state of the aggregation changed, for example
390  * as a result of a failure changing the MAC address of one of the
391  * constituent ports.
392  */
393 boolean_t
394 aggr_grp_update_ports_mac(aggr_grp_t *grp)
395 {
396 	aggr_port_t *cport;
397 	boolean_t link_state_changed = B_FALSE;
398 	mac_perim_handle_t mph;
399 
400 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
401 
402 	for (cport = grp->lg_ports; cport != NULL;
403 	    cport = cport->lp_next) {
404 		mac_perim_enter_by_mh(cport->lp_mh, &mph);
405 		if (aggr_port_unicst(cport) != 0) {
406 			if (aggr_grp_detach_port(grp, cport))
407 				link_state_changed = B_TRUE;
408 		} else {
409 			/*
410 			 * If a port was detached because of a previous
411 			 * failure changing the MAC address, the port is
412 			 * reattached when it successfully changes the MAC
413 			 * address now, and this might cause the link state
414 			 * of the aggregation to change.
415 			 */
416 			if (aggr_grp_attach_port(grp, cport))
417 				link_state_changed = B_TRUE;
418 		}
419 		mac_perim_exit(mph);
420 	}
421 	return (link_state_changed);
422 }
423 
424 /*
425  * Invoked when the MAC address of a port has changed. If the port's
426  * MAC address was used for the group MAC address, set mac_addr_changedp
427  * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST
428  * notification. If the link state changes due to detach/attach of
429  * the constituent port, set link_state_changedp to B_TRUE to indicate
430  * to the caller that it should send a MAC_NOTE_LINK notification. In both
431  * cases, it is the responsibility of the caller to invoke notification
432  * functions after releasing the the port lock.
433  */
434 void
435 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port,
436     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
437 {
438 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
439 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
440 	ASSERT(mac_addr_changedp != NULL);
441 	ASSERT(link_state_changedp != NULL);
442 
443 	*mac_addr_changedp = B_FALSE;
444 	*link_state_changedp = B_FALSE;
445 
446 	if (grp->lg_addr_fixed) {
447 		/*
448 		 * The group is using a fixed MAC address or an automatic
449 		 * MAC address has not been set.
450 		 */
451 		return;
452 	}
453 
454 	if (grp->lg_mac_addr_port == port) {
455 		/*
456 		 * The MAC address of the port was assigned to the group
457 		 * MAC address. Update the group MAC address.
458 		 */
459 		bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
460 		*mac_addr_changedp = B_TRUE;
461 	} else {
462 		/*
463 		 * Update the actual port MAC address to the MAC address
464 		 * of the group.
465 		 */
466 		if (aggr_port_unicst(port) != 0) {
467 			*link_state_changedp = aggr_grp_detach_port(grp, port);
468 		} else {
469 			/*
470 			 * If a port was detached because of a previous
471 			 * failure changing the MAC address, the port is
472 			 * reattached when it successfully changes the MAC
473 			 * address now, and this might cause the link state
474 			 * of the aggregation to change.
475 			 */
476 			*link_state_changedp = aggr_grp_attach_port(grp, port);
477 		}
478 	}
479 }
480 
481 /*
482  * Add a port to a link aggregation group.
483  */
484 static int
485 aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force,
486     aggr_port_t **pp)
487 {
488 	aggr_port_t *port, **cport;
489 	mac_perim_handle_t mph;
490 	int err;
491 
492 	/*
493 	 * lg_mh could be NULL when the function is called during the creation
494 	 * of the aggregation.
495 	 */
496 	ASSERT(grp->lg_mh == NULL || MAC_PERIM_HELD(grp->lg_mh));
497 
498 	/* create new port */
499 	err = aggr_port_create(grp, port_linkid, force, &port);
500 	if (err != 0)
501 		return (err);
502 
503 	mac_perim_enter_by_mh(port->lp_mh, &mph);
504 
505 	/* add port to list of group constituent ports */
506 	cport = &grp->lg_ports;
507 	while (*cport != NULL)
508 		cport = &((*cport)->lp_next);
509 	*cport = port;
510 
511 	/*
512 	 * Back reference to the group it is member of. A port always
513 	 * holds a reference to its group to ensure that the back
514 	 * reference is always valid.
515 	 */
516 	port->lp_grp = grp;
517 	AGGR_GRP_REFHOLD(grp);
518 	grp->lg_nports++;
519 
520 	aggr_lacp_init_port(port);
521 	mac_perim_exit(mph);
522 
523 	if (pp != NULL)
524 		*pp = port;
525 
526 	return (0);
527 }
528 
529 /*
530  * Add a pseudo Rx ring for the given HW ring handle.
531  */
532 static int
533 aggr_add_pseudo_rx_ring(aggr_port_t *port,
534     aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
535 {
536 	aggr_pseudo_rx_ring_t	*ring;
537 	int			err;
538 	int			j;
539 
540 	for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
541 		ring = rx_grp->arg_rings + j;
542 		if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE))
543 			break;
544 	}
545 
546 	/*
547 	 * No slot for this new Rx ring.
548 	 */
549 	if (j == MAX_RINGS_PER_GROUP)
550 		return (EIO);
551 
552 	ring->arr_flags |= MAC_PSEUDO_RING_INUSE;
553 	ring->arr_hw_rh = hw_rh;
554 	ring->arr_port = port;
555 	rx_grp->arg_ring_cnt++;
556 
557 	/*
558 	 * The group is already registered, dynamically add a new ring to the
559 	 * mac group.
560 	 */
561 	mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring);
562 	if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) {
563 		ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
564 		ring->arr_hw_rh = NULL;
565 		ring->arr_port = NULL;
566 		rx_grp->arg_ring_cnt--;
567 		mac_hwring_teardown(hw_rh);
568 	}
569 	return (err);
570 }
571 
572 /*
573  * Remove the pseudo Rx ring of the given HW ring handle.
574  */
575 static void
576 aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
577 {
578 	aggr_pseudo_rx_ring_t	*ring;
579 	int			j;
580 
581 	for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
582 		ring = rx_grp->arg_rings + j;
583 		if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) ||
584 		    ring->arr_hw_rh != hw_rh) {
585 			continue;
586 		}
587 
588 		mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh);
589 
590 		ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
591 		ring->arr_hw_rh = NULL;
592 		ring->arr_port = NULL;
593 		rx_grp->arg_ring_cnt--;
594 		mac_hwring_teardown(hw_rh);
595 		break;
596 	}
597 }
598 
599 /*
600  * This function is called to create pseudo rings over the hardware rings of
601  * the underlying device. Note that there is a 1:1 mapping between the pseudo
602  * RX rings of the aggr and the hardware rings of the underlying port.
603  */
604 static int
605 aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
606 {
607 	aggr_grp_t		*grp = port->lp_grp;
608 	mac_ring_handle_t	hw_rh[MAX_RINGS_PER_GROUP];
609 	aggr_unicst_addr_t	*addr, *a;
610 	mac_perim_handle_t	pmph;
611 	int			hw_rh_cnt, i = 0, j;
612 	int			err = 0;
613 
614 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
615 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
616 
617 	/*
618 	 * This function must be called after the aggr registers its mac
619 	 * and its RX group has been initialized.
620 	 */
621 	ASSERT(rx_grp->arg_gh != NULL);
622 
623 	/*
624 	 * Get the list the the underlying HW rings.
625 	 */
626 	hw_rh_cnt = mac_hwrings_get(port->lp_mch, &port->lp_hwgh, hw_rh,
627 	    MAC_RING_TYPE_RX);
628 
629 	if (port->lp_hwgh != NULL) {
630 		/*
631 		 * Quiesce the HW ring and the mac srs on the ring. Note
632 		 * that the HW ring will be restarted when the pseudo ring
633 		 * is started. At that time all the packets will be
634 		 * directly passed up to the pseudo RX ring and handled
635 		 * by mac srs created over the pseudo RX ring.
636 		 */
637 		mac_rx_client_quiesce(port->lp_mch);
638 		mac_srs_perm_quiesce(port->lp_mch, B_TRUE);
639 	}
640 
641 	/*
642 	 * Add all the unicast addresses to the newly added port.
643 	 */
644 	for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) {
645 		if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0)
646 			break;
647 	}
648 
649 	for (i = 0; err == 0 && i < hw_rh_cnt; i++)
650 		err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]);
651 
652 	if (err != 0) {
653 		for (j = 0; j < i; j++)
654 			aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]);
655 
656 		for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next)
657 			aggr_port_remmac(port, a->aua_addr);
658 
659 		if (port->lp_hwgh != NULL) {
660 			mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
661 			mac_rx_client_restart(port->lp_mch);
662 			port->lp_hwgh = NULL;
663 		}
664 	} else {
665 		port->lp_grp_added = B_TRUE;
666 	}
667 done:
668 	mac_perim_exit(pmph);
669 	return (err);
670 }
671 
672 /*
673  * This function is called by aggr to remove pseudo RX rings over the
674  * HW rings of the underlying port.
675  */
676 static void
677 aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
678 {
679 	aggr_grp_t		*grp = port->lp_grp;
680 	mac_ring_handle_t	hw_rh[MAX_RINGS_PER_GROUP];
681 	aggr_unicst_addr_t	*addr;
682 	mac_group_handle_t	hwgh;
683 	mac_perim_handle_t	pmph;
684 	int			hw_rh_cnt, i;
685 
686 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
687 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
688 
689 	if (!port->lp_grp_added)
690 		goto done;
691 
692 	ASSERT(rx_grp->arg_gh != NULL);
693 	hw_rh_cnt = mac_hwrings_get(port->lp_mch, &hwgh, hw_rh,
694 	    MAC_RING_TYPE_RX);
695 
696 	/*
697 	 * If hw_rh_cnt is 0, it means that the underlying port does not
698 	 * support RX rings. Directly return in this case.
699 	 */
700 	for (i = 0; i < hw_rh_cnt; i++)
701 		aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]);
702 
703 	for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next)
704 		aggr_port_remmac(port, addr->aua_addr);
705 
706 	if (port->lp_hwgh != NULL) {
707 		port->lp_hwgh = NULL;
708 
709 		/*
710 		 * First clear the permanent-quiesced flag of the RX srs then
711 		 * restart the HW ring and the mac srs on the ring. Note that
712 		 * the HW ring and associated SRS will soon been removed when
713 		 * the port is removed from the aggr.
714 		 */
715 		mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
716 		mac_rx_client_restart(port->lp_mch);
717 	}
718 
719 	port->lp_grp_added = B_FALSE;
720 done:
721 	mac_perim_exit(pmph);
722 }
723 
724 static int
725 aggr_pseudo_disable_intr(mac_intr_handle_t ih)
726 {
727 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
728 	return (mac_hwring_disable_intr(rr_ring->arr_hw_rh));
729 }
730 
731 static int
732 aggr_pseudo_enable_intr(mac_intr_handle_t ih)
733 {
734 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
735 	return (mac_hwring_enable_intr(rr_ring->arr_hw_rh));
736 }
737 
738 static int
739 aggr_pseudo_start_ring(mac_ring_driver_t arg, uint64_t mr_gen)
740 {
741 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
742 	int err;
743 
744 	err = mac_hwring_start(rr_ring->arr_hw_rh);
745 	if (err == 0)
746 		rr_ring->arr_gen = mr_gen;
747 	return (err);
748 }
749 
750 static void
751 aggr_pseudo_stop_ring(mac_ring_driver_t arg)
752 {
753 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
754 	mac_hwring_stop(rr_ring->arr_hw_rh);
755 }
756 
757 /*
758  * Add one or more ports to an existing link aggregation group.
759  */
760 int
761 aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force,
762     laioc_port_t *ports)
763 {
764 	int rc, i, nadded = 0;
765 	aggr_grp_t *grp = NULL;
766 	aggr_port_t *port;
767 	boolean_t link_state_changed = B_FALSE;
768 	mac_perim_handle_t mph, pmph;
769 
770 	/* get group corresponding to linkid */
771 	rw_enter(&aggr_grp_lock, RW_READER);
772 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
773 	    (mod_hash_val_t *)&grp) != 0) {
774 		rw_exit(&aggr_grp_lock);
775 		return (ENOENT);
776 	}
777 	AGGR_GRP_REFHOLD(grp);
778 
779 	/*
780 	 * Hold the perimeter so that the aggregation won't be destroyed.
781 	 */
782 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
783 	rw_exit(&aggr_grp_lock);
784 
785 	/* add the specified ports to group */
786 	for (i = 0; i < nports; i++) {
787 		/* add port to group */
788 		if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid,
789 		    force, &port)) != 0) {
790 			goto bail;
791 		}
792 		ASSERT(port != NULL);
793 		nadded++;
794 
795 		/* check capabilities */
796 		if (!aggr_grp_capab_check(grp, port) ||
797 		    !aggr_grp_sdu_check(grp, port) ||
798 		    !aggr_grp_margin_check(grp, port)) {
799 			rc = ENOTSUP;
800 			goto bail;
801 		}
802 
803 		/*
804 		 * Create the pseudo ring for each HW ring of the underlying
805 		 * port.
806 		 */
807 		rc = aggr_add_pseudo_rx_group(port, &grp->lg_rx_group);
808 		if (rc != 0)
809 			goto bail;
810 
811 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
812 
813 		/* set LACP mode */
814 		aggr_port_lacp_set_mode(grp, port);
815 
816 		/* start port if group has already been started */
817 		if (grp->lg_started) {
818 			rc = aggr_port_start(port);
819 			if (rc != 0) {
820 				mac_perim_exit(pmph);
821 				goto bail;
822 			}
823 
824 			/*
825 			 * Turn on the promiscuous mode over the port when it
826 			 * is requested to be turned on to receive the
827 			 * non-primary address over a port, or the promiscous
828 			 * mode is enabled over the aggr.
829 			 */
830 			if (grp->lg_promisc || port->lp_prom_addr != NULL) {
831 				rc = aggr_port_promisc(port, B_TRUE);
832 				if (rc != 0) {
833 					mac_perim_exit(pmph);
834 					goto bail;
835 				}
836 			}
837 		}
838 		mac_perim_exit(pmph);
839 
840 		/*
841 		 * Attach each port if necessary.
842 		 */
843 		if (aggr_port_notify_link(grp, port))
844 			link_state_changed = B_TRUE;
845 
846 		/*
847 		 * Initialize the callback functions for this port.
848 		 */
849 		aggr_port_init_callbacks(port);
850 	}
851 
852 	/* update the MAC address of the constituent ports */
853 	if (aggr_grp_update_ports_mac(grp))
854 		link_state_changed = B_TRUE;
855 
856 	if (link_state_changed)
857 		mac_link_update(grp->lg_mh, grp->lg_link_state);
858 
859 bail:
860 	if (rc != 0) {
861 		/* stop and remove ports that have been added */
862 		for (i = 0; i < nadded; i++) {
863 			port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
864 			ASSERT(port != NULL);
865 			if (grp->lg_started) {
866 				mac_perim_enter_by_mh(port->lp_mh, &pmph);
867 				(void) aggr_port_promisc(port, B_FALSE);
868 				aggr_port_stop(port);
869 				mac_perim_exit(pmph);
870 			}
871 			aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
872 			(void) aggr_grp_rem_port(grp, port, NULL, NULL);
873 		}
874 	}
875 
876 	mac_perim_exit(mph);
877 	AGGR_GRP_REFRELE(grp);
878 	return (rc);
879 }
880 
881 static int
882 aggr_grp_modify_common(aggr_grp_t *grp, uint8_t update_mask, uint32_t policy,
883     boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
884     aggr_lacp_timer_t lacp_timer)
885 {
886 	boolean_t mac_addr_changed = B_FALSE;
887 	boolean_t link_state_changed = B_FALSE;
888 	mac_perim_handle_t pmph;
889 
890 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
891 
892 	/* validate fixed address if specified */
893 	if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed &&
894 	    ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) ||
895 	    (mac_addr[0] & 0x01))) {
896 		return (EINVAL);
897 	}
898 
899 	/* update policy if requested */
900 	if (update_mask & AGGR_MODIFY_POLICY)
901 		aggr_send_update_policy(grp, policy);
902 
903 	/* update unicast MAC address if requested */
904 	if (update_mask & AGGR_MODIFY_MAC) {
905 		if (mac_fixed) {
906 			/* user-supplied MAC address */
907 			grp->lg_mac_addr_port = NULL;
908 			if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) {
909 				bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
910 				mac_addr_changed = B_TRUE;
911 			}
912 		} else if (grp->lg_addr_fixed) {
913 			/* switch from user-supplied to automatic */
914 			aggr_port_t *port = grp->lg_ports;
915 
916 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
917 			bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
918 			grp->lg_mac_addr_port = port;
919 			mac_addr_changed = B_TRUE;
920 			mac_perim_exit(pmph);
921 		}
922 		grp->lg_addr_fixed = mac_fixed;
923 	}
924 
925 	if (mac_addr_changed)
926 		link_state_changed = aggr_grp_update_ports_mac(grp);
927 
928 	if (update_mask & AGGR_MODIFY_LACP_MODE)
929 		aggr_lacp_update_mode(grp, lacp_mode);
930 
931 	if (update_mask & AGGR_MODIFY_LACP_TIMER)
932 		aggr_lacp_update_timer(grp, lacp_timer);
933 
934 	if (link_state_changed)
935 		mac_link_update(grp->lg_mh, grp->lg_link_state);
936 
937 	if (mac_addr_changed)
938 		mac_unicst_update(grp->lg_mh, grp->lg_addr);
939 
940 	return (0);
941 }
942 
943 /*
944  * Update properties of an existing link aggregation group.
945  */
946 int
947 aggr_grp_modify(datalink_id_t linkid, uint8_t update_mask, uint32_t policy,
948     boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
949     aggr_lacp_timer_t lacp_timer)
950 {
951 	aggr_grp_t *grp = NULL;
952 	mac_perim_handle_t mph;
953 	int err;
954 
955 	/* get group corresponding to linkid */
956 	rw_enter(&aggr_grp_lock, RW_READER);
957 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
958 	    (mod_hash_val_t *)&grp) != 0) {
959 		rw_exit(&aggr_grp_lock);
960 		return (ENOENT);
961 	}
962 	AGGR_GRP_REFHOLD(grp);
963 
964 	/*
965 	 * Hold the perimeter so that the aggregation won't be destroyed.
966 	 */
967 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
968 	rw_exit(&aggr_grp_lock);
969 
970 	err = aggr_grp_modify_common(grp, update_mask, policy, mac_fixed,
971 	    mac_addr, lacp_mode, lacp_timer);
972 
973 	mac_perim_exit(mph);
974 	AGGR_GRP_REFRELE(grp);
975 	return (err);
976 }
977 
978 /*
979  * Create a new link aggregation group upon request from administrator.
980  * Returns 0 on success, an errno on failure.
981  */
982 int
983 aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports,
984     laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force,
985     uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer)
986 {
987 	aggr_grp_t *grp = NULL;
988 	aggr_port_t *port;
989 	mac_register_t *mac;
990 	boolean_t link_state_changed;
991 	mac_perim_handle_t mph;
992 	int err;
993 	int i;
994 
995 	/* need at least one port */
996 	if (nports == 0)
997 		return (EINVAL);
998 
999 	rw_enter(&aggr_grp_lock, RW_WRITER);
1000 
1001 	/* does a group with the same linkid already exist? */
1002 	err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1003 	    (mod_hash_val_t *)&grp);
1004 	if (err == 0) {
1005 		rw_exit(&aggr_grp_lock);
1006 		return (EEXIST);
1007 	}
1008 
1009 	grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP);
1010 
1011 	grp->lg_refs = 1;
1012 	grp->lg_closing = B_FALSE;
1013 	grp->lg_force = force;
1014 	grp->lg_linkid = linkid;
1015 	grp->lg_ifspeed = 0;
1016 	grp->lg_link_state = LINK_STATE_UNKNOWN;
1017 	grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
1018 	grp->lg_started = B_FALSE;
1019 	grp->lg_promisc = B_FALSE;
1020 	grp->lg_lacp_done = B_FALSE;
1021 	grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
1022 	grp->lg_lacp_rx_thread = thread_create(NULL, 0,
1023 	    aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1024 	bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t));
1025 	aggr_lacp_init_grp(grp);
1026 
1027 	/* add MAC ports to group */
1028 	grp->lg_ports = NULL;
1029 	grp->lg_nports = 0;
1030 	grp->lg_nattached_ports = 0;
1031 	grp->lg_ntx_ports = 0;
1032 
1033 	/*
1034 	 * If key is not specified by the user, allocate the key.
1035 	 */
1036 	if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) {
1037 		err = ENOMEM;
1038 		goto bail;
1039 	}
1040 	grp->lg_key = key;
1041 
1042 	for (i = 0; i < nports; i++) {
1043 		err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL);
1044 		if (err != 0)
1045 			goto bail;
1046 	}
1047 
1048 	/*
1049 	 * If no explicit MAC address was specified by the administrator,
1050 	 * set it to the MAC address of the first port.
1051 	 */
1052 	grp->lg_addr_fixed = mac_fixed;
1053 	if (grp->lg_addr_fixed) {
1054 		/* validate specified address */
1055 		if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
1056 			err = EINVAL;
1057 			goto bail;
1058 		}
1059 		bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
1060 	} else {
1061 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1062 		grp->lg_mac_addr_port = grp->lg_ports;
1063 	}
1064 
1065 	/* set the initial group capabilities */
1066 	aggr_grp_capab_set(grp);
1067 
1068 	if ((mac = mac_alloc(MAC_VERSION)) == NULL) {
1069 		err = ENOMEM;
1070 		goto bail;
1071 	}
1072 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1073 	mac->m_driver = grp;
1074 	mac->m_dip = aggr_dip;
1075 	mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key;
1076 	mac->m_src_addr = grp->lg_addr;
1077 	mac->m_callbacks = &aggr_m_callbacks;
1078 	mac->m_min_sdu = 0;
1079 	mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp);
1080 	mac->m_margin = aggr_grp_max_margin(grp);
1081 	mac->m_v12n = MAC_VIRT_LEVEL1;
1082 	err = mac_register(mac, &grp->lg_mh);
1083 	mac_free(mac);
1084 	if (err != 0)
1085 		goto bail;
1086 
1087 	if ((err = dls_devnet_create(grp->lg_mh, grp->lg_linkid)) != 0) {
1088 		(void) mac_unregister(grp->lg_mh);
1089 		grp->lg_mh = NULL;
1090 		goto bail;
1091 	}
1092 
1093 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1094 
1095 	/*
1096 	 * Update the MAC address of the constituent ports.
1097 	 * None of the port is attached at this time, the link state of the
1098 	 * aggregation will not change.
1099 	 */
1100 	link_state_changed = aggr_grp_update_ports_mac(grp);
1101 	ASSERT(!link_state_changed);
1102 
1103 	/* update outbound load balancing policy */
1104 	aggr_send_update_policy(grp, policy);
1105 
1106 	/* set LACP mode */
1107 	aggr_lacp_set_mode(grp, lacp_mode, lacp_timer);
1108 
1109 	/*
1110 	 * Attach each port if necessary.
1111 	 */
1112 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1113 		/*
1114 		 * Create the pseudo ring for each HW ring of the underlying
1115 		 * port. Note that this is done after the aggr registers the
1116 		 * mac.
1117 		 */
1118 		VERIFY(aggr_add_pseudo_rx_group(port, &grp->lg_rx_group) == 0);
1119 		if (aggr_port_notify_link(grp, port))
1120 			link_state_changed = B_TRUE;
1121 
1122 		/*
1123 		 * Initialize the callback functions for this port.
1124 		 */
1125 		aggr_port_init_callbacks(port);
1126 	}
1127 
1128 	if (link_state_changed)
1129 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1130 
1131 	/* add new group to hash table */
1132 	err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid),
1133 	    (mod_hash_val_t)grp);
1134 	ASSERT(err == 0);
1135 	aggr_grp_cnt++;
1136 
1137 	mac_perim_exit(mph);
1138 	rw_exit(&aggr_grp_lock);
1139 	return (0);
1140 
1141 bail:
1142 
1143 	grp->lg_closing = B_TRUE;
1144 
1145 	port = grp->lg_ports;
1146 	while (port != NULL) {
1147 		aggr_port_t *cport;
1148 
1149 		cport = port->lp_next;
1150 		aggr_port_delete(port);
1151 		port = cport;
1152 	}
1153 
1154 	/*
1155 	 * Inform the lacp_rx thread to exit.
1156 	 */
1157 	mutex_enter(&grp->lg_lacp_lock);
1158 	grp->lg_lacp_done = B_TRUE;
1159 	cv_signal(&grp->lg_lacp_cv);
1160 	while (grp->lg_lacp_rx_thread != NULL)
1161 		cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1162 	mutex_exit(&grp->lg_lacp_lock);
1163 
1164 	rw_exit(&aggr_grp_lock);
1165 	AGGR_GRP_REFRELE(grp);
1166 	return (err);
1167 }
1168 
1169 /*
1170  * Return a pointer to the member of a group with specified linkid.
1171  */
1172 static aggr_port_t *
1173 aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid)
1174 {
1175 	aggr_port_t *port;
1176 
1177 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1178 
1179 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1180 		if (port->lp_linkid == linkid)
1181 			break;
1182 	}
1183 
1184 	return (port);
1185 }
1186 
1187 /*
1188  * Stop, detach and remove a port from a link aggregation group.
1189  */
1190 static int
1191 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port,
1192     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
1193 {
1194 	int rc = 0;
1195 	aggr_port_t **pport;
1196 	boolean_t mac_addr_changed = B_FALSE;
1197 	boolean_t link_state_changed = B_FALSE;
1198 	mac_perim_handle_t mph;
1199 	uint64_t val;
1200 	uint_t i;
1201 	uint_t stat;
1202 
1203 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1204 	ASSERT(grp->lg_nports > 1);
1205 	ASSERT(!grp->lg_closing);
1206 
1207 	/* unlink port */
1208 	for (pport = &grp->lg_ports; *pport != port;
1209 	    pport = &(*pport)->lp_next) {
1210 		if (*pport == NULL) {
1211 			rc = ENOENT;
1212 			goto done;
1213 		}
1214 	}
1215 	*pport = port->lp_next;
1216 
1217 	mac_perim_enter_by_mh(port->lp_mh, &mph);
1218 
1219 	/*
1220 	 * If the MAC address of the port being removed was assigned
1221 	 * to the group, update the group MAC address
1222 	 * using the MAC address of a different port.
1223 	 */
1224 	if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) {
1225 		/*
1226 		 * Set the MAC address of the group to the
1227 		 * MAC address of its first port.
1228 		 */
1229 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1230 		grp->lg_mac_addr_port = grp->lg_ports;
1231 		mac_addr_changed = B_TRUE;
1232 	}
1233 
1234 	link_state_changed = aggr_grp_detach_port(grp, port);
1235 
1236 	/*
1237 	 * Add the counter statistics of the ports while it was aggregated
1238 	 * to the group's residual statistics.  This is done by obtaining
1239 	 * the current counter from the underlying MAC then subtracting the
1240 	 * value of the counter at the moment it was added to the
1241 	 * aggregation.
1242 	 */
1243 	for (i = 0; i < MAC_NSTAT; i++) {
1244 		stat = i + MAC_STAT_MIN;
1245 		if (!MAC_STAT_ISACOUNTER(stat))
1246 			continue;
1247 		val = aggr_port_stat(port, stat);
1248 		val -= port->lp_stat[i];
1249 		grp->lg_stat[i] += val;
1250 	}
1251 	for (i = 0; i < ETHER_NSTAT; i++) {
1252 		stat = i + MACTYPE_STAT_MIN;
1253 		if (!ETHER_STAT_ISACOUNTER(stat))
1254 			continue;
1255 		val = aggr_port_stat(port, stat);
1256 		val -= port->lp_ether_stat[i];
1257 		grp->lg_ether_stat[i] += val;
1258 	}
1259 
1260 	grp->lg_nports--;
1261 	mac_perim_exit(mph);
1262 
1263 	aggr_port_delete(port);
1264 
1265 	/*
1266 	 * If the group MAC address has changed, update the MAC address of
1267 	 * the remaining constituent ports according to the new MAC
1268 	 * address of the group.
1269 	 */
1270 	if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
1271 		link_state_changed = B_TRUE;
1272 
1273 done:
1274 	if (mac_addr_changedp != NULL)
1275 		*mac_addr_changedp = mac_addr_changed;
1276 	if (link_state_changedp != NULL)
1277 		*link_state_changedp = link_state_changed;
1278 
1279 	return (rc);
1280 }
1281 
1282 /*
1283  * Remove one or more ports from an existing link aggregation group.
1284  */
1285 int
1286 aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports)
1287 {
1288 	int rc = 0, i;
1289 	aggr_grp_t *grp = NULL;
1290 	aggr_port_t *port;
1291 	boolean_t mac_addr_update = B_FALSE, mac_addr_changed;
1292 	boolean_t link_state_update = B_FALSE, link_state_changed;
1293 	mac_perim_handle_t mph, pmph;
1294 
1295 	/* get group corresponding to linkid */
1296 	rw_enter(&aggr_grp_lock, RW_READER);
1297 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1298 	    (mod_hash_val_t *)&grp) != 0) {
1299 		rw_exit(&aggr_grp_lock);
1300 		return (ENOENT);
1301 	}
1302 	AGGR_GRP_REFHOLD(grp);
1303 
1304 	/*
1305 	 * Hold the perimeter so that the aggregation won't be destroyed.
1306 	 */
1307 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1308 	rw_exit(&aggr_grp_lock);
1309 
1310 	/* we need to keep at least one port per group */
1311 	if (nports >= grp->lg_nports) {
1312 		rc = EINVAL;
1313 		goto bail;
1314 	}
1315 
1316 	/* first verify that all the groups are valid */
1317 	for (i = 0; i < nports; i++) {
1318 		if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) {
1319 			/* port not found */
1320 			rc = ENOENT;
1321 			goto bail;
1322 		}
1323 	}
1324 
1325 	/* clear the promiscous mode for the specified ports */
1326 	for (i = 0; i < nports && rc == 0; i++) {
1327 		/* lookup port */
1328 		port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1329 		ASSERT(port != NULL);
1330 
1331 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1332 		rc = aggr_port_promisc(port, B_FALSE);
1333 		mac_perim_exit(pmph);
1334 	}
1335 	if (rc != 0) {
1336 		for (i = 0; i < nports; i++) {
1337 			port = aggr_grp_port_lookup(grp,
1338 			    ports[i].lp_linkid);
1339 			ASSERT(port != NULL);
1340 
1341 			/*
1342 			 * Turn the promiscuous mode back on if it is required
1343 			 * to receive the non-primary address over a port, or
1344 			 * the promiscous mode is enabled over the aggr.
1345 			 */
1346 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
1347 			if (port->lp_started && (grp->lg_promisc ||
1348 			    port->lp_prom_addr != NULL)) {
1349 				(void) aggr_port_promisc(port, B_TRUE);
1350 			}
1351 			mac_perim_exit(pmph);
1352 		}
1353 		goto bail;
1354 	}
1355 
1356 	/* remove the specified ports from group */
1357 	for (i = 0; i < nports; i++) {
1358 		/* lookup port */
1359 		port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1360 		ASSERT(port != NULL);
1361 
1362 		/* stop port if group has already been started */
1363 		if (grp->lg_started) {
1364 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
1365 			aggr_port_stop(port);
1366 			mac_perim_exit(pmph);
1367 		}
1368 
1369 		aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1370 		/* remove port from group */
1371 		rc = aggr_grp_rem_port(grp, port, &mac_addr_changed,
1372 		    &link_state_changed);
1373 		ASSERT(rc == 0);
1374 		mac_addr_update = mac_addr_update || mac_addr_changed;
1375 		link_state_update = link_state_update || link_state_changed;
1376 	}
1377 
1378 bail:
1379 	if (mac_addr_update)
1380 		mac_unicst_update(grp->lg_mh, grp->lg_addr);
1381 	if (link_state_update)
1382 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1383 
1384 	mac_perim_exit(mph);
1385 	AGGR_GRP_REFRELE(grp);
1386 
1387 	return (rc);
1388 }
1389 
1390 int
1391 aggr_grp_delete(datalink_id_t linkid)
1392 {
1393 	aggr_grp_t *grp = NULL;
1394 	aggr_port_t *port, *cport;
1395 	datalink_id_t tmpid;
1396 	mod_hash_val_t val;
1397 	mac_perim_handle_t mph, pmph;
1398 	int err;
1399 
1400 	rw_enter(&aggr_grp_lock, RW_WRITER);
1401 
1402 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1403 	    (mod_hash_val_t *)&grp) != 0) {
1404 		rw_exit(&aggr_grp_lock);
1405 		return (ENOENT);
1406 	}
1407 
1408 	/*
1409 	 * Note that dls_devnet_destroy() must be called before lg_lock is
1410 	 * held. Otherwise, it will deadlock if another thread is in
1411 	 * aggr_m_stat() and thus has a kstat_hold() on the kstats that
1412 	 * dls_devnet_destroy() needs to delete.
1413 	 */
1414 	if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid, B_TRUE)) != 0) {
1415 		rw_exit(&aggr_grp_lock);
1416 		return (err);
1417 	}
1418 	ASSERT(linkid == tmpid);
1419 
1420 	/*
1421 	 * Unregister from the MAC service module. Since this can
1422 	 * fail if a client hasn't closed the MAC port, we gracefully
1423 	 * fail the operation.
1424 	 */
1425 	if ((err = mac_disable(grp->lg_mh)) != 0) {
1426 		(void) dls_devnet_create(grp->lg_mh, linkid);
1427 		rw_exit(&aggr_grp_lock);
1428 		return (err);
1429 	}
1430 	(void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val);
1431 	ASSERT(grp == (aggr_grp_t *)val);
1432 
1433 	ASSERT(aggr_grp_cnt > 0);
1434 	aggr_grp_cnt--;
1435 	rw_exit(&aggr_grp_lock);
1436 
1437 	/*
1438 	 * Inform the lacp_rx thread to exit.
1439 	 */
1440 	mutex_enter(&grp->lg_lacp_lock);
1441 	grp->lg_lacp_done = B_TRUE;
1442 	cv_signal(&grp->lg_lacp_cv);
1443 	while (grp->lg_lacp_rx_thread != NULL)
1444 		cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1445 	mutex_exit(&grp->lg_lacp_lock);
1446 
1447 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1448 
1449 	grp->lg_closing = B_TRUE;
1450 	/* detach and free MAC ports associated with group */
1451 	port = grp->lg_ports;
1452 	while (port != NULL) {
1453 		cport = port->lp_next;
1454 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1455 		if (grp->lg_started)
1456 			aggr_port_stop(port);
1457 		(void) aggr_grp_detach_port(grp, port);
1458 		mac_perim_exit(pmph);
1459 		aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1460 		aggr_port_delete(port);
1461 		port = cport;
1462 	}
1463 
1464 	mac_perim_exit(mph);
1465 
1466 	/*
1467 	 * Wait for the port's lacp timer thread and its notification callback
1468 	 * to exit before calling mac_unregister() since both needs to access
1469 	 * the mac perimeter of the grp.
1470 	 */
1471 	aggr_grp_port_wait(grp);
1472 
1473 	VERIFY(mac_unregister(grp->lg_mh) == 0);
1474 	grp->lg_mh = NULL;
1475 
1476 	AGGR_GRP_REFRELE(grp);
1477 	return (0);
1478 }
1479 
1480 void
1481 aggr_grp_free(aggr_grp_t *grp)
1482 {
1483 	ASSERT(grp->lg_refs == 0);
1484 	ASSERT(grp->lg_port_ref == 0);
1485 	if (grp->lg_key > AGGR_MAX_KEY) {
1486 		id_free(key_ids, grp->lg_key);
1487 		grp->lg_key = 0;
1488 	}
1489 	kmem_cache_free(aggr_grp_cache, grp);
1490 }
1491 
1492 int
1493 aggr_grp_info(datalink_id_t linkid, void *fn_arg,
1494     aggr_grp_info_new_grp_fn_t new_grp_fn,
1495     aggr_grp_info_new_port_fn_t new_port_fn)
1496 {
1497 	aggr_grp_t	*grp;
1498 	aggr_port_t	*port;
1499 	mac_perim_handle_t mph, pmph;
1500 	int		rc = 0;
1501 
1502 	rw_enter(&aggr_grp_lock, RW_READER);
1503 
1504 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1505 	    (mod_hash_val_t *)&grp) != 0) {
1506 		rw_exit(&aggr_grp_lock);
1507 		return (ENOENT);
1508 	}
1509 	AGGR_GRP_REFHOLD(grp);
1510 
1511 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1512 	rw_exit(&aggr_grp_lock);
1513 
1514 	rc = new_grp_fn(fn_arg, grp->lg_linkid,
1515 	    (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr,
1516 	    grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy,
1517 	    grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer);
1518 
1519 	if (rc != 0)
1520 		goto bail;
1521 
1522 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1523 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1524 		rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr,
1525 		    port->lp_state, &port->lp_lacp.ActorOperPortState);
1526 		mac_perim_exit(pmph);
1527 
1528 		if (rc != 0)
1529 			goto bail;
1530 	}
1531 
1532 bail:
1533 	mac_perim_exit(mph);
1534 	AGGR_GRP_REFRELE(grp);
1535 	return (rc);
1536 }
1537 
1538 /*ARGSUSED*/
1539 static void
1540 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1541 {
1542 	miocnak(q, mp, 0, ENOTSUP);
1543 }
1544 
1545 static int
1546 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
1547 {
1548 	aggr_port_t	*port;
1549 	uint_t		stat_index;
1550 
1551 	/* We only aggregate counter statistics. */
1552 	if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
1553 	    IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
1554 		return (ENOTSUP);
1555 	}
1556 
1557 	/*
1558 	 * Counter statistics for a group are computed by aggregating the
1559 	 * counters of the members MACs while they were aggregated, plus
1560 	 * the residual counter of the group itself, which is updated each
1561 	 * time a MAC is removed from the group.
1562 	 */
1563 	*val = 0;
1564 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1565 		/* actual port statistic */
1566 		*val += aggr_port_stat(port, stat);
1567 		/*
1568 		 * minus the port stat when it was added, plus any residual
1569 		 * amount for the group.
1570 		 */
1571 		if (IS_MAC_STAT(stat)) {
1572 			stat_index = stat - MAC_STAT_MIN;
1573 			*val -= port->lp_stat[stat_index];
1574 			*val += grp->lg_stat[stat_index];
1575 		} else if (IS_MACTYPE_STAT(stat)) {
1576 			stat_index = stat - MACTYPE_STAT_MIN;
1577 			*val -= port->lp_ether_stat[stat_index];
1578 			*val += grp->lg_ether_stat[stat_index];
1579 		}
1580 	}
1581 	return (0);
1582 }
1583 
1584 static int
1585 aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
1586 {
1587 	aggr_grp_t		*grp = arg;
1588 	mac_perim_handle_t	mph;
1589 	int			rval = 0;
1590 
1591 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1592 
1593 	switch (stat) {
1594 	case MAC_STAT_IFSPEED:
1595 		*val = grp->lg_ifspeed;
1596 		break;
1597 
1598 	case ETHER_STAT_LINK_DUPLEX:
1599 		*val = grp->lg_link_duplex;
1600 		break;
1601 
1602 	default:
1603 		/*
1604 		 * For all other statistics, we return the aggregated stat
1605 		 * from the underlying ports.  aggr_grp_stat() will set
1606 		 * rval appropriately if the statistic isn't a counter.
1607 		 */
1608 		rval = aggr_grp_stat(grp, stat, val);
1609 	}
1610 
1611 	mac_perim_exit(mph);
1612 	return (rval);
1613 }
1614 
1615 static int
1616 aggr_m_start(void *arg)
1617 {
1618 	aggr_grp_t *grp = arg;
1619 	aggr_port_t *port;
1620 	mac_perim_handle_t mph, pmph;
1621 
1622 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1623 
1624 	/*
1625 	 * Attempts to start all configured members of the group.
1626 	 * Group members will be attached when their link-up notification
1627 	 * is received.
1628 	 */
1629 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1630 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1631 		if (aggr_port_start(port) != 0) {
1632 			mac_perim_exit(pmph);
1633 			continue;
1634 		}
1635 
1636 		/*
1637 		 * Turn on the promiscuous mode if it is required to receive
1638 		 * the non-primary address over a port, or the promiscous
1639 		 * mode is enabled over the aggr.
1640 		 */
1641 		if (grp->lg_promisc || port->lp_prom_addr != NULL) {
1642 			if (aggr_port_promisc(port, B_TRUE) != 0)
1643 				aggr_port_stop(port);
1644 		}
1645 		mac_perim_exit(pmph);
1646 	}
1647 
1648 	grp->lg_started = B_TRUE;
1649 
1650 	mac_perim_exit(mph);
1651 	return (0);
1652 }
1653 
1654 static void
1655 aggr_m_stop(void *arg)
1656 {
1657 	aggr_grp_t *grp = arg;
1658 	aggr_port_t *port;
1659 	mac_perim_handle_t mph, pmph;
1660 
1661 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1662 
1663 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1664 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1665 
1666 		/* reset port promiscuous mode */
1667 		(void) aggr_port_promisc(port, B_FALSE);
1668 
1669 		aggr_port_stop(port);
1670 		mac_perim_exit(pmph);
1671 	}
1672 
1673 	grp->lg_started = B_FALSE;
1674 	mac_perim_exit(mph);
1675 }
1676 
1677 static int
1678 aggr_m_promisc(void *arg, boolean_t on)
1679 {
1680 	aggr_grp_t *grp = arg;
1681 	aggr_port_t *port;
1682 	boolean_t link_state_changed = B_FALSE;
1683 	mac_perim_handle_t mph, pmph;
1684 
1685 	AGGR_GRP_REFHOLD(grp);
1686 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1687 
1688 	ASSERT(!grp->lg_closing);
1689 
1690 	if (on == grp->lg_promisc)
1691 		goto bail;
1692 
1693 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1694 		int	err = 0;
1695 
1696 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1697 		AGGR_PORT_REFHOLD(port);
1698 		if (!on && (port->lp_prom_addr == NULL))
1699 			err = aggr_port_promisc(port, B_FALSE);
1700 		else if (on && port->lp_started)
1701 			err = aggr_port_promisc(port, B_TRUE);
1702 
1703 		if (err != 0) {
1704 			if (aggr_grp_detach_port(grp, port))
1705 				link_state_changed = B_TRUE;
1706 		} else {
1707 			/*
1708 			 * If a port was detached because of a previous
1709 			 * failure changing the promiscuity, the port
1710 			 * is reattached when it successfully changes
1711 			 * the promiscuity now, and this might cause
1712 			 * the link state of the aggregation to change.
1713 			 */
1714 			if (aggr_grp_attach_port(grp, port))
1715 				link_state_changed = B_TRUE;
1716 		}
1717 		mac_perim_exit(pmph);
1718 		AGGR_PORT_REFRELE(port);
1719 	}
1720 
1721 	grp->lg_promisc = on;
1722 
1723 	if (link_state_changed)
1724 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1725 
1726 bail:
1727 	mac_perim_exit(mph);
1728 	AGGR_GRP_REFRELE(grp);
1729 
1730 	return (0);
1731 }
1732 
1733 static void
1734 aggr_grp_port_rename(const char *new_name, void *arg)
1735 {
1736 	/*
1737 	 * aggr port's mac client name is the format of "aggr link name" plus
1738 	 * AGGR_PORT_NAME_DELIMIT plus "underneath link name".
1739 	 */
1740 	int aggr_len, link_len, clnt_name_len, i;
1741 	char *str_end, *str_st, *str_del;
1742 	char aggr_name[MAXNAMELEN];
1743 	char link_name[MAXNAMELEN];
1744 	char *clnt_name;
1745 	aggr_grp_t *aggr_grp = arg;
1746 	aggr_port_t *aggr_port = aggr_grp->lg_ports;
1747 
1748 	for (i = 0; i < aggr_grp->lg_nports; i++) {
1749 		clnt_name = mac_client_name(aggr_port->lp_mch);
1750 		clnt_name_len = strlen(clnt_name);
1751 		str_st = clnt_name;
1752 		str_end = &(clnt_name[clnt_name_len]);
1753 		str_del = strchr(str_st, AGGR_PORT_NAME_DELIMIT);
1754 		ASSERT(str_del != NULL);
1755 		aggr_len = (intptr_t)((uintptr_t)str_del - (uintptr_t)str_st);
1756 		link_len = (intptr_t)((uintptr_t)str_end - (uintptr_t)str_del);
1757 		bzero(aggr_name, MAXNAMELEN);
1758 		bzero(link_name, MAXNAMELEN);
1759 		bcopy(clnt_name, aggr_name, aggr_len);
1760 		bcopy(str_del, link_name, link_len + 1);
1761 		bzero(clnt_name, MAXNAMELEN);
1762 		(void) snprintf(clnt_name, MAXNAMELEN, "%s%s", new_name,
1763 		    link_name);
1764 
1765 		(void) mac_rename_primary(aggr_port->lp_mh, NULL);
1766 		aggr_port = aggr_port->lp_next;
1767 	}
1768 }
1769 
1770 /*
1771  * Initialize the capabilities that are advertised for the group
1772  * according to the capabilities of the constituent ports.
1773  */
1774 static boolean_t
1775 aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
1776 {
1777 	aggr_grp_t *grp = arg;
1778 
1779 	switch (cap) {
1780 	case MAC_CAPAB_HCKSUM: {
1781 		uint32_t *hcksum_txflags = cap_data;
1782 		*hcksum_txflags = grp->lg_hcksum_txflags;
1783 		break;
1784 	}
1785 	case MAC_CAPAB_LSO: {
1786 		mac_capab_lso_t *cap_lso = cap_data;
1787 
1788 		if (grp->lg_lso) {
1789 			*cap_lso = grp->lg_cap_lso;
1790 			break;
1791 		} else {
1792 			return (B_FALSE);
1793 		}
1794 	}
1795 	case MAC_CAPAB_NO_NATIVEVLAN:
1796 		return (!grp->lg_vlan);
1797 	case MAC_CAPAB_NO_ZCOPY:
1798 		return (!grp->lg_zcopy);
1799 	case MAC_CAPAB_RINGS: {
1800 		mac_capab_rings_t *cap_rings = cap_data;
1801 
1802 		if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
1803 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
1804 			cap_rings->mr_rnum = grp->lg_rx_group.arg_ring_cnt;
1805 			cap_rings->mr_rget = aggr_fill_ring;
1806 
1807 			/*
1808 			 * An aggregation advertises only one (pseudo) RX
1809 			 * group, which virtualizes the main/primary group of
1810 			 * the underlying devices.
1811 			 */
1812 			cap_rings->mr_gnum = 1;
1813 			cap_rings->mr_gget = aggr_fill_group;
1814 			cap_rings->mr_gaddring = NULL;
1815 			cap_rings->mr_gremring = NULL;
1816 		} else {
1817 			return (B_FALSE);
1818 		}
1819 		break;
1820 	}
1821 	case MAC_CAPAB_AGGR:
1822 	{
1823 		mac_capab_aggr_t *aggr_cap;
1824 
1825 		if (cap_data != NULL) {
1826 			aggr_cap = cap_data;
1827 			aggr_cap->mca_rename_fn = aggr_grp_port_rename;
1828 			aggr_cap->mca_unicst = aggr_m_unicst;
1829 		}
1830 		return (B_TRUE);
1831 	}
1832 	default:
1833 		return (B_FALSE);
1834 	}
1835 	return (B_TRUE);
1836 }
1837 
1838 /*
1839  * Callback funtion for MAC layer to register groups.
1840  */
1841 static void
1842 aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index,
1843     mac_group_info_t *infop, mac_group_handle_t gh)
1844 {
1845 	aggr_grp_t *grp = arg;
1846 	aggr_pseudo_rx_group_t *rx_group;
1847 
1848 	ASSERT(rtype == MAC_RING_TYPE_RX && index == 0);
1849 	rx_group = &grp->lg_rx_group;
1850 	rx_group->arg_gh = gh;
1851 	rx_group->arg_grp = grp;
1852 
1853 	infop->mgi_driver = (mac_group_driver_t)rx_group;
1854 	infop->mgi_start = NULL;
1855 	infop->mgi_stop = NULL;
1856 	infop->mgi_addmac = aggr_addmac;
1857 	infop->mgi_remmac = aggr_remmac;
1858 	infop->mgi_count = rx_group->arg_ring_cnt;
1859 }
1860 
1861 /*
1862  * Callback funtion for MAC layer to register all rings.
1863  */
1864 static void
1865 aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
1866     const int index, mac_ring_info_t *infop, mac_ring_handle_t rh)
1867 {
1868 	aggr_grp_t	*grp = arg;
1869 
1870 	switch (rtype) {
1871 	case MAC_RING_TYPE_RX: {
1872 		aggr_pseudo_rx_group_t	*rx_group = &grp->lg_rx_group;
1873 		aggr_pseudo_rx_ring_t	*rx_ring;
1874 		mac_intr_t		aggr_mac_intr;
1875 
1876 		ASSERT(rg_index == 0);
1877 
1878 		ASSERT((index >= 0) && (index < rx_group->arg_ring_cnt));
1879 		rx_ring = rx_group->arg_rings + index;
1880 		rx_ring->arr_rh = rh;
1881 
1882 		/*
1883 		 * Entrypoint to enable interrupt (disable poll) and
1884 		 * disable interrupt (enable poll).
1885 		 */
1886 		aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring;
1887 		aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr;
1888 		aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr;
1889 
1890 		infop->mri_driver = (mac_ring_driver_t)rx_ring;
1891 		infop->mri_start = aggr_pseudo_start_ring;
1892 		infop->mri_stop = aggr_pseudo_stop_ring;
1893 
1894 		infop->mri_intr = aggr_mac_intr;
1895 		infop->mri_poll = aggr_rx_poll;
1896 		break;
1897 	}
1898 	default:
1899 		break;
1900 	}
1901 }
1902 
1903 static mblk_t *
1904 aggr_rx_poll(void *arg, int bytes_to_pickup)
1905 {
1906 	aggr_pseudo_rx_ring_t *rr_ring = arg;
1907 	aggr_port_t *port = rr_ring->arr_port;
1908 	aggr_grp_t *grp = port->lp_grp;
1909 	mblk_t *mp_chain, *mp, **mpp;
1910 
1911 	mp_chain = mac_hwring_poll(rr_ring->arr_hw_rh, bytes_to_pickup);
1912 
1913 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1914 		return (mp_chain);
1915 
1916 	mpp = &mp_chain;
1917 	while ((mp = *mpp) != NULL) {
1918 		if (MBLKL(mp) >= sizeof (struct ether_header)) {
1919 			struct ether_header *ehp;
1920 
1921 			ehp = (struct ether_header *)mp->b_rptr;
1922 			if (ntohs(ehp->ether_type) == ETHERTYPE_SLOW) {
1923 				*mpp = mp->b_next;
1924 				mp->b_next = NULL;
1925 				aggr_recv_lacp(port,
1926 				    (mac_resource_handle_t)rr_ring, mp);
1927 				continue;
1928 			}
1929 		}
1930 
1931 		if (!port->lp_collector_enabled) {
1932 			*mpp = mp->b_next;
1933 			mp->b_next = NULL;
1934 			freemsg(mp);
1935 			continue;
1936 		}
1937 		mpp = &mp->b_next;
1938 	}
1939 	return (mp_chain);
1940 }
1941 
1942 static int
1943 aggr_addmac(void *arg, const uint8_t *mac_addr)
1944 {
1945 	aggr_pseudo_rx_group_t	*rx_group = (aggr_pseudo_rx_group_t *)arg;
1946 	aggr_unicst_addr_t	*addr, **pprev;
1947 	aggr_grp_t		*grp = rx_group->arg_grp;
1948 	aggr_port_t		*port, *p;
1949 	mac_perim_handle_t	mph;
1950 	int			err = 0;
1951 
1952 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1953 
1954 	if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
1955 		mac_perim_exit(mph);
1956 		return (0);
1957 	}
1958 
1959 	/*
1960 	 * Insert this mac address into the list of mac addresses owned by
1961 	 * the aggregation pseudo group.
1962 	 */
1963 	pprev = &rx_group->arg_macaddr;
1964 	while ((addr = *pprev) != NULL) {
1965 		if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) {
1966 			mac_perim_exit(mph);
1967 			return (EEXIST);
1968 		}
1969 		pprev = &addr->aua_next;
1970 	}
1971 	addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP);
1972 	bcopy(mac_addr, addr->aua_addr, ETHERADDRL);
1973 	addr->aua_next = NULL;
1974 	*pprev = addr;
1975 
1976 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
1977 		if ((err = aggr_port_addmac(port, mac_addr)) != 0)
1978 			break;
1979 
1980 	if (err != 0) {
1981 		for (p = grp->lg_ports; p != port; p = p->lp_next)
1982 			aggr_port_remmac(p, mac_addr);
1983 
1984 		*pprev = NULL;
1985 		kmem_free(addr, sizeof (aggr_unicst_addr_t));
1986 	}
1987 
1988 	mac_perim_exit(mph);
1989 	return (err);
1990 }
1991 
1992 static int
1993 aggr_remmac(void *arg, const uint8_t *mac_addr)
1994 {
1995 	aggr_pseudo_rx_group_t	*rx_group = (aggr_pseudo_rx_group_t *)arg;
1996 	aggr_unicst_addr_t	*addr, **pprev;
1997 	aggr_grp_t		*grp = rx_group->arg_grp;
1998 	aggr_port_t		*port;
1999 	mac_perim_handle_t	mph;
2000 	int			err = 0;
2001 
2002 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2003 
2004 	if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
2005 		mac_perim_exit(mph);
2006 		return (0);
2007 	}
2008 
2009 	/*
2010 	 * Insert this mac address into the list of mac addresses owned by
2011 	 * the aggregation pseudo group.
2012 	 */
2013 	pprev = &rx_group->arg_macaddr;
2014 	while ((addr = *pprev) != NULL) {
2015 		if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) {
2016 			pprev = &addr->aua_next;
2017 			continue;
2018 		}
2019 		break;
2020 	}
2021 	if (addr == NULL) {
2022 		mac_perim_exit(mph);
2023 		return (EINVAL);
2024 	}
2025 
2026 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2027 		aggr_port_remmac(port, mac_addr);
2028 
2029 	*pprev = addr->aua_next;
2030 	kmem_free(addr, sizeof (aggr_unicst_addr_t));
2031 
2032 	mac_perim_exit(mph);
2033 	return (err);
2034 }
2035 
2036 /*
2037  * Add or remove the multicast addresses that are defined for the group
2038  * to or from the specified port.
2039  *
2040  * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port
2041  * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is
2042  * called when the port is either stopped or detached.
2043  */
2044 void
2045 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
2046 {
2047 	aggr_grp_t *grp = port->lp_grp;
2048 
2049 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
2050 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2051 
2052 	if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED)
2053 		return;
2054 
2055 	mac_multicast_refresh(grp->lg_mh, aggr_port_multicst, port, add);
2056 }
2057 
2058 static int
2059 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
2060 {
2061 	aggr_grp_t *grp = arg;
2062 	aggr_port_t *port = NULL;
2063 	mac_perim_handle_t mph;
2064 	int err = 0, cerr;
2065 
2066 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2067 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2068 		if (port->lp_state != AGGR_PORT_STATE_ATTACHED ||
2069 		    !port->lp_started) {
2070 			continue;
2071 		}
2072 		cerr = aggr_port_multicst(port, add, addrp);
2073 		if (cerr != 0 && err == 0)
2074 			err = cerr;
2075 	}
2076 	mac_perim_exit(mph);
2077 	return (err);
2078 }
2079 
2080 static int
2081 aggr_m_unicst(void *arg, const uint8_t *macaddr)
2082 {
2083 	aggr_grp_t *grp = arg;
2084 	mac_perim_handle_t mph;
2085 	int err;
2086 
2087 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2088 	err = aggr_grp_modify_common(grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr,
2089 	    0, 0);
2090 	mac_perim_exit(mph);
2091 	return (err);
2092 }
2093 
2094 /*
2095  * Initialize the capabilities that are advertised for the group
2096  * according to the capabilities of the constituent ports.
2097  */
2098 static void
2099 aggr_grp_capab_set(aggr_grp_t *grp)
2100 {
2101 	uint32_t cksum;
2102 	aggr_port_t *port;
2103 	mac_capab_lso_t cap_lso;
2104 
2105 	ASSERT(grp->lg_mh == NULL);
2106 	ASSERT(grp->lg_ports != NULL);
2107 
2108 	grp->lg_hcksum_txflags = (uint32_t)-1;
2109 	grp->lg_zcopy = B_TRUE;
2110 	grp->lg_vlan = B_TRUE;
2111 
2112 	grp->lg_lso = B_TRUE;
2113 	grp->lg_cap_lso.lso_flags = (t_uscalar_t)-1;
2114 	grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = (t_uscalar_t)-1;
2115 
2116 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2117 		if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum))
2118 			cksum = 0;
2119 		grp->lg_hcksum_txflags &= cksum;
2120 
2121 		grp->lg_vlan &=
2122 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL);
2123 
2124 		grp->lg_zcopy &=
2125 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL);
2126 
2127 		grp->lg_lso &=
2128 		    mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso);
2129 		if (grp->lg_lso) {
2130 			grp->lg_cap_lso.lso_flags &= cap_lso.lso_flags;
2131 			if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
2132 			    cap_lso.lso_basic_tcp_ipv4.lso_max)
2133 				grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max =
2134 				    cap_lso.lso_basic_tcp_ipv4.lso_max;
2135 		}
2136 	}
2137 }
2138 
2139 /*
2140  * Checks whether the capabilities of the port being added are compatible
2141  * with the current capabilities of the aggregation.
2142  */
2143 static boolean_t
2144 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port)
2145 {
2146 	uint32_t hcksum_txflags;
2147 
2148 	ASSERT(grp->lg_ports != NULL);
2149 
2150 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) &
2151 	    grp->lg_vlan) != grp->lg_vlan) {
2152 		return (B_FALSE);
2153 	}
2154 
2155 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) &
2156 	    grp->lg_zcopy) != grp->lg_zcopy) {
2157 		return (B_FALSE);
2158 	}
2159 
2160 	if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) {
2161 		if (grp->lg_hcksum_txflags != 0)
2162 			return (B_FALSE);
2163 	} else if ((hcksum_txflags & grp->lg_hcksum_txflags) !=
2164 	    grp->lg_hcksum_txflags) {
2165 		return (B_FALSE);
2166 	}
2167 
2168 	if (grp->lg_lso) {
2169 		mac_capab_lso_t cap_lso;
2170 
2171 		if (mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso)) {
2172 			if ((grp->lg_cap_lso.lso_flags & cap_lso.lso_flags) !=
2173 			    grp->lg_cap_lso.lso_flags)
2174 				return (B_FALSE);
2175 			if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
2176 			    cap_lso.lso_basic_tcp_ipv4.lso_max)
2177 				return (B_FALSE);
2178 		} else {
2179 			return (B_FALSE);
2180 		}
2181 	}
2182 
2183 	return (B_TRUE);
2184 }
2185 
2186 /*
2187  * Returns the maximum SDU according to the SDU of the constituent ports.
2188  */
2189 static uint_t
2190 aggr_grp_max_sdu(aggr_grp_t *grp)
2191 {
2192 	uint_t max_sdu = (uint_t)-1;
2193 	aggr_port_t *port;
2194 
2195 	ASSERT(grp->lg_ports != NULL);
2196 
2197 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2198 		uint_t port_sdu_max;
2199 
2200 		mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
2201 		if (max_sdu > port_sdu_max)
2202 			max_sdu = port_sdu_max;
2203 	}
2204 
2205 	return (max_sdu);
2206 }
2207 
2208 /*
2209  * Checks if the maximum SDU of the specified port is compatible
2210  * with the maximum SDU of the specified aggregation group, returns
2211  * B_TRUE if it is, B_FALSE otherwise.
2212  */
2213 static boolean_t
2214 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port)
2215 {
2216 	uint_t port_sdu_max;
2217 
2218 	mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
2219 	return (port_sdu_max >= grp->lg_max_sdu);
2220 }
2221 
2222 /*
2223  * Returns the maximum margin according to the margin of the constituent ports.
2224  */
2225 static uint32_t
2226 aggr_grp_max_margin(aggr_grp_t *grp)
2227 {
2228 	uint32_t margin = UINT32_MAX;
2229 	aggr_port_t *port;
2230 
2231 	ASSERT(grp->lg_mh == NULL);
2232 	ASSERT(grp->lg_ports != NULL);
2233 
2234 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2235 		if (margin > port->lp_margin)
2236 			margin = port->lp_margin;
2237 	}
2238 
2239 	grp->lg_margin = margin;
2240 	return (margin);
2241 }
2242 
2243 /*
2244  * Checks if the maximum margin of the specified port is compatible
2245  * with the maximum margin of the specified aggregation group, returns
2246  * B_TRUE if it is, B_FALSE otherwise.
2247  */
2248 static boolean_t
2249 aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port)
2250 {
2251 	if (port->lp_margin >= grp->lg_margin)
2252 		return (B_TRUE);
2253 
2254 	/*
2255 	 * See whether the current margin value is allowed to be changed to
2256 	 * the new value.
2257 	 */
2258 	if (!mac_margin_update(grp->lg_mh, port->lp_margin))
2259 		return (B_FALSE);
2260 
2261 	grp->lg_margin = port->lp_margin;
2262 	return (B_TRUE);
2263 }
2264 
2265 /*
2266  * Set MTU on individual ports of an aggregation group
2267  */
2268 static int
2269 aggr_set_port_sdu(aggr_grp_t *grp, aggr_port_t *port, uint32_t sdu,
2270     uint32_t *old_mtu)
2271 {
2272 	boolean_t 		removed = B_FALSE;
2273 	mac_perim_handle_t	mph;
2274 	mac_diag_t		diag;
2275 	int			err, rv, retry = 0;
2276 
2277 	if (port->lp_mah != NULL) {
2278 		(void) mac_unicast_remove(port->lp_mch, port->lp_mah);
2279 		port->lp_mah = NULL;
2280 		removed = B_TRUE;
2281 	}
2282 	err = mac_set_mtu(port->lp_mh, sdu, old_mtu);
2283 try_again:
2284 	if (removed && (rv = mac_unicast_add(port->lp_mch, NULL,
2285 	    MAC_UNICAST_PRIMARY | MAC_UNICAST_DISABLE_TX_VID_CHECK,
2286 	    &port->lp_mah, 0, &diag)) != 0) {
2287 		/*
2288 		 * following is a workaround for a bug in 'bge' driver.
2289 		 * See CR 6794654 for more information and this work around
2290 		 * will be removed once the CR is fixed.
2291 		 */
2292 		if (rv == EIO && retry++ < 3) {
2293 			delay(2 * hz);
2294 			goto try_again;
2295 		}
2296 		/*
2297 		 * if mac_unicast_add() failed while setting the MTU,
2298 		 * detach the port from the group.
2299 		 */
2300 		mac_perim_enter_by_mh(port->lp_mh, &mph);
2301 		(void) aggr_grp_detach_port(grp, port);
2302 		mac_perim_exit(mph);
2303 		cmn_err(CE_WARN, "Unable to restart the port %s while "
2304 		    "setting MTU. Detaching the port from the aggregation.",
2305 		    mac_client_name(port->lp_mch));
2306 	}
2307 	return (err);
2308 }
2309 
2310 static int
2311 aggr_sdu_update(aggr_grp_t *grp, uint32_t sdu)
2312 {
2313 	int			err = 0, i, rv;
2314 	aggr_port_t		*port;
2315 	uint32_t		*mtu;
2316 
2317 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2318 
2319 	/*
2320 	 * If the MTU being set is equal to aggr group's maximum
2321 	 * allowable value, then there is nothing to change
2322 	 */
2323 	if (sdu == grp->lg_max_sdu)
2324 		return (0);
2325 
2326 	/* 0 is aggr group's min sdu */
2327 	if (sdu == 0)
2328 		return (EINVAL);
2329 
2330 	mtu = kmem_alloc(sizeof (uint32_t) * grp->lg_nports, KM_SLEEP);
2331 	for (port = grp->lg_ports, i = 0; port != NULL && err == 0;
2332 	    port = port->lp_next, i++) {
2333 		err = aggr_set_port_sdu(grp, port, sdu, mtu + i);
2334 	}
2335 	if (err != 0) {
2336 		/* recover from error: reset the mtus of the ports */
2337 		aggr_port_t *tmp;
2338 
2339 		for (tmp = grp->lg_ports, i = 0; tmp != port;
2340 		    tmp = tmp->lp_next, i++) {
2341 			(void) aggr_set_port_sdu(grp, tmp, *(mtu + i), NULL);
2342 		}
2343 		goto bail;
2344 	}
2345 	grp->lg_max_sdu = aggr_grp_max_sdu(grp);
2346 	rv = mac_maxsdu_update(grp->lg_mh, grp->lg_max_sdu);
2347 	ASSERT(rv == 0);
2348 bail:
2349 	kmem_free(mtu, sizeof (uint32_t) * grp->lg_nports);
2350 	return (err);
2351 }
2352 
2353 /*
2354  * Callback functions for set/get of properties
2355  */
2356 /*ARGSUSED*/
2357 static int
2358 aggr_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
2359     uint_t pr_valsize, const void *pr_val)
2360 {
2361 	int 		err = ENOTSUP;
2362 	aggr_grp_t 	*grp = m_driver;
2363 
2364 	switch (pr_num) {
2365 	case MAC_PROP_MTU: {
2366 		uint32_t 	mtu;
2367 
2368 		if (pr_valsize < sizeof (mtu)) {
2369 			err = EINVAL;
2370 			break;
2371 		}
2372 		bcopy(pr_val, &mtu, sizeof (mtu));
2373 		err = aggr_sdu_update(grp, mtu);
2374 		break;
2375 	}
2376 	default:
2377 		break;
2378 	}
2379 	return (err);
2380 }
2381 
2382 int
2383 aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_range_t *range)
2384 {
2385 	mac_propval_range_t		*vals;
2386 	mac_propval_uint32_range_t	*ur;
2387 	aggr_port_t			*port;
2388 	mac_perim_handle_t		mph;
2389 	mac_prop_t 			macprop;
2390 	uint_t 				perm, i;
2391 	uint32_t 			min = 0, max = (uint32_t)-1;
2392 	int 				err = 0;
2393 
2394 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2395 
2396 	vals = kmem_alloc(sizeof (mac_propval_range_t) * grp->lg_nports,
2397 	    KM_SLEEP);
2398 	macprop.mp_id = MAC_PROP_MTU;
2399 	macprop.mp_name = "mtu";
2400 	macprop.mp_flags = MAC_PROP_POSSIBLE;
2401 
2402 	for (port = grp->lg_ports, i = 0; port != NULL;
2403 	    port = port->lp_next, i++) {
2404 		mac_perim_enter_by_mh(port->lp_mh, &mph);
2405 		err = mac_get_prop(port->lp_mh, &macprop, vals + i,
2406 		    sizeof (mac_propval_range_t), &perm);
2407 		mac_perim_exit(mph);
2408 		if (err != 0)
2409 			break;
2410 	}
2411 	/*
2412 	 * if any of the underlying ports does not support changing MTU then
2413 	 * just return ENOTSUP
2414 	 */
2415 	if (port != NULL) {
2416 		ASSERT(err != 0);
2417 		goto done;
2418 	}
2419 	range->mpr_count = 1;
2420 	range->mpr_type = MAC_PROPVAL_UINT32;
2421 	for (i = 0; i < grp->lg_nports; i++) {
2422 		ur = &((vals + i)->range_uint32[0]);
2423 		/*
2424 		 * Take max of the min, for range_min; that is the minimum
2425 		 * MTU value for an aggregation is the maximum of the
2426 		 * minimum values of all the underlying ports
2427 		 */
2428 		if (ur->mpur_min > min)
2429 			min = ur->mpur_min;
2430 		/* Take min of the max, for range_max */
2431 		if (ur->mpur_max < max)
2432 			max = ur->mpur_max;
2433 	}
2434 	range->range_uint32[0].mpur_min = min;
2435 	range->range_uint32[0].mpur_max = max;
2436 done:
2437 	kmem_free(vals, sizeof (mac_propval_range_t) * grp->lg_nports);
2438 	return (err);
2439 }
2440 
2441 /*ARGSUSED*/
2442 static int
2443 aggr_m_getprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
2444     uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm)
2445 {
2446 	mac_propval_range_t 	range;
2447 	int 			err = ENOTSUP;
2448 	aggr_grp_t		*grp = m_driver;
2449 
2450 	switch (pr_num) {
2451 	case MAC_PROP_MTU:
2452 		if (!(pr_flags & MAC_PROP_POSSIBLE))
2453 			return (ENOTSUP);
2454 		if (pr_valsize < sizeof (mac_propval_range_t))
2455 			return (EINVAL);
2456 		if ((err = aggr_grp_possible_mtu_range(grp, &range)) != 0)
2457 			return (err);
2458 		bcopy(&range, pr_val, sizeof (range));
2459 		return (0);
2460 	}
2461 	return (err);
2462 }
2463