xref: /illumos-gate/usr/src/uts/common/io/e1000g/e1000g_rx.c (revision 8fd04b8338ed5093ec2d1e668fa620b7de44c177)
1 /*
2  * This file is provided under a CDDLv1 license.  When using or
3  * redistributing this file, you may do so under this license.
4  * In redistributing this file this license must be included
5  * and no other modification of this header file is permitted.
6  *
7  * CDDL LICENSE SUMMARY
8  *
9  * Copyright(c) 1999 - 2009 Intel Corporation. All rights reserved.
10  *
11  * The contents of this file are subject to the terms of Version
12  * 1.0 of the Common Development and Distribution License (the "License").
13  *
14  * You should have received a copy of the License with this software.
15  * You can obtain a copy of the License at
16  *	http://www.opensolaris.org/os/licensing.
17  * See the License for the specific language governing permissions
18  * and limitations under the License.
19  */
20 
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * **********************************************************************
28  *									*
29  * Module Name:								*
30  *   e1000g_rx.c							*
31  *									*
32  * Abstract:								*
33  *   This file contains some routines that take care of Receive		*
34  *   interrupt and also for the received packets it sends up to		*
35  *   upper layer.							*
36  *   It tries to do a zero copy if free buffers are available in	*
37  *   the pool.								*
38  *									*
39  * **********************************************************************
40  */
41 
42 #include "e1000g_sw.h"
43 #include "e1000g_debug.h"
44 
45 static p_rx_sw_packet_t e1000g_get_buf(e1000g_rx_data_t *rx_data);
46 #pragma	inline(e1000g_get_buf)
47 
48 /*
49  * e1000g_rxfree_func - the call-back function to reclaim rx buffer
50  *
51  * This function is called when an mp is freed by the user thru
52  * freeb call (Only for mp constructed through desballoc call)
53  * It returns back the freed buffer to the freelist
54  */
55 void
56 e1000g_rxfree_func(p_rx_sw_packet_t packet)
57 {
58 	e1000g_rx_data_t *rx_data;
59 	private_devi_list_t *devi_node;
60 	struct e1000g *Adapter;
61 	uint32_t ring_cnt;
62 	uint32_t ref_cnt;
63 	unsigned char *address;
64 
65 	if (packet->ref_cnt == 0) {
66 		/*
67 		 * This case only happens when rx buffers are being freed
68 		 * in e1000g_stop() and freemsg() is called.
69 		 */
70 		return;
71 	}
72 
73 	rx_data = (e1000g_rx_data_t *)(uintptr_t)packet->rx_data;
74 
75 	if (packet->mp == NULL) {
76 		/*
77 		 * Allocate a mblk that binds to the data buffer
78 		 */
79 		address = (unsigned char *)packet->rx_buf->address;
80 		if (address != NULL) {
81 			packet->mp = desballoc((unsigned char *)
82 			    address, packet->rx_buf->size,
83 			    BPRI_MED, &packet->free_rtn);
84 		}
85 	}
86 
87 	/*
88 	 * Enqueue the recycled packets in a recycle queue. When freelist
89 	 * dries up, move the entire chain of packets from recycle queue
90 	 * to freelist. This helps in avoiding per packet mutex contention
91 	 * around freelist.
92 	 */
93 	mutex_enter(&rx_data->recycle_lock);
94 	QUEUE_PUSH_TAIL(&rx_data->recycle_list, &packet->Link);
95 	rx_data->recycle_freepkt++;
96 	mutex_exit(&rx_data->recycle_lock);
97 
98 	ref_cnt = atomic_dec_32_nv(&packet->ref_cnt);
99 	if (ref_cnt == 0) {
100 		mutex_enter(&e1000g_rx_detach_lock);
101 		e1000g_free_rx_sw_packet(packet, B_FALSE);
102 
103 		atomic_dec_32(&rx_data->pending_count);
104 		atomic_dec_32(&e1000g_mblks_pending);
105 
106 		if ((rx_data->pending_count == 0) &&
107 		    (rx_data->flag & E1000G_RX_STOPPED)) {
108 			devi_node = rx_data->priv_devi_node;
109 
110 			if (devi_node != NULL) {
111 				ring_cnt = atomic_dec_32_nv(
112 				    &devi_node->pending_rx_count);
113 				if ((ring_cnt == 0) &&
114 				    (devi_node->flag &
115 				    E1000G_PRIV_DEVI_DETACH)) {
116 					e1000g_free_priv_devi_node(
117 					    devi_node);
118 				}
119 			} else {
120 				Adapter = rx_data->rx_ring->adapter;
121 				atomic_dec_32(
122 				    &Adapter->pending_rx_count);
123 			}
124 
125 			e1000g_free_rx_pending_buffers(rx_data);
126 			e1000g_free_rx_data(rx_data);
127 		}
128 		mutex_exit(&e1000g_rx_detach_lock);
129 	}
130 }
131 
132 /*
133  * e1000g_rx_setup - setup rx data structures
134  *
135  * This routine initializes all of the receive related
136  * structures. This includes the receive descriptors, the
137  * actual receive buffers, and the rx_sw_packet software
138  * structures.
139  */
140 void
141 e1000g_rx_setup(struct e1000g *Adapter)
142 {
143 	struct e1000_hw *hw;
144 	p_rx_sw_packet_t packet;
145 	struct e1000_rx_desc *descriptor;
146 	uint32_t buf_low;
147 	uint32_t buf_high;
148 	uint32_t reg_val;
149 	uint32_t rctl;
150 	uint32_t rxdctl;
151 	uint32_t ert;
152 	uint16_t phy_data;
153 	int i;
154 	int size;
155 	e1000g_rx_data_t *rx_data;
156 
157 	hw = &Adapter->shared;
158 	rx_data = Adapter->rx_ring->rx_data;
159 
160 	/*
161 	 * zero out all of the receive buffer descriptor memory
162 	 * assures any previous data or status is erased
163 	 */
164 	bzero(rx_data->rbd_area,
165 	    sizeof (struct e1000_rx_desc) * Adapter->rx_desc_num);
166 
167 	if (!Adapter->rx_buffer_setup) {
168 		/* Init the list of "Receive Buffer" */
169 		QUEUE_INIT_LIST(&rx_data->recv_list);
170 
171 		/* Init the list of "Free Receive Buffer" */
172 		QUEUE_INIT_LIST(&rx_data->free_list);
173 
174 		/* Init the list of "Free Receive Buffer" */
175 		QUEUE_INIT_LIST(&rx_data->recycle_list);
176 		/*
177 		 * Setup Receive list and the Free list. Note that
178 		 * the both were allocated in one packet area.
179 		 */
180 		packet = rx_data->packet_area;
181 		descriptor = rx_data->rbd_first;
182 
183 		for (i = 0; i < Adapter->rx_desc_num;
184 		    i++, packet = packet->next, descriptor++) {
185 			ASSERT(packet != NULL);
186 			ASSERT(descriptor != NULL);
187 			descriptor->buffer_addr =
188 			    packet->rx_buf->dma_address;
189 
190 			/* Add this rx_sw_packet to the receive list */
191 			QUEUE_PUSH_TAIL(&rx_data->recv_list,
192 			    &packet->Link);
193 		}
194 
195 		for (i = 0; i < Adapter->rx_freelist_num;
196 		    i++, packet = packet->next) {
197 			ASSERT(packet != NULL);
198 			/* Add this rx_sw_packet to the free list */
199 			QUEUE_PUSH_TAIL(&rx_data->free_list,
200 			    &packet->Link);
201 		}
202 		rx_data->avail_freepkt = Adapter->rx_freelist_num;
203 		rx_data->recycle_freepkt = 0;
204 
205 		Adapter->rx_buffer_setup = B_TRUE;
206 	} else {
207 		/* Setup the initial pointer to the first rx descriptor */
208 		packet = (p_rx_sw_packet_t)
209 		    QUEUE_GET_HEAD(&rx_data->recv_list);
210 		descriptor = rx_data->rbd_first;
211 
212 		for (i = 0; i < Adapter->rx_desc_num; i++) {
213 			ASSERT(packet != NULL);
214 			ASSERT(descriptor != NULL);
215 			descriptor->buffer_addr =
216 			    packet->rx_buf->dma_address;
217 
218 			/* Get next rx_sw_packet */
219 			packet = (p_rx_sw_packet_t)
220 			    QUEUE_GET_NEXT(&rx_data->recv_list, &packet->Link);
221 			descriptor++;
222 		}
223 	}
224 
225 	E1000_WRITE_REG(&Adapter->shared, E1000_RDTR, Adapter->rx_intr_delay);
226 	E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
227 	    "E1000_RDTR: 0x%x\n", Adapter->rx_intr_delay);
228 	if (hw->mac.type >= e1000_82540) {
229 		E1000_WRITE_REG(&Adapter->shared, E1000_RADV,
230 		    Adapter->rx_intr_abs_delay);
231 		E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
232 		    "E1000_RADV: 0x%x\n", Adapter->rx_intr_abs_delay);
233 	}
234 
235 	/*
236 	 * Setup our descriptor pointers
237 	 */
238 	rx_data->rbd_next = rx_data->rbd_first;
239 
240 	size = Adapter->rx_desc_num * sizeof (struct e1000_rx_desc);
241 	E1000_WRITE_REG(hw, E1000_RDLEN(0), size);
242 	size = E1000_READ_REG(hw, E1000_RDLEN(0));
243 
244 	/* To get lower order bits */
245 	buf_low = (uint32_t)rx_data->rbd_dma_addr;
246 	/* To get the higher order bits */
247 	buf_high = (uint32_t)(rx_data->rbd_dma_addr >> 32);
248 
249 	E1000_WRITE_REG(hw, E1000_RDBAH(0), buf_high);
250 	E1000_WRITE_REG(hw, E1000_RDBAL(0), buf_low);
251 
252 	/*
253 	 * Setup our HW Rx Head & Tail descriptor pointers
254 	 */
255 	E1000_WRITE_REG(hw, E1000_RDT(0),
256 	    (uint32_t)(rx_data->rbd_last - rx_data->rbd_first));
257 	E1000_WRITE_REG(hw, E1000_RDH(0), 0);
258 
259 	/*
260 	 * Setup the Receive Control Register (RCTL), and ENABLE the
261 	 * receiver. The initial configuration is to: Enable the receiver,
262 	 * accept broadcasts, discard bad packets (and long packets),
263 	 * disable VLAN filter checking, set the receive descriptor
264 	 * minimum threshold size to 1/2, and the receive buffer size to
265 	 * 2k.
266 	 */
267 	rctl = E1000_RCTL_EN |		/* Enable Receive Unit */
268 	    E1000_RCTL_BAM |		/* Accept Broadcast Packets */
269 	    E1000_RCTL_LPE |		/* Large Packet Enable bit */
270 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT) |
271 	    E1000_RCTL_RDMTS_HALF |
272 	    E1000_RCTL_LBM_NO;		/* Loopback Mode = none */
273 
274 	if (Adapter->strip_crc)
275 		rctl |= E1000_RCTL_SECRC;	/* Strip Ethernet CRC */
276 
277 	if (Adapter->mem_workaround_82546 &&
278 	    ((hw->mac.type == e1000_82545) ||
279 	    (hw->mac.type == e1000_82546) ||
280 	    (hw->mac.type == e1000_82546_rev_3))) {
281 		rctl |= E1000_RCTL_SZ_2048;
282 	} else {
283 		if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_2K) &&
284 		    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_4K))
285 			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
286 		else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_4K) &&
287 		    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_8K))
288 			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
289 		else if ((Adapter->max_frame_size > FRAME_SIZE_UPTO_8K) &&
290 		    (Adapter->max_frame_size <= FRAME_SIZE_UPTO_16K))
291 			rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX;
292 		else
293 			rctl |= E1000_RCTL_SZ_2048;
294 	}
295 
296 	if (e1000_tbi_sbp_enabled_82543(hw))
297 		rctl |= E1000_RCTL_SBP;
298 
299 	/*
300 	 * Enable Early Receive Threshold (ERT) on supported devices.
301 	 * Only takes effect when packet size is equal or larger than the
302 	 * specified value (in 8 byte units), e.g. using jumbo frames.
303 	 */
304 	if ((hw->mac.type == e1000_82573) ||
305 	    (hw->mac.type == e1000_82574) ||
306 	    (hw->mac.type == e1000_ich9lan) ||
307 	    (hw->mac.type == e1000_ich10lan)) {
308 
309 		ert = E1000_ERT_2048;
310 
311 		/*
312 		 * Special modification when ERT and
313 		 * jumbo frames are enabled
314 		 */
315 		if (Adapter->default_mtu > ETHERMTU) {
316 			rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
317 			E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 0x3);
318 			ert |= (1 << 13);
319 		}
320 
321 		E1000_WRITE_REG(hw, E1000_ERT, ert);
322 	}
323 
324 	/* Workaround errata on 82577/8 adapters with large frames */
325 	if ((hw->mac.type == e1000_pchlan) &&
326 	    (Adapter->default_mtu > ETHERMTU)) {
327 
328 		(void) e1000_read_phy_reg(hw, PHY_REG(770, 26), &phy_data);
329 		phy_data &= 0xfff8;
330 		phy_data |= (1 << 2);
331 		(void) e1000_write_phy_reg(hw, PHY_REG(770, 26), phy_data);
332 
333 		if (hw->phy.type == e1000_phy_82577) {
334 			(void) e1000_read_phy_reg(hw, 22, &phy_data);
335 			phy_data &= 0x0fff;
336 			phy_data |= (1 << 14);
337 			(void) e1000_write_phy_reg(hw, 0x10, 0x2823);
338 			(void) e1000_write_phy_reg(hw, 0x11, 0x0003);
339 			(void) e1000_write_phy_reg(hw, 22, phy_data);
340 		}
341 	}
342 
343 	reg_val =
344 	    E1000_RXCSUM_TUOFL |	/* TCP/UDP checksum offload Enable */
345 	    E1000_RXCSUM_IPOFL;		/* IP checksum offload Enable */
346 
347 	E1000_WRITE_REG(hw, E1000_RXCSUM, reg_val);
348 
349 	/*
350 	 * Workaround: Set bit 16 (IPv6_ExDIS) to disable the
351 	 * processing of received IPV6 extension headers
352 	 */
353 	if ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572)) {
354 		reg_val = E1000_READ_REG(hw, E1000_RFCTL);
355 		reg_val |= (E1000_RFCTL_IPV6_EX_DIS |
356 		    E1000_RFCTL_NEW_IPV6_EXT_DIS);
357 		E1000_WRITE_REG(hw, E1000_RFCTL, reg_val);
358 	}
359 
360 	/* Write to enable the receive unit */
361 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
362 }
363 
364 /*
365  * e1000g_get_buf - get an rx sw packet from the free_list
366  */
367 static p_rx_sw_packet_t
368 e1000g_get_buf(e1000g_rx_data_t *rx_data)
369 {
370 	p_rx_sw_packet_t packet;
371 
372 	mutex_enter(&rx_data->freelist_lock);
373 	packet = (p_rx_sw_packet_t)
374 	    QUEUE_POP_HEAD(&rx_data->free_list);
375 	if (packet != NULL) {
376 		rx_data->avail_freepkt--;
377 	} else {
378 		/*
379 		 * If the freelist has no packets, check the recycle list
380 		 * to see if there are any available descriptor there.
381 		 */
382 		mutex_enter(&rx_data->recycle_lock);
383 		QUEUE_SWITCH(&rx_data->free_list, &rx_data->recycle_list);
384 		rx_data->avail_freepkt = rx_data->recycle_freepkt;
385 		rx_data->recycle_freepkt = 0;
386 		mutex_exit(&rx_data->recycle_lock);
387 		packet = (p_rx_sw_packet_t)
388 		    QUEUE_POP_HEAD(&rx_data->free_list);
389 		if (packet != NULL)
390 			rx_data->avail_freepkt--;
391 	}
392 	mutex_exit(&rx_data->freelist_lock);
393 
394 	return (packet);
395 }
396 
397 /*
398  * e1000g_receive - main receive routine
399  *
400  * This routine will process packets received in an interrupt
401  */
402 mblk_t *
403 e1000g_receive(e1000g_rx_ring_t *rx_ring, mblk_t **tail, uint_t sz)
404 {
405 	struct e1000_hw *hw;
406 	mblk_t *nmp;
407 	mblk_t *ret_mp;
408 	mblk_t *ret_nmp;
409 	struct e1000_rx_desc *current_desc;
410 	struct e1000_rx_desc *last_desc;
411 	p_rx_sw_packet_t packet;
412 	p_rx_sw_packet_t newpkt;
413 	uint16_t length;
414 	uint32_t pkt_count;
415 	uint32_t desc_count;
416 	boolean_t accept_frame;
417 	boolean_t end_of_packet;
418 	boolean_t need_copy;
419 	struct e1000g *Adapter;
420 	dma_buffer_t *rx_buf;
421 	uint16_t cksumflags;
422 	uint_t chain_sz = 0;
423 	e1000g_rx_data_t *rx_data;
424 	uint32_t max_size;
425 	uint32_t min_size;
426 
427 	ret_mp = NULL;
428 	ret_nmp = NULL;
429 	pkt_count = 0;
430 	desc_count = 0;
431 	cksumflags = 0;
432 
433 	Adapter = rx_ring->adapter;
434 	rx_data = rx_ring->rx_data;
435 	hw = &Adapter->shared;
436 
437 	/* Sync the Rx descriptor DMA buffers */
438 	(void) ddi_dma_sync(rx_data->rbd_dma_handle,
439 	    0, 0, DDI_DMA_SYNC_FORKERNEL);
440 
441 	if (e1000g_check_dma_handle(rx_data->rbd_dma_handle) != DDI_FM_OK) {
442 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
443 		Adapter->e1000g_state |= E1000G_ERROR;
444 		return (NULL);
445 	}
446 
447 	current_desc = rx_data->rbd_next;
448 	if (!(current_desc->status & E1000_RXD_STAT_DD)) {
449 		/*
450 		 * don't send anything up. just clear the RFD
451 		 */
452 		E1000G_DEBUG_STAT(rx_ring->stat_none);
453 		return (NULL);
454 	}
455 
456 	max_size = Adapter->max_frame_size - ETHERFCSL - VLAN_TAGSZ;
457 	min_size = ETHERMIN;
458 
459 	/*
460 	 * Loop through the receive descriptors starting at the last known
461 	 * descriptor owned by the hardware that begins a packet.
462 	 */
463 	while ((current_desc->status & E1000_RXD_STAT_DD) &&
464 	    (pkt_count < Adapter->rx_limit_onintr) &&
465 	    ((sz == E1000G_CHAIN_NO_LIMIT) || (chain_sz <= sz))) {
466 
467 		desc_count++;
468 		/*
469 		 * Now this can happen in Jumbo frame situation.
470 		 */
471 		if (current_desc->status & E1000_RXD_STAT_EOP) {
472 			/* packet has EOP set */
473 			end_of_packet = B_TRUE;
474 		} else {
475 			/*
476 			 * If this received buffer does not have the
477 			 * End-Of-Packet bit set, the received packet
478 			 * will consume multiple buffers. We won't send this
479 			 * packet upstack till we get all the related buffers.
480 			 */
481 			end_of_packet = B_FALSE;
482 		}
483 
484 		/*
485 		 * Get a pointer to the actual receive buffer
486 		 * The mp->b_rptr is mapped to The CurrentDescriptor
487 		 * Buffer Address.
488 		 */
489 		packet =
490 		    (p_rx_sw_packet_t)QUEUE_GET_HEAD(&rx_data->recv_list);
491 		ASSERT(packet != NULL);
492 
493 		rx_buf = packet->rx_buf;
494 
495 		length = current_desc->length;
496 
497 #ifdef __sparc
498 		if (packet->dma_type == USE_DVMA)
499 			dvma_sync(rx_buf->dma_handle, 0,
500 			    DDI_DMA_SYNC_FORKERNEL);
501 		else
502 			(void) ddi_dma_sync(rx_buf->dma_handle,
503 			    E1000G_IPALIGNROOM, length,
504 			    DDI_DMA_SYNC_FORKERNEL);
505 #else
506 		(void) ddi_dma_sync(rx_buf->dma_handle,
507 		    E1000G_IPALIGNROOM, length,
508 		    DDI_DMA_SYNC_FORKERNEL);
509 #endif
510 
511 		if (e1000g_check_dma_handle(
512 		    rx_buf->dma_handle) != DDI_FM_OK) {
513 			ddi_fm_service_impact(Adapter->dip,
514 			    DDI_SERVICE_DEGRADED);
515 			Adapter->e1000g_state |= E1000G_ERROR;
516 
517 			goto rx_drop;
518 		}
519 
520 		accept_frame = (current_desc->errors == 0) ||
521 		    ((current_desc->errors &
522 		    (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE)) != 0);
523 
524 		if (hw->mac.type == e1000_82543) {
525 			unsigned char last_byte;
526 
527 			last_byte =
528 			    *((unsigned char *)rx_buf->address + length - 1);
529 
530 			if (TBI_ACCEPT(hw,
531 			    current_desc->status, current_desc->errors,
532 			    current_desc->length, last_byte,
533 			    Adapter->min_frame_size, Adapter->max_frame_size)) {
534 
535 				e1000_tbi_adjust_stats(Adapter,
536 				    length, hw->mac.addr);
537 
538 				length--;
539 				accept_frame = B_TRUE;
540 			} else if (e1000_tbi_sbp_enabled_82543(hw) &&
541 			    (current_desc->errors == E1000_RXD_ERR_CE)) {
542 				accept_frame = B_TRUE;
543 			}
544 		}
545 
546 		/*
547 		 * Indicate the packet to the NOS if it was good.
548 		 * Normally, hardware will discard bad packets for us.
549 		 * Check for the packet to be a valid Ethernet packet
550 		 */
551 		if (!accept_frame) {
552 			/*
553 			 * error in incoming packet, either the packet is not a
554 			 * ethernet size packet, or the packet has an error. In
555 			 * either case, the packet will simply be discarded.
556 			 */
557 			E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
558 			    "Process Receive Interrupts: Error in Packet\n");
559 
560 			E1000G_STAT(rx_ring->stat_error);
561 			/*
562 			 * Returning here as we are done here. There is
563 			 * no point in waiting for while loop to elapse
564 			 * and the things which were done. More efficient
565 			 * and less error prone...
566 			 */
567 			goto rx_drop;
568 		}
569 
570 		/*
571 		 * If the Ethernet CRC is not stripped by the hardware,
572 		 * we need to strip it before sending it up to the stack.
573 		 */
574 		if (end_of_packet && !Adapter->strip_crc) {
575 			if (length > ETHERFCSL) {
576 				length -= ETHERFCSL;
577 			} else {
578 				/*
579 				 * If the fragment is smaller than the CRC,
580 				 * drop this fragment, do the processing of
581 				 * the end of the packet.
582 				 */
583 				ASSERT(rx_data->rx_mblk_tail != NULL);
584 				rx_data->rx_mblk_tail->b_wptr -=
585 				    ETHERFCSL - length;
586 				rx_data->rx_mblk_len -=
587 				    ETHERFCSL - length;
588 
589 				QUEUE_POP_HEAD(&rx_data->recv_list);
590 
591 				goto rx_end_of_packet;
592 			}
593 		}
594 
595 		need_copy = B_TRUE;
596 
597 		if (length <= Adapter->rx_bcopy_thresh)
598 			goto rx_copy;
599 
600 		/*
601 		 * Get the pre-constructed mblk that was associated
602 		 * to the receive data buffer.
603 		 */
604 		if (packet->mp == NULL) {
605 			packet->mp = desballoc((unsigned char *)
606 			    rx_buf->address, length,
607 			    BPRI_MED, &packet->free_rtn);
608 		}
609 
610 		if (packet->mp != NULL) {
611 			/*
612 			 * We have two sets of buffer pool. One associated with
613 			 * the Rxdescriptors and other a freelist buffer pool.
614 			 * Each time we get a good packet, Try to get a buffer
615 			 * from the freelist pool using e1000g_get_buf. If we
616 			 * get free buffer, then replace the descriptor buffer
617 			 * address with the free buffer we just got, and pass
618 			 * the pre-constructed mblk upstack. (note no copying)
619 			 *
620 			 * If we failed to get a free buffer, then try to
621 			 * allocate a new buffer(mp) and copy the recv buffer
622 			 * content to our newly allocated buffer(mp). Don't
623 			 * disturb the desriptor buffer address. (note copying)
624 			 */
625 			newpkt = e1000g_get_buf(rx_data);
626 
627 			if (newpkt != NULL) {
628 				/*
629 				 * Get the mblk associated to the data,
630 				 * and strip it off the sw packet.
631 				 */
632 				nmp = packet->mp;
633 				packet->mp = NULL;
634 				atomic_inc_32(&packet->ref_cnt);
635 
636 				/*
637 				 * Now replace old buffer with the new
638 				 * one we got from free list
639 				 * Both the RxSwPacket as well as the
640 				 * Receive Buffer Descriptor will now
641 				 * point to this new packet.
642 				 */
643 				packet = newpkt;
644 
645 				current_desc->buffer_addr =
646 				    newpkt->rx_buf->dma_address;
647 
648 				need_copy = B_FALSE;
649 			} else {
650 				/* EMPTY */
651 				E1000G_DEBUG_STAT(rx_ring->stat_no_freepkt);
652 			}
653 		}
654 
655 rx_copy:
656 		if (need_copy) {
657 			/*
658 			 * No buffers available on free list,
659 			 * bcopy the data from the buffer and
660 			 * keep the original buffer. Dont want to
661 			 * do this.. Yack but no other way
662 			 */
663 			if ((nmp = allocb(length + E1000G_IPALIGNROOM,
664 			    BPRI_MED)) == NULL) {
665 				/*
666 				 * The system has no buffers available
667 				 * to send up the incoming packet, hence
668 				 * the packet will have to be processed
669 				 * when there're more buffers available.
670 				 */
671 				E1000G_STAT(rx_ring->stat_allocb_fail);
672 				goto rx_drop;
673 			}
674 			nmp->b_rptr += E1000G_IPALIGNROOM;
675 			nmp->b_wptr += E1000G_IPALIGNROOM;
676 			/*
677 			 * The free list did not have any buffers
678 			 * available, so, the received packet will
679 			 * have to be copied into a mp and the original
680 			 * buffer will have to be retained for future
681 			 * packet reception.
682 			 */
683 			bcopy(rx_buf->address, nmp->b_wptr, length);
684 		}
685 
686 		/*
687 		 * The rx_sw_packet MUST be popped off the
688 		 * RxSwPacketList before either a putnext or freemsg
689 		 * is done on the mp that has now been created by the
690 		 * desballoc. If not, it is possible that the free
691 		 * routine will get called from the interrupt context
692 		 * and try to put this packet on the free list
693 		 */
694 		(p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_data->recv_list);
695 
696 		ASSERT(nmp != NULL);
697 		nmp->b_wptr += length;
698 
699 		if (rx_data->rx_mblk == NULL) {
700 			/*
701 			 *  TCP/UDP checksum offload and
702 			 *  IP checksum offload
703 			 */
704 			if (!(current_desc->status & E1000_RXD_STAT_IXSM)) {
705 				/*
706 				 * Check TCP/UDP checksum
707 				 */
708 				if ((current_desc->status &
709 				    E1000_RXD_STAT_TCPCS) &&
710 				    !(current_desc->errors &
711 				    E1000_RXD_ERR_TCPE))
712 					cksumflags |= HCK_FULLCKSUM |
713 					    HCK_FULLCKSUM_OK;
714 				/*
715 				 * Check IP Checksum
716 				 */
717 				if ((current_desc->status &
718 				    E1000_RXD_STAT_IPCS) &&
719 				    !(current_desc->errors &
720 				    E1000_RXD_ERR_IPE))
721 					cksumflags |= HCK_IPV4_HDRCKSUM;
722 			}
723 		}
724 
725 		/*
726 		 * We need to maintain our packet chain in the global
727 		 * Adapter structure, for the Rx processing can end
728 		 * with a fragment that has no EOP set.
729 		 */
730 		if (rx_data->rx_mblk == NULL) {
731 			/* Get the head of the message chain */
732 			rx_data->rx_mblk = nmp;
733 			rx_data->rx_mblk_tail = nmp;
734 			rx_data->rx_mblk_len = length;
735 		} else {	/* Not the first packet */
736 			/* Continue adding buffers */
737 			rx_data->rx_mblk_tail->b_cont = nmp;
738 			rx_data->rx_mblk_tail = nmp;
739 			rx_data->rx_mblk_len += length;
740 		}
741 		ASSERT(rx_data->rx_mblk != NULL);
742 		ASSERT(rx_data->rx_mblk_tail != NULL);
743 		ASSERT(rx_data->rx_mblk_tail->b_cont == NULL);
744 
745 		/*
746 		 * Now this MP is ready to travel upwards but some more
747 		 * fragments are coming.
748 		 * We will send packet upwards as soon as we get EOP
749 		 * set on the packet.
750 		 */
751 		if (!end_of_packet) {
752 			/*
753 			 * continue to get the next descriptor,
754 			 * Tail would be advanced at the end
755 			 */
756 			goto rx_next_desc;
757 		}
758 
759 rx_end_of_packet:
760 		if (E1000G_IS_VLAN_PACKET(rx_data->rx_mblk->b_rptr))
761 			max_size = Adapter->max_frame_size - ETHERFCSL;
762 
763 		if ((rx_data->rx_mblk_len > max_size) ||
764 		    (rx_data->rx_mblk_len < min_size)) {
765 			E1000G_STAT(rx_ring->stat_size_error);
766 			goto rx_drop;
767 		}
768 
769 		/*
770 		 * Found packet with EOP
771 		 * Process the last fragment.
772 		 */
773 		if (cksumflags != 0) {
774 			(void) hcksum_assoc(rx_data->rx_mblk,
775 			    NULL, NULL, 0, 0, 0, 0, cksumflags, 0);
776 			cksumflags = 0;
777 		}
778 
779 		/*
780 		 * Count packets that span multi-descriptors
781 		 */
782 		E1000G_DEBUG_STAT_COND(rx_ring->stat_multi_desc,
783 		    (rx_data->rx_mblk->b_cont != NULL));
784 
785 		/*
786 		 * Append to list to send upstream
787 		 */
788 		if (ret_mp == NULL) {
789 			ret_mp = ret_nmp = rx_data->rx_mblk;
790 		} else {
791 			ret_nmp->b_next = rx_data->rx_mblk;
792 			ret_nmp = rx_data->rx_mblk;
793 		}
794 		ret_nmp->b_next = NULL;
795 		*tail = ret_nmp;
796 		chain_sz += length;
797 
798 		rx_data->rx_mblk = NULL;
799 		rx_data->rx_mblk_tail = NULL;
800 		rx_data->rx_mblk_len = 0;
801 
802 		pkt_count++;
803 
804 rx_next_desc:
805 		/*
806 		 * Zero out the receive descriptors status
807 		 */
808 		current_desc->status = 0;
809 
810 		if (current_desc == rx_data->rbd_last)
811 			rx_data->rbd_next = rx_data->rbd_first;
812 		else
813 			rx_data->rbd_next++;
814 
815 		last_desc = current_desc;
816 		current_desc = rx_data->rbd_next;
817 
818 		/*
819 		 * Put the buffer that we just indicated back
820 		 * at the end of our list
821 		 */
822 		QUEUE_PUSH_TAIL(&rx_data->recv_list,
823 		    &packet->Link);
824 	}	/* while loop */
825 
826 	/* Sync the Rx descriptor DMA buffers */
827 	(void) ddi_dma_sync(rx_data->rbd_dma_handle,
828 	    0, 0, DDI_DMA_SYNC_FORDEV);
829 
830 	/*
831 	 * Advance the E1000's Receive Queue #0 "Tail Pointer".
832 	 */
833 	E1000_WRITE_REG(hw, E1000_RDT(0),
834 	    (uint32_t)(last_desc - rx_data->rbd_first));
835 
836 	if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
837 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
838 		Adapter->e1000g_state |= E1000G_ERROR;
839 	}
840 
841 	Adapter->rx_pkt_cnt = pkt_count;
842 
843 	return (ret_mp);
844 
845 rx_drop:
846 	/*
847 	 * Zero out the receive descriptors status
848 	 */
849 	current_desc->status = 0;
850 
851 	/* Sync the Rx descriptor DMA buffers */
852 	(void) ddi_dma_sync(rx_data->rbd_dma_handle,
853 	    0, 0, DDI_DMA_SYNC_FORDEV);
854 
855 	if (current_desc == rx_data->rbd_last)
856 		rx_data->rbd_next = rx_data->rbd_first;
857 	else
858 		rx_data->rbd_next++;
859 
860 	last_desc = current_desc;
861 
862 	(p_rx_sw_packet_t)QUEUE_POP_HEAD(&rx_data->recv_list);
863 
864 	QUEUE_PUSH_TAIL(&rx_data->recv_list, &packet->Link);
865 	/*
866 	 * Reclaim all old buffers already allocated during
867 	 * Jumbo receives.....for incomplete reception
868 	 */
869 	if (rx_data->rx_mblk != NULL) {
870 		freemsg(rx_data->rx_mblk);
871 		rx_data->rx_mblk = NULL;
872 		rx_data->rx_mblk_tail = NULL;
873 		rx_data->rx_mblk_len = 0;
874 	}
875 	/*
876 	 * Advance the E1000's Receive Queue #0 "Tail Pointer".
877 	 */
878 	E1000_WRITE_REG(hw, E1000_RDT(0),
879 	    (uint32_t)(last_desc - rx_data->rbd_first));
880 
881 	if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
882 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
883 		Adapter->e1000g_state |= E1000G_ERROR;
884 	}
885 
886 	return (ret_mp);
887 }
888