xref: /linux/drivers/net/ethernet/intel/i40e/i40e_txrx.c (revision a13d7201d7deedcbb6ac6efa94a1a7d34d3d79ec)
1 /*******************************************************************************
2  *
3  * Intel Ethernet Controller XL710 Family Linux Driver
4  * Copyright(c) 2013 - 2014 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  * The full GNU General Public License is included in this distribution in
19  * the file called "COPYING".
20  *
21  * Contact Information:
22  * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23  * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
24  *
25  ******************************************************************************/
26 
27 #include <linux/prefetch.h>
28 #include <net/busy_poll.h>
29 #include "i40e.h"
30 #include "i40e_prototype.h"
31 
32 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
33 				u32 td_tag)
34 {
35 	return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
36 			   ((u64)td_cmd  << I40E_TXD_QW1_CMD_SHIFT) |
37 			   ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
38 			   ((u64)size  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
39 			   ((u64)td_tag  << I40E_TXD_QW1_L2TAG1_SHIFT));
40 }
41 
42 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
43 #define I40E_FD_CLEAN_DELAY 10
44 /**
45  * i40e_program_fdir_filter - Program a Flow Director filter
46  * @fdir_data: Packet data that will be filter parameters
47  * @raw_packet: the pre-allocated packet buffer for FDir
48  * @pf: The PF pointer
49  * @add: True for add/update, False for remove
50  **/
51 int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
52 			     struct i40e_pf *pf, bool add)
53 {
54 	struct i40e_filter_program_desc *fdir_desc;
55 	struct i40e_tx_buffer *tx_buf, *first;
56 	struct i40e_tx_desc *tx_desc;
57 	struct i40e_ring *tx_ring;
58 	unsigned int fpt, dcc;
59 	struct i40e_vsi *vsi;
60 	struct device *dev;
61 	dma_addr_t dma;
62 	u32 td_cmd = 0;
63 	u16 delay = 0;
64 	u16 i;
65 
66 	/* find existing FDIR VSI */
67 	vsi = NULL;
68 	for (i = 0; i < pf->num_alloc_vsi; i++)
69 		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
70 			vsi = pf->vsi[i];
71 	if (!vsi)
72 		return -ENOENT;
73 
74 	tx_ring = vsi->tx_rings[0];
75 	dev = tx_ring->dev;
76 
77 	/* we need two descriptors to add/del a filter and we can wait */
78 	do {
79 		if (I40E_DESC_UNUSED(tx_ring) > 1)
80 			break;
81 		msleep_interruptible(1);
82 		delay++;
83 	} while (delay < I40E_FD_CLEAN_DELAY);
84 
85 	if (!(I40E_DESC_UNUSED(tx_ring) > 1))
86 		return -EAGAIN;
87 
88 	dma = dma_map_single(dev, raw_packet,
89 			     I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
90 	if (dma_mapping_error(dev, dma))
91 		goto dma_fail;
92 
93 	/* grab the next descriptor */
94 	i = tx_ring->next_to_use;
95 	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
96 	first = &tx_ring->tx_bi[i];
97 	memset(first, 0, sizeof(struct i40e_tx_buffer));
98 
99 	tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
100 
101 	fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
102 	      I40E_TXD_FLTR_QW0_QINDEX_MASK;
103 
104 	fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
105 	       I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
106 
107 	fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
108 	       I40E_TXD_FLTR_QW0_PCTYPE_MASK;
109 
110 	/* Use LAN VSI Id if not programmed by user */
111 	if (fdir_data->dest_vsi == 0)
112 		fpt |= (pf->vsi[pf->lan_vsi]->id) <<
113 		       I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
114 	else
115 		fpt |= ((u32)fdir_data->dest_vsi <<
116 			I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
117 		       I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
118 
119 	dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
120 
121 	if (add)
122 		dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
123 		       I40E_TXD_FLTR_QW1_PCMD_SHIFT;
124 	else
125 		dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
126 		       I40E_TXD_FLTR_QW1_PCMD_SHIFT;
127 
128 	dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
129 	       I40E_TXD_FLTR_QW1_DEST_MASK;
130 
131 	dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
132 	       I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
133 
134 	if (fdir_data->cnt_index != 0) {
135 		dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
136 		dcc |= ((u32)fdir_data->cnt_index <<
137 			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
138 			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
139 	}
140 
141 	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
142 	fdir_desc->rsvd = cpu_to_le32(0);
143 	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
144 	fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
145 
146 	/* Now program a dummy descriptor */
147 	i = tx_ring->next_to_use;
148 	tx_desc = I40E_TX_DESC(tx_ring, i);
149 	tx_buf = &tx_ring->tx_bi[i];
150 
151 	tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
152 
153 	memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
154 
155 	/* record length, and DMA address */
156 	dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
157 	dma_unmap_addr_set(tx_buf, dma, dma);
158 
159 	tx_desc->buffer_addr = cpu_to_le64(dma);
160 	td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
161 
162 	tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
163 	tx_buf->raw_buf = (void *)raw_packet;
164 
165 	tx_desc->cmd_type_offset_bsz =
166 		build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
167 
168 	/* Force memory writes to complete before letting h/w
169 	 * know there are new descriptors to fetch.
170 	 */
171 	wmb();
172 
173 	/* Mark the data descriptor to be watched */
174 	first->next_to_watch = tx_desc;
175 
176 	writel(tx_ring->next_to_use, tx_ring->tail);
177 	return 0;
178 
179 dma_fail:
180 	return -1;
181 }
182 
183 #define IP_HEADER_OFFSET 14
184 #define I40E_UDPIP_DUMMY_PACKET_LEN 42
185 /**
186  * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
187  * @vsi: pointer to the targeted VSI
188  * @fd_data: the flow director data required for the FDir descriptor
189  * @add: true adds a filter, false removes it
190  *
191  * Returns 0 if the filters were successfully added or removed
192  **/
193 static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
194 				   struct i40e_fdir_filter *fd_data,
195 				   bool add)
196 {
197 	struct i40e_pf *pf = vsi->back;
198 	struct udphdr *udp;
199 	struct iphdr *ip;
200 	bool err = false;
201 	u8 *raw_packet;
202 	int ret;
203 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
204 		0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
205 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
206 
207 	raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
208 	if (!raw_packet)
209 		return -ENOMEM;
210 	memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
211 
212 	ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
213 	udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
214 	      + sizeof(struct iphdr));
215 
216 	ip->daddr = fd_data->dst_ip[0];
217 	udp->dest = fd_data->dst_port;
218 	ip->saddr = fd_data->src_ip[0];
219 	udp->source = fd_data->src_port;
220 
221 	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
222 	ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
223 	if (ret) {
224 		dev_info(&pf->pdev->dev,
225 			 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
226 			 fd_data->pctype, fd_data->fd_id, ret);
227 		err = true;
228 	} else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
229 		if (add)
230 			dev_info(&pf->pdev->dev,
231 				 "Filter OK for PCTYPE %d loc = %d\n",
232 				 fd_data->pctype, fd_data->fd_id);
233 		else
234 			dev_info(&pf->pdev->dev,
235 				 "Filter deleted for PCTYPE %d loc = %d\n",
236 				 fd_data->pctype, fd_data->fd_id);
237 	}
238 	return err ? -EOPNOTSUPP : 0;
239 }
240 
241 #define I40E_TCPIP_DUMMY_PACKET_LEN 54
242 /**
243  * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
244  * @vsi: pointer to the targeted VSI
245  * @fd_data: the flow director data required for the FDir descriptor
246  * @add: true adds a filter, false removes it
247  *
248  * Returns 0 if the filters were successfully added or removed
249  **/
250 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
251 				   struct i40e_fdir_filter *fd_data,
252 				   bool add)
253 {
254 	struct i40e_pf *pf = vsi->back;
255 	struct tcphdr *tcp;
256 	struct iphdr *ip;
257 	bool err = false;
258 	u8 *raw_packet;
259 	int ret;
260 	/* Dummy packet */
261 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
262 		0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
263 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
264 		0x0, 0x72, 0, 0, 0, 0};
265 
266 	raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
267 	if (!raw_packet)
268 		return -ENOMEM;
269 	memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
270 
271 	ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
272 	tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
273 	      + sizeof(struct iphdr));
274 
275 	ip->daddr = fd_data->dst_ip[0];
276 	tcp->dest = fd_data->dst_port;
277 	ip->saddr = fd_data->src_ip[0];
278 	tcp->source = fd_data->src_port;
279 
280 	if (add) {
281 		pf->fd_tcp_rule++;
282 		if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
283 			if (I40E_DEBUG_FD & pf->hw.debug_mask)
284 				dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
285 			pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
286 		}
287 	} else {
288 		pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
289 				  (pf->fd_tcp_rule - 1) : 0;
290 		if (pf->fd_tcp_rule == 0) {
291 			pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
292 			if (I40E_DEBUG_FD & pf->hw.debug_mask)
293 				dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
294 		}
295 	}
296 
297 	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
298 	ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
299 
300 	if (ret) {
301 		dev_info(&pf->pdev->dev,
302 			 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
303 			 fd_data->pctype, fd_data->fd_id, ret);
304 		err = true;
305 	} else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
306 		if (add)
307 			dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
308 				 fd_data->pctype, fd_data->fd_id);
309 		else
310 			dev_info(&pf->pdev->dev,
311 				 "Filter deleted for PCTYPE %d loc = %d\n",
312 				 fd_data->pctype, fd_data->fd_id);
313 	}
314 
315 	return err ? -EOPNOTSUPP : 0;
316 }
317 
318 /**
319  * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
320  * a specific flow spec
321  * @vsi: pointer to the targeted VSI
322  * @fd_data: the flow director data required for the FDir descriptor
323  * @add: true adds a filter, false removes it
324  *
325  * Always returns -EOPNOTSUPP
326  **/
327 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
328 				    struct i40e_fdir_filter *fd_data,
329 				    bool add)
330 {
331 	return -EOPNOTSUPP;
332 }
333 
334 #define I40E_IP_DUMMY_PACKET_LEN 34
335 /**
336  * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
337  * a specific flow spec
338  * @vsi: pointer to the targeted VSI
339  * @fd_data: the flow director data required for the FDir descriptor
340  * @add: true adds a filter, false removes it
341  *
342  * Returns 0 if the filters were successfully added or removed
343  **/
344 static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
345 				  struct i40e_fdir_filter *fd_data,
346 				  bool add)
347 {
348 	struct i40e_pf *pf = vsi->back;
349 	struct iphdr *ip;
350 	bool err = false;
351 	u8 *raw_packet;
352 	int ret;
353 	int i;
354 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
355 		0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
356 		0, 0, 0, 0};
357 
358 	for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
359 	     i <= I40E_FILTER_PCTYPE_FRAG_IPV4;	i++) {
360 		raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
361 		if (!raw_packet)
362 			return -ENOMEM;
363 		memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
364 		ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
365 
366 		ip->saddr = fd_data->src_ip[0];
367 		ip->daddr = fd_data->dst_ip[0];
368 		ip->protocol = 0;
369 
370 		fd_data->pctype = i;
371 		ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
372 
373 		if (ret) {
374 			dev_info(&pf->pdev->dev,
375 				 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
376 				 fd_data->pctype, fd_data->fd_id, ret);
377 			err = true;
378 		} else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
379 			if (add)
380 				dev_info(&pf->pdev->dev,
381 					 "Filter OK for PCTYPE %d loc = %d\n",
382 					 fd_data->pctype, fd_data->fd_id);
383 			else
384 				dev_info(&pf->pdev->dev,
385 					 "Filter deleted for PCTYPE %d loc = %d\n",
386 					 fd_data->pctype, fd_data->fd_id);
387 		}
388 	}
389 
390 	return err ? -EOPNOTSUPP : 0;
391 }
392 
393 /**
394  * i40e_add_del_fdir - Build raw packets to add/del fdir filter
395  * @vsi: pointer to the targeted VSI
396  * @cmd: command to get or set RX flow classification rules
397  * @add: true adds a filter, false removes it
398  *
399  **/
400 int i40e_add_del_fdir(struct i40e_vsi *vsi,
401 		      struct i40e_fdir_filter *input, bool add)
402 {
403 	struct i40e_pf *pf = vsi->back;
404 	int ret;
405 
406 	switch (input->flow_type & ~FLOW_EXT) {
407 	case TCP_V4_FLOW:
408 		ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
409 		break;
410 	case UDP_V4_FLOW:
411 		ret = i40e_add_del_fdir_udpv4(vsi, input, add);
412 		break;
413 	case SCTP_V4_FLOW:
414 		ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
415 		break;
416 	case IPV4_FLOW:
417 		ret = i40e_add_del_fdir_ipv4(vsi, input, add);
418 		break;
419 	case IP_USER_FLOW:
420 		switch (input->ip4_proto) {
421 		case IPPROTO_TCP:
422 			ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
423 			break;
424 		case IPPROTO_UDP:
425 			ret = i40e_add_del_fdir_udpv4(vsi, input, add);
426 			break;
427 		case IPPROTO_SCTP:
428 			ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
429 			break;
430 		default:
431 			ret = i40e_add_del_fdir_ipv4(vsi, input, add);
432 			break;
433 		}
434 		break;
435 	default:
436 		dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
437 			 input->flow_type);
438 		ret = -EINVAL;
439 	}
440 
441 	/* The buffer allocated here is freed by the i40e_clean_tx_ring() */
442 	return ret;
443 }
444 
445 /**
446  * i40e_fd_handle_status - check the Programming Status for FD
447  * @rx_ring: the Rx ring for this descriptor
448  * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
449  * @prog_id: the id originally used for programming
450  *
451  * This is used to verify if the FD programming or invalidation
452  * requested by SW to the HW is successful or not and take actions accordingly.
453  **/
454 static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
455 				  union i40e_rx_desc *rx_desc, u8 prog_id)
456 {
457 	struct i40e_pf *pf = rx_ring->vsi->back;
458 	struct pci_dev *pdev = pf->pdev;
459 	u32 fcnt_prog, fcnt_avail;
460 	u32 error;
461 	u64 qw;
462 
463 	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
464 	error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
465 		I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
466 
467 	if (error == (0x1 << I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
468 		if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
469 		    (I40E_DEBUG_FD & pf->hw.debug_mask))
470 			dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
471 				 rx_desc->wb.qword0.hi_dword.fd_id);
472 
473 		/* Check if the programming error is for ATR.
474 		 * If so, auto disable ATR and set a state for
475 		 * flush in progress. Next time we come here if flush is in
476 		 * progress do nothing, once flush is complete the state will
477 		 * be cleared.
478 		 */
479 		if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
480 			return;
481 
482 		pf->fd_add_err++;
483 		/* store the current atr filter count */
484 		pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
485 
486 		if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
487 		    (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) {
488 			pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
489 			set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
490 		}
491 
492 		/* filter programming failed most likely due to table full */
493 		fcnt_prog = i40e_get_global_fd_count(pf);
494 		fcnt_avail = pf->fdir_pf_filter_count;
495 		/* If ATR is running fcnt_prog can quickly change,
496 		 * if we are very close to full, it makes sense to disable
497 		 * FD ATR/SB and then re-enable it when there is room.
498 		 */
499 		if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
500 			if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
501 			    !(pf->auto_disable_flags &
502 				     I40E_FLAG_FD_SB_ENABLED)) {
503 				if (I40E_DEBUG_FD & pf->hw.debug_mask)
504 					dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
505 				pf->auto_disable_flags |=
506 							I40E_FLAG_FD_SB_ENABLED;
507 			}
508 		} else {
509 			dev_info(&pdev->dev,
510 				"FD filter programming failed due to incorrect filter parameters\n");
511 		}
512 	} else if (error ==
513 			  (0x1 << I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
514 		if (I40E_DEBUG_FD & pf->hw.debug_mask)
515 			dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
516 				 rx_desc->wb.qword0.hi_dword.fd_id);
517 	}
518 }
519 
520 /**
521  * i40e_unmap_and_free_tx_resource - Release a Tx buffer
522  * @ring:      the ring that owns the buffer
523  * @tx_buffer: the buffer to free
524  **/
525 static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
526 					    struct i40e_tx_buffer *tx_buffer)
527 {
528 	if (tx_buffer->skb) {
529 		if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
530 			kfree(tx_buffer->raw_buf);
531 		else
532 			dev_kfree_skb_any(tx_buffer->skb);
533 
534 		if (dma_unmap_len(tx_buffer, len))
535 			dma_unmap_single(ring->dev,
536 					 dma_unmap_addr(tx_buffer, dma),
537 					 dma_unmap_len(tx_buffer, len),
538 					 DMA_TO_DEVICE);
539 	} else if (dma_unmap_len(tx_buffer, len)) {
540 		dma_unmap_page(ring->dev,
541 			       dma_unmap_addr(tx_buffer, dma),
542 			       dma_unmap_len(tx_buffer, len),
543 			       DMA_TO_DEVICE);
544 	}
545 	tx_buffer->next_to_watch = NULL;
546 	tx_buffer->skb = NULL;
547 	dma_unmap_len_set(tx_buffer, len, 0);
548 	/* tx_buffer must be completely set up in the transmit path */
549 }
550 
551 /**
552  * i40e_clean_tx_ring - Free any empty Tx buffers
553  * @tx_ring: ring to be cleaned
554  **/
555 void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
556 {
557 	unsigned long bi_size;
558 	u16 i;
559 
560 	/* ring already cleared, nothing to do */
561 	if (!tx_ring->tx_bi)
562 		return;
563 
564 	/* Free all the Tx ring sk_buffs */
565 	for (i = 0; i < tx_ring->count; i++)
566 		i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
567 
568 	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
569 	memset(tx_ring->tx_bi, 0, bi_size);
570 
571 	/* Zero out the descriptor ring */
572 	memset(tx_ring->desc, 0, tx_ring->size);
573 
574 	tx_ring->next_to_use = 0;
575 	tx_ring->next_to_clean = 0;
576 
577 	if (!tx_ring->netdev)
578 		return;
579 
580 	/* cleanup Tx queue statistics */
581 	netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
582 						  tx_ring->queue_index));
583 }
584 
585 /**
586  * i40e_free_tx_resources - Free Tx resources per queue
587  * @tx_ring: Tx descriptor ring for a specific queue
588  *
589  * Free all transmit software resources
590  **/
591 void i40e_free_tx_resources(struct i40e_ring *tx_ring)
592 {
593 	i40e_clean_tx_ring(tx_ring);
594 	kfree(tx_ring->tx_bi);
595 	tx_ring->tx_bi = NULL;
596 
597 	if (tx_ring->desc) {
598 		dma_free_coherent(tx_ring->dev, tx_ring->size,
599 				  tx_ring->desc, tx_ring->dma);
600 		tx_ring->desc = NULL;
601 	}
602 }
603 
604 /**
605  * i40e_get_head - Retrieve head from head writeback
606  * @tx_ring:  tx ring to fetch head of
607  *
608  * Returns value of Tx ring head based on value stored
609  * in head write-back location
610  **/
611 static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
612 {
613 	void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
614 
615 	return le32_to_cpu(*(volatile __le32 *)head);
616 }
617 
618 /**
619  * i40e_get_tx_pending - how many tx descriptors not processed
620  * @tx_ring: the ring of descriptors
621  *
622  * Since there is no access to the ring head register
623  * in XL710, we need to use our local copies
624  **/
625 static u32 i40e_get_tx_pending(struct i40e_ring *ring)
626 {
627 	u32 head, tail;
628 
629 	head = i40e_get_head(ring);
630 	tail = readl(ring->tail);
631 
632 	if (head != tail)
633 		return (head < tail) ?
634 			tail - head : (tail + ring->count - head);
635 
636 	return 0;
637 }
638 
639 /**
640  * i40e_check_tx_hang - Is there a hang in the Tx queue
641  * @tx_ring: the ring of descriptors
642  **/
643 static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
644 {
645 	u32 tx_done = tx_ring->stats.packets;
646 	u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
647 	u32 tx_pending = i40e_get_tx_pending(tx_ring);
648 	struct i40e_pf *pf = tx_ring->vsi->back;
649 	bool ret = false;
650 
651 	clear_check_for_tx_hang(tx_ring);
652 
653 	/* Check for a hung queue, but be thorough. This verifies
654 	 * that a transmit has been completed since the previous
655 	 * check AND there is at least one packet pending. The
656 	 * ARMED bit is set to indicate a potential hang. The
657 	 * bit is cleared if a pause frame is received to remove
658 	 * false hang detection due to PFC or 802.3x frames. By
659 	 * requiring this to fail twice we avoid races with
660 	 * PFC clearing the ARMED bit and conditions where we
661 	 * run the check_tx_hang logic with a transmit completion
662 	 * pending but without time to complete it yet.
663 	 */
664 	if ((tx_done_old == tx_done) && tx_pending) {
665 		/* make sure it is true for two checks in a row */
666 		ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
667 				       &tx_ring->state);
668 	} else if (tx_done_old == tx_done &&
669 		   (tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) {
670 		if (I40E_DEBUG_FLOW & pf->hw.debug_mask)
671 			dev_info(tx_ring->dev, "HW needs some more descs to do a cacheline flush. tx_pending %d, queue %d",
672 				 tx_pending, tx_ring->queue_index);
673 		pf->tx_sluggish_count++;
674 	} else {
675 		/* update completed stats and disarm the hang check */
676 		tx_ring->tx_stats.tx_done_old = tx_done;
677 		clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
678 	}
679 
680 	return ret;
681 }
682 
683 #define WB_STRIDE 0x3
684 
685 /**
686  * i40e_clean_tx_irq - Reclaim resources after transmit completes
687  * @tx_ring:  tx ring to clean
688  * @budget:   how many cleans we're allowed
689  *
690  * Returns true if there's any budget left (e.g. the clean is finished)
691  **/
692 static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
693 {
694 	u16 i = tx_ring->next_to_clean;
695 	struct i40e_tx_buffer *tx_buf;
696 	struct i40e_tx_desc *tx_head;
697 	struct i40e_tx_desc *tx_desc;
698 	unsigned int total_packets = 0;
699 	unsigned int total_bytes = 0;
700 
701 	tx_buf = &tx_ring->tx_bi[i];
702 	tx_desc = I40E_TX_DESC(tx_ring, i);
703 	i -= tx_ring->count;
704 
705 	tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
706 
707 	do {
708 		struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
709 
710 		/* if next_to_watch is not set then there is no work pending */
711 		if (!eop_desc)
712 			break;
713 
714 		/* prevent any other reads prior to eop_desc */
715 		read_barrier_depends();
716 
717 		/* we have caught up to head, no work left to do */
718 		if (tx_head == tx_desc)
719 			break;
720 
721 		/* clear next_to_watch to prevent false hangs */
722 		tx_buf->next_to_watch = NULL;
723 
724 		/* update the statistics for this packet */
725 		total_bytes += tx_buf->bytecount;
726 		total_packets += tx_buf->gso_segs;
727 
728 		/* free the skb */
729 		dev_consume_skb_any(tx_buf->skb);
730 
731 		/* unmap skb header data */
732 		dma_unmap_single(tx_ring->dev,
733 				 dma_unmap_addr(tx_buf, dma),
734 				 dma_unmap_len(tx_buf, len),
735 				 DMA_TO_DEVICE);
736 
737 		/* clear tx_buffer data */
738 		tx_buf->skb = NULL;
739 		dma_unmap_len_set(tx_buf, len, 0);
740 
741 		/* unmap remaining buffers */
742 		while (tx_desc != eop_desc) {
743 
744 			tx_buf++;
745 			tx_desc++;
746 			i++;
747 			if (unlikely(!i)) {
748 				i -= tx_ring->count;
749 				tx_buf = tx_ring->tx_bi;
750 				tx_desc = I40E_TX_DESC(tx_ring, 0);
751 			}
752 
753 			/* unmap any remaining paged data */
754 			if (dma_unmap_len(tx_buf, len)) {
755 				dma_unmap_page(tx_ring->dev,
756 					       dma_unmap_addr(tx_buf, dma),
757 					       dma_unmap_len(tx_buf, len),
758 					       DMA_TO_DEVICE);
759 				dma_unmap_len_set(tx_buf, len, 0);
760 			}
761 		}
762 
763 		/* move us one more past the eop_desc for start of next pkt */
764 		tx_buf++;
765 		tx_desc++;
766 		i++;
767 		if (unlikely(!i)) {
768 			i -= tx_ring->count;
769 			tx_buf = tx_ring->tx_bi;
770 			tx_desc = I40E_TX_DESC(tx_ring, 0);
771 		}
772 
773 		prefetch(tx_desc);
774 
775 		/* update budget accounting */
776 		budget--;
777 	} while (likely(budget));
778 
779 	i += tx_ring->count;
780 	tx_ring->next_to_clean = i;
781 	u64_stats_update_begin(&tx_ring->syncp);
782 	tx_ring->stats.bytes += total_bytes;
783 	tx_ring->stats.packets += total_packets;
784 	u64_stats_update_end(&tx_ring->syncp);
785 	tx_ring->q_vector->tx.total_bytes += total_bytes;
786 	tx_ring->q_vector->tx.total_packets += total_packets;
787 
788 	/* check to see if there are any non-cache aligned descriptors
789 	 * waiting to be written back, and kick the hardware to force
790 	 * them to be written back in case of napi polling
791 	 */
792 	if (budget &&
793 	    !((i & WB_STRIDE) == WB_STRIDE) &&
794 	    !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
795 	    (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
796 		tx_ring->arm_wb = true;
797 	else
798 		tx_ring->arm_wb = false;
799 
800 	if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
801 		/* schedule immediate reset if we believe we hung */
802 		dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
803 			 "  VSI                  <%d>\n"
804 			 "  Tx Queue             <%d>\n"
805 			 "  next_to_use          <%x>\n"
806 			 "  next_to_clean        <%x>\n",
807 			 tx_ring->vsi->seid,
808 			 tx_ring->queue_index,
809 			 tx_ring->next_to_use, i);
810 
811 		netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
812 
813 		dev_info(tx_ring->dev,
814 			 "tx hang detected on queue %d, reset requested\n",
815 			 tx_ring->queue_index);
816 
817 		/* do not fire the reset immediately, wait for the stack to
818 		 * decide we are truly stuck, also prevents every queue from
819 		 * simultaneously requesting a reset
820 		 */
821 
822 		/* the adapter is about to reset, no point in enabling polling */
823 		budget = 1;
824 	}
825 
826 	netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
827 						      tx_ring->queue_index),
828 				  total_packets, total_bytes);
829 
830 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
831 	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
832 		     (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
833 		/* Make sure that anybody stopping the queue after this
834 		 * sees the new next_to_clean.
835 		 */
836 		smp_mb();
837 		if (__netif_subqueue_stopped(tx_ring->netdev,
838 					     tx_ring->queue_index) &&
839 		   !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) {
840 			netif_wake_subqueue(tx_ring->netdev,
841 					    tx_ring->queue_index);
842 			++tx_ring->tx_stats.restart_queue;
843 		}
844 	}
845 
846 	return !!budget;
847 }
848 
849 /**
850  * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors
851  * @vsi: the VSI we care about
852  * @q_vector: the vector  on which to force writeback
853  *
854  **/
855 static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
856 {
857 	u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
858 		  I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
859 		  I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
860 		  I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
861 		  /* allow 00 to be written to the index */
862 
863 	wr32(&vsi->back->hw,
864 	     I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1),
865 	     val);
866 }
867 
868 /**
869  * i40e_set_new_dynamic_itr - Find new ITR level
870  * @rc: structure containing ring performance data
871  *
872  * Stores a new ITR value based on packets and byte counts during
873  * the last interrupt.  The advantage of per interrupt computation
874  * is faster updates and more accurate ITR for the current traffic
875  * pattern.  Constants in this function were computed based on
876  * theoretical maximum wire speed and thresholds were set based on
877  * testing data as well as attempting to minimize response time
878  * while increasing bulk throughput.
879  **/
880 static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
881 {
882 	enum i40e_latency_range new_latency_range = rc->latency_range;
883 	u32 new_itr = rc->itr;
884 	int bytes_per_int;
885 
886 	if (rc->total_packets == 0 || !rc->itr)
887 		return;
888 
889 	/* simple throttlerate management
890 	 *   0-10MB/s   lowest (100000 ints/s)
891 	 *  10-20MB/s   low    (20000 ints/s)
892 	 *  20-1249MB/s bulk   (8000 ints/s)
893 	 */
894 	bytes_per_int = rc->total_bytes / rc->itr;
895 	switch (rc->itr) {
896 	case I40E_LOWEST_LATENCY:
897 		if (bytes_per_int > 10)
898 			new_latency_range = I40E_LOW_LATENCY;
899 		break;
900 	case I40E_LOW_LATENCY:
901 		if (bytes_per_int > 20)
902 			new_latency_range = I40E_BULK_LATENCY;
903 		else if (bytes_per_int <= 10)
904 			new_latency_range = I40E_LOWEST_LATENCY;
905 		break;
906 	case I40E_BULK_LATENCY:
907 		if (bytes_per_int <= 20)
908 			rc->latency_range = I40E_LOW_LATENCY;
909 		break;
910 	}
911 
912 	switch (new_latency_range) {
913 	case I40E_LOWEST_LATENCY:
914 		new_itr = I40E_ITR_100K;
915 		break;
916 	case I40E_LOW_LATENCY:
917 		new_itr = I40E_ITR_20K;
918 		break;
919 	case I40E_BULK_LATENCY:
920 		new_itr = I40E_ITR_8K;
921 		break;
922 	default:
923 		break;
924 	}
925 
926 	if (new_itr != rc->itr) {
927 		/* do an exponential smoothing */
928 		new_itr = (10 * new_itr * rc->itr) /
929 			  ((9 * new_itr) + rc->itr);
930 		rc->itr = new_itr & I40E_MAX_ITR;
931 	}
932 
933 	rc->total_bytes = 0;
934 	rc->total_packets = 0;
935 }
936 
937 /**
938  * i40e_update_dynamic_itr - Adjust ITR based on bytes per int
939  * @q_vector: the vector to adjust
940  **/
941 static void i40e_update_dynamic_itr(struct i40e_q_vector *q_vector)
942 {
943 	u16 vector = q_vector->vsi->base_vector + q_vector->v_idx;
944 	struct i40e_hw *hw = &q_vector->vsi->back->hw;
945 	u32 reg_addr;
946 	u16 old_itr;
947 
948 	reg_addr = I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1);
949 	old_itr = q_vector->rx.itr;
950 	i40e_set_new_dynamic_itr(&q_vector->rx);
951 	if (old_itr != q_vector->rx.itr)
952 		wr32(hw, reg_addr, q_vector->rx.itr);
953 
954 	reg_addr = I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1);
955 	old_itr = q_vector->tx.itr;
956 	i40e_set_new_dynamic_itr(&q_vector->tx);
957 	if (old_itr != q_vector->tx.itr)
958 		wr32(hw, reg_addr, q_vector->tx.itr);
959 }
960 
961 /**
962  * i40e_clean_programming_status - clean the programming status descriptor
963  * @rx_ring: the rx ring that has this descriptor
964  * @rx_desc: the rx descriptor written back by HW
965  *
966  * Flow director should handle FD_FILTER_STATUS to check its filter programming
967  * status being successful or not and take actions accordingly. FCoE should
968  * handle its context/filter programming/invalidation status and take actions.
969  *
970  **/
971 static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
972 					  union i40e_rx_desc *rx_desc)
973 {
974 	u64 qw;
975 	u8 id;
976 
977 	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
978 	id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
979 		  I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
980 
981 	if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
982 		i40e_fd_handle_status(rx_ring, rx_desc, id);
983 #ifdef I40E_FCOE
984 	else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
985 		 (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS))
986 		i40e_fcoe_handle_status(rx_ring, rx_desc, id);
987 #endif
988 }
989 
990 /**
991  * i40e_setup_tx_descriptors - Allocate the Tx descriptors
992  * @tx_ring: the tx ring to set up
993  *
994  * Return 0 on success, negative on error
995  **/
996 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
997 {
998 	struct device *dev = tx_ring->dev;
999 	int bi_size;
1000 
1001 	if (!dev)
1002 		return -ENOMEM;
1003 
1004 	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
1005 	tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
1006 	if (!tx_ring->tx_bi)
1007 		goto err;
1008 
1009 	/* round up to nearest 4K */
1010 	tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
1011 	/* add u32 for head writeback, align after this takes care of
1012 	 * guaranteeing this is at least one cache line in size
1013 	 */
1014 	tx_ring->size += sizeof(u32);
1015 	tx_ring->size = ALIGN(tx_ring->size, 4096);
1016 	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
1017 					   &tx_ring->dma, GFP_KERNEL);
1018 	if (!tx_ring->desc) {
1019 		dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
1020 			 tx_ring->size);
1021 		goto err;
1022 	}
1023 
1024 	tx_ring->next_to_use = 0;
1025 	tx_ring->next_to_clean = 0;
1026 	return 0;
1027 
1028 err:
1029 	kfree(tx_ring->tx_bi);
1030 	tx_ring->tx_bi = NULL;
1031 	return -ENOMEM;
1032 }
1033 
1034 /**
1035  * i40e_clean_rx_ring - Free Rx buffers
1036  * @rx_ring: ring to be cleaned
1037  **/
1038 void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
1039 {
1040 	struct device *dev = rx_ring->dev;
1041 	struct i40e_rx_buffer *rx_bi;
1042 	unsigned long bi_size;
1043 	u16 i;
1044 
1045 	/* ring already cleared, nothing to do */
1046 	if (!rx_ring->rx_bi)
1047 		return;
1048 
1049 	if (ring_is_ps_enabled(rx_ring)) {
1050 		int bufsz = ALIGN(rx_ring->rx_hdr_len, 256) * rx_ring->count;
1051 
1052 		rx_bi = &rx_ring->rx_bi[0];
1053 		if (rx_bi->hdr_buf) {
1054 			dma_free_coherent(dev,
1055 					  bufsz,
1056 					  rx_bi->hdr_buf,
1057 					  rx_bi->dma);
1058 			for (i = 0; i < rx_ring->count; i++) {
1059 				rx_bi = &rx_ring->rx_bi[i];
1060 				rx_bi->dma = 0;
1061 				rx_bi->hdr_buf = NULL;
1062 			}
1063 		}
1064 	}
1065 	/* Free all the Rx ring sk_buffs */
1066 	for (i = 0; i < rx_ring->count; i++) {
1067 		rx_bi = &rx_ring->rx_bi[i];
1068 		if (rx_bi->dma) {
1069 			dma_unmap_single(dev,
1070 					 rx_bi->dma,
1071 					 rx_ring->rx_buf_len,
1072 					 DMA_FROM_DEVICE);
1073 			rx_bi->dma = 0;
1074 		}
1075 		if (rx_bi->skb) {
1076 			dev_kfree_skb(rx_bi->skb);
1077 			rx_bi->skb = NULL;
1078 		}
1079 		if (rx_bi->page) {
1080 			if (rx_bi->page_dma) {
1081 				dma_unmap_page(dev,
1082 					       rx_bi->page_dma,
1083 					       PAGE_SIZE / 2,
1084 					       DMA_FROM_DEVICE);
1085 				rx_bi->page_dma = 0;
1086 			}
1087 			__free_page(rx_bi->page);
1088 			rx_bi->page = NULL;
1089 			rx_bi->page_offset = 0;
1090 		}
1091 	}
1092 
1093 	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1094 	memset(rx_ring->rx_bi, 0, bi_size);
1095 
1096 	/* Zero out the descriptor ring */
1097 	memset(rx_ring->desc, 0, rx_ring->size);
1098 
1099 	rx_ring->next_to_clean = 0;
1100 	rx_ring->next_to_use = 0;
1101 }
1102 
1103 /**
1104  * i40e_free_rx_resources - Free Rx resources
1105  * @rx_ring: ring to clean the resources from
1106  *
1107  * Free all receive software resources
1108  **/
1109 void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1110 {
1111 	i40e_clean_rx_ring(rx_ring);
1112 	kfree(rx_ring->rx_bi);
1113 	rx_ring->rx_bi = NULL;
1114 
1115 	if (rx_ring->desc) {
1116 		dma_free_coherent(rx_ring->dev, rx_ring->size,
1117 				  rx_ring->desc, rx_ring->dma);
1118 		rx_ring->desc = NULL;
1119 	}
1120 }
1121 
1122 /**
1123  * i40e_alloc_rx_headers - allocate rx header buffers
1124  * @rx_ring: ring to alloc buffers
1125  *
1126  * Allocate rx header buffers for the entire ring. As these are static,
1127  * this is only called when setting up a new ring.
1128  **/
1129 void i40e_alloc_rx_headers(struct i40e_ring *rx_ring)
1130 {
1131 	struct device *dev = rx_ring->dev;
1132 	struct i40e_rx_buffer *rx_bi;
1133 	dma_addr_t dma;
1134 	void *buffer;
1135 	int buf_size;
1136 	int i;
1137 
1138 	if (rx_ring->rx_bi[0].hdr_buf)
1139 		return;
1140 	/* Make sure the buffers don't cross cache line boundaries. */
1141 	buf_size = ALIGN(rx_ring->rx_hdr_len, 256);
1142 	buffer = dma_alloc_coherent(dev, buf_size * rx_ring->count,
1143 				    &dma, GFP_KERNEL);
1144 	if (!buffer)
1145 		return;
1146 	for (i = 0; i < rx_ring->count; i++) {
1147 		rx_bi = &rx_ring->rx_bi[i];
1148 		rx_bi->dma = dma + (i * buf_size);
1149 		rx_bi->hdr_buf = buffer + (i * buf_size);
1150 	}
1151 }
1152 
1153 /**
1154  * i40e_setup_rx_descriptors - Allocate Rx descriptors
1155  * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1156  *
1157  * Returns 0 on success, negative on failure
1158  **/
1159 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1160 {
1161 	struct device *dev = rx_ring->dev;
1162 	int bi_size;
1163 
1164 	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1165 	rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1166 	if (!rx_ring->rx_bi)
1167 		goto err;
1168 
1169 	u64_stats_init(&rx_ring->syncp);
1170 
1171 	/* Round up to nearest 4K */
1172 	rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
1173 		? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
1174 		: rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1175 	rx_ring->size = ALIGN(rx_ring->size, 4096);
1176 	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
1177 					   &rx_ring->dma, GFP_KERNEL);
1178 
1179 	if (!rx_ring->desc) {
1180 		dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1181 			 rx_ring->size);
1182 		goto err;
1183 	}
1184 
1185 	rx_ring->next_to_clean = 0;
1186 	rx_ring->next_to_use = 0;
1187 
1188 	return 0;
1189 err:
1190 	kfree(rx_ring->rx_bi);
1191 	rx_ring->rx_bi = NULL;
1192 	return -ENOMEM;
1193 }
1194 
1195 /**
1196  * i40e_release_rx_desc - Store the new tail and head values
1197  * @rx_ring: ring to bump
1198  * @val: new head index
1199  **/
1200 static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1201 {
1202 	rx_ring->next_to_use = val;
1203 	/* Force memory writes to complete before letting h/w
1204 	 * know there are new descriptors to fetch.  (Only
1205 	 * applicable for weak-ordered memory model archs,
1206 	 * such as IA-64).
1207 	 */
1208 	wmb();
1209 	writel(val, rx_ring->tail);
1210 }
1211 
1212 /**
1213  * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split
1214  * @rx_ring: ring to place buffers on
1215  * @cleaned_count: number of buffers to replace
1216  **/
1217 void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
1218 {
1219 	u16 i = rx_ring->next_to_use;
1220 	union i40e_rx_desc *rx_desc;
1221 	struct i40e_rx_buffer *bi;
1222 
1223 	/* do nothing if no valid netdev defined */
1224 	if (!rx_ring->netdev || !cleaned_count)
1225 		return;
1226 
1227 	while (cleaned_count--) {
1228 		rx_desc = I40E_RX_DESC(rx_ring, i);
1229 		bi = &rx_ring->rx_bi[i];
1230 
1231 		if (bi->skb) /* desc is in use */
1232 			goto no_buffers;
1233 		if (!bi->page) {
1234 			bi->page = alloc_page(GFP_ATOMIC);
1235 			if (!bi->page) {
1236 				rx_ring->rx_stats.alloc_page_failed++;
1237 				goto no_buffers;
1238 			}
1239 		}
1240 
1241 		if (!bi->page_dma) {
1242 			/* use a half page if we're re-using */
1243 			bi->page_offset ^= PAGE_SIZE / 2;
1244 			bi->page_dma = dma_map_page(rx_ring->dev,
1245 						    bi->page,
1246 						    bi->page_offset,
1247 						    PAGE_SIZE / 2,
1248 						    DMA_FROM_DEVICE);
1249 			if (dma_mapping_error(rx_ring->dev,
1250 					      bi->page_dma)) {
1251 				rx_ring->rx_stats.alloc_page_failed++;
1252 				bi->page_dma = 0;
1253 				goto no_buffers;
1254 			}
1255 		}
1256 
1257 		dma_sync_single_range_for_device(rx_ring->dev,
1258 						 bi->dma,
1259 						 0,
1260 						 rx_ring->rx_hdr_len,
1261 						 DMA_FROM_DEVICE);
1262 		/* Refresh the desc even if buffer_addrs didn't change
1263 		 * because each write-back erases this info.
1264 		 */
1265 		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
1266 		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
1267 		i++;
1268 		if (i == rx_ring->count)
1269 			i = 0;
1270 	}
1271 
1272 no_buffers:
1273 	if (rx_ring->next_to_use != i)
1274 		i40e_release_rx_desc(rx_ring, i);
1275 }
1276 
1277 /**
1278  * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
1279  * @rx_ring: ring to place buffers on
1280  * @cleaned_count: number of buffers to replace
1281  **/
1282 void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
1283 {
1284 	u16 i = rx_ring->next_to_use;
1285 	union i40e_rx_desc *rx_desc;
1286 	struct i40e_rx_buffer *bi;
1287 	struct sk_buff *skb;
1288 
1289 	/* do nothing if no valid netdev defined */
1290 	if (!rx_ring->netdev || !cleaned_count)
1291 		return;
1292 
1293 	while (cleaned_count--) {
1294 		rx_desc = I40E_RX_DESC(rx_ring, i);
1295 		bi = &rx_ring->rx_bi[i];
1296 		skb = bi->skb;
1297 
1298 		if (!skb) {
1299 			skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1300 							rx_ring->rx_buf_len);
1301 			if (!skb) {
1302 				rx_ring->rx_stats.alloc_buff_failed++;
1303 				goto no_buffers;
1304 			}
1305 			/* initialize queue mapping */
1306 			skb_record_rx_queue(skb, rx_ring->queue_index);
1307 			bi->skb = skb;
1308 		}
1309 
1310 		if (!bi->dma) {
1311 			bi->dma = dma_map_single(rx_ring->dev,
1312 						 skb->data,
1313 						 rx_ring->rx_buf_len,
1314 						 DMA_FROM_DEVICE);
1315 			if (dma_mapping_error(rx_ring->dev, bi->dma)) {
1316 				rx_ring->rx_stats.alloc_buff_failed++;
1317 				bi->dma = 0;
1318 				goto no_buffers;
1319 			}
1320 		}
1321 
1322 		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
1323 		rx_desc->read.hdr_addr = 0;
1324 		i++;
1325 		if (i == rx_ring->count)
1326 			i = 0;
1327 	}
1328 
1329 no_buffers:
1330 	if (rx_ring->next_to_use != i)
1331 		i40e_release_rx_desc(rx_ring, i);
1332 }
1333 
1334 /**
1335  * i40e_receive_skb - Send a completed packet up the stack
1336  * @rx_ring:  rx ring in play
1337  * @skb: packet to send up
1338  * @vlan_tag: vlan tag for packet
1339  **/
1340 static void i40e_receive_skb(struct i40e_ring *rx_ring,
1341 			     struct sk_buff *skb, u16 vlan_tag)
1342 {
1343 	struct i40e_q_vector *q_vector = rx_ring->q_vector;
1344 	struct i40e_vsi *vsi = rx_ring->vsi;
1345 	u64 flags = vsi->back->flags;
1346 
1347 	if (vlan_tag & VLAN_VID_MASK)
1348 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1349 
1350 	if (flags & I40E_FLAG_IN_NETPOLL)
1351 		netif_rx(skb);
1352 	else
1353 		napi_gro_receive(&q_vector->napi, skb);
1354 }
1355 
1356 /**
1357  * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1358  * @vsi: the VSI we care about
1359  * @skb: skb currently being received and modified
1360  * @rx_status: status value of last descriptor in packet
1361  * @rx_error: error value of last descriptor in packet
1362  * @rx_ptype: ptype value of last descriptor in packet
1363  **/
1364 static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
1365 				    struct sk_buff *skb,
1366 				    u32 rx_status,
1367 				    u32 rx_error,
1368 				    u16 rx_ptype)
1369 {
1370 	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
1371 	bool ipv4 = false, ipv6 = false;
1372 	bool ipv4_tunnel, ipv6_tunnel;
1373 	__wsum rx_udp_csum;
1374 	struct iphdr *iph;
1375 	__sum16 csum;
1376 
1377 	ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
1378 		     (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
1379 	ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
1380 		     (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
1381 
1382 	skb->ip_summed = CHECKSUM_NONE;
1383 
1384 	/* Rx csum enabled and ip headers found? */
1385 	if (!(vsi->netdev->features & NETIF_F_RXCSUM))
1386 		return;
1387 
1388 	/* did the hardware decode the packet and checksum? */
1389 	if (!(rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
1390 		return;
1391 
1392 	/* both known and outer_ip must be set for the below code to work */
1393 	if (!(decoded.known && decoded.outer_ip))
1394 		return;
1395 
1396 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1397 	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4)
1398 		ipv4 = true;
1399 	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1400 		 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1401 		ipv6 = true;
1402 
1403 	if (ipv4 &&
1404 	    (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1405 			 (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT))))
1406 		goto checksum_fail;
1407 
1408 	/* likely incorrect csum if alternate IP extension headers found */
1409 	if (ipv6 &&
1410 	    rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
1411 		/* don't increment checksum err here, non-fatal err */
1412 		return;
1413 
1414 	/* there was some L4 error, count error and punt packet to the stack */
1415 	if (rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
1416 		goto checksum_fail;
1417 
1418 	/* handle packets that were not able to be checksummed due
1419 	 * to arrival speed, in this case the stack can compute
1420 	 * the csum.
1421 	 */
1422 	if (rx_error & (1 << I40E_RX_DESC_ERROR_PPRS_SHIFT))
1423 		return;
1424 
1425 	/* If VXLAN traffic has an outer UDPv4 checksum we need to check
1426 	 * it in the driver, hardware does not do it for us.
1427 	 * Since L3L4P bit was set we assume a valid IHL value (>=5)
1428 	 * so the total length of IPv4 header is IHL*4 bytes
1429 	 * The UDP_0 bit *may* bet set if the *inner* header is UDP
1430 	 */
1431 	if (ipv4_tunnel) {
1432 		skb->transport_header = skb->mac_header +
1433 					sizeof(struct ethhdr) +
1434 					(ip_hdr(skb)->ihl * 4);
1435 
1436 		/* Add 4 bytes for VLAN tagged packets */
1437 		skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) ||
1438 					  skb->protocol == htons(ETH_P_8021AD))
1439 					  ? VLAN_HLEN : 0;
1440 
1441 		if ((ip_hdr(skb)->protocol == IPPROTO_UDP) &&
1442 		    (udp_hdr(skb)->check != 0)) {
1443 			rx_udp_csum = udp_csum(skb);
1444 			iph = ip_hdr(skb);
1445 			csum = csum_tcpudp_magic(
1446 					iph->saddr, iph->daddr,
1447 					(skb->len - skb_transport_offset(skb)),
1448 					IPPROTO_UDP, rx_udp_csum);
1449 
1450 			if (udp_hdr(skb)->check != csum)
1451 				goto checksum_fail;
1452 
1453 		} /* else its GRE and so no outer UDP header */
1454 	}
1455 
1456 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1457 	skb->csum_level = ipv4_tunnel || ipv6_tunnel;
1458 
1459 	return;
1460 
1461 checksum_fail:
1462 	vsi->back->hw_csum_rx_error++;
1463 }
1464 
1465 /**
1466  * i40e_rx_hash - returns the hash value from the Rx descriptor
1467  * @ring: descriptor ring
1468  * @rx_desc: specific descriptor
1469  **/
1470 static inline u32 i40e_rx_hash(struct i40e_ring *ring,
1471 			       union i40e_rx_desc *rx_desc)
1472 {
1473 	const __le64 rss_mask =
1474 		cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
1475 			    I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
1476 
1477 	if ((ring->netdev->features & NETIF_F_RXHASH) &&
1478 	    (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask)
1479 		return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
1480 	else
1481 		return 0;
1482 }
1483 
1484 /**
1485  * i40e_ptype_to_hash - get a hash type
1486  * @ptype: the ptype value from the descriptor
1487  *
1488  * Returns a hash type to be used by skb_set_hash
1489  **/
1490 static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype)
1491 {
1492 	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
1493 
1494 	if (!decoded.known)
1495 		return PKT_HASH_TYPE_NONE;
1496 
1497 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1498 	    decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
1499 		return PKT_HASH_TYPE_L4;
1500 	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1501 		 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
1502 		return PKT_HASH_TYPE_L3;
1503 	else
1504 		return PKT_HASH_TYPE_L2;
1505 }
1506 
1507 /**
1508  * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split
1509  * @rx_ring:  rx ring to clean
1510  * @budget:   how many cleans we're allowed
1511  *
1512  * Returns true if there's any budget left (e.g. the clean is finished)
1513  **/
1514 static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
1515 {
1516 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1517 	u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
1518 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1519 	const int current_node = numa_node_id();
1520 	struct i40e_vsi *vsi = rx_ring->vsi;
1521 	u16 i = rx_ring->next_to_clean;
1522 	union i40e_rx_desc *rx_desc;
1523 	u32 rx_error, rx_status;
1524 	u8 rx_ptype;
1525 	u64 qword;
1526 
1527 	if (budget <= 0)
1528 		return 0;
1529 
1530 	do {
1531 		struct i40e_rx_buffer *rx_bi;
1532 		struct sk_buff *skb;
1533 		u16 vlan_tag;
1534 		/* return some buffers to hardware, one at a time is too slow */
1535 		if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1536 			i40e_alloc_rx_buffers_ps(rx_ring, cleaned_count);
1537 			cleaned_count = 0;
1538 		}
1539 
1540 		i = rx_ring->next_to_clean;
1541 		rx_desc = I40E_RX_DESC(rx_ring, i);
1542 		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1543 		rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1544 			I40E_RXD_QW1_STATUS_SHIFT;
1545 
1546 		if (!(rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)))
1547 			break;
1548 
1549 		/* This memory barrier is needed to keep us from reading
1550 		 * any other fields out of the rx_desc until we know the
1551 		 * DD bit is set.
1552 		 */
1553 		dma_rmb();
1554 		if (i40e_rx_is_programming_status(qword)) {
1555 			i40e_clean_programming_status(rx_ring, rx_desc);
1556 			I40E_RX_INCREMENT(rx_ring, i);
1557 			continue;
1558 		}
1559 		rx_bi = &rx_ring->rx_bi[i];
1560 		skb = rx_bi->skb;
1561 		if (likely(!skb)) {
1562 			skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1563 							rx_ring->rx_hdr_len);
1564 			if (!skb) {
1565 				rx_ring->rx_stats.alloc_buff_failed++;
1566 				break;
1567 			}
1568 
1569 			/* initialize queue mapping */
1570 			skb_record_rx_queue(skb, rx_ring->queue_index);
1571 			/* we are reusing so sync this buffer for CPU use */
1572 			dma_sync_single_range_for_cpu(rx_ring->dev,
1573 						      rx_bi->dma,
1574 						      0,
1575 						      rx_ring->rx_hdr_len,
1576 						      DMA_FROM_DEVICE);
1577 		}
1578 		rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1579 				I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1580 		rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
1581 				I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1582 		rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
1583 			 I40E_RXD_QW1_LENGTH_SPH_SHIFT;
1584 
1585 		rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1586 			   I40E_RXD_QW1_ERROR_SHIFT;
1587 		rx_hbo = rx_error & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
1588 		rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
1589 
1590 		rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1591 			   I40E_RXD_QW1_PTYPE_SHIFT;
1592 		prefetch(rx_bi->page);
1593 		rx_bi->skb = NULL;
1594 		cleaned_count++;
1595 		if (rx_hbo || rx_sph) {
1596 			int len;
1597 			if (rx_hbo)
1598 				len = I40E_RX_HDR_SIZE;
1599 			else
1600 				len = rx_header_len;
1601 			memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len);
1602 		} else if (skb->len == 0) {
1603 			int len;
1604 
1605 			len = (rx_packet_len > skb_headlen(skb) ?
1606 				skb_headlen(skb) : rx_packet_len);
1607 			memcpy(__skb_put(skb, len),
1608 			       rx_bi->page + rx_bi->page_offset,
1609 			       len);
1610 			rx_bi->page_offset += len;
1611 			rx_packet_len -= len;
1612 		}
1613 
1614 		/* Get the rest of the data if this was a header split */
1615 		if (rx_packet_len) {
1616 			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
1617 					   rx_bi->page,
1618 					   rx_bi->page_offset,
1619 					   rx_packet_len);
1620 
1621 			skb->len += rx_packet_len;
1622 			skb->data_len += rx_packet_len;
1623 			skb->truesize += rx_packet_len;
1624 
1625 			if ((page_count(rx_bi->page) == 1) &&
1626 			    (page_to_nid(rx_bi->page) == current_node))
1627 				get_page(rx_bi->page);
1628 			else
1629 				rx_bi->page = NULL;
1630 
1631 			dma_unmap_page(rx_ring->dev,
1632 				       rx_bi->page_dma,
1633 				       PAGE_SIZE / 2,
1634 				       DMA_FROM_DEVICE);
1635 			rx_bi->page_dma = 0;
1636 		}
1637 		I40E_RX_INCREMENT(rx_ring, i);
1638 
1639 		if (unlikely(
1640 		    !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1641 			struct i40e_rx_buffer *next_buffer;
1642 
1643 			next_buffer = &rx_ring->rx_bi[i];
1644 			next_buffer->skb = skb;
1645 			rx_ring->rx_stats.non_eop_descs++;
1646 			continue;
1647 		}
1648 
1649 		/* ERR_MASK will only have valid bits if EOP set */
1650 		if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1651 			dev_kfree_skb_any(skb);
1652 			continue;
1653 		}
1654 
1655 		skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
1656 			     i40e_ptype_to_hash(rx_ptype));
1657 		if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1658 			i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1659 					   I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1660 					   I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1661 			rx_ring->last_rx_timestamp = jiffies;
1662 		}
1663 
1664 		/* probably a little skewed due to removing CRC */
1665 		total_rx_bytes += skb->len;
1666 		total_rx_packets++;
1667 
1668 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1669 
1670 		i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1671 
1672 		vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1673 			 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1674 			 : 0;
1675 #ifdef I40E_FCOE
1676 		if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1677 			dev_kfree_skb_any(skb);
1678 			continue;
1679 		}
1680 #endif
1681 		skb_mark_napi_id(skb, &rx_ring->q_vector->napi);
1682 		i40e_receive_skb(rx_ring, skb, vlan_tag);
1683 
1684 		rx_desc->wb.qword1.status_error_len = 0;
1685 
1686 	} while (likely(total_rx_packets < budget));
1687 
1688 	u64_stats_update_begin(&rx_ring->syncp);
1689 	rx_ring->stats.packets += total_rx_packets;
1690 	rx_ring->stats.bytes += total_rx_bytes;
1691 	u64_stats_update_end(&rx_ring->syncp);
1692 	rx_ring->q_vector->rx.total_packets += total_rx_packets;
1693 	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1694 
1695 	return total_rx_packets;
1696 }
1697 
1698 /**
1699  * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer
1700  * @rx_ring:  rx ring to clean
1701  * @budget:   how many cleans we're allowed
1702  *
1703  * Returns number of packets cleaned
1704  **/
1705 static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
1706 {
1707 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1708 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1709 	struct i40e_vsi *vsi = rx_ring->vsi;
1710 	union i40e_rx_desc *rx_desc;
1711 	u32 rx_error, rx_status;
1712 	u16 rx_packet_len;
1713 	u8 rx_ptype;
1714 	u64 qword;
1715 	u16 i;
1716 
1717 	do {
1718 		struct i40e_rx_buffer *rx_bi;
1719 		struct sk_buff *skb;
1720 		u16 vlan_tag;
1721 		/* return some buffers to hardware, one at a time is too slow */
1722 		if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1723 			i40e_alloc_rx_buffers_1buf(rx_ring, cleaned_count);
1724 			cleaned_count = 0;
1725 		}
1726 
1727 		i = rx_ring->next_to_clean;
1728 		rx_desc = I40E_RX_DESC(rx_ring, i);
1729 		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1730 		rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1731 			I40E_RXD_QW1_STATUS_SHIFT;
1732 
1733 		if (!(rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)))
1734 			break;
1735 
1736 		/* This memory barrier is needed to keep us from reading
1737 		 * any other fields out of the rx_desc until we know the
1738 		 * DD bit is set.
1739 		 */
1740 		dma_rmb();
1741 
1742 		if (i40e_rx_is_programming_status(qword)) {
1743 			i40e_clean_programming_status(rx_ring, rx_desc);
1744 			I40E_RX_INCREMENT(rx_ring, i);
1745 			continue;
1746 		}
1747 		rx_bi = &rx_ring->rx_bi[i];
1748 		skb = rx_bi->skb;
1749 		prefetch(skb->data);
1750 
1751 		rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1752 				I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1753 
1754 		rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1755 			   I40E_RXD_QW1_ERROR_SHIFT;
1756 		rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
1757 
1758 		rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1759 			   I40E_RXD_QW1_PTYPE_SHIFT;
1760 		rx_bi->skb = NULL;
1761 		cleaned_count++;
1762 
1763 		/* Get the header and possibly the whole packet
1764 		 * If this is an skb from previous receive dma will be 0
1765 		 */
1766 		skb_put(skb, rx_packet_len);
1767 		dma_unmap_single(rx_ring->dev, rx_bi->dma, rx_ring->rx_buf_len,
1768 				 DMA_FROM_DEVICE);
1769 		rx_bi->dma = 0;
1770 
1771 		I40E_RX_INCREMENT(rx_ring, i);
1772 
1773 		if (unlikely(
1774 		    !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1775 			rx_ring->rx_stats.non_eop_descs++;
1776 			continue;
1777 		}
1778 
1779 		/* ERR_MASK will only have valid bits if EOP set */
1780 		if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1781 			dev_kfree_skb_any(skb);
1782 			/* TODO: shouldn't we increment a counter indicating the
1783 			 * drop?
1784 			 */
1785 			continue;
1786 		}
1787 
1788 		skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
1789 			     i40e_ptype_to_hash(rx_ptype));
1790 		if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1791 			i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1792 					   I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1793 					   I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1794 			rx_ring->last_rx_timestamp = jiffies;
1795 		}
1796 
1797 		/* probably a little skewed due to removing CRC */
1798 		total_rx_bytes += skb->len;
1799 		total_rx_packets++;
1800 
1801 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1802 
1803 		i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1804 
1805 		vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1806 			 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1807 			 : 0;
1808 #ifdef I40E_FCOE
1809 		if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1810 			dev_kfree_skb_any(skb);
1811 			continue;
1812 		}
1813 #endif
1814 		i40e_receive_skb(rx_ring, skb, vlan_tag);
1815 
1816 		rx_desc->wb.qword1.status_error_len = 0;
1817 	} while (likely(total_rx_packets < budget));
1818 
1819 	u64_stats_update_begin(&rx_ring->syncp);
1820 	rx_ring->stats.packets += total_rx_packets;
1821 	rx_ring->stats.bytes += total_rx_bytes;
1822 	u64_stats_update_end(&rx_ring->syncp);
1823 	rx_ring->q_vector->rx.total_packets += total_rx_packets;
1824 	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1825 
1826 	return total_rx_packets;
1827 }
1828 
1829 /**
1830  * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1831  * @napi: napi struct with our devices info in it
1832  * @budget: amount of work driver is allowed to do this pass, in packets
1833  *
1834  * This function will clean all queues associated with a q_vector.
1835  *
1836  * Returns the amount of work done
1837  **/
1838 int i40e_napi_poll(struct napi_struct *napi, int budget)
1839 {
1840 	struct i40e_q_vector *q_vector =
1841 			       container_of(napi, struct i40e_q_vector, napi);
1842 	struct i40e_vsi *vsi = q_vector->vsi;
1843 	struct i40e_ring *ring;
1844 	bool clean_complete = true;
1845 	bool arm_wb = false;
1846 	int budget_per_ring;
1847 	int cleaned;
1848 
1849 	if (test_bit(__I40E_DOWN, &vsi->state)) {
1850 		napi_complete(napi);
1851 		return 0;
1852 	}
1853 
1854 	/* Since the actual Tx work is minimal, we can give the Tx a larger
1855 	 * budget and be more aggressive about cleaning up the Tx descriptors.
1856 	 */
1857 	i40e_for_each_ring(ring, q_vector->tx) {
1858 		clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
1859 		arm_wb |= ring->arm_wb;
1860 	}
1861 
1862 	/* We attempt to distribute budget to each Rx queue fairly, but don't
1863 	 * allow the budget to go below 1 because that would exit polling early.
1864 	 */
1865 	budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
1866 
1867 	i40e_for_each_ring(ring, q_vector->rx) {
1868 		if (ring_is_ps_enabled(ring))
1869 			cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring);
1870 		else
1871 			cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring);
1872 		/* if we didn't clean as many as budgeted, we must be done */
1873 		clean_complete &= (budget_per_ring != cleaned);
1874 	}
1875 
1876 	/* If work not completed, return budget and polling will return */
1877 	if (!clean_complete) {
1878 		if (arm_wb)
1879 			i40e_force_wb(vsi, q_vector);
1880 		return budget;
1881 	}
1882 
1883 	/* Work is done so exit the polling mode and re-enable the interrupt */
1884 	napi_complete(napi);
1885 	if (ITR_IS_DYNAMIC(vsi->rx_itr_setting) ||
1886 	    ITR_IS_DYNAMIC(vsi->tx_itr_setting))
1887 		i40e_update_dynamic_itr(q_vector);
1888 
1889 	if (!test_bit(__I40E_DOWN, &vsi->state)) {
1890 		if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
1891 			i40e_irq_dynamic_enable(vsi,
1892 					q_vector->v_idx + vsi->base_vector);
1893 		} else {
1894 			struct i40e_hw *hw = &vsi->back->hw;
1895 			/* We re-enable the queue 0 cause, but
1896 			 * don't worry about dynamic_enable
1897 			 * because we left it on for the other
1898 			 * possible interrupts during napi
1899 			 */
1900 			u32 qval = rd32(hw, I40E_QINT_RQCTL(0));
1901 			qval |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
1902 			wr32(hw, I40E_QINT_RQCTL(0), qval);
1903 
1904 			qval = rd32(hw, I40E_QINT_TQCTL(0));
1905 			qval |= I40E_QINT_TQCTL_CAUSE_ENA_MASK;
1906 			wr32(hw, I40E_QINT_TQCTL(0), qval);
1907 
1908 			i40e_irq_dynamic_enable_icr0(vsi->back);
1909 		}
1910 	}
1911 
1912 	return 0;
1913 }
1914 
1915 /**
1916  * i40e_atr - Add a Flow Director ATR filter
1917  * @tx_ring:  ring to add programming descriptor to
1918  * @skb:      send buffer
1919  * @tx_flags: send tx flags
1920  * @protocol: wire protocol
1921  **/
1922 static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
1923 		     u32 tx_flags, __be16 protocol)
1924 {
1925 	struct i40e_filter_program_desc *fdir_desc;
1926 	struct i40e_pf *pf = tx_ring->vsi->back;
1927 	union {
1928 		unsigned char *network;
1929 		struct iphdr *ipv4;
1930 		struct ipv6hdr *ipv6;
1931 	} hdr;
1932 	struct tcphdr *th;
1933 	unsigned int hlen;
1934 	u32 flex_ptype, dtype_cmd;
1935 	u16 i;
1936 
1937 	/* make sure ATR is enabled */
1938 	if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
1939 		return;
1940 
1941 	if ((pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
1942 		return;
1943 
1944 	/* if sampling is disabled do nothing */
1945 	if (!tx_ring->atr_sample_rate)
1946 		return;
1947 
1948 	if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6)))
1949 		return;
1950 
1951 	if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL)) {
1952 		/* snag network header to get L4 type and address */
1953 		hdr.network = skb_network_header(skb);
1954 
1955 		/* Currently only IPv4/IPv6 with TCP is supported
1956 		 * access ihl as u8 to avoid unaligned access on ia64
1957 		 */
1958 		if (tx_flags & I40E_TX_FLAGS_IPV4)
1959 			hlen = (hdr.network[0] & 0x0F) << 2;
1960 		else if (protocol == htons(ETH_P_IPV6))
1961 			hlen = sizeof(struct ipv6hdr);
1962 		else
1963 			return;
1964 	} else {
1965 		hdr.network = skb_inner_network_header(skb);
1966 		hlen = skb_inner_network_header_len(skb);
1967 	}
1968 
1969 	/* Currently only IPv4/IPv6 with TCP is supported
1970 	 * Note: tx_flags gets modified to reflect inner protocols in
1971 	 * tx_enable_csum function if encap is enabled.
1972 	 */
1973 	if ((tx_flags & I40E_TX_FLAGS_IPV4) &&
1974 	    (hdr.ipv4->protocol != IPPROTO_TCP))
1975 		return;
1976 	else if ((tx_flags & I40E_TX_FLAGS_IPV6) &&
1977 		 (hdr.ipv6->nexthdr != IPPROTO_TCP))
1978 		return;
1979 
1980 	th = (struct tcphdr *)(hdr.network + hlen);
1981 
1982 	/* Due to lack of space, no more new filters can be programmed */
1983 	if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
1984 		return;
1985 
1986 	tx_ring->atr_count++;
1987 
1988 	/* sample on all syn/fin/rst packets or once every atr sample rate */
1989 	if (!th->fin &&
1990 	    !th->syn &&
1991 	    !th->rst &&
1992 	    (tx_ring->atr_count < tx_ring->atr_sample_rate))
1993 		return;
1994 
1995 	tx_ring->atr_count = 0;
1996 
1997 	/* grab the next descriptor */
1998 	i = tx_ring->next_to_use;
1999 	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
2000 
2001 	i++;
2002 	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2003 
2004 	flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
2005 		      I40E_TXD_FLTR_QW0_QINDEX_MASK;
2006 	flex_ptype |= (protocol == htons(ETH_P_IP)) ?
2007 		      (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
2008 		       I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
2009 		      (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
2010 		       I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
2011 
2012 	flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
2013 
2014 	dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
2015 
2016 	dtype_cmd |= (th->fin || th->rst) ?
2017 		     (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
2018 		      I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
2019 		     (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
2020 		      I40E_TXD_FLTR_QW1_PCMD_SHIFT);
2021 
2022 	dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
2023 		     I40E_TXD_FLTR_QW1_DEST_SHIFT;
2024 
2025 	dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
2026 		     I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
2027 
2028 	dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
2029 	if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL))
2030 		dtype_cmd |=
2031 			((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) <<
2032 			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2033 			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2034 	else
2035 		dtype_cmd |=
2036 			((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) <<
2037 			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2038 			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2039 
2040 	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
2041 	fdir_desc->rsvd = cpu_to_le32(0);
2042 	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
2043 	fdir_desc->fd_id = cpu_to_le32(0);
2044 }
2045 
2046 /**
2047  * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
2048  * @skb:     send buffer
2049  * @tx_ring: ring to send buffer on
2050  * @flags:   the tx flags to be set
2051  *
2052  * Checks the skb and set up correspondingly several generic transmit flags
2053  * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
2054  *
2055  * Returns error code indicate the frame should be dropped upon error and the
2056  * otherwise  returns 0 to indicate the flags has been set properly.
2057  **/
2058 #ifdef I40E_FCOE
2059 inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2060 				      struct i40e_ring *tx_ring,
2061 				      u32 *flags)
2062 #else
2063 static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2064 					     struct i40e_ring *tx_ring,
2065 					     u32 *flags)
2066 #endif
2067 {
2068 	__be16 protocol = skb->protocol;
2069 	u32  tx_flags = 0;
2070 
2071 	if (protocol == htons(ETH_P_8021Q) &&
2072 	    !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
2073 		/* When HW VLAN acceleration is turned off by the user the
2074 		 * stack sets the protocol to 8021q so that the driver
2075 		 * can take any steps required to support the SW only
2076 		 * VLAN handling.  In our case the driver doesn't need
2077 		 * to take any further steps so just set the protocol
2078 		 * to the encapsulated ethertype.
2079 		 */
2080 		skb->protocol = vlan_get_protocol(skb);
2081 		goto out;
2082 	}
2083 
2084 	/* if we have a HW VLAN tag being added, default to the HW one */
2085 	if (skb_vlan_tag_present(skb)) {
2086 		tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
2087 		tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2088 	/* else if it is a SW VLAN, check the next protocol and store the tag */
2089 	} else if (protocol == htons(ETH_P_8021Q)) {
2090 		struct vlan_hdr *vhdr, _vhdr;
2091 		vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
2092 		if (!vhdr)
2093 			return -EINVAL;
2094 
2095 		protocol = vhdr->h_vlan_encapsulated_proto;
2096 		tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
2097 		tx_flags |= I40E_TX_FLAGS_SW_VLAN;
2098 	}
2099 
2100 	if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED))
2101 		goto out;
2102 
2103 	/* Insert 802.1p priority into VLAN header */
2104 	if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
2105 	    (skb->priority != TC_PRIO_CONTROL)) {
2106 		tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
2107 		tx_flags |= (skb->priority & 0x7) <<
2108 				I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
2109 		if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
2110 			struct vlan_ethhdr *vhdr;
2111 			int rc;
2112 
2113 			rc = skb_cow_head(skb, 0);
2114 			if (rc < 0)
2115 				return rc;
2116 			vhdr = (struct vlan_ethhdr *)skb->data;
2117 			vhdr->h_vlan_TCI = htons(tx_flags >>
2118 						 I40E_TX_FLAGS_VLAN_SHIFT);
2119 		} else {
2120 			tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2121 		}
2122 	}
2123 
2124 out:
2125 	*flags = tx_flags;
2126 	return 0;
2127 }
2128 
2129 /**
2130  * i40e_tso - set up the tso context descriptor
2131  * @tx_ring:  ptr to the ring to send
2132  * @skb:      ptr to the skb we're sending
2133  * @hdr_len:  ptr to the size of the packet header
2134  * @cd_tunneling: ptr to context descriptor bits
2135  *
2136  * Returns 0 if no TSO can happen, 1 if tso is going, or error
2137  **/
2138 static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
2139 		    u8 *hdr_len, u64 *cd_type_cmd_tso_mss,
2140 		    u32 *cd_tunneling)
2141 {
2142 	u32 cd_cmd, cd_tso_len, cd_mss;
2143 	struct ipv6hdr *ipv6h;
2144 	struct tcphdr *tcph;
2145 	struct iphdr *iph;
2146 	u32 l4len;
2147 	int err;
2148 
2149 	if (!skb_is_gso(skb))
2150 		return 0;
2151 
2152 	err = skb_cow_head(skb, 0);
2153 	if (err < 0)
2154 		return err;
2155 
2156 	iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
2157 	ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
2158 
2159 	if (iph->version == 4) {
2160 		tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
2161 		iph->tot_len = 0;
2162 		iph->check = 0;
2163 		tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
2164 						 0, IPPROTO_TCP, 0);
2165 	} else if (ipv6h->version == 6) {
2166 		tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
2167 		ipv6h->payload_len = 0;
2168 		tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
2169 					       0, IPPROTO_TCP, 0);
2170 	}
2171 
2172 	l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb);
2173 	*hdr_len = (skb->encapsulation
2174 		    ? (skb_inner_transport_header(skb) - skb->data)
2175 		    : skb_transport_offset(skb)) + l4len;
2176 
2177 	/* find the field values */
2178 	cd_cmd = I40E_TX_CTX_DESC_TSO;
2179 	cd_tso_len = skb->len - *hdr_len;
2180 	cd_mss = skb_shinfo(skb)->gso_size;
2181 	*cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
2182 				((u64)cd_tso_len <<
2183 				 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
2184 				((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
2185 	return 1;
2186 }
2187 
2188 /**
2189  * i40e_tsyn - set up the tsyn context descriptor
2190  * @tx_ring:  ptr to the ring to send
2191  * @skb:      ptr to the skb we're sending
2192  * @tx_flags: the collected send information
2193  *
2194  * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
2195  **/
2196 static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
2197 		     u32 tx_flags, u64 *cd_type_cmd_tso_mss)
2198 {
2199 	struct i40e_pf *pf;
2200 
2201 	if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
2202 		return 0;
2203 
2204 	/* Tx timestamps cannot be sampled when doing TSO */
2205 	if (tx_flags & I40E_TX_FLAGS_TSO)
2206 		return 0;
2207 
2208 	/* only timestamp the outbound packet if the user has requested it and
2209 	 * we are not already transmitting a packet to be timestamped
2210 	 */
2211 	pf = i40e_netdev_to_pf(tx_ring->netdev);
2212 	if (!(pf->flags & I40E_FLAG_PTP))
2213 		return 0;
2214 
2215 	if (pf->ptp_tx &&
2216 	    !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) {
2217 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
2218 		pf->ptp_tx_skb = skb_get(skb);
2219 	} else {
2220 		return 0;
2221 	}
2222 
2223 	*cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
2224 				I40E_TXD_CTX_QW1_CMD_SHIFT;
2225 
2226 	return 1;
2227 }
2228 
2229 /**
2230  * i40e_tx_enable_csum - Enable Tx checksum offloads
2231  * @skb: send buffer
2232  * @tx_flags: pointer to Tx flags currently set
2233  * @td_cmd: Tx descriptor command bits to set
2234  * @td_offset: Tx descriptor header offsets to set
2235  * @cd_tunneling: ptr to context desc bits
2236  **/
2237 static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
2238 				u32 *td_cmd, u32 *td_offset,
2239 				struct i40e_ring *tx_ring,
2240 				u32 *cd_tunneling)
2241 {
2242 	struct ipv6hdr *this_ipv6_hdr;
2243 	unsigned int this_tcp_hdrlen;
2244 	struct iphdr *this_ip_hdr;
2245 	u32 network_hdr_len;
2246 	u8 l4_hdr = 0;
2247 	u32 l4_tunnel = 0;
2248 
2249 	if (skb->encapsulation) {
2250 		switch (ip_hdr(skb)->protocol) {
2251 		case IPPROTO_UDP:
2252 			l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING;
2253 			*tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL;
2254 			break;
2255 		default:
2256 			return;
2257 		}
2258 		network_hdr_len = skb_inner_network_header_len(skb);
2259 		this_ip_hdr = inner_ip_hdr(skb);
2260 		this_ipv6_hdr = inner_ipv6_hdr(skb);
2261 		this_tcp_hdrlen = inner_tcp_hdrlen(skb);
2262 
2263 		if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2264 			if (*tx_flags & I40E_TX_FLAGS_TSO) {
2265 				*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
2266 				ip_hdr(skb)->check = 0;
2267 			} else {
2268 				*cd_tunneling |=
2269 					 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
2270 			}
2271 		} else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2272 			*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
2273 			if (*tx_flags & I40E_TX_FLAGS_TSO)
2274 				ip_hdr(skb)->check = 0;
2275 		}
2276 
2277 		/* Now set the ctx descriptor fields */
2278 		*cd_tunneling |= (skb_network_header_len(skb) >> 2) <<
2279 				   I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT      |
2280 				   l4_tunnel                             |
2281 				   ((skb_inner_network_offset(skb) -
2282 					skb_transport_offset(skb)) >> 1) <<
2283 				   I40E_TXD_CTX_QW0_NATLEN_SHIFT;
2284 		if (this_ip_hdr->version == 6) {
2285 			*tx_flags &= ~I40E_TX_FLAGS_IPV4;
2286 			*tx_flags |= I40E_TX_FLAGS_IPV6;
2287 		}
2288 	} else {
2289 		network_hdr_len = skb_network_header_len(skb);
2290 		this_ip_hdr = ip_hdr(skb);
2291 		this_ipv6_hdr = ipv6_hdr(skb);
2292 		this_tcp_hdrlen = tcp_hdrlen(skb);
2293 	}
2294 
2295 	/* Enable IP checksum offloads */
2296 	if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2297 		l4_hdr = this_ip_hdr->protocol;
2298 		/* the stack computes the IP header already, the only time we
2299 		 * need the hardware to recompute it is in the case of TSO.
2300 		 */
2301 		if (*tx_flags & I40E_TX_FLAGS_TSO) {
2302 			*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
2303 			this_ip_hdr->check = 0;
2304 		} else {
2305 			*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
2306 		}
2307 		/* Now set the td_offset for IP header length */
2308 		*td_offset = (network_hdr_len >> 2) <<
2309 			      I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2310 	} else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2311 		l4_hdr = this_ipv6_hdr->nexthdr;
2312 		*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
2313 		/* Now set the td_offset for IP header length */
2314 		*td_offset = (network_hdr_len >> 2) <<
2315 			      I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2316 	}
2317 	/* words in MACLEN + dwords in IPLEN + dwords in L4Len */
2318 	*td_offset |= (skb_network_offset(skb) >> 1) <<
2319 		       I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
2320 
2321 	/* Enable L4 checksum offloads */
2322 	switch (l4_hdr) {
2323 	case IPPROTO_TCP:
2324 		/* enable checksum offloads */
2325 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
2326 		*td_offset |= (this_tcp_hdrlen >> 2) <<
2327 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2328 		break;
2329 	case IPPROTO_SCTP:
2330 		/* enable SCTP checksum offload */
2331 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
2332 		*td_offset |= (sizeof(struct sctphdr) >> 2) <<
2333 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2334 		break;
2335 	case IPPROTO_UDP:
2336 		/* enable UDP checksum offload */
2337 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
2338 		*td_offset |= (sizeof(struct udphdr) >> 2) <<
2339 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2340 		break;
2341 	default:
2342 		break;
2343 	}
2344 }
2345 
2346 /**
2347  * i40e_create_tx_ctx Build the Tx context descriptor
2348  * @tx_ring:  ring to create the descriptor on
2349  * @cd_type_cmd_tso_mss: Quad Word 1
2350  * @cd_tunneling: Quad Word 0 - bits 0-31
2351  * @cd_l2tag2: Quad Word 0 - bits 32-63
2352  **/
2353 static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
2354 			       const u64 cd_type_cmd_tso_mss,
2355 			       const u32 cd_tunneling, const u32 cd_l2tag2)
2356 {
2357 	struct i40e_tx_context_desc *context_desc;
2358 	int i = tx_ring->next_to_use;
2359 
2360 	if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
2361 	    !cd_tunneling && !cd_l2tag2)
2362 		return;
2363 
2364 	/* grab the next descriptor */
2365 	context_desc = I40E_TX_CTXTDESC(tx_ring, i);
2366 
2367 	i++;
2368 	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2369 
2370 	/* cpu_to_le32 and assign to struct fields */
2371 	context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
2372 	context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
2373 	context_desc->rsvd = cpu_to_le16(0);
2374 	context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
2375 }
2376 
2377 /**
2378  * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2379  * @tx_ring: the ring to be checked
2380  * @size:    the size buffer we want to assure is available
2381  *
2382  * Returns -EBUSY if a stop is needed, else 0
2383  **/
2384 static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2385 {
2386 	netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
2387 	/* Memory barrier before checking head and tail */
2388 	smp_mb();
2389 
2390 	/* Check again in a case another CPU has just made room available. */
2391 	if (likely(I40E_DESC_UNUSED(tx_ring) < size))
2392 		return -EBUSY;
2393 
2394 	/* A reprieve! - use start_queue because it doesn't call schedule */
2395 	netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
2396 	++tx_ring->tx_stats.restart_queue;
2397 	return 0;
2398 }
2399 
2400 /**
2401  * i40e_maybe_stop_tx - 1st level check for tx stop conditions
2402  * @tx_ring: the ring to be checked
2403  * @size:    the size buffer we want to assure is available
2404  *
2405  * Returns 0 if stop is not needed
2406  **/
2407 #ifdef I40E_FCOE
2408 inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2409 #else
2410 static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2411 #endif
2412 {
2413 	if (likely(I40E_DESC_UNUSED(tx_ring) >= size))
2414 		return 0;
2415 	return __i40e_maybe_stop_tx(tx_ring, size);
2416 }
2417 
2418 /**
2419  * i40e_chk_linearize - Check if there are more than 8 fragments per packet
2420  * @skb:      send buffer
2421  * @tx_flags: collected send information
2422  *
2423  * Note: Our HW can't scatter-gather more than 8 fragments to build
2424  * a packet on the wire and so we need to figure out the cases where we
2425  * need to linearize the skb.
2426  **/
2427 static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags)
2428 {
2429 	struct skb_frag_struct *frag;
2430 	bool linearize = false;
2431 	unsigned int size = 0;
2432 	u16 num_frags;
2433 	u16 gso_segs;
2434 
2435 	num_frags = skb_shinfo(skb)->nr_frags;
2436 	gso_segs = skb_shinfo(skb)->gso_segs;
2437 
2438 	if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
2439 		u16 j = 0;
2440 
2441 		if (num_frags < (I40E_MAX_BUFFER_TXD))
2442 			goto linearize_chk_done;
2443 		/* try the simple math, if we have too many frags per segment */
2444 		if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) >
2445 		    I40E_MAX_BUFFER_TXD) {
2446 			linearize = true;
2447 			goto linearize_chk_done;
2448 		}
2449 		frag = &skb_shinfo(skb)->frags[0];
2450 		/* we might still have more fragments per segment */
2451 		do {
2452 			size += skb_frag_size(frag);
2453 			frag++; j++;
2454 			if ((size >= skb_shinfo(skb)->gso_size) &&
2455 			    (j < I40E_MAX_BUFFER_TXD)) {
2456 				size = (size % skb_shinfo(skb)->gso_size);
2457 				j = (size) ? 1 : 0;
2458 			}
2459 			if (j == I40E_MAX_BUFFER_TXD) {
2460 				linearize = true;
2461 				break;
2462 			}
2463 			num_frags--;
2464 		} while (num_frags);
2465 	} else {
2466 		if (num_frags >= I40E_MAX_BUFFER_TXD)
2467 			linearize = true;
2468 	}
2469 
2470 linearize_chk_done:
2471 	return linearize;
2472 }
2473 
2474 /**
2475  * i40e_tx_map - Build the Tx descriptor
2476  * @tx_ring:  ring to send buffer on
2477  * @skb:      send buffer
2478  * @first:    first buffer info buffer to use
2479  * @tx_flags: collected send information
2480  * @hdr_len:  size of the packet header
2481  * @td_cmd:   the command field in the descriptor
2482  * @td_offset: offset for checksum or crc
2483  **/
2484 #ifdef I40E_FCOE
2485 inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2486 			struct i40e_tx_buffer *first, u32 tx_flags,
2487 			const u8 hdr_len, u32 td_cmd, u32 td_offset)
2488 #else
2489 static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2490 			       struct i40e_tx_buffer *first, u32 tx_flags,
2491 			       const u8 hdr_len, u32 td_cmd, u32 td_offset)
2492 #endif
2493 {
2494 	unsigned int data_len = skb->data_len;
2495 	unsigned int size = skb_headlen(skb);
2496 	struct skb_frag_struct *frag;
2497 	struct i40e_tx_buffer *tx_bi;
2498 	struct i40e_tx_desc *tx_desc;
2499 	u16 i = tx_ring->next_to_use;
2500 	u32 td_tag = 0;
2501 	dma_addr_t dma;
2502 	u16 gso_segs;
2503 
2504 	if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
2505 		td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
2506 		td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
2507 			 I40E_TX_FLAGS_VLAN_SHIFT;
2508 	}
2509 
2510 	if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
2511 		gso_segs = skb_shinfo(skb)->gso_segs;
2512 	else
2513 		gso_segs = 1;
2514 
2515 	/* multiply data chunks by size of headers */
2516 	first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
2517 	first->gso_segs = gso_segs;
2518 	first->skb = skb;
2519 	first->tx_flags = tx_flags;
2520 
2521 	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
2522 
2523 	tx_desc = I40E_TX_DESC(tx_ring, i);
2524 	tx_bi = first;
2525 
2526 	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
2527 		if (dma_mapping_error(tx_ring->dev, dma))
2528 			goto dma_error;
2529 
2530 		/* record length, and DMA address */
2531 		dma_unmap_len_set(tx_bi, len, size);
2532 		dma_unmap_addr_set(tx_bi, dma, dma);
2533 
2534 		tx_desc->buffer_addr = cpu_to_le64(dma);
2535 
2536 		while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
2537 			tx_desc->cmd_type_offset_bsz =
2538 				build_ctob(td_cmd, td_offset,
2539 					   I40E_MAX_DATA_PER_TXD, td_tag);
2540 
2541 			tx_desc++;
2542 			i++;
2543 			if (i == tx_ring->count) {
2544 				tx_desc = I40E_TX_DESC(tx_ring, 0);
2545 				i = 0;
2546 			}
2547 
2548 			dma += I40E_MAX_DATA_PER_TXD;
2549 			size -= I40E_MAX_DATA_PER_TXD;
2550 
2551 			tx_desc->buffer_addr = cpu_to_le64(dma);
2552 		}
2553 
2554 		if (likely(!data_len))
2555 			break;
2556 
2557 		tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
2558 							  size, td_tag);
2559 
2560 		tx_desc++;
2561 		i++;
2562 		if (i == tx_ring->count) {
2563 			tx_desc = I40E_TX_DESC(tx_ring, 0);
2564 			i = 0;
2565 		}
2566 
2567 		size = skb_frag_size(frag);
2568 		data_len -= size;
2569 
2570 		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
2571 				       DMA_TO_DEVICE);
2572 
2573 		tx_bi = &tx_ring->tx_bi[i];
2574 	}
2575 
2576 	/* Place RS bit on last descriptor of any packet that spans across the
2577 	 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
2578 	 */
2579 	if (((i & WB_STRIDE) != WB_STRIDE) &&
2580 	    (first <= &tx_ring->tx_bi[i]) &&
2581 	    (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
2582 		tx_desc->cmd_type_offset_bsz =
2583 			build_ctob(td_cmd, td_offset, size, td_tag) |
2584 			cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
2585 					 I40E_TXD_QW1_CMD_SHIFT);
2586 	} else {
2587 		tx_desc->cmd_type_offset_bsz =
2588 			build_ctob(td_cmd, td_offset, size, td_tag) |
2589 			cpu_to_le64((u64)I40E_TXD_CMD <<
2590 					 I40E_TXD_QW1_CMD_SHIFT);
2591 	}
2592 
2593 	netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
2594 						 tx_ring->queue_index),
2595 			     first->bytecount);
2596 
2597 	/* Force memory writes to complete before letting h/w
2598 	 * know there are new descriptors to fetch.  (Only
2599 	 * applicable for weak-ordered memory model archs,
2600 	 * such as IA-64).
2601 	 */
2602 	wmb();
2603 
2604 	/* set next_to_watch value indicating a packet is present */
2605 	first->next_to_watch = tx_desc;
2606 
2607 	i++;
2608 	if (i == tx_ring->count)
2609 		i = 0;
2610 
2611 	tx_ring->next_to_use = i;
2612 
2613 	i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
2614 	/* notify HW of packet */
2615 	if (!skb->xmit_more ||
2616 	    netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2617 						   tx_ring->queue_index)))
2618 		writel(i, tx_ring->tail);
2619 
2620 	return;
2621 
2622 dma_error:
2623 	dev_info(tx_ring->dev, "TX DMA map failed\n");
2624 
2625 	/* clear dma mappings for failed tx_bi map */
2626 	for (;;) {
2627 		tx_bi = &tx_ring->tx_bi[i];
2628 		i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
2629 		if (tx_bi == first)
2630 			break;
2631 		if (i == 0)
2632 			i = tx_ring->count;
2633 		i--;
2634 	}
2635 
2636 	tx_ring->next_to_use = i;
2637 }
2638 
2639 /**
2640  * i40e_xmit_descriptor_count - calculate number of tx descriptors needed
2641  * @skb:     send buffer
2642  * @tx_ring: ring to send buffer on
2643  *
2644  * Returns number of data descriptors needed for this skb. Returns 0 to indicate
2645  * there is not enough descriptors available in this ring since we need at least
2646  * one descriptor.
2647  **/
2648 #ifdef I40E_FCOE
2649 inline int i40e_xmit_descriptor_count(struct sk_buff *skb,
2650 				      struct i40e_ring *tx_ring)
2651 #else
2652 static inline int i40e_xmit_descriptor_count(struct sk_buff *skb,
2653 					     struct i40e_ring *tx_ring)
2654 #endif
2655 {
2656 	unsigned int f;
2657 	int count = 0;
2658 
2659 	/* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2660 	 *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2661 	 *       + 4 desc gap to avoid the cache line where head is,
2662 	 *       + 1 desc for context descriptor,
2663 	 * otherwise try next time
2664 	 */
2665 	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
2666 		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
2667 
2668 	count += TXD_USE_COUNT(skb_headlen(skb));
2669 	if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
2670 		tx_ring->tx_stats.tx_busy++;
2671 		return 0;
2672 	}
2673 	return count;
2674 }
2675 
2676 /**
2677  * i40e_xmit_frame_ring - Sends buffer on Tx ring
2678  * @skb:     send buffer
2679  * @tx_ring: ring to send buffer on
2680  *
2681  * Returns NETDEV_TX_OK if sent, else an error code
2682  **/
2683 static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2684 					struct i40e_ring *tx_ring)
2685 {
2686 	u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
2687 	u32 cd_tunneling = 0, cd_l2tag2 = 0;
2688 	struct i40e_tx_buffer *first;
2689 	u32 td_offset = 0;
2690 	u32 tx_flags = 0;
2691 	__be16 protocol;
2692 	u32 td_cmd = 0;
2693 	u8 hdr_len = 0;
2694 	int tsyn;
2695 	int tso;
2696 	if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
2697 		return NETDEV_TX_BUSY;
2698 
2699 	/* prepare the xmit flags */
2700 	if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
2701 		goto out_drop;
2702 
2703 	/* obtain protocol of skb */
2704 	protocol = vlan_get_protocol(skb);
2705 
2706 	/* record the location of the first descriptor for this packet */
2707 	first = &tx_ring->tx_bi[tx_ring->next_to_use];
2708 
2709 	/* setup IPv4/IPv6 offloads */
2710 	if (protocol == htons(ETH_P_IP))
2711 		tx_flags |= I40E_TX_FLAGS_IPV4;
2712 	else if (protocol == htons(ETH_P_IPV6))
2713 		tx_flags |= I40E_TX_FLAGS_IPV6;
2714 
2715 	tso = i40e_tso(tx_ring, skb, &hdr_len,
2716 		       &cd_type_cmd_tso_mss, &cd_tunneling);
2717 
2718 	if (tso < 0)
2719 		goto out_drop;
2720 	else if (tso)
2721 		tx_flags |= I40E_TX_FLAGS_TSO;
2722 
2723 	tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
2724 
2725 	if (tsyn)
2726 		tx_flags |= I40E_TX_FLAGS_TSYN;
2727 
2728 	if (i40e_chk_linearize(skb, tx_flags))
2729 		if (skb_linearize(skb))
2730 			goto out_drop;
2731 
2732 	skb_tx_timestamp(skb);
2733 
2734 	/* always enable CRC insertion offload */
2735 	td_cmd |= I40E_TX_DESC_CMD_ICRC;
2736 
2737 	/* Always offload the checksum, since it's in the data descriptor */
2738 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
2739 		tx_flags |= I40E_TX_FLAGS_CSUM;
2740 
2741 		i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
2742 				    tx_ring, &cd_tunneling);
2743 	}
2744 
2745 	i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
2746 			   cd_tunneling, cd_l2tag2);
2747 
2748 	/* Add Flow Director ATR if it's enabled.
2749 	 *
2750 	 * NOTE: this must always be directly before the data descriptor.
2751 	 */
2752 	i40e_atr(tx_ring, skb, tx_flags, protocol);
2753 
2754 	i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
2755 		    td_cmd, td_offset);
2756 
2757 	return NETDEV_TX_OK;
2758 
2759 out_drop:
2760 	dev_kfree_skb_any(skb);
2761 	return NETDEV_TX_OK;
2762 }
2763 
2764 /**
2765  * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2766  * @skb:    send buffer
2767  * @netdev: network interface device structure
2768  *
2769  * Returns NETDEV_TX_OK if sent, else an error code
2770  **/
2771 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
2772 {
2773 	struct i40e_netdev_priv *np = netdev_priv(netdev);
2774 	struct i40e_vsi *vsi = np->vsi;
2775 	struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
2776 
2777 	/* hardware can't handle really short frames, hardware padding works
2778 	 * beyond this point
2779 	 */
2780 	if (skb_put_padto(skb, I40E_MIN_TX_LEN))
2781 		return NETDEV_TX_OK;
2782 
2783 	return i40e_xmit_frame_ring(skb, tx_ring);
2784 }
2785