xref: /linux/drivers/net/ethernet/emulex/benet/be_main.c (revision fcc8487d477a3452a1d0ccbdd4c5e0e1e3cb8bed)
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17 
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27 
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32 
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39 
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43 
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48 
49 static const struct pci_device_id be_dev_ids[] = {
50 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58 	{ 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61 
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64 
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67 	"CEV",
68 	"CTX",
69 	"DBUF",
70 	"ERX",
71 	"Host",
72 	"MPU",
73 	"NDMA",
74 	"PTC ",
75 	"RDMA ",
76 	"RXF ",
77 	"RXIPS ",
78 	"RXULP0 ",
79 	"RXULP1 ",
80 	"RXULP2 ",
81 	"TIM ",
82 	"TPOST ",
83 	"TPRE ",
84 	"TXIPS ",
85 	"TXULP0 ",
86 	"TXULP1 ",
87 	"UC ",
88 	"WDMA ",
89 	"TXULP2 ",
90 	"HOST1 ",
91 	"P0_OB_LINK ",
92 	"P1_OB_LINK ",
93 	"HOST_GPIO ",
94 	"MBOX ",
95 	"ERX2 ",
96 	"SPARE ",
97 	"JTAG ",
98 	"MPU_INTPEND "
99 };
100 
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103 	"LPCMEMHOST",
104 	"MGMT_MAC",
105 	"PCS0ONLINE",
106 	"MPU_IRAM",
107 	"PCS1ONLINE",
108 	"PCTL0",
109 	"PCTL1",
110 	"PMEM",
111 	"RR",
112 	"TXPB",
113 	"RXPP",
114 	"XAUI",
115 	"TXP",
116 	"ARM",
117 	"IPC",
118 	"HOST2",
119 	"HOST3",
120 	"HOST4",
121 	"HOST5",
122 	"HOST6",
123 	"HOST7",
124 	"ECRC",
125 	"Poison TLP",
126 	"NETC",
127 	"PERIPH",
128 	"LLTXULP",
129 	"D2P",
130 	"RCON",
131 	"LDMA",
132 	"LLTXP",
133 	"LLTXPB",
134 	"Unknown"
135 };
136 
137 #define BE_VF_IF_EN_FLAGS	(BE_IF_FLAGS_UNTAGGED | \
138 				 BE_IF_FLAGS_BROADCAST | \
139 				 BE_IF_FLAGS_MULTICAST | \
140 				 BE_IF_FLAGS_PASS_L3L4_ERRORS)
141 
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144 	struct be_dma_mem *mem = &q->dma_mem;
145 
146 	if (mem->va) {
147 		dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148 				  mem->dma);
149 		mem->va = NULL;
150 	}
151 }
152 
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154 			  u16 len, u16 entry_size)
155 {
156 	struct be_dma_mem *mem = &q->dma_mem;
157 
158 	memset(q, 0, sizeof(*q));
159 	q->len = len;
160 	q->entry_size = entry_size;
161 	mem->size = len * entry_size;
162 	mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163 				      GFP_KERNEL);
164 	if (!mem->va)
165 		return -ENOMEM;
166 	return 0;
167 }
168 
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171 	u32 reg, enabled;
172 
173 	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174 			      &reg);
175 	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176 
177 	if (!enabled && enable)
178 		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179 	else if (enabled && !enable)
180 		reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181 	else
182 		return;
183 
184 	pci_write_config_dword(adapter->pdev,
185 			       PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187 
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190 	int status = 0;
191 
192 	/* On lancer interrupts can't be controlled via this register */
193 	if (lancer_chip(adapter))
194 		return;
195 
196 	if (be_check_error(adapter, BE_ERROR_EEH))
197 		return;
198 
199 	status = be_cmd_intr_set(adapter, enable);
200 	if (status)
201 		be_reg_intr_set(adapter, enable);
202 }
203 
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206 	u32 val = 0;
207 
208 	if (be_check_error(adapter, BE_ERROR_HW))
209 		return;
210 
211 	val |= qid & DB_RQ_RING_ID_MASK;
212 	val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213 
214 	wmb();
215 	iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217 
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219 			  u16 posted)
220 {
221 	u32 val = 0;
222 
223 	if (be_check_error(adapter, BE_ERROR_HW))
224 		return;
225 
226 	val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227 	val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228 
229 	wmb();
230 	iowrite32(val, adapter->db + txo->db_offset);
231 }
232 
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234 			 bool arm, bool clear_int, u16 num_popped,
235 			 u32 eq_delay_mult_enc)
236 {
237 	u32 val = 0;
238 
239 	val |= qid & DB_EQ_RING_ID_MASK;
240 	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241 
242 	if (be_check_error(adapter, BE_ERROR_HW))
243 		return;
244 
245 	if (arm)
246 		val |= 1 << DB_EQ_REARM_SHIFT;
247 	if (clear_int)
248 		val |= 1 << DB_EQ_CLR_SHIFT;
249 	val |= 1 << DB_EQ_EVNT_SHIFT;
250 	val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251 	val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252 	iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254 
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257 	u32 val = 0;
258 
259 	val |= qid & DB_CQ_RING_ID_MASK;
260 	val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261 			DB_CQ_RING_ID_EXT_MASK_SHIFT);
262 
263 	if (be_check_error(adapter, BE_ERROR_HW))
264 		return;
265 
266 	if (arm)
267 		val |= 1 << DB_CQ_REARM_SHIFT;
268 	val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269 	iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271 
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274 	int i;
275 
276 	/* Check if mac has already been added as part of uc-list */
277 	for (i = 0; i < adapter->uc_macs; i++) {
278 		if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279 			/* mac already added, skip addition */
280 			adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281 			return 0;
282 		}
283 	}
284 
285 	return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286 			       &adapter->pmac_id[0], 0);
287 }
288 
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291 	int i;
292 
293 	/* Skip deletion if the programmed mac is
294 	 * being used in uc-list
295 	 */
296 	for (i = 0; i < adapter->uc_macs; i++) {
297 		if (adapter->pmac_id[i + 1] == pmac_id)
298 			return;
299 	}
300 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302 
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305 	struct be_adapter *adapter = netdev_priv(netdev);
306 	struct device *dev = &adapter->pdev->dev;
307 	struct sockaddr *addr = p;
308 	int status;
309 	u8 mac[ETH_ALEN];
310 	u32 old_pmac_id = adapter->pmac_id[0];
311 
312 	if (!is_valid_ether_addr(addr->sa_data))
313 		return -EADDRNOTAVAIL;
314 
315 	/* Proceed further only if, User provided MAC is different
316 	 * from active MAC
317 	 */
318 	if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319 		return 0;
320 
321 	/* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
322 	 * address
323 	 */
324 	if (BEx_chip(adapter) && be_virtfn(adapter) &&
325 	    !check_privilege(adapter, BE_PRIV_FILTMGMT))
326 		return -EPERM;
327 
328 	/* if device is not running, copy MAC to netdev->dev_addr */
329 	if (!netif_running(netdev))
330 		goto done;
331 
332 	/* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
333 	 * privilege or if PF did not provision the new MAC address.
334 	 * On BE3, this cmd will always fail if the VF doesn't have the
335 	 * FILTMGMT privilege. This failure is OK, only if the PF programmed
336 	 * the MAC for the VF.
337 	 */
338 	mutex_lock(&adapter->rx_filter_lock);
339 	status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
340 	if (!status) {
341 
342 		/* Delete the old programmed MAC. This call may fail if the
343 		 * old MAC was already deleted by the PF driver.
344 		 */
345 		if (adapter->pmac_id[0] != old_pmac_id)
346 			be_dev_mac_del(adapter, old_pmac_id);
347 	}
348 
349 	mutex_unlock(&adapter->rx_filter_lock);
350 	/* Decide if the new MAC is successfully activated only after
351 	 * querying the FW
352 	 */
353 	status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
354 				       adapter->if_handle, true, 0);
355 	if (status)
356 		goto err;
357 
358 	/* The MAC change did not happen, either due to lack of privilege
359 	 * or PF didn't pre-provision.
360 	 */
361 	if (!ether_addr_equal(addr->sa_data, mac)) {
362 		status = -EPERM;
363 		goto err;
364 	}
365 
366 	/* Remember currently programmed MAC */
367 	ether_addr_copy(adapter->dev_mac, addr->sa_data);
368 done:
369 	ether_addr_copy(netdev->dev_addr, addr->sa_data);
370 	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
371 	return 0;
372 err:
373 	dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
374 	return status;
375 }
376 
377 /* BE2 supports only v0 cmd */
378 static void *hw_stats_from_cmd(struct be_adapter *adapter)
379 {
380 	if (BE2_chip(adapter)) {
381 		struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
382 
383 		return &cmd->hw_stats;
384 	} else if (BE3_chip(adapter)) {
385 		struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
386 
387 		return &cmd->hw_stats;
388 	} else {
389 		struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
390 
391 		return &cmd->hw_stats;
392 	}
393 }
394 
395 /* BE2 supports only v0 cmd */
396 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
397 {
398 	if (BE2_chip(adapter)) {
399 		struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
400 
401 		return &hw_stats->erx;
402 	} else if (BE3_chip(adapter)) {
403 		struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
404 
405 		return &hw_stats->erx;
406 	} else {
407 		struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
408 
409 		return &hw_stats->erx;
410 	}
411 }
412 
413 static void populate_be_v0_stats(struct be_adapter *adapter)
414 {
415 	struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
416 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
417 	struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
418 	struct be_port_rxf_stats_v0 *port_stats =
419 					&rxf_stats->port[adapter->port_num];
420 	struct be_drv_stats *drvs = &adapter->drv_stats;
421 
422 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
423 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
424 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
425 	drvs->rx_control_frames = port_stats->rx_control_frames;
426 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
427 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
428 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
429 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
430 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
431 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
432 	drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
433 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
434 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
435 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
436 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
437 	drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
438 	drvs->rx_dropped_header_too_small =
439 		port_stats->rx_dropped_header_too_small;
440 	drvs->rx_address_filtered =
441 					port_stats->rx_address_filtered +
442 					port_stats->rx_vlan_filtered;
443 	drvs->rx_alignment_symbol_errors =
444 		port_stats->rx_alignment_symbol_errors;
445 
446 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
447 	drvs->tx_controlframes = port_stats->tx_controlframes;
448 
449 	if (adapter->port_num)
450 		drvs->jabber_events = rxf_stats->port1_jabber_events;
451 	else
452 		drvs->jabber_events = rxf_stats->port0_jabber_events;
453 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
454 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
455 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
456 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
457 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
458 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
459 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
460 }
461 
462 static void populate_be_v1_stats(struct be_adapter *adapter)
463 {
464 	struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
465 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
466 	struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
467 	struct be_port_rxf_stats_v1 *port_stats =
468 					&rxf_stats->port[adapter->port_num];
469 	struct be_drv_stats *drvs = &adapter->drv_stats;
470 
471 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
472 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
473 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
474 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
475 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
476 	drvs->rx_control_frames = port_stats->rx_control_frames;
477 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
478 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
479 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
480 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
481 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
482 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
483 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
484 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
485 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
486 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
487 	drvs->rx_dropped_header_too_small =
488 		port_stats->rx_dropped_header_too_small;
489 	drvs->rx_input_fifo_overflow_drop =
490 		port_stats->rx_input_fifo_overflow_drop;
491 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
492 	drvs->rx_alignment_symbol_errors =
493 		port_stats->rx_alignment_symbol_errors;
494 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
495 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
496 	drvs->tx_controlframes = port_stats->tx_controlframes;
497 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
498 	drvs->jabber_events = port_stats->jabber_events;
499 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
500 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
501 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
502 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
503 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
504 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
505 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
506 }
507 
508 static void populate_be_v2_stats(struct be_adapter *adapter)
509 {
510 	struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
511 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
512 	struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
513 	struct be_port_rxf_stats_v2 *port_stats =
514 					&rxf_stats->port[adapter->port_num];
515 	struct be_drv_stats *drvs = &adapter->drv_stats;
516 
517 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
518 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
519 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
520 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
521 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
522 	drvs->rx_control_frames = port_stats->rx_control_frames;
523 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
524 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
525 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
526 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
527 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
528 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
529 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
530 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
531 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
532 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
533 	drvs->rx_dropped_header_too_small =
534 		port_stats->rx_dropped_header_too_small;
535 	drvs->rx_input_fifo_overflow_drop =
536 		port_stats->rx_input_fifo_overflow_drop;
537 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
538 	drvs->rx_alignment_symbol_errors =
539 		port_stats->rx_alignment_symbol_errors;
540 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
541 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
542 	drvs->tx_controlframes = port_stats->tx_controlframes;
543 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
544 	drvs->jabber_events = port_stats->jabber_events;
545 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
546 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
547 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
548 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
549 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
550 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
551 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
552 	if (be_roce_supported(adapter)) {
553 		drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
554 		drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
555 		drvs->rx_roce_frames = port_stats->roce_frames_received;
556 		drvs->roce_drops_crc = port_stats->roce_drops_crc;
557 		drvs->roce_drops_payload_len =
558 			port_stats->roce_drops_payload_len;
559 	}
560 }
561 
562 static void populate_lancer_stats(struct be_adapter *adapter)
563 {
564 	struct be_drv_stats *drvs = &adapter->drv_stats;
565 	struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
566 
567 	be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
568 	drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
569 	drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
570 	drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
571 	drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
572 	drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
573 	drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
574 	drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
575 	drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
576 	drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
577 	drvs->rx_dropped_tcp_length =
578 				pport_stats->rx_dropped_invalid_tcp_length;
579 	drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
580 	drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
581 	drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
582 	drvs->rx_dropped_header_too_small =
583 				pport_stats->rx_dropped_header_too_small;
584 	drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
585 	drvs->rx_address_filtered =
586 					pport_stats->rx_address_filtered +
587 					pport_stats->rx_vlan_filtered;
588 	drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
589 	drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
590 	drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
591 	drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
592 	drvs->jabber_events = pport_stats->rx_jabbers;
593 	drvs->forwarded_packets = pport_stats->num_forwards_lo;
594 	drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
595 	drvs->rx_drops_too_many_frags =
596 				pport_stats->rx_drops_too_many_frags_lo;
597 }
598 
599 static void accumulate_16bit_val(u32 *acc, u16 val)
600 {
601 #define lo(x)			(x & 0xFFFF)
602 #define hi(x)			(x & 0xFFFF0000)
603 	bool wrapped = val < lo(*acc);
604 	u32 newacc = hi(*acc) + val;
605 
606 	if (wrapped)
607 		newacc += 65536;
608 	ACCESS_ONCE(*acc) = newacc;
609 }
610 
611 static void populate_erx_stats(struct be_adapter *adapter,
612 			       struct be_rx_obj *rxo, u32 erx_stat)
613 {
614 	if (!BEx_chip(adapter))
615 		rx_stats(rxo)->rx_drops_no_frags = erx_stat;
616 	else
617 		/* below erx HW counter can actually wrap around after
618 		 * 65535. Driver accumulates a 32-bit value
619 		 */
620 		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
621 				     (u16)erx_stat);
622 }
623 
624 void be_parse_stats(struct be_adapter *adapter)
625 {
626 	struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
627 	struct be_rx_obj *rxo;
628 	int i;
629 	u32 erx_stat;
630 
631 	if (lancer_chip(adapter)) {
632 		populate_lancer_stats(adapter);
633 	} else {
634 		if (BE2_chip(adapter))
635 			populate_be_v0_stats(adapter);
636 		else if (BE3_chip(adapter))
637 			/* for BE3 */
638 			populate_be_v1_stats(adapter);
639 		else
640 			populate_be_v2_stats(adapter);
641 
642 		/* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
643 		for_all_rx_queues(adapter, rxo, i) {
644 			erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
645 			populate_erx_stats(adapter, rxo, erx_stat);
646 		}
647 	}
648 }
649 
650 static void be_get_stats64(struct net_device *netdev,
651 			   struct rtnl_link_stats64 *stats)
652 {
653 	struct be_adapter *adapter = netdev_priv(netdev);
654 	struct be_drv_stats *drvs = &adapter->drv_stats;
655 	struct be_rx_obj *rxo;
656 	struct be_tx_obj *txo;
657 	u64 pkts, bytes;
658 	unsigned int start;
659 	int i;
660 
661 	for_all_rx_queues(adapter, rxo, i) {
662 		const struct be_rx_stats *rx_stats = rx_stats(rxo);
663 
664 		do {
665 			start = u64_stats_fetch_begin_irq(&rx_stats->sync);
666 			pkts = rx_stats(rxo)->rx_pkts;
667 			bytes = rx_stats(rxo)->rx_bytes;
668 		} while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
669 		stats->rx_packets += pkts;
670 		stats->rx_bytes += bytes;
671 		stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
672 		stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
673 					rx_stats(rxo)->rx_drops_no_frags;
674 	}
675 
676 	for_all_tx_queues(adapter, txo, i) {
677 		const struct be_tx_stats *tx_stats = tx_stats(txo);
678 
679 		do {
680 			start = u64_stats_fetch_begin_irq(&tx_stats->sync);
681 			pkts = tx_stats(txo)->tx_pkts;
682 			bytes = tx_stats(txo)->tx_bytes;
683 		} while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
684 		stats->tx_packets += pkts;
685 		stats->tx_bytes += bytes;
686 	}
687 
688 	/* bad pkts received */
689 	stats->rx_errors = drvs->rx_crc_errors +
690 		drvs->rx_alignment_symbol_errors +
691 		drvs->rx_in_range_errors +
692 		drvs->rx_out_range_errors +
693 		drvs->rx_frame_too_long +
694 		drvs->rx_dropped_too_small +
695 		drvs->rx_dropped_too_short +
696 		drvs->rx_dropped_header_too_small +
697 		drvs->rx_dropped_tcp_length +
698 		drvs->rx_dropped_runt;
699 
700 	/* detailed rx errors */
701 	stats->rx_length_errors = drvs->rx_in_range_errors +
702 		drvs->rx_out_range_errors +
703 		drvs->rx_frame_too_long;
704 
705 	stats->rx_crc_errors = drvs->rx_crc_errors;
706 
707 	/* frame alignment errors */
708 	stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
709 
710 	/* receiver fifo overrun */
711 	/* drops_no_pbuf is no per i/f, it's per BE card */
712 	stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
713 				drvs->rx_input_fifo_overflow_drop +
714 				drvs->rx_drops_no_pbuf;
715 }
716 
717 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
718 {
719 	struct net_device *netdev = adapter->netdev;
720 
721 	if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
722 		netif_carrier_off(netdev);
723 		adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
724 	}
725 
726 	if (link_status)
727 		netif_carrier_on(netdev);
728 	else
729 		netif_carrier_off(netdev);
730 
731 	netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
732 }
733 
734 static int be_gso_hdr_len(struct sk_buff *skb)
735 {
736 	if (skb->encapsulation)
737 		return skb_inner_transport_offset(skb) +
738 		       inner_tcp_hdrlen(skb);
739 	return skb_transport_offset(skb) + tcp_hdrlen(skb);
740 }
741 
742 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
743 {
744 	struct be_tx_stats *stats = tx_stats(txo);
745 	u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
746 	/* Account for headers which get duplicated in TSO pkt */
747 	u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
748 
749 	u64_stats_update_begin(&stats->sync);
750 	stats->tx_reqs++;
751 	stats->tx_bytes += skb->len + dup_hdr_len;
752 	stats->tx_pkts += tx_pkts;
753 	if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
754 		stats->tx_vxlan_offload_pkts += tx_pkts;
755 	u64_stats_update_end(&stats->sync);
756 }
757 
758 /* Returns number of WRBs needed for the skb */
759 static u32 skb_wrb_cnt(struct sk_buff *skb)
760 {
761 	/* +1 for the header wrb */
762 	return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
763 }
764 
765 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
766 {
767 	wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
768 	wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
769 	wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
770 	wrb->rsvd0 = 0;
771 }
772 
773 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
774  * to avoid the swap and shift/mask operations in wrb_fill().
775  */
776 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
777 {
778 	wrb->frag_pa_hi = 0;
779 	wrb->frag_pa_lo = 0;
780 	wrb->frag_len = 0;
781 	wrb->rsvd0 = 0;
782 }
783 
784 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
785 				     struct sk_buff *skb)
786 {
787 	u8 vlan_prio;
788 	u16 vlan_tag;
789 
790 	vlan_tag = skb_vlan_tag_get(skb);
791 	vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
792 	/* If vlan priority provided by OS is NOT in available bmap */
793 	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
794 		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
795 				adapter->recommended_prio_bits;
796 
797 	return vlan_tag;
798 }
799 
800 /* Used only for IP tunnel packets */
801 static u16 skb_inner_ip_proto(struct sk_buff *skb)
802 {
803 	return (inner_ip_hdr(skb)->version == 4) ?
804 		inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
805 }
806 
807 static u16 skb_ip_proto(struct sk_buff *skb)
808 {
809 	return (ip_hdr(skb)->version == 4) ?
810 		ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
811 }
812 
813 static inline bool be_is_txq_full(struct be_tx_obj *txo)
814 {
815 	return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
816 }
817 
818 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
819 {
820 	return atomic_read(&txo->q.used) < txo->q.len / 2;
821 }
822 
823 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
824 {
825 	return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
826 }
827 
828 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
829 				       struct sk_buff *skb,
830 				       struct be_wrb_params *wrb_params)
831 {
832 	u16 proto;
833 
834 	if (skb_is_gso(skb)) {
835 		BE_WRB_F_SET(wrb_params->features, LSO, 1);
836 		wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
837 		if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
838 			BE_WRB_F_SET(wrb_params->features, LSO6, 1);
839 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
840 		if (skb->encapsulation) {
841 			BE_WRB_F_SET(wrb_params->features, IPCS, 1);
842 			proto = skb_inner_ip_proto(skb);
843 		} else {
844 			proto = skb_ip_proto(skb);
845 		}
846 		if (proto == IPPROTO_TCP)
847 			BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
848 		else if (proto == IPPROTO_UDP)
849 			BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
850 	}
851 
852 	if (skb_vlan_tag_present(skb)) {
853 		BE_WRB_F_SET(wrb_params->features, VLAN, 1);
854 		wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
855 	}
856 
857 	BE_WRB_F_SET(wrb_params->features, CRC, 1);
858 }
859 
860 static void wrb_fill_hdr(struct be_adapter *adapter,
861 			 struct be_eth_hdr_wrb *hdr,
862 			 struct be_wrb_params *wrb_params,
863 			 struct sk_buff *skb)
864 {
865 	memset(hdr, 0, sizeof(*hdr));
866 
867 	SET_TX_WRB_HDR_BITS(crc, hdr,
868 			    BE_WRB_F_GET(wrb_params->features, CRC));
869 	SET_TX_WRB_HDR_BITS(ipcs, hdr,
870 			    BE_WRB_F_GET(wrb_params->features, IPCS));
871 	SET_TX_WRB_HDR_BITS(tcpcs, hdr,
872 			    BE_WRB_F_GET(wrb_params->features, TCPCS));
873 	SET_TX_WRB_HDR_BITS(udpcs, hdr,
874 			    BE_WRB_F_GET(wrb_params->features, UDPCS));
875 
876 	SET_TX_WRB_HDR_BITS(lso, hdr,
877 			    BE_WRB_F_GET(wrb_params->features, LSO));
878 	SET_TX_WRB_HDR_BITS(lso6, hdr,
879 			    BE_WRB_F_GET(wrb_params->features, LSO6));
880 	SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
881 
882 	/* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
883 	 * hack is not needed, the evt bit is set while ringing DB.
884 	 */
885 	SET_TX_WRB_HDR_BITS(event, hdr,
886 			    BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
887 	SET_TX_WRB_HDR_BITS(vlan, hdr,
888 			    BE_WRB_F_GET(wrb_params->features, VLAN));
889 	SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
890 
891 	SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
892 	SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
893 	SET_TX_WRB_HDR_BITS(mgmt, hdr,
894 			    BE_WRB_F_GET(wrb_params->features, OS2BMC));
895 }
896 
897 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
898 			  bool unmap_single)
899 {
900 	dma_addr_t dma;
901 	u32 frag_len = le32_to_cpu(wrb->frag_len);
902 
903 
904 	dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
905 		(u64)le32_to_cpu(wrb->frag_pa_lo);
906 	if (frag_len) {
907 		if (unmap_single)
908 			dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
909 		else
910 			dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
911 	}
912 }
913 
914 /* Grab a WRB header for xmit */
915 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
916 {
917 	u32 head = txo->q.head;
918 
919 	queue_head_inc(&txo->q);
920 	return head;
921 }
922 
923 /* Set up the WRB header for xmit */
924 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
925 				struct be_tx_obj *txo,
926 				struct be_wrb_params *wrb_params,
927 				struct sk_buff *skb, u16 head)
928 {
929 	u32 num_frags = skb_wrb_cnt(skb);
930 	struct be_queue_info *txq = &txo->q;
931 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
932 
933 	wrb_fill_hdr(adapter, hdr, wrb_params, skb);
934 	be_dws_cpu_to_le(hdr, sizeof(*hdr));
935 
936 	BUG_ON(txo->sent_skb_list[head]);
937 	txo->sent_skb_list[head] = skb;
938 	txo->last_req_hdr = head;
939 	atomic_add(num_frags, &txq->used);
940 	txo->last_req_wrb_cnt = num_frags;
941 	txo->pend_wrb_cnt += num_frags;
942 }
943 
944 /* Setup a WRB fragment (buffer descriptor) for xmit */
945 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
946 				 int len)
947 {
948 	struct be_eth_wrb *wrb;
949 	struct be_queue_info *txq = &txo->q;
950 
951 	wrb = queue_head_node(txq);
952 	wrb_fill(wrb, busaddr, len);
953 	queue_head_inc(txq);
954 }
955 
956 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
957  * was invoked. The producer index is restored to the previous packet and the
958  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
959  */
960 static void be_xmit_restore(struct be_adapter *adapter,
961 			    struct be_tx_obj *txo, u32 head, bool map_single,
962 			    u32 copied)
963 {
964 	struct device *dev;
965 	struct be_eth_wrb *wrb;
966 	struct be_queue_info *txq = &txo->q;
967 
968 	dev = &adapter->pdev->dev;
969 	txq->head = head;
970 
971 	/* skip the first wrb (hdr); it's not mapped */
972 	queue_head_inc(txq);
973 	while (copied) {
974 		wrb = queue_head_node(txq);
975 		unmap_tx_frag(dev, wrb, map_single);
976 		map_single = false;
977 		copied -= le32_to_cpu(wrb->frag_len);
978 		queue_head_inc(txq);
979 	}
980 
981 	txq->head = head;
982 }
983 
984 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
985  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
986  * of WRBs used up by the packet.
987  */
988 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
989 			   struct sk_buff *skb,
990 			   struct be_wrb_params *wrb_params)
991 {
992 	u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
993 	struct device *dev = &adapter->pdev->dev;
994 	struct be_queue_info *txq = &txo->q;
995 	bool map_single = false;
996 	u32 head = txq->head;
997 	dma_addr_t busaddr;
998 	int len;
999 
1000 	head = be_tx_get_wrb_hdr(txo);
1001 
1002 	if (skb->len > skb->data_len) {
1003 		len = skb_headlen(skb);
1004 
1005 		busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1006 		if (dma_mapping_error(dev, busaddr))
1007 			goto dma_err;
1008 		map_single = true;
1009 		be_tx_setup_wrb_frag(txo, busaddr, len);
1010 		copied += len;
1011 	}
1012 
1013 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1014 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1015 		len = skb_frag_size(frag);
1016 
1017 		busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1018 		if (dma_mapping_error(dev, busaddr))
1019 			goto dma_err;
1020 		be_tx_setup_wrb_frag(txo, busaddr, len);
1021 		copied += len;
1022 	}
1023 
1024 	be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1025 
1026 	be_tx_stats_update(txo, skb);
1027 	return wrb_cnt;
1028 
1029 dma_err:
1030 	adapter->drv_stats.dma_map_errors++;
1031 	be_xmit_restore(adapter, txo, head, map_single, copied);
1032 	return 0;
1033 }
1034 
1035 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1036 {
1037 	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1038 }
1039 
1040 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1041 					     struct sk_buff *skb,
1042 					     struct be_wrb_params
1043 					     *wrb_params)
1044 {
1045 	u16 vlan_tag = 0;
1046 
1047 	skb = skb_share_check(skb, GFP_ATOMIC);
1048 	if (unlikely(!skb))
1049 		return skb;
1050 
1051 	if (skb_vlan_tag_present(skb))
1052 		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1053 
1054 	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1055 		if (!vlan_tag)
1056 			vlan_tag = adapter->pvid;
1057 		/* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1058 		 * skip VLAN insertion
1059 		 */
1060 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1061 	}
1062 
1063 	if (vlan_tag) {
1064 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1065 						vlan_tag);
1066 		if (unlikely(!skb))
1067 			return skb;
1068 		skb->vlan_tci = 0;
1069 	}
1070 
1071 	/* Insert the outer VLAN, if any */
1072 	if (adapter->qnq_vid) {
1073 		vlan_tag = adapter->qnq_vid;
1074 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1075 						vlan_tag);
1076 		if (unlikely(!skb))
1077 			return skb;
1078 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1079 	}
1080 
1081 	return skb;
1082 }
1083 
1084 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1085 {
1086 	struct ethhdr *eh = (struct ethhdr *)skb->data;
1087 	u16 offset = ETH_HLEN;
1088 
1089 	if (eh->h_proto == htons(ETH_P_IPV6)) {
1090 		struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1091 
1092 		offset += sizeof(struct ipv6hdr);
1093 		if (ip6h->nexthdr != NEXTHDR_TCP &&
1094 		    ip6h->nexthdr != NEXTHDR_UDP) {
1095 			struct ipv6_opt_hdr *ehdr =
1096 				(struct ipv6_opt_hdr *)(skb->data + offset);
1097 
1098 			/* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1099 			if (ehdr->hdrlen == 0xff)
1100 				return true;
1101 		}
1102 	}
1103 	return false;
1104 }
1105 
1106 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1107 {
1108 	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1109 }
1110 
1111 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1112 {
1113 	return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1114 }
1115 
1116 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1117 						  struct sk_buff *skb,
1118 						  struct be_wrb_params
1119 						  *wrb_params)
1120 {
1121 	struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1122 	unsigned int eth_hdr_len;
1123 	struct iphdr *ip;
1124 
1125 	/* For padded packets, BE HW modifies tot_len field in IP header
1126 	 * incorrecly when VLAN tag is inserted by HW.
1127 	 * For padded packets, Lancer computes incorrect checksum.
1128 	 */
1129 	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1130 						VLAN_ETH_HLEN : ETH_HLEN;
1131 	if (skb->len <= 60 &&
1132 	    (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1133 	    is_ipv4_pkt(skb)) {
1134 		ip = (struct iphdr *)ip_hdr(skb);
1135 		pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1136 	}
1137 
1138 	/* If vlan tag is already inlined in the packet, skip HW VLAN
1139 	 * tagging in pvid-tagging mode
1140 	 */
1141 	if (be_pvid_tagging_enabled(adapter) &&
1142 	    veh->h_vlan_proto == htons(ETH_P_8021Q))
1143 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1144 
1145 	/* HW has a bug wherein it will calculate CSUM for VLAN
1146 	 * pkts even though it is disabled.
1147 	 * Manually insert VLAN in pkt.
1148 	 */
1149 	if (skb->ip_summed != CHECKSUM_PARTIAL &&
1150 	    skb_vlan_tag_present(skb)) {
1151 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1152 		if (unlikely(!skb))
1153 			goto err;
1154 	}
1155 
1156 	/* HW may lockup when VLAN HW tagging is requested on
1157 	 * certain ipv6 packets. Drop such pkts if the HW workaround to
1158 	 * skip HW tagging is not enabled by FW.
1159 	 */
1160 	if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1161 		     (adapter->pvid || adapter->qnq_vid) &&
1162 		     !qnq_async_evt_rcvd(adapter)))
1163 		goto tx_drop;
1164 
1165 	/* Manual VLAN tag insertion to prevent:
1166 	 * ASIC lockup when the ASIC inserts VLAN tag into
1167 	 * certain ipv6 packets. Insert VLAN tags in driver,
1168 	 * and set event, completion, vlan bits accordingly
1169 	 * in the Tx WRB.
1170 	 */
1171 	if (be_ipv6_tx_stall_chk(adapter, skb) &&
1172 	    be_vlan_tag_tx_chk(adapter, skb)) {
1173 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1174 		if (unlikely(!skb))
1175 			goto err;
1176 	}
1177 
1178 	return skb;
1179 tx_drop:
1180 	dev_kfree_skb_any(skb);
1181 err:
1182 	return NULL;
1183 }
1184 
1185 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1186 					   struct sk_buff *skb,
1187 					   struct be_wrb_params *wrb_params)
1188 {
1189 	int err;
1190 
1191 	/* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1192 	 * packets that are 32b or less may cause a transmit stall
1193 	 * on that port. The workaround is to pad such packets
1194 	 * (len <= 32 bytes) to a minimum length of 36b.
1195 	 */
1196 	if (skb->len <= 32) {
1197 		if (skb_put_padto(skb, 36))
1198 			return NULL;
1199 	}
1200 
1201 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
1202 		skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1203 		if (!skb)
1204 			return NULL;
1205 	}
1206 
1207 	/* The stack can send us skbs with length greater than
1208 	 * what the HW can handle. Trim the extra bytes.
1209 	 */
1210 	WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1211 	err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1212 	WARN_ON(err);
1213 
1214 	return skb;
1215 }
1216 
1217 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1218 {
1219 	struct be_queue_info *txq = &txo->q;
1220 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1221 
1222 	/* Mark the last request eventable if it hasn't been marked already */
1223 	if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1224 		hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1225 
1226 	/* compose a dummy wrb if there are odd set of wrbs to notify */
1227 	if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1228 		wrb_fill_dummy(queue_head_node(txq));
1229 		queue_head_inc(txq);
1230 		atomic_inc(&txq->used);
1231 		txo->pend_wrb_cnt++;
1232 		hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1233 					   TX_HDR_WRB_NUM_SHIFT);
1234 		hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1235 					  TX_HDR_WRB_NUM_SHIFT);
1236 	}
1237 	be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1238 	txo->pend_wrb_cnt = 0;
1239 }
1240 
1241 /* OS2BMC related */
1242 
1243 #define DHCP_CLIENT_PORT	68
1244 #define DHCP_SERVER_PORT	67
1245 #define NET_BIOS_PORT1		137
1246 #define NET_BIOS_PORT2		138
1247 #define DHCPV6_RAS_PORT		547
1248 
1249 #define is_mc_allowed_on_bmc(adapter, eh)	\
1250 	(!is_multicast_filt_enabled(adapter) &&	\
1251 	 is_multicast_ether_addr(eh->h_dest) &&	\
1252 	 !is_broadcast_ether_addr(eh->h_dest))
1253 
1254 #define is_bc_allowed_on_bmc(adapter, eh)	\
1255 	(!is_broadcast_filt_enabled(adapter) &&	\
1256 	 is_broadcast_ether_addr(eh->h_dest))
1257 
1258 #define is_arp_allowed_on_bmc(adapter, skb)	\
1259 	(is_arp(skb) && is_arp_filt_enabled(adapter))
1260 
1261 #define is_broadcast_packet(eh, adapter)	\
1262 		(is_multicast_ether_addr(eh->h_dest) && \
1263 		!compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1264 
1265 #define is_arp(skb)	(skb->protocol == htons(ETH_P_ARP))
1266 
1267 #define is_arp_filt_enabled(adapter)	\
1268 		(adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1269 
1270 #define is_dhcp_client_filt_enabled(adapter)	\
1271 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1272 
1273 #define is_dhcp_srvr_filt_enabled(adapter)	\
1274 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1275 
1276 #define is_nbios_filt_enabled(adapter)	\
1277 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1278 
1279 #define is_ipv6_na_filt_enabled(adapter)	\
1280 		(adapter->bmc_filt_mask &	\
1281 			BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1282 
1283 #define is_ipv6_ra_filt_enabled(adapter)	\
1284 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1285 
1286 #define is_ipv6_ras_filt_enabled(adapter)	\
1287 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1288 
1289 #define is_broadcast_filt_enabled(adapter)	\
1290 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1291 
1292 #define is_multicast_filt_enabled(adapter)	\
1293 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1294 
1295 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1296 			       struct sk_buff **skb)
1297 {
1298 	struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1299 	bool os2bmc = false;
1300 
1301 	if (!be_is_os2bmc_enabled(adapter))
1302 		goto done;
1303 
1304 	if (!is_multicast_ether_addr(eh->h_dest))
1305 		goto done;
1306 
1307 	if (is_mc_allowed_on_bmc(adapter, eh) ||
1308 	    is_bc_allowed_on_bmc(adapter, eh) ||
1309 	    is_arp_allowed_on_bmc(adapter, (*skb))) {
1310 		os2bmc = true;
1311 		goto done;
1312 	}
1313 
1314 	if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1315 		struct ipv6hdr *hdr = ipv6_hdr((*skb));
1316 		u8 nexthdr = hdr->nexthdr;
1317 
1318 		if (nexthdr == IPPROTO_ICMPV6) {
1319 			struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1320 
1321 			switch (icmp6->icmp6_type) {
1322 			case NDISC_ROUTER_ADVERTISEMENT:
1323 				os2bmc = is_ipv6_ra_filt_enabled(adapter);
1324 				goto done;
1325 			case NDISC_NEIGHBOUR_ADVERTISEMENT:
1326 				os2bmc = is_ipv6_na_filt_enabled(adapter);
1327 				goto done;
1328 			default:
1329 				break;
1330 			}
1331 		}
1332 	}
1333 
1334 	if (is_udp_pkt((*skb))) {
1335 		struct udphdr *udp = udp_hdr((*skb));
1336 
1337 		switch (ntohs(udp->dest)) {
1338 		case DHCP_CLIENT_PORT:
1339 			os2bmc = is_dhcp_client_filt_enabled(adapter);
1340 			goto done;
1341 		case DHCP_SERVER_PORT:
1342 			os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1343 			goto done;
1344 		case NET_BIOS_PORT1:
1345 		case NET_BIOS_PORT2:
1346 			os2bmc = is_nbios_filt_enabled(adapter);
1347 			goto done;
1348 		case DHCPV6_RAS_PORT:
1349 			os2bmc = is_ipv6_ras_filt_enabled(adapter);
1350 			goto done;
1351 		default:
1352 			break;
1353 		}
1354 	}
1355 done:
1356 	/* For packets over a vlan, which are destined
1357 	 * to BMC, asic expects the vlan to be inline in the packet.
1358 	 */
1359 	if (os2bmc)
1360 		*skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1361 
1362 	return os2bmc;
1363 }
1364 
1365 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1366 {
1367 	struct be_adapter *adapter = netdev_priv(netdev);
1368 	u16 q_idx = skb_get_queue_mapping(skb);
1369 	struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1370 	struct be_wrb_params wrb_params = { 0 };
1371 	bool flush = !skb->xmit_more;
1372 	u16 wrb_cnt;
1373 
1374 	skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1375 	if (unlikely(!skb))
1376 		goto drop;
1377 
1378 	be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1379 
1380 	wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1381 	if (unlikely(!wrb_cnt)) {
1382 		dev_kfree_skb_any(skb);
1383 		goto drop;
1384 	}
1385 
1386 	/* if os2bmc is enabled and if the pkt is destined to bmc,
1387 	 * enqueue the pkt a 2nd time with mgmt bit set.
1388 	 */
1389 	if (be_send_pkt_to_bmc(adapter, &skb)) {
1390 		BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1391 		wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1392 		if (unlikely(!wrb_cnt))
1393 			goto drop;
1394 		else
1395 			skb_get(skb);
1396 	}
1397 
1398 	if (be_is_txq_full(txo)) {
1399 		netif_stop_subqueue(netdev, q_idx);
1400 		tx_stats(txo)->tx_stops++;
1401 	}
1402 
1403 	if (flush || __netif_subqueue_stopped(netdev, q_idx))
1404 		be_xmit_flush(adapter, txo);
1405 
1406 	return NETDEV_TX_OK;
1407 drop:
1408 	tx_stats(txo)->tx_drv_drops++;
1409 	/* Flush the already enqueued tx requests */
1410 	if (flush && txo->pend_wrb_cnt)
1411 		be_xmit_flush(adapter, txo);
1412 
1413 	return NETDEV_TX_OK;
1414 }
1415 
1416 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1417 {
1418 	return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1419 			BE_IF_FLAGS_ALL_PROMISCUOUS;
1420 }
1421 
1422 static int be_set_vlan_promisc(struct be_adapter *adapter)
1423 {
1424 	struct device *dev = &adapter->pdev->dev;
1425 	int status;
1426 
1427 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1428 		return 0;
1429 
1430 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1431 	if (!status) {
1432 		dev_info(dev, "Enabled VLAN promiscuous mode\n");
1433 		adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1434 	} else {
1435 		dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1436 	}
1437 	return status;
1438 }
1439 
1440 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1441 {
1442 	struct device *dev = &adapter->pdev->dev;
1443 	int status;
1444 
1445 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1446 	if (!status) {
1447 		dev_info(dev, "Disabling VLAN promiscuous mode\n");
1448 		adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1449 	}
1450 	return status;
1451 }
1452 
1453 /*
1454  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1455  * If the user configures more, place BE in vlan promiscuous mode.
1456  */
1457 static int be_vid_config(struct be_adapter *adapter)
1458 {
1459 	struct device *dev = &adapter->pdev->dev;
1460 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1461 	u16 num = 0, i = 0;
1462 	int status = 0;
1463 
1464 	/* No need to change the VLAN state if the I/F is in promiscuous */
1465 	if (adapter->netdev->flags & IFF_PROMISC)
1466 		return 0;
1467 
1468 	if (adapter->vlans_added > be_max_vlans(adapter))
1469 		return be_set_vlan_promisc(adapter);
1470 
1471 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1472 		status = be_clear_vlan_promisc(adapter);
1473 		if (status)
1474 			return status;
1475 	}
1476 	/* Construct VLAN Table to give to HW */
1477 	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1478 		vids[num++] = cpu_to_le16(i);
1479 
1480 	status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1481 	if (status) {
1482 		dev_err(dev, "Setting HW VLAN filtering failed\n");
1483 		/* Set to VLAN promisc mode as setting VLAN filter failed */
1484 		if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1485 		    addl_status(status) ==
1486 				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1487 			return be_set_vlan_promisc(adapter);
1488 	}
1489 	return status;
1490 }
1491 
1492 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1493 {
1494 	struct be_adapter *adapter = netdev_priv(netdev);
1495 	int status = 0;
1496 
1497 	mutex_lock(&adapter->rx_filter_lock);
1498 
1499 	/* Packets with VID 0 are always received by Lancer by default */
1500 	if (lancer_chip(adapter) && vid == 0)
1501 		goto done;
1502 
1503 	if (test_bit(vid, adapter->vids))
1504 		goto done;
1505 
1506 	set_bit(vid, adapter->vids);
1507 	adapter->vlans_added++;
1508 
1509 	status = be_vid_config(adapter);
1510 done:
1511 	mutex_unlock(&adapter->rx_filter_lock);
1512 	return status;
1513 }
1514 
1515 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1516 {
1517 	struct be_adapter *adapter = netdev_priv(netdev);
1518 	int status = 0;
1519 
1520 	mutex_lock(&adapter->rx_filter_lock);
1521 
1522 	/* Packets with VID 0 are always received by Lancer by default */
1523 	if (lancer_chip(adapter) && vid == 0)
1524 		goto done;
1525 
1526 	if (!test_bit(vid, adapter->vids))
1527 		goto done;
1528 
1529 	clear_bit(vid, adapter->vids);
1530 	adapter->vlans_added--;
1531 
1532 	status = be_vid_config(adapter);
1533 done:
1534 	mutex_unlock(&adapter->rx_filter_lock);
1535 	return status;
1536 }
1537 
1538 static void be_set_all_promisc(struct be_adapter *adapter)
1539 {
1540 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1541 	adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1542 }
1543 
1544 static void be_set_mc_promisc(struct be_adapter *adapter)
1545 {
1546 	int status;
1547 
1548 	if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1549 		return;
1550 
1551 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1552 	if (!status)
1553 		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1554 }
1555 
1556 static void be_set_uc_promisc(struct be_adapter *adapter)
1557 {
1558 	int status;
1559 
1560 	if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1561 		return;
1562 
1563 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1564 	if (!status)
1565 		adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1566 }
1567 
1568 static void be_clear_uc_promisc(struct be_adapter *adapter)
1569 {
1570 	int status;
1571 
1572 	if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1573 		return;
1574 
1575 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1576 	if (!status)
1577 		adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1578 }
1579 
1580 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1581  * We use a single callback function for both sync and unsync. We really don't
1582  * add/remove addresses through this callback. But, we use it to detect changes
1583  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1584  */
1585 static int be_uc_list_update(struct net_device *netdev,
1586 			     const unsigned char *addr)
1587 {
1588 	struct be_adapter *adapter = netdev_priv(netdev);
1589 
1590 	adapter->update_uc_list = true;
1591 	return 0;
1592 }
1593 
1594 static int be_mc_list_update(struct net_device *netdev,
1595 			     const unsigned char *addr)
1596 {
1597 	struct be_adapter *adapter = netdev_priv(netdev);
1598 
1599 	adapter->update_mc_list = true;
1600 	return 0;
1601 }
1602 
1603 static void be_set_mc_list(struct be_adapter *adapter)
1604 {
1605 	struct net_device *netdev = adapter->netdev;
1606 	struct netdev_hw_addr *ha;
1607 	bool mc_promisc = false;
1608 	int status;
1609 
1610 	netif_addr_lock_bh(netdev);
1611 	__dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1612 
1613 	if (netdev->flags & IFF_PROMISC) {
1614 		adapter->update_mc_list = false;
1615 	} else if (netdev->flags & IFF_ALLMULTI ||
1616 		   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1617 		/* Enable multicast promisc if num configured exceeds
1618 		 * what we support
1619 		 */
1620 		mc_promisc = true;
1621 		adapter->update_mc_list = false;
1622 	} else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1623 		/* Update mc-list unconditionally if the iface was previously
1624 		 * in mc-promisc mode and now is out of that mode.
1625 		 */
1626 		adapter->update_mc_list = true;
1627 	}
1628 
1629 	if (adapter->update_mc_list) {
1630 		int i = 0;
1631 
1632 		/* cache the mc-list in adapter */
1633 		netdev_for_each_mc_addr(ha, netdev) {
1634 			ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1635 			i++;
1636 		}
1637 		adapter->mc_count = netdev_mc_count(netdev);
1638 	}
1639 	netif_addr_unlock_bh(netdev);
1640 
1641 	if (mc_promisc) {
1642 		be_set_mc_promisc(adapter);
1643 	} else if (adapter->update_mc_list) {
1644 		status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1645 		if (!status)
1646 			adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1647 		else
1648 			be_set_mc_promisc(adapter);
1649 
1650 		adapter->update_mc_list = false;
1651 	}
1652 }
1653 
1654 static void be_clear_mc_list(struct be_adapter *adapter)
1655 {
1656 	struct net_device *netdev = adapter->netdev;
1657 
1658 	__dev_mc_unsync(netdev, NULL);
1659 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1660 	adapter->mc_count = 0;
1661 }
1662 
1663 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1664 {
1665 	if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1666 		adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1667 		return 0;
1668 	}
1669 
1670 	return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1671 			       adapter->if_handle,
1672 			       &adapter->pmac_id[uc_idx + 1], 0);
1673 }
1674 
1675 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1676 {
1677 	if (pmac_id == adapter->pmac_id[0])
1678 		return;
1679 
1680 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1681 }
1682 
1683 static void be_set_uc_list(struct be_adapter *adapter)
1684 {
1685 	struct net_device *netdev = adapter->netdev;
1686 	struct netdev_hw_addr *ha;
1687 	bool uc_promisc = false;
1688 	int curr_uc_macs = 0, i;
1689 
1690 	netif_addr_lock_bh(netdev);
1691 	__dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1692 
1693 	if (netdev->flags & IFF_PROMISC) {
1694 		adapter->update_uc_list = false;
1695 	} else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1696 		uc_promisc = true;
1697 		adapter->update_uc_list = false;
1698 	}  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1699 		/* Update uc-list unconditionally if the iface was previously
1700 		 * in uc-promisc mode and now is out of that mode.
1701 		 */
1702 		adapter->update_uc_list = true;
1703 	}
1704 
1705 	if (adapter->update_uc_list) {
1706 		/* cache the uc-list in adapter array */
1707 		i = 0;
1708 		netdev_for_each_uc_addr(ha, netdev) {
1709 			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1710 			i++;
1711 		}
1712 		curr_uc_macs = netdev_uc_count(netdev);
1713 	}
1714 	netif_addr_unlock_bh(netdev);
1715 
1716 	if (uc_promisc) {
1717 		be_set_uc_promisc(adapter);
1718 	} else if (adapter->update_uc_list) {
1719 		be_clear_uc_promisc(adapter);
1720 
1721 		for (i = 0; i < adapter->uc_macs; i++)
1722 			be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1723 
1724 		for (i = 0; i < curr_uc_macs; i++)
1725 			be_uc_mac_add(adapter, i);
1726 		adapter->uc_macs = curr_uc_macs;
1727 		adapter->update_uc_list = false;
1728 	}
1729 }
1730 
1731 static void be_clear_uc_list(struct be_adapter *adapter)
1732 {
1733 	struct net_device *netdev = adapter->netdev;
1734 	int i;
1735 
1736 	__dev_uc_unsync(netdev, NULL);
1737 	for (i = 0; i < adapter->uc_macs; i++)
1738 		be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1739 
1740 	adapter->uc_macs = 0;
1741 }
1742 
1743 static void __be_set_rx_mode(struct be_adapter *adapter)
1744 {
1745 	struct net_device *netdev = adapter->netdev;
1746 
1747 	mutex_lock(&adapter->rx_filter_lock);
1748 
1749 	if (netdev->flags & IFF_PROMISC) {
1750 		if (!be_in_all_promisc(adapter))
1751 			be_set_all_promisc(adapter);
1752 	} else if (be_in_all_promisc(adapter)) {
1753 		/* We need to re-program the vlan-list or clear
1754 		 * vlan-promisc mode (if needed) when the interface
1755 		 * comes out of promisc mode.
1756 		 */
1757 		be_vid_config(adapter);
1758 	}
1759 
1760 	be_set_uc_list(adapter);
1761 	be_set_mc_list(adapter);
1762 
1763 	mutex_unlock(&adapter->rx_filter_lock);
1764 }
1765 
1766 static void be_work_set_rx_mode(struct work_struct *work)
1767 {
1768 	struct be_cmd_work *cmd_work =
1769 				container_of(work, struct be_cmd_work, work);
1770 
1771 	__be_set_rx_mode(cmd_work->adapter);
1772 	kfree(cmd_work);
1773 }
1774 
1775 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1776 {
1777 	struct be_adapter *adapter = netdev_priv(netdev);
1778 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1779 	int status;
1780 
1781 	if (!sriov_enabled(adapter))
1782 		return -EPERM;
1783 
1784 	if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1785 		return -EINVAL;
1786 
1787 	/* Proceed further only if user provided MAC is different
1788 	 * from active MAC
1789 	 */
1790 	if (ether_addr_equal(mac, vf_cfg->mac_addr))
1791 		return 0;
1792 
1793 	if (BEx_chip(adapter)) {
1794 		be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1795 				vf + 1);
1796 
1797 		status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1798 					 &vf_cfg->pmac_id, vf + 1);
1799 	} else {
1800 		status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1801 					vf + 1);
1802 	}
1803 
1804 	if (status) {
1805 		dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1806 			mac, vf, status);
1807 		return be_cmd_status(status);
1808 	}
1809 
1810 	ether_addr_copy(vf_cfg->mac_addr, mac);
1811 
1812 	return 0;
1813 }
1814 
1815 static int be_get_vf_config(struct net_device *netdev, int vf,
1816 			    struct ifla_vf_info *vi)
1817 {
1818 	struct be_adapter *adapter = netdev_priv(netdev);
1819 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1820 
1821 	if (!sriov_enabled(adapter))
1822 		return -EPERM;
1823 
1824 	if (vf >= adapter->num_vfs)
1825 		return -EINVAL;
1826 
1827 	vi->vf = vf;
1828 	vi->max_tx_rate = vf_cfg->tx_rate;
1829 	vi->min_tx_rate = 0;
1830 	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1831 	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1832 	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1833 	vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1834 	vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1835 
1836 	return 0;
1837 }
1838 
1839 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1840 {
1841 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1842 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1843 	int vf_if_id = vf_cfg->if_handle;
1844 	int status;
1845 
1846 	/* Enable Transparent VLAN Tagging */
1847 	status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1848 	if (status)
1849 		return status;
1850 
1851 	/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1852 	vids[0] = 0;
1853 	status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1854 	if (!status)
1855 		dev_info(&adapter->pdev->dev,
1856 			 "Cleared guest VLANs on VF%d", vf);
1857 
1858 	/* After TVT is enabled, disallow VFs to program VLAN filters */
1859 	if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1860 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1861 						  ~BE_PRIV_FILTMGMT, vf + 1);
1862 		if (!status)
1863 			vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1864 	}
1865 	return 0;
1866 }
1867 
1868 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1869 {
1870 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1871 	struct device *dev = &adapter->pdev->dev;
1872 	int status;
1873 
1874 	/* Reset Transparent VLAN Tagging. */
1875 	status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1876 				       vf_cfg->if_handle, 0, 0);
1877 	if (status)
1878 		return status;
1879 
1880 	/* Allow VFs to program VLAN filtering */
1881 	if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1882 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1883 						  BE_PRIV_FILTMGMT, vf + 1);
1884 		if (!status) {
1885 			vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1886 			dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1887 		}
1888 	}
1889 
1890 	dev_info(dev,
1891 		 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1892 	return 0;
1893 }
1894 
1895 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1896 			  __be16 vlan_proto)
1897 {
1898 	struct be_adapter *adapter = netdev_priv(netdev);
1899 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1900 	int status;
1901 
1902 	if (!sriov_enabled(adapter))
1903 		return -EPERM;
1904 
1905 	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1906 		return -EINVAL;
1907 
1908 	if (vlan_proto != htons(ETH_P_8021Q))
1909 		return -EPROTONOSUPPORT;
1910 
1911 	if (vlan || qos) {
1912 		vlan |= qos << VLAN_PRIO_SHIFT;
1913 		status = be_set_vf_tvt(adapter, vf, vlan);
1914 	} else {
1915 		status = be_clear_vf_tvt(adapter, vf);
1916 	}
1917 
1918 	if (status) {
1919 		dev_err(&adapter->pdev->dev,
1920 			"VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1921 			status);
1922 		return be_cmd_status(status);
1923 	}
1924 
1925 	vf_cfg->vlan_tag = vlan;
1926 	return 0;
1927 }
1928 
1929 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1930 			     int min_tx_rate, int max_tx_rate)
1931 {
1932 	struct be_adapter *adapter = netdev_priv(netdev);
1933 	struct device *dev = &adapter->pdev->dev;
1934 	int percent_rate, status = 0;
1935 	u16 link_speed = 0;
1936 	u8 link_status;
1937 
1938 	if (!sriov_enabled(adapter))
1939 		return -EPERM;
1940 
1941 	if (vf >= adapter->num_vfs)
1942 		return -EINVAL;
1943 
1944 	if (min_tx_rate)
1945 		return -EINVAL;
1946 
1947 	if (!max_tx_rate)
1948 		goto config_qos;
1949 
1950 	status = be_cmd_link_status_query(adapter, &link_speed,
1951 					  &link_status, 0);
1952 	if (status)
1953 		goto err;
1954 
1955 	if (!link_status) {
1956 		dev_err(dev, "TX-rate setting not allowed when link is down\n");
1957 		status = -ENETDOWN;
1958 		goto err;
1959 	}
1960 
1961 	if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1962 		dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1963 			link_speed);
1964 		status = -EINVAL;
1965 		goto err;
1966 	}
1967 
1968 	/* On Skyhawk the QOS setting must be done only as a % value */
1969 	percent_rate = link_speed / 100;
1970 	if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1971 		dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1972 			percent_rate);
1973 		status = -EINVAL;
1974 		goto err;
1975 	}
1976 
1977 config_qos:
1978 	status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1979 	if (status)
1980 		goto err;
1981 
1982 	adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1983 	return 0;
1984 
1985 err:
1986 	dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1987 		max_tx_rate, vf);
1988 	return be_cmd_status(status);
1989 }
1990 
1991 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1992 				int link_state)
1993 {
1994 	struct be_adapter *adapter = netdev_priv(netdev);
1995 	int status;
1996 
1997 	if (!sriov_enabled(adapter))
1998 		return -EPERM;
1999 
2000 	if (vf >= adapter->num_vfs)
2001 		return -EINVAL;
2002 
2003 	status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2004 	if (status) {
2005 		dev_err(&adapter->pdev->dev,
2006 			"Link state change on VF %d failed: %#x\n", vf, status);
2007 		return be_cmd_status(status);
2008 	}
2009 
2010 	adapter->vf_cfg[vf].plink_tracking = link_state;
2011 
2012 	return 0;
2013 }
2014 
2015 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2016 {
2017 	struct be_adapter *adapter = netdev_priv(netdev);
2018 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2019 	u8 spoofchk;
2020 	int status;
2021 
2022 	if (!sriov_enabled(adapter))
2023 		return -EPERM;
2024 
2025 	if (vf >= adapter->num_vfs)
2026 		return -EINVAL;
2027 
2028 	if (BEx_chip(adapter))
2029 		return -EOPNOTSUPP;
2030 
2031 	if (enable == vf_cfg->spoofchk)
2032 		return 0;
2033 
2034 	spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2035 
2036 	status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2037 				       0, spoofchk);
2038 	if (status) {
2039 		dev_err(&adapter->pdev->dev,
2040 			"Spoofchk change on VF %d failed: %#x\n", vf, status);
2041 		return be_cmd_status(status);
2042 	}
2043 
2044 	vf_cfg->spoofchk = enable;
2045 	return 0;
2046 }
2047 
2048 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2049 			  ulong now)
2050 {
2051 	aic->rx_pkts_prev = rx_pkts;
2052 	aic->tx_reqs_prev = tx_pkts;
2053 	aic->jiffies = now;
2054 }
2055 
2056 static int be_get_new_eqd(struct be_eq_obj *eqo)
2057 {
2058 	struct be_adapter *adapter = eqo->adapter;
2059 	int eqd, start;
2060 	struct be_aic_obj *aic;
2061 	struct be_rx_obj *rxo;
2062 	struct be_tx_obj *txo;
2063 	u64 rx_pkts = 0, tx_pkts = 0;
2064 	ulong now;
2065 	u32 pps, delta;
2066 	int i;
2067 
2068 	aic = &adapter->aic_obj[eqo->idx];
2069 	if (!aic->enable) {
2070 		if (aic->jiffies)
2071 			aic->jiffies = 0;
2072 		eqd = aic->et_eqd;
2073 		return eqd;
2074 	}
2075 
2076 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2077 		do {
2078 			start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2079 			rx_pkts += rxo->stats.rx_pkts;
2080 		} while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2081 	}
2082 
2083 	for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2084 		do {
2085 			start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2086 			tx_pkts += txo->stats.tx_reqs;
2087 		} while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2088 	}
2089 
2090 	/* Skip, if wrapped around or first calculation */
2091 	now = jiffies;
2092 	if (!aic->jiffies || time_before(now, aic->jiffies) ||
2093 	    rx_pkts < aic->rx_pkts_prev ||
2094 	    tx_pkts < aic->tx_reqs_prev) {
2095 		be_aic_update(aic, rx_pkts, tx_pkts, now);
2096 		return aic->prev_eqd;
2097 	}
2098 
2099 	delta = jiffies_to_msecs(now - aic->jiffies);
2100 	if (delta == 0)
2101 		return aic->prev_eqd;
2102 
2103 	pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2104 		(((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2105 	eqd = (pps / 15000) << 2;
2106 
2107 	if (eqd < 8)
2108 		eqd = 0;
2109 	eqd = min_t(u32, eqd, aic->max_eqd);
2110 	eqd = max_t(u32, eqd, aic->min_eqd);
2111 
2112 	be_aic_update(aic, rx_pkts, tx_pkts, now);
2113 
2114 	return eqd;
2115 }
2116 
2117 /* For Skyhawk-R only */
2118 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2119 {
2120 	struct be_adapter *adapter = eqo->adapter;
2121 	struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2122 	ulong now = jiffies;
2123 	int eqd;
2124 	u32 mult_enc;
2125 
2126 	if (!aic->enable)
2127 		return 0;
2128 
2129 	if (jiffies_to_msecs(now - aic->jiffies) < 1)
2130 		eqd = aic->prev_eqd;
2131 	else
2132 		eqd = be_get_new_eqd(eqo);
2133 
2134 	if (eqd > 100)
2135 		mult_enc = R2I_DLY_ENC_1;
2136 	else if (eqd > 60)
2137 		mult_enc = R2I_DLY_ENC_2;
2138 	else if (eqd > 20)
2139 		mult_enc = R2I_DLY_ENC_3;
2140 	else
2141 		mult_enc = R2I_DLY_ENC_0;
2142 
2143 	aic->prev_eqd = eqd;
2144 
2145 	return mult_enc;
2146 }
2147 
2148 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2149 {
2150 	struct be_set_eqd set_eqd[MAX_EVT_QS];
2151 	struct be_aic_obj *aic;
2152 	struct be_eq_obj *eqo;
2153 	int i, num = 0, eqd;
2154 
2155 	for_all_evt_queues(adapter, eqo, i) {
2156 		aic = &adapter->aic_obj[eqo->idx];
2157 		eqd = be_get_new_eqd(eqo);
2158 		if (force_update || eqd != aic->prev_eqd) {
2159 			set_eqd[num].delay_multiplier = (eqd * 65)/100;
2160 			set_eqd[num].eq_id = eqo->q.id;
2161 			aic->prev_eqd = eqd;
2162 			num++;
2163 		}
2164 	}
2165 
2166 	if (num)
2167 		be_cmd_modify_eqd(adapter, set_eqd, num);
2168 }
2169 
2170 static void be_rx_stats_update(struct be_rx_obj *rxo,
2171 			       struct be_rx_compl_info *rxcp)
2172 {
2173 	struct be_rx_stats *stats = rx_stats(rxo);
2174 
2175 	u64_stats_update_begin(&stats->sync);
2176 	stats->rx_compl++;
2177 	stats->rx_bytes += rxcp->pkt_size;
2178 	stats->rx_pkts++;
2179 	if (rxcp->tunneled)
2180 		stats->rx_vxlan_offload_pkts++;
2181 	if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2182 		stats->rx_mcast_pkts++;
2183 	if (rxcp->err)
2184 		stats->rx_compl_err++;
2185 	u64_stats_update_end(&stats->sync);
2186 }
2187 
2188 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2189 {
2190 	/* L4 checksum is not reliable for non TCP/UDP packets.
2191 	 * Also ignore ipcksm for ipv6 pkts
2192 	 */
2193 	return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2194 		(rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2195 }
2196 
2197 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2198 {
2199 	struct be_adapter *adapter = rxo->adapter;
2200 	struct be_rx_page_info *rx_page_info;
2201 	struct be_queue_info *rxq = &rxo->q;
2202 	u32 frag_idx = rxq->tail;
2203 
2204 	rx_page_info = &rxo->page_info_tbl[frag_idx];
2205 	BUG_ON(!rx_page_info->page);
2206 
2207 	if (rx_page_info->last_frag) {
2208 		dma_unmap_page(&adapter->pdev->dev,
2209 			       dma_unmap_addr(rx_page_info, bus),
2210 			       adapter->big_page_size, DMA_FROM_DEVICE);
2211 		rx_page_info->last_frag = false;
2212 	} else {
2213 		dma_sync_single_for_cpu(&adapter->pdev->dev,
2214 					dma_unmap_addr(rx_page_info, bus),
2215 					rx_frag_size, DMA_FROM_DEVICE);
2216 	}
2217 
2218 	queue_tail_inc(rxq);
2219 	atomic_dec(&rxq->used);
2220 	return rx_page_info;
2221 }
2222 
2223 /* Throwaway the data in the Rx completion */
2224 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2225 				struct be_rx_compl_info *rxcp)
2226 {
2227 	struct be_rx_page_info *page_info;
2228 	u16 i, num_rcvd = rxcp->num_rcvd;
2229 
2230 	for (i = 0; i < num_rcvd; i++) {
2231 		page_info = get_rx_page_info(rxo);
2232 		put_page(page_info->page);
2233 		memset(page_info, 0, sizeof(*page_info));
2234 	}
2235 }
2236 
2237 /*
2238  * skb_fill_rx_data forms a complete skb for an ether frame
2239  * indicated by rxcp.
2240  */
2241 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2242 			     struct be_rx_compl_info *rxcp)
2243 {
2244 	struct be_rx_page_info *page_info;
2245 	u16 i, j;
2246 	u16 hdr_len, curr_frag_len, remaining;
2247 	u8 *start;
2248 
2249 	page_info = get_rx_page_info(rxo);
2250 	start = page_address(page_info->page) + page_info->page_offset;
2251 	prefetch(start);
2252 
2253 	/* Copy data in the first descriptor of this completion */
2254 	curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2255 
2256 	skb->len = curr_frag_len;
2257 	if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2258 		memcpy(skb->data, start, curr_frag_len);
2259 		/* Complete packet has now been moved to data */
2260 		put_page(page_info->page);
2261 		skb->data_len = 0;
2262 		skb->tail += curr_frag_len;
2263 	} else {
2264 		hdr_len = ETH_HLEN;
2265 		memcpy(skb->data, start, hdr_len);
2266 		skb_shinfo(skb)->nr_frags = 1;
2267 		skb_frag_set_page(skb, 0, page_info->page);
2268 		skb_shinfo(skb)->frags[0].page_offset =
2269 					page_info->page_offset + hdr_len;
2270 		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2271 				  curr_frag_len - hdr_len);
2272 		skb->data_len = curr_frag_len - hdr_len;
2273 		skb->truesize += rx_frag_size;
2274 		skb->tail += hdr_len;
2275 	}
2276 	page_info->page = NULL;
2277 
2278 	if (rxcp->pkt_size <= rx_frag_size) {
2279 		BUG_ON(rxcp->num_rcvd != 1);
2280 		return;
2281 	}
2282 
2283 	/* More frags present for this completion */
2284 	remaining = rxcp->pkt_size - curr_frag_len;
2285 	for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2286 		page_info = get_rx_page_info(rxo);
2287 		curr_frag_len = min(remaining, rx_frag_size);
2288 
2289 		/* Coalesce all frags from the same physical page in one slot */
2290 		if (page_info->page_offset == 0) {
2291 			/* Fresh page */
2292 			j++;
2293 			skb_frag_set_page(skb, j, page_info->page);
2294 			skb_shinfo(skb)->frags[j].page_offset =
2295 							page_info->page_offset;
2296 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2297 			skb_shinfo(skb)->nr_frags++;
2298 		} else {
2299 			put_page(page_info->page);
2300 		}
2301 
2302 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2303 		skb->len += curr_frag_len;
2304 		skb->data_len += curr_frag_len;
2305 		skb->truesize += rx_frag_size;
2306 		remaining -= curr_frag_len;
2307 		page_info->page = NULL;
2308 	}
2309 	BUG_ON(j > MAX_SKB_FRAGS);
2310 }
2311 
2312 /* Process the RX completion indicated by rxcp when GRO is disabled */
2313 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2314 				struct be_rx_compl_info *rxcp)
2315 {
2316 	struct be_adapter *adapter = rxo->adapter;
2317 	struct net_device *netdev = adapter->netdev;
2318 	struct sk_buff *skb;
2319 
2320 	skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2321 	if (unlikely(!skb)) {
2322 		rx_stats(rxo)->rx_drops_no_skbs++;
2323 		be_rx_compl_discard(rxo, rxcp);
2324 		return;
2325 	}
2326 
2327 	skb_fill_rx_data(rxo, skb, rxcp);
2328 
2329 	if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2330 		skb->ip_summed = CHECKSUM_UNNECESSARY;
2331 	else
2332 		skb_checksum_none_assert(skb);
2333 
2334 	skb->protocol = eth_type_trans(skb, netdev);
2335 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2336 	if (netdev->features & NETIF_F_RXHASH)
2337 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2338 
2339 	skb->csum_level = rxcp->tunneled;
2340 	skb_mark_napi_id(skb, napi);
2341 
2342 	if (rxcp->vlanf)
2343 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2344 
2345 	netif_receive_skb(skb);
2346 }
2347 
2348 /* Process the RX completion indicated by rxcp when GRO is enabled */
2349 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2350 				    struct napi_struct *napi,
2351 				    struct be_rx_compl_info *rxcp)
2352 {
2353 	struct be_adapter *adapter = rxo->adapter;
2354 	struct be_rx_page_info *page_info;
2355 	struct sk_buff *skb = NULL;
2356 	u16 remaining, curr_frag_len;
2357 	u16 i, j;
2358 
2359 	skb = napi_get_frags(napi);
2360 	if (!skb) {
2361 		be_rx_compl_discard(rxo, rxcp);
2362 		return;
2363 	}
2364 
2365 	remaining = rxcp->pkt_size;
2366 	for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2367 		page_info = get_rx_page_info(rxo);
2368 
2369 		curr_frag_len = min(remaining, rx_frag_size);
2370 
2371 		/* Coalesce all frags from the same physical page in one slot */
2372 		if (i == 0 || page_info->page_offset == 0) {
2373 			/* First frag or Fresh page */
2374 			j++;
2375 			skb_frag_set_page(skb, j, page_info->page);
2376 			skb_shinfo(skb)->frags[j].page_offset =
2377 							page_info->page_offset;
2378 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2379 		} else {
2380 			put_page(page_info->page);
2381 		}
2382 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2383 		skb->truesize += rx_frag_size;
2384 		remaining -= curr_frag_len;
2385 		memset(page_info, 0, sizeof(*page_info));
2386 	}
2387 	BUG_ON(j > MAX_SKB_FRAGS);
2388 
2389 	skb_shinfo(skb)->nr_frags = j + 1;
2390 	skb->len = rxcp->pkt_size;
2391 	skb->data_len = rxcp->pkt_size;
2392 	skb->ip_summed = CHECKSUM_UNNECESSARY;
2393 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2394 	if (adapter->netdev->features & NETIF_F_RXHASH)
2395 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2396 
2397 	skb->csum_level = rxcp->tunneled;
2398 
2399 	if (rxcp->vlanf)
2400 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2401 
2402 	napi_gro_frags(napi);
2403 }
2404 
2405 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2406 				 struct be_rx_compl_info *rxcp)
2407 {
2408 	rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2409 	rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2410 	rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2411 	rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2412 	rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2413 	rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2414 	rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2415 	rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2416 	rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2417 	rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2418 	rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2419 	if (rxcp->vlanf) {
2420 		rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2421 		rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2422 	}
2423 	rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2424 	rxcp->tunneled =
2425 		GET_RX_COMPL_V1_BITS(tunneled, compl);
2426 }
2427 
2428 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2429 				 struct be_rx_compl_info *rxcp)
2430 {
2431 	rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2432 	rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2433 	rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2434 	rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2435 	rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2436 	rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2437 	rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2438 	rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2439 	rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2440 	rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2441 	rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2442 	if (rxcp->vlanf) {
2443 		rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2444 		rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2445 	}
2446 	rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2447 	rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2448 }
2449 
2450 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2451 {
2452 	struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2453 	struct be_rx_compl_info *rxcp = &rxo->rxcp;
2454 	struct be_adapter *adapter = rxo->adapter;
2455 
2456 	/* For checking the valid bit it is Ok to use either definition as the
2457 	 * valid bit is at the same position in both v0 and v1 Rx compl */
2458 	if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2459 		return NULL;
2460 
2461 	rmb();
2462 	be_dws_le_to_cpu(compl, sizeof(*compl));
2463 
2464 	if (adapter->be3_native)
2465 		be_parse_rx_compl_v1(compl, rxcp);
2466 	else
2467 		be_parse_rx_compl_v0(compl, rxcp);
2468 
2469 	if (rxcp->ip_frag)
2470 		rxcp->l4_csum = 0;
2471 
2472 	if (rxcp->vlanf) {
2473 		/* In QNQ modes, if qnq bit is not set, then the packet was
2474 		 * tagged only with the transparent outer vlan-tag and must
2475 		 * not be treated as a vlan packet by host
2476 		 */
2477 		if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2478 			rxcp->vlanf = 0;
2479 
2480 		if (!lancer_chip(adapter))
2481 			rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2482 
2483 		if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2484 		    !test_bit(rxcp->vlan_tag, adapter->vids))
2485 			rxcp->vlanf = 0;
2486 	}
2487 
2488 	/* As the compl has been parsed, reset it; we wont touch it again */
2489 	compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2490 
2491 	queue_tail_inc(&rxo->cq);
2492 	return rxcp;
2493 }
2494 
2495 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2496 {
2497 	u32 order = get_order(size);
2498 
2499 	if (order > 0)
2500 		gfp |= __GFP_COMP;
2501 	return  alloc_pages(gfp, order);
2502 }
2503 
2504 /*
2505  * Allocate a page, split it to fragments of size rx_frag_size and post as
2506  * receive buffers to BE
2507  */
2508 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2509 {
2510 	struct be_adapter *adapter = rxo->adapter;
2511 	struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2512 	struct be_queue_info *rxq = &rxo->q;
2513 	struct page *pagep = NULL;
2514 	struct device *dev = &adapter->pdev->dev;
2515 	struct be_eth_rx_d *rxd;
2516 	u64 page_dmaaddr = 0, frag_dmaaddr;
2517 	u32 posted, page_offset = 0, notify = 0;
2518 
2519 	page_info = &rxo->page_info_tbl[rxq->head];
2520 	for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2521 		if (!pagep) {
2522 			pagep = be_alloc_pages(adapter->big_page_size, gfp);
2523 			if (unlikely(!pagep)) {
2524 				rx_stats(rxo)->rx_post_fail++;
2525 				break;
2526 			}
2527 			page_dmaaddr = dma_map_page(dev, pagep, 0,
2528 						    adapter->big_page_size,
2529 						    DMA_FROM_DEVICE);
2530 			if (dma_mapping_error(dev, page_dmaaddr)) {
2531 				put_page(pagep);
2532 				pagep = NULL;
2533 				adapter->drv_stats.dma_map_errors++;
2534 				break;
2535 			}
2536 			page_offset = 0;
2537 		} else {
2538 			get_page(pagep);
2539 			page_offset += rx_frag_size;
2540 		}
2541 		page_info->page_offset = page_offset;
2542 		page_info->page = pagep;
2543 
2544 		rxd = queue_head_node(rxq);
2545 		frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2546 		rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2547 		rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2548 
2549 		/* Any space left in the current big page for another frag? */
2550 		if ((page_offset + rx_frag_size + rx_frag_size) >
2551 					adapter->big_page_size) {
2552 			pagep = NULL;
2553 			page_info->last_frag = true;
2554 			dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2555 		} else {
2556 			dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2557 		}
2558 
2559 		prev_page_info = page_info;
2560 		queue_head_inc(rxq);
2561 		page_info = &rxo->page_info_tbl[rxq->head];
2562 	}
2563 
2564 	/* Mark the last frag of a page when we break out of the above loop
2565 	 * with no more slots available in the RXQ
2566 	 */
2567 	if (pagep) {
2568 		prev_page_info->last_frag = true;
2569 		dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2570 	}
2571 
2572 	if (posted) {
2573 		atomic_add(posted, &rxq->used);
2574 		if (rxo->rx_post_starved)
2575 			rxo->rx_post_starved = false;
2576 		do {
2577 			notify = min(MAX_NUM_POST_ERX_DB, posted);
2578 			be_rxq_notify(adapter, rxq->id, notify);
2579 			posted -= notify;
2580 		} while (posted);
2581 	} else if (atomic_read(&rxq->used) == 0) {
2582 		/* Let be_worker replenish when memory is available */
2583 		rxo->rx_post_starved = true;
2584 	}
2585 }
2586 
2587 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2588 {
2589 	struct be_queue_info *tx_cq = &txo->cq;
2590 	struct be_tx_compl_info *txcp = &txo->txcp;
2591 	struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2592 
2593 	if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2594 		return NULL;
2595 
2596 	/* Ensure load ordering of valid bit dword and other dwords below */
2597 	rmb();
2598 	be_dws_le_to_cpu(compl, sizeof(*compl));
2599 
2600 	txcp->status = GET_TX_COMPL_BITS(status, compl);
2601 	txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2602 
2603 	compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2604 	queue_tail_inc(tx_cq);
2605 	return txcp;
2606 }
2607 
2608 static u16 be_tx_compl_process(struct be_adapter *adapter,
2609 			       struct be_tx_obj *txo, u16 last_index)
2610 {
2611 	struct sk_buff **sent_skbs = txo->sent_skb_list;
2612 	struct be_queue_info *txq = &txo->q;
2613 	struct sk_buff *skb = NULL;
2614 	bool unmap_skb_hdr = false;
2615 	struct be_eth_wrb *wrb;
2616 	u16 num_wrbs = 0;
2617 	u32 frag_index;
2618 
2619 	do {
2620 		if (sent_skbs[txq->tail]) {
2621 			/* Free skb from prev req */
2622 			if (skb)
2623 				dev_consume_skb_any(skb);
2624 			skb = sent_skbs[txq->tail];
2625 			sent_skbs[txq->tail] = NULL;
2626 			queue_tail_inc(txq);  /* skip hdr wrb */
2627 			num_wrbs++;
2628 			unmap_skb_hdr = true;
2629 		}
2630 		wrb = queue_tail_node(txq);
2631 		frag_index = txq->tail;
2632 		unmap_tx_frag(&adapter->pdev->dev, wrb,
2633 			      (unmap_skb_hdr && skb_headlen(skb)));
2634 		unmap_skb_hdr = false;
2635 		queue_tail_inc(txq);
2636 		num_wrbs++;
2637 	} while (frag_index != last_index);
2638 	dev_consume_skb_any(skb);
2639 
2640 	return num_wrbs;
2641 }
2642 
2643 /* Return the number of events in the event queue */
2644 static inline int events_get(struct be_eq_obj *eqo)
2645 {
2646 	struct be_eq_entry *eqe;
2647 	int num = 0;
2648 
2649 	do {
2650 		eqe = queue_tail_node(&eqo->q);
2651 		if (eqe->evt == 0)
2652 			break;
2653 
2654 		rmb();
2655 		eqe->evt = 0;
2656 		num++;
2657 		queue_tail_inc(&eqo->q);
2658 	} while (true);
2659 
2660 	return num;
2661 }
2662 
2663 /* Leaves the EQ is disarmed state */
2664 static void be_eq_clean(struct be_eq_obj *eqo)
2665 {
2666 	int num = events_get(eqo);
2667 
2668 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2669 }
2670 
2671 /* Free posted rx buffers that were not used */
2672 static void be_rxq_clean(struct be_rx_obj *rxo)
2673 {
2674 	struct be_queue_info *rxq = &rxo->q;
2675 	struct be_rx_page_info *page_info;
2676 
2677 	while (atomic_read(&rxq->used) > 0) {
2678 		page_info = get_rx_page_info(rxo);
2679 		put_page(page_info->page);
2680 		memset(page_info, 0, sizeof(*page_info));
2681 	}
2682 	BUG_ON(atomic_read(&rxq->used));
2683 	rxq->tail = 0;
2684 	rxq->head = 0;
2685 }
2686 
2687 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2688 {
2689 	struct be_queue_info *rx_cq = &rxo->cq;
2690 	struct be_rx_compl_info *rxcp;
2691 	struct be_adapter *adapter = rxo->adapter;
2692 	int flush_wait = 0;
2693 
2694 	/* Consume pending rx completions.
2695 	 * Wait for the flush completion (identified by zero num_rcvd)
2696 	 * to arrive. Notify CQ even when there are no more CQ entries
2697 	 * for HW to flush partially coalesced CQ entries.
2698 	 * In Lancer, there is no need to wait for flush compl.
2699 	 */
2700 	for (;;) {
2701 		rxcp = be_rx_compl_get(rxo);
2702 		if (!rxcp) {
2703 			if (lancer_chip(adapter))
2704 				break;
2705 
2706 			if (flush_wait++ > 50 ||
2707 			    be_check_error(adapter,
2708 					   BE_ERROR_HW)) {
2709 				dev_warn(&adapter->pdev->dev,
2710 					 "did not receive flush compl\n");
2711 				break;
2712 			}
2713 			be_cq_notify(adapter, rx_cq->id, true, 0);
2714 			mdelay(1);
2715 		} else {
2716 			be_rx_compl_discard(rxo, rxcp);
2717 			be_cq_notify(adapter, rx_cq->id, false, 1);
2718 			if (rxcp->num_rcvd == 0)
2719 				break;
2720 		}
2721 	}
2722 
2723 	/* After cleanup, leave the CQ in unarmed state */
2724 	be_cq_notify(adapter, rx_cq->id, false, 0);
2725 }
2726 
2727 static void be_tx_compl_clean(struct be_adapter *adapter)
2728 {
2729 	struct device *dev = &adapter->pdev->dev;
2730 	u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2731 	struct be_tx_compl_info *txcp;
2732 	struct be_queue_info *txq;
2733 	u32 end_idx, notified_idx;
2734 	struct be_tx_obj *txo;
2735 	int i, pending_txqs;
2736 
2737 	/* Stop polling for compls when HW has been silent for 10ms */
2738 	do {
2739 		pending_txqs = adapter->num_tx_qs;
2740 
2741 		for_all_tx_queues(adapter, txo, i) {
2742 			cmpl = 0;
2743 			num_wrbs = 0;
2744 			txq = &txo->q;
2745 			while ((txcp = be_tx_compl_get(txo))) {
2746 				num_wrbs +=
2747 					be_tx_compl_process(adapter, txo,
2748 							    txcp->end_index);
2749 				cmpl++;
2750 			}
2751 			if (cmpl) {
2752 				be_cq_notify(adapter, txo->cq.id, false, cmpl);
2753 				atomic_sub(num_wrbs, &txq->used);
2754 				timeo = 0;
2755 			}
2756 			if (!be_is_tx_compl_pending(txo))
2757 				pending_txqs--;
2758 		}
2759 
2760 		if (pending_txqs == 0 || ++timeo > 10 ||
2761 		    be_check_error(adapter, BE_ERROR_HW))
2762 			break;
2763 
2764 		mdelay(1);
2765 	} while (true);
2766 
2767 	/* Free enqueued TX that was never notified to HW */
2768 	for_all_tx_queues(adapter, txo, i) {
2769 		txq = &txo->q;
2770 
2771 		if (atomic_read(&txq->used)) {
2772 			dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2773 				 i, atomic_read(&txq->used));
2774 			notified_idx = txq->tail;
2775 			end_idx = txq->tail;
2776 			index_adv(&end_idx, atomic_read(&txq->used) - 1,
2777 				  txq->len);
2778 			/* Use the tx-compl process logic to handle requests
2779 			 * that were not sent to the HW.
2780 			 */
2781 			num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2782 			atomic_sub(num_wrbs, &txq->used);
2783 			BUG_ON(atomic_read(&txq->used));
2784 			txo->pend_wrb_cnt = 0;
2785 			/* Since hw was never notified of these requests,
2786 			 * reset TXQ indices
2787 			 */
2788 			txq->head = notified_idx;
2789 			txq->tail = notified_idx;
2790 		}
2791 	}
2792 }
2793 
2794 static void be_evt_queues_destroy(struct be_adapter *adapter)
2795 {
2796 	struct be_eq_obj *eqo;
2797 	int i;
2798 
2799 	for_all_evt_queues(adapter, eqo, i) {
2800 		if (eqo->q.created) {
2801 			be_eq_clean(eqo);
2802 			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2803 			netif_napi_del(&eqo->napi);
2804 			free_cpumask_var(eqo->affinity_mask);
2805 		}
2806 		be_queue_free(adapter, &eqo->q);
2807 	}
2808 }
2809 
2810 static int be_evt_queues_create(struct be_adapter *adapter)
2811 {
2812 	struct be_queue_info *eq;
2813 	struct be_eq_obj *eqo;
2814 	struct be_aic_obj *aic;
2815 	int i, rc;
2816 
2817 	/* need enough EQs to service both RX and TX queues */
2818 	adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2819 				    max(adapter->cfg_num_rx_irqs,
2820 					adapter->cfg_num_tx_irqs));
2821 
2822 	for_all_evt_queues(adapter, eqo, i) {
2823 		int numa_node = dev_to_node(&adapter->pdev->dev);
2824 
2825 		aic = &adapter->aic_obj[i];
2826 		eqo->adapter = adapter;
2827 		eqo->idx = i;
2828 		aic->max_eqd = BE_MAX_EQD;
2829 		aic->enable = true;
2830 
2831 		eq = &eqo->q;
2832 		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2833 				    sizeof(struct be_eq_entry));
2834 		if (rc)
2835 			return rc;
2836 
2837 		rc = be_cmd_eq_create(adapter, eqo);
2838 		if (rc)
2839 			return rc;
2840 
2841 		if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2842 			return -ENOMEM;
2843 		cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2844 				eqo->affinity_mask);
2845 		netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2846 			       BE_NAPI_WEIGHT);
2847 	}
2848 	return 0;
2849 }
2850 
2851 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2852 {
2853 	struct be_queue_info *q;
2854 
2855 	q = &adapter->mcc_obj.q;
2856 	if (q->created)
2857 		be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2858 	be_queue_free(adapter, q);
2859 
2860 	q = &adapter->mcc_obj.cq;
2861 	if (q->created)
2862 		be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2863 	be_queue_free(adapter, q);
2864 }
2865 
2866 /* Must be called only after TX qs are created as MCC shares TX EQ */
2867 static int be_mcc_queues_create(struct be_adapter *adapter)
2868 {
2869 	struct be_queue_info *q, *cq;
2870 
2871 	cq = &adapter->mcc_obj.cq;
2872 	if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2873 			   sizeof(struct be_mcc_compl)))
2874 		goto err;
2875 
2876 	/* Use the default EQ for MCC completions */
2877 	if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2878 		goto mcc_cq_free;
2879 
2880 	q = &adapter->mcc_obj.q;
2881 	if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2882 		goto mcc_cq_destroy;
2883 
2884 	if (be_cmd_mccq_create(adapter, q, cq))
2885 		goto mcc_q_free;
2886 
2887 	return 0;
2888 
2889 mcc_q_free:
2890 	be_queue_free(adapter, q);
2891 mcc_cq_destroy:
2892 	be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2893 mcc_cq_free:
2894 	be_queue_free(adapter, cq);
2895 err:
2896 	return -1;
2897 }
2898 
2899 static void be_tx_queues_destroy(struct be_adapter *adapter)
2900 {
2901 	struct be_queue_info *q;
2902 	struct be_tx_obj *txo;
2903 	u8 i;
2904 
2905 	for_all_tx_queues(adapter, txo, i) {
2906 		q = &txo->q;
2907 		if (q->created)
2908 			be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2909 		be_queue_free(adapter, q);
2910 
2911 		q = &txo->cq;
2912 		if (q->created)
2913 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2914 		be_queue_free(adapter, q);
2915 	}
2916 }
2917 
2918 static int be_tx_qs_create(struct be_adapter *adapter)
2919 {
2920 	struct be_queue_info *cq;
2921 	struct be_tx_obj *txo;
2922 	struct be_eq_obj *eqo;
2923 	int status, i;
2924 
2925 	adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2926 
2927 	for_all_tx_queues(adapter, txo, i) {
2928 		cq = &txo->cq;
2929 		status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2930 					sizeof(struct be_eth_tx_compl));
2931 		if (status)
2932 			return status;
2933 
2934 		u64_stats_init(&txo->stats.sync);
2935 		u64_stats_init(&txo->stats.sync_compl);
2936 
2937 		/* If num_evt_qs is less than num_tx_qs, then more than
2938 		 * one txq share an eq
2939 		 */
2940 		eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2941 		status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2942 		if (status)
2943 			return status;
2944 
2945 		status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2946 					sizeof(struct be_eth_wrb));
2947 		if (status)
2948 			return status;
2949 
2950 		status = be_cmd_txq_create(adapter, txo);
2951 		if (status)
2952 			return status;
2953 
2954 		netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2955 				    eqo->idx);
2956 	}
2957 
2958 	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2959 		 adapter->num_tx_qs);
2960 	return 0;
2961 }
2962 
2963 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2964 {
2965 	struct be_queue_info *q;
2966 	struct be_rx_obj *rxo;
2967 	int i;
2968 
2969 	for_all_rx_queues(adapter, rxo, i) {
2970 		q = &rxo->cq;
2971 		if (q->created)
2972 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2973 		be_queue_free(adapter, q);
2974 	}
2975 }
2976 
2977 static int be_rx_cqs_create(struct be_adapter *adapter)
2978 {
2979 	struct be_queue_info *eq, *cq;
2980 	struct be_rx_obj *rxo;
2981 	int rc, i;
2982 
2983 	adapter->num_rss_qs =
2984 			min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2985 
2986 	/* We'll use RSS only if atleast 2 RSS rings are supported. */
2987 	if (adapter->num_rss_qs < 2)
2988 		adapter->num_rss_qs = 0;
2989 
2990 	adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2991 
2992 	/* When the interface is not capable of RSS rings (and there is no
2993 	 * need to create a default RXQ) we'll still need one RXQ
2994 	 */
2995 	if (adapter->num_rx_qs == 0)
2996 		adapter->num_rx_qs = 1;
2997 
2998 	adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2999 	for_all_rx_queues(adapter, rxo, i) {
3000 		rxo->adapter = adapter;
3001 		cq = &rxo->cq;
3002 		rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3003 				    sizeof(struct be_eth_rx_compl));
3004 		if (rc)
3005 			return rc;
3006 
3007 		u64_stats_init(&rxo->stats.sync);
3008 		eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3009 		rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3010 		if (rc)
3011 			return rc;
3012 	}
3013 
3014 	dev_info(&adapter->pdev->dev,
3015 		 "created %d RX queue(s)\n", adapter->num_rx_qs);
3016 	return 0;
3017 }
3018 
3019 static irqreturn_t be_intx(int irq, void *dev)
3020 {
3021 	struct be_eq_obj *eqo = dev;
3022 	struct be_adapter *adapter = eqo->adapter;
3023 	int num_evts = 0;
3024 
3025 	/* IRQ is not expected when NAPI is scheduled as the EQ
3026 	 * will not be armed.
3027 	 * But, this can happen on Lancer INTx where it takes
3028 	 * a while to de-assert INTx or in BE2 where occasionaly
3029 	 * an interrupt may be raised even when EQ is unarmed.
3030 	 * If NAPI is already scheduled, then counting & notifying
3031 	 * events will orphan them.
3032 	 */
3033 	if (napi_schedule_prep(&eqo->napi)) {
3034 		num_evts = events_get(eqo);
3035 		__napi_schedule(&eqo->napi);
3036 		if (num_evts)
3037 			eqo->spurious_intr = 0;
3038 	}
3039 	be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3040 
3041 	/* Return IRQ_HANDLED only for the the first spurious intr
3042 	 * after a valid intr to stop the kernel from branding
3043 	 * this irq as a bad one!
3044 	 */
3045 	if (num_evts || eqo->spurious_intr++ == 0)
3046 		return IRQ_HANDLED;
3047 	else
3048 		return IRQ_NONE;
3049 }
3050 
3051 static irqreturn_t be_msix(int irq, void *dev)
3052 {
3053 	struct be_eq_obj *eqo = dev;
3054 
3055 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3056 	napi_schedule(&eqo->napi);
3057 	return IRQ_HANDLED;
3058 }
3059 
3060 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3061 {
3062 	return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3063 }
3064 
3065 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3066 			 int budget)
3067 {
3068 	struct be_adapter *adapter = rxo->adapter;
3069 	struct be_queue_info *rx_cq = &rxo->cq;
3070 	struct be_rx_compl_info *rxcp;
3071 	u32 work_done;
3072 	u32 frags_consumed = 0;
3073 
3074 	for (work_done = 0; work_done < budget; work_done++) {
3075 		rxcp = be_rx_compl_get(rxo);
3076 		if (!rxcp)
3077 			break;
3078 
3079 		/* Is it a flush compl that has no data */
3080 		if (unlikely(rxcp->num_rcvd == 0))
3081 			goto loop_continue;
3082 
3083 		/* Discard compl with partial DMA Lancer B0 */
3084 		if (unlikely(!rxcp->pkt_size)) {
3085 			be_rx_compl_discard(rxo, rxcp);
3086 			goto loop_continue;
3087 		}
3088 
3089 		/* On BE drop pkts that arrive due to imperfect filtering in
3090 		 * promiscuous mode on some skews
3091 		 */
3092 		if (unlikely(rxcp->port != adapter->port_num &&
3093 			     !lancer_chip(adapter))) {
3094 			be_rx_compl_discard(rxo, rxcp);
3095 			goto loop_continue;
3096 		}
3097 
3098 		if (do_gro(rxcp))
3099 			be_rx_compl_process_gro(rxo, napi, rxcp);
3100 		else
3101 			be_rx_compl_process(rxo, napi, rxcp);
3102 
3103 loop_continue:
3104 		frags_consumed += rxcp->num_rcvd;
3105 		be_rx_stats_update(rxo, rxcp);
3106 	}
3107 
3108 	if (work_done) {
3109 		be_cq_notify(adapter, rx_cq->id, true, work_done);
3110 
3111 		/* When an rx-obj gets into post_starved state, just
3112 		 * let be_worker do the posting.
3113 		 */
3114 		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3115 		    !rxo->rx_post_starved)
3116 			be_post_rx_frags(rxo, GFP_ATOMIC,
3117 					 max_t(u32, MAX_RX_POST,
3118 					       frags_consumed));
3119 	}
3120 
3121 	return work_done;
3122 }
3123 
3124 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3125 {
3126 	switch (status) {
3127 	case BE_TX_COMP_HDR_PARSE_ERR:
3128 		tx_stats(txo)->tx_hdr_parse_err++;
3129 		break;
3130 	case BE_TX_COMP_NDMA_ERR:
3131 		tx_stats(txo)->tx_dma_err++;
3132 		break;
3133 	case BE_TX_COMP_ACL_ERR:
3134 		tx_stats(txo)->tx_spoof_check_err++;
3135 		break;
3136 	}
3137 }
3138 
3139 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3140 {
3141 	switch (status) {
3142 	case LANCER_TX_COMP_LSO_ERR:
3143 		tx_stats(txo)->tx_tso_err++;
3144 		break;
3145 	case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3146 	case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3147 		tx_stats(txo)->tx_spoof_check_err++;
3148 		break;
3149 	case LANCER_TX_COMP_QINQ_ERR:
3150 		tx_stats(txo)->tx_qinq_err++;
3151 		break;
3152 	case LANCER_TX_COMP_PARITY_ERR:
3153 		tx_stats(txo)->tx_internal_parity_err++;
3154 		break;
3155 	case LANCER_TX_COMP_DMA_ERR:
3156 		tx_stats(txo)->tx_dma_err++;
3157 		break;
3158 	}
3159 }
3160 
3161 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3162 			  int idx)
3163 {
3164 	int num_wrbs = 0, work_done = 0;
3165 	struct be_tx_compl_info *txcp;
3166 
3167 	while ((txcp = be_tx_compl_get(txo))) {
3168 		num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3169 		work_done++;
3170 
3171 		if (txcp->status) {
3172 			if (lancer_chip(adapter))
3173 				lancer_update_tx_err(txo, txcp->status);
3174 			else
3175 				be_update_tx_err(txo, txcp->status);
3176 		}
3177 	}
3178 
3179 	if (work_done) {
3180 		be_cq_notify(adapter, txo->cq.id, true, work_done);
3181 		atomic_sub(num_wrbs, &txo->q.used);
3182 
3183 		/* As Tx wrbs have been freed up, wake up netdev queue
3184 		 * if it was stopped due to lack of tx wrbs.  */
3185 		if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3186 		    be_can_txq_wake(txo)) {
3187 			netif_wake_subqueue(adapter->netdev, idx);
3188 		}
3189 
3190 		u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3191 		tx_stats(txo)->tx_compl += work_done;
3192 		u64_stats_update_end(&tx_stats(txo)->sync_compl);
3193 	}
3194 }
3195 
3196 int be_poll(struct napi_struct *napi, int budget)
3197 {
3198 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3199 	struct be_adapter *adapter = eqo->adapter;
3200 	int max_work = 0, work, i, num_evts;
3201 	struct be_rx_obj *rxo;
3202 	struct be_tx_obj *txo;
3203 	u32 mult_enc = 0;
3204 
3205 	num_evts = events_get(eqo);
3206 
3207 	for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3208 		be_process_tx(adapter, txo, i);
3209 
3210 	/* This loop will iterate twice for EQ0 in which
3211 	 * completions of the last RXQ (default one) are also processed
3212 	 * For other EQs the loop iterates only once
3213 	 */
3214 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3215 		work = be_process_rx(rxo, napi, budget);
3216 		max_work = max(work, max_work);
3217 	}
3218 
3219 	if (is_mcc_eqo(eqo))
3220 		be_process_mcc(adapter);
3221 
3222 	if (max_work < budget) {
3223 		napi_complete_done(napi, max_work);
3224 
3225 		/* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3226 		 * delay via a delay multiplier encoding value
3227 		 */
3228 		if (skyhawk_chip(adapter))
3229 			mult_enc = be_get_eq_delay_mult_enc(eqo);
3230 
3231 		be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3232 			     mult_enc);
3233 	} else {
3234 		/* As we'll continue in polling mode, count and clear events */
3235 		be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3236 	}
3237 	return max_work;
3238 }
3239 
3240 void be_detect_error(struct be_adapter *adapter)
3241 {
3242 	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3243 	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3244 	u32 i;
3245 	struct device *dev = &adapter->pdev->dev;
3246 
3247 	if (be_check_error(adapter, BE_ERROR_HW))
3248 		return;
3249 
3250 	if (lancer_chip(adapter)) {
3251 		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3252 		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3253 			be_set_error(adapter, BE_ERROR_UE);
3254 			sliport_err1 = ioread32(adapter->db +
3255 						SLIPORT_ERROR1_OFFSET);
3256 			sliport_err2 = ioread32(adapter->db +
3257 						SLIPORT_ERROR2_OFFSET);
3258 			/* Do not log error messages if its a FW reset */
3259 			if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3260 			    sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3261 				dev_info(dev, "Firmware update in progress\n");
3262 			} else {
3263 				dev_err(dev, "Error detected in the card\n");
3264 				dev_err(dev, "ERR: sliport status 0x%x\n",
3265 					sliport_status);
3266 				dev_err(dev, "ERR: sliport error1 0x%x\n",
3267 					sliport_err1);
3268 				dev_err(dev, "ERR: sliport error2 0x%x\n",
3269 					sliport_err2);
3270 			}
3271 		}
3272 	} else {
3273 		ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3274 		ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3275 		ue_lo_mask = ioread32(adapter->pcicfg +
3276 				      PCICFG_UE_STATUS_LOW_MASK);
3277 		ue_hi_mask = ioread32(adapter->pcicfg +
3278 				      PCICFG_UE_STATUS_HI_MASK);
3279 
3280 		ue_lo = (ue_lo & ~ue_lo_mask);
3281 		ue_hi = (ue_hi & ~ue_hi_mask);
3282 
3283 		/* On certain platforms BE hardware can indicate spurious UEs.
3284 		 * Allow HW to stop working completely in case of a real UE.
3285 		 * Hence not setting the hw_error for UE detection.
3286 		 */
3287 
3288 		if (ue_lo || ue_hi) {
3289 			dev_err(dev, "Error detected in the adapter");
3290 			if (skyhawk_chip(adapter))
3291 				be_set_error(adapter, BE_ERROR_UE);
3292 
3293 			for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3294 				if (ue_lo & 1)
3295 					dev_err(dev, "UE: %s bit set\n",
3296 						ue_status_low_desc[i]);
3297 			}
3298 			for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3299 				if (ue_hi & 1)
3300 					dev_err(dev, "UE: %s bit set\n",
3301 						ue_status_hi_desc[i]);
3302 			}
3303 		}
3304 	}
3305 }
3306 
3307 static void be_msix_disable(struct be_adapter *adapter)
3308 {
3309 	if (msix_enabled(adapter)) {
3310 		pci_disable_msix(adapter->pdev);
3311 		adapter->num_msix_vec = 0;
3312 		adapter->num_msix_roce_vec = 0;
3313 	}
3314 }
3315 
3316 static int be_msix_enable(struct be_adapter *adapter)
3317 {
3318 	unsigned int i, max_roce_eqs;
3319 	struct device *dev = &adapter->pdev->dev;
3320 	int num_vec;
3321 
3322 	/* If RoCE is supported, program the max number of vectors that
3323 	 * could be used for NIC and RoCE, else, just program the number
3324 	 * we'll use initially.
3325 	 */
3326 	if (be_roce_supported(adapter)) {
3327 		max_roce_eqs =
3328 			be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3329 		max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3330 		num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3331 	} else {
3332 		num_vec = max(adapter->cfg_num_rx_irqs,
3333 			      adapter->cfg_num_tx_irqs);
3334 	}
3335 
3336 	for (i = 0; i < num_vec; i++)
3337 		adapter->msix_entries[i].entry = i;
3338 
3339 	num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3340 					MIN_MSIX_VECTORS, num_vec);
3341 	if (num_vec < 0)
3342 		goto fail;
3343 
3344 	if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3345 		adapter->num_msix_roce_vec = num_vec / 2;
3346 		dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3347 			 adapter->num_msix_roce_vec);
3348 	}
3349 
3350 	adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3351 
3352 	dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3353 		 adapter->num_msix_vec);
3354 	return 0;
3355 
3356 fail:
3357 	dev_warn(dev, "MSIx enable failed\n");
3358 
3359 	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
3360 	if (be_virtfn(adapter))
3361 		return num_vec;
3362 	return 0;
3363 }
3364 
3365 static inline int be_msix_vec_get(struct be_adapter *adapter,
3366 				  struct be_eq_obj *eqo)
3367 {
3368 	return adapter->msix_entries[eqo->msix_idx].vector;
3369 }
3370 
3371 static int be_msix_register(struct be_adapter *adapter)
3372 {
3373 	struct net_device *netdev = adapter->netdev;
3374 	struct be_eq_obj *eqo;
3375 	int status, i, vec;
3376 
3377 	for_all_evt_queues(adapter, eqo, i) {
3378 		sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3379 		vec = be_msix_vec_get(adapter, eqo);
3380 		status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3381 		if (status)
3382 			goto err_msix;
3383 
3384 		irq_set_affinity_hint(vec, eqo->affinity_mask);
3385 	}
3386 
3387 	return 0;
3388 err_msix:
3389 	for (i--; i >= 0; i--) {
3390 		eqo = &adapter->eq_obj[i];
3391 		free_irq(be_msix_vec_get(adapter, eqo), eqo);
3392 	}
3393 	dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3394 		 status);
3395 	be_msix_disable(adapter);
3396 	return status;
3397 }
3398 
3399 static int be_irq_register(struct be_adapter *adapter)
3400 {
3401 	struct net_device *netdev = adapter->netdev;
3402 	int status;
3403 
3404 	if (msix_enabled(adapter)) {
3405 		status = be_msix_register(adapter);
3406 		if (status == 0)
3407 			goto done;
3408 		/* INTx is not supported for VF */
3409 		if (be_virtfn(adapter))
3410 			return status;
3411 	}
3412 
3413 	/* INTx: only the first EQ is used */
3414 	netdev->irq = adapter->pdev->irq;
3415 	status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3416 			     &adapter->eq_obj[0]);
3417 	if (status) {
3418 		dev_err(&adapter->pdev->dev,
3419 			"INTx request IRQ failed - err %d\n", status);
3420 		return status;
3421 	}
3422 done:
3423 	adapter->isr_registered = true;
3424 	return 0;
3425 }
3426 
3427 static void be_irq_unregister(struct be_adapter *adapter)
3428 {
3429 	struct net_device *netdev = adapter->netdev;
3430 	struct be_eq_obj *eqo;
3431 	int i, vec;
3432 
3433 	if (!adapter->isr_registered)
3434 		return;
3435 
3436 	/* INTx */
3437 	if (!msix_enabled(adapter)) {
3438 		free_irq(netdev->irq, &adapter->eq_obj[0]);
3439 		goto done;
3440 	}
3441 
3442 	/* MSIx */
3443 	for_all_evt_queues(adapter, eqo, i) {
3444 		vec = be_msix_vec_get(adapter, eqo);
3445 		irq_set_affinity_hint(vec, NULL);
3446 		free_irq(vec, eqo);
3447 	}
3448 
3449 done:
3450 	adapter->isr_registered = false;
3451 }
3452 
3453 static void be_rx_qs_destroy(struct be_adapter *adapter)
3454 {
3455 	struct rss_info *rss = &adapter->rss_info;
3456 	struct be_queue_info *q;
3457 	struct be_rx_obj *rxo;
3458 	int i;
3459 
3460 	for_all_rx_queues(adapter, rxo, i) {
3461 		q = &rxo->q;
3462 		if (q->created) {
3463 			/* If RXQs are destroyed while in an "out of buffer"
3464 			 * state, there is a possibility of an HW stall on
3465 			 * Lancer. So, post 64 buffers to each queue to relieve
3466 			 * the "out of buffer" condition.
3467 			 * Make sure there's space in the RXQ before posting.
3468 			 */
3469 			if (lancer_chip(adapter)) {
3470 				be_rx_cq_clean(rxo);
3471 				if (atomic_read(&q->used) == 0)
3472 					be_post_rx_frags(rxo, GFP_KERNEL,
3473 							 MAX_RX_POST);
3474 			}
3475 
3476 			be_cmd_rxq_destroy(adapter, q);
3477 			be_rx_cq_clean(rxo);
3478 			be_rxq_clean(rxo);
3479 		}
3480 		be_queue_free(adapter, q);
3481 	}
3482 
3483 	if (rss->rss_flags) {
3484 		rss->rss_flags = RSS_ENABLE_NONE;
3485 		be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3486 				  128, rss->rss_hkey);
3487 	}
3488 }
3489 
3490 static void be_disable_if_filters(struct be_adapter *adapter)
3491 {
3492 	/* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3493 	if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3494 	    check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3495 		be_dev_mac_del(adapter, adapter->pmac_id[0]);
3496 		eth_zero_addr(adapter->dev_mac);
3497 	}
3498 
3499 	be_clear_uc_list(adapter);
3500 	be_clear_mc_list(adapter);
3501 
3502 	/* The IFACE flags are enabled in the open path and cleared
3503 	 * in the close path. When a VF gets detached from the host and
3504 	 * assigned to a VM the following happens:
3505 	 *	- VF's IFACE flags get cleared in the detach path
3506 	 *	- IFACE create is issued by the VF in the attach path
3507 	 * Due to a bug in the BE3/Skyhawk-R FW
3508 	 * (Lancer FW doesn't have the bug), the IFACE capability flags
3509 	 * specified along with the IFACE create cmd issued by a VF are not
3510 	 * honoured by FW.  As a consequence, if a *new* driver
3511 	 * (that enables/disables IFACE flags in open/close)
3512 	 * is loaded in the host and an *old* driver is * used by a VM/VF,
3513 	 * the IFACE gets created *without* the needed flags.
3514 	 * To avoid this, disable RX-filter flags only for Lancer.
3515 	 */
3516 	if (lancer_chip(adapter)) {
3517 		be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3518 		adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3519 	}
3520 }
3521 
3522 static int be_close(struct net_device *netdev)
3523 {
3524 	struct be_adapter *adapter = netdev_priv(netdev);
3525 	struct be_eq_obj *eqo;
3526 	int i;
3527 
3528 	/* This protection is needed as be_close() may be called even when the
3529 	 * adapter is in cleared state (after eeh perm failure)
3530 	 */
3531 	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3532 		return 0;
3533 
3534 	/* Before attempting cleanup ensure all the pending cmds in the
3535 	 * config_wq have finished execution
3536 	 */
3537 	flush_workqueue(be_wq);
3538 
3539 	be_disable_if_filters(adapter);
3540 
3541 	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3542 		for_all_evt_queues(adapter, eqo, i) {
3543 			napi_disable(&eqo->napi);
3544 		}
3545 		adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3546 	}
3547 
3548 	be_async_mcc_disable(adapter);
3549 
3550 	/* Wait for all pending tx completions to arrive so that
3551 	 * all tx skbs are freed.
3552 	 */
3553 	netif_tx_disable(netdev);
3554 	be_tx_compl_clean(adapter);
3555 
3556 	be_rx_qs_destroy(adapter);
3557 
3558 	for_all_evt_queues(adapter, eqo, i) {
3559 		if (msix_enabled(adapter))
3560 			synchronize_irq(be_msix_vec_get(adapter, eqo));
3561 		else
3562 			synchronize_irq(netdev->irq);
3563 		be_eq_clean(eqo);
3564 	}
3565 
3566 	be_irq_unregister(adapter);
3567 
3568 	return 0;
3569 }
3570 
3571 static int be_rx_qs_create(struct be_adapter *adapter)
3572 {
3573 	struct rss_info *rss = &adapter->rss_info;
3574 	u8 rss_key[RSS_HASH_KEY_LEN];
3575 	struct be_rx_obj *rxo;
3576 	int rc, i, j;
3577 
3578 	for_all_rx_queues(adapter, rxo, i) {
3579 		rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3580 				    sizeof(struct be_eth_rx_d));
3581 		if (rc)
3582 			return rc;
3583 	}
3584 
3585 	if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3586 		rxo = default_rxo(adapter);
3587 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3588 				       rx_frag_size, adapter->if_handle,
3589 				       false, &rxo->rss_id);
3590 		if (rc)
3591 			return rc;
3592 	}
3593 
3594 	for_all_rss_queues(adapter, rxo, i) {
3595 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3596 				       rx_frag_size, adapter->if_handle,
3597 				       true, &rxo->rss_id);
3598 		if (rc)
3599 			return rc;
3600 	}
3601 
3602 	if (be_multi_rxq(adapter)) {
3603 		for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3604 			for_all_rss_queues(adapter, rxo, i) {
3605 				if ((j + i) >= RSS_INDIR_TABLE_LEN)
3606 					break;
3607 				rss->rsstable[j + i] = rxo->rss_id;
3608 				rss->rss_queue[j + i] = i;
3609 			}
3610 		}
3611 		rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3612 			RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3613 
3614 		if (!BEx_chip(adapter))
3615 			rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3616 				RSS_ENABLE_UDP_IPV6;
3617 
3618 		netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3619 		rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3620 				       RSS_INDIR_TABLE_LEN, rss_key);
3621 		if (rc) {
3622 			rss->rss_flags = RSS_ENABLE_NONE;
3623 			return rc;
3624 		}
3625 
3626 		memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3627 	} else {
3628 		/* Disable RSS, if only default RX Q is created */
3629 		rss->rss_flags = RSS_ENABLE_NONE;
3630 	}
3631 
3632 
3633 	/* Post 1 less than RXQ-len to avoid head being equal to tail,
3634 	 * which is a queue empty condition
3635 	 */
3636 	for_all_rx_queues(adapter, rxo, i)
3637 		be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3638 
3639 	return 0;
3640 }
3641 
3642 static int be_enable_if_filters(struct be_adapter *adapter)
3643 {
3644 	int status;
3645 
3646 	status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3647 	if (status)
3648 		return status;
3649 
3650 	/* Normally this condition usually true as the ->dev_mac is zeroed.
3651 	 * But on BE3 VFs the initial MAC is pre-programmed by PF and
3652 	 * subsequent be_dev_mac_add() can fail (after fresh boot)
3653 	 */
3654 	if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3655 		int old_pmac_id = -1;
3656 
3657 		/* Remember old programmed MAC if any - can happen on BE3 VF */
3658 		if (!is_zero_ether_addr(adapter->dev_mac))
3659 			old_pmac_id = adapter->pmac_id[0];
3660 
3661 		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3662 		if (status)
3663 			return status;
3664 
3665 		/* Delete the old programmed MAC as we successfully programmed
3666 		 * a new MAC
3667 		 */
3668 		if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3669 			be_dev_mac_del(adapter, old_pmac_id);
3670 
3671 		ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3672 	}
3673 
3674 	if (adapter->vlans_added)
3675 		be_vid_config(adapter);
3676 
3677 	__be_set_rx_mode(adapter);
3678 
3679 	return 0;
3680 }
3681 
3682 static int be_open(struct net_device *netdev)
3683 {
3684 	struct be_adapter *adapter = netdev_priv(netdev);
3685 	struct be_eq_obj *eqo;
3686 	struct be_rx_obj *rxo;
3687 	struct be_tx_obj *txo;
3688 	u8 link_status;
3689 	int status, i;
3690 
3691 	status = be_rx_qs_create(adapter);
3692 	if (status)
3693 		goto err;
3694 
3695 	status = be_enable_if_filters(adapter);
3696 	if (status)
3697 		goto err;
3698 
3699 	status = be_irq_register(adapter);
3700 	if (status)
3701 		goto err;
3702 
3703 	for_all_rx_queues(adapter, rxo, i)
3704 		be_cq_notify(adapter, rxo->cq.id, true, 0);
3705 
3706 	for_all_tx_queues(adapter, txo, i)
3707 		be_cq_notify(adapter, txo->cq.id, true, 0);
3708 
3709 	be_async_mcc_enable(adapter);
3710 
3711 	for_all_evt_queues(adapter, eqo, i) {
3712 		napi_enable(&eqo->napi);
3713 		be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3714 	}
3715 	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3716 
3717 	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3718 	if (!status)
3719 		be_link_status_update(adapter, link_status);
3720 
3721 	netif_tx_start_all_queues(netdev);
3722 	if (skyhawk_chip(adapter))
3723 		udp_tunnel_get_rx_info(netdev);
3724 
3725 	return 0;
3726 err:
3727 	be_close(adapter->netdev);
3728 	return -EIO;
3729 }
3730 
3731 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3732 {
3733 	u32 addr;
3734 
3735 	addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3736 
3737 	mac[5] = (u8)(addr & 0xFF);
3738 	mac[4] = (u8)((addr >> 8) & 0xFF);
3739 	mac[3] = (u8)((addr >> 16) & 0xFF);
3740 	/* Use the OUI from the current MAC address */
3741 	memcpy(mac, adapter->netdev->dev_addr, 3);
3742 }
3743 
3744 /*
3745  * Generate a seed MAC address from the PF MAC Address using jhash.
3746  * MAC Address for VFs are assigned incrementally starting from the seed.
3747  * These addresses are programmed in the ASIC by the PF and the VF driver
3748  * queries for the MAC address during its probe.
3749  */
3750 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3751 {
3752 	u32 vf;
3753 	int status = 0;
3754 	u8 mac[ETH_ALEN];
3755 	struct be_vf_cfg *vf_cfg;
3756 
3757 	be_vf_eth_addr_generate(adapter, mac);
3758 
3759 	for_all_vfs(adapter, vf_cfg, vf) {
3760 		if (BEx_chip(adapter))
3761 			status = be_cmd_pmac_add(adapter, mac,
3762 						 vf_cfg->if_handle,
3763 						 &vf_cfg->pmac_id, vf + 1);
3764 		else
3765 			status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3766 						vf + 1);
3767 
3768 		if (status)
3769 			dev_err(&adapter->pdev->dev,
3770 				"Mac address assignment failed for VF %d\n",
3771 				vf);
3772 		else
3773 			memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3774 
3775 		mac[5] += 1;
3776 	}
3777 	return status;
3778 }
3779 
3780 static int be_vfs_mac_query(struct be_adapter *adapter)
3781 {
3782 	int status, vf;
3783 	u8 mac[ETH_ALEN];
3784 	struct be_vf_cfg *vf_cfg;
3785 
3786 	for_all_vfs(adapter, vf_cfg, vf) {
3787 		status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3788 					       mac, vf_cfg->if_handle,
3789 					       false, vf+1);
3790 		if (status)
3791 			return status;
3792 		memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3793 	}
3794 	return 0;
3795 }
3796 
3797 static void be_vf_clear(struct be_adapter *adapter)
3798 {
3799 	struct be_vf_cfg *vf_cfg;
3800 	u32 vf;
3801 
3802 	if (pci_vfs_assigned(adapter->pdev)) {
3803 		dev_warn(&adapter->pdev->dev,
3804 			 "VFs are assigned to VMs: not disabling VFs\n");
3805 		goto done;
3806 	}
3807 
3808 	pci_disable_sriov(adapter->pdev);
3809 
3810 	for_all_vfs(adapter, vf_cfg, vf) {
3811 		if (BEx_chip(adapter))
3812 			be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3813 					vf_cfg->pmac_id, vf + 1);
3814 		else
3815 			be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3816 				       vf + 1);
3817 
3818 		be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3819 	}
3820 
3821 	if (BE3_chip(adapter))
3822 		be_cmd_set_hsw_config(adapter, 0, 0,
3823 				      adapter->if_handle,
3824 				      PORT_FWD_TYPE_PASSTHRU, 0);
3825 done:
3826 	kfree(adapter->vf_cfg);
3827 	adapter->num_vfs = 0;
3828 	adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3829 }
3830 
3831 static void be_clear_queues(struct be_adapter *adapter)
3832 {
3833 	be_mcc_queues_destroy(adapter);
3834 	be_rx_cqs_destroy(adapter);
3835 	be_tx_queues_destroy(adapter);
3836 	be_evt_queues_destroy(adapter);
3837 }
3838 
3839 static void be_cancel_worker(struct be_adapter *adapter)
3840 {
3841 	if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3842 		cancel_delayed_work_sync(&adapter->work);
3843 		adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3844 	}
3845 }
3846 
3847 static void be_cancel_err_detection(struct be_adapter *adapter)
3848 {
3849 	struct be_error_recovery *err_rec = &adapter->error_recovery;
3850 
3851 	if (!be_err_recovery_workq)
3852 		return;
3853 
3854 	if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3855 		cancel_delayed_work_sync(&err_rec->err_detection_work);
3856 		adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3857 	}
3858 }
3859 
3860 static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3861 {
3862 	struct net_device *netdev = adapter->netdev;
3863 	struct device *dev = &adapter->pdev->dev;
3864 	struct be_vxlan_port *vxlan_port;
3865 	__be16 port;
3866 	int status;
3867 
3868 	vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3869 				      struct be_vxlan_port, list);
3870 	port = vxlan_port->port;
3871 
3872 	status = be_cmd_manage_iface(adapter, adapter->if_handle,
3873 				     OP_CONVERT_NORMAL_TO_TUNNEL);
3874 	if (status) {
3875 		dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3876 		return status;
3877 	}
3878 	adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3879 
3880 	status = be_cmd_set_vxlan_port(adapter, port);
3881 	if (status) {
3882 		dev_warn(dev, "Failed to add VxLAN port\n");
3883 		return status;
3884 	}
3885 	adapter->vxlan_port = port;
3886 
3887 	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3888 				   NETIF_F_TSO | NETIF_F_TSO6 |
3889 				   NETIF_F_GSO_UDP_TUNNEL;
3890 	netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
3891 	netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
3892 
3893 	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
3894 		 be16_to_cpu(port));
3895 	return 0;
3896 }
3897 
3898 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3899 {
3900 	struct net_device *netdev = adapter->netdev;
3901 
3902 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3903 		be_cmd_manage_iface(adapter, adapter->if_handle,
3904 				    OP_CONVERT_TUNNEL_TO_NORMAL);
3905 
3906 	if (adapter->vxlan_port)
3907 		be_cmd_set_vxlan_port(adapter, 0);
3908 
3909 	adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3910 	adapter->vxlan_port = 0;
3911 
3912 	netdev->hw_enc_features = 0;
3913 	netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3914 	netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3915 }
3916 
3917 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3918 				struct be_resources *vft_res)
3919 {
3920 	struct be_resources res = adapter->pool_res;
3921 	u32 vf_if_cap_flags = res.vf_if_cap_flags;
3922 	struct be_resources res_mod = {0};
3923 	u16 num_vf_qs = 1;
3924 
3925 	/* Distribute the queue resources among the PF and it's VFs */
3926 	if (num_vfs) {
3927 		/* Divide the rx queues evenly among the VFs and the PF, capped
3928 		 * at VF-EQ-count. Any remainder queues belong to the PF.
3929 		 */
3930 		num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3931 				res.max_rss_qs / (num_vfs + 1));
3932 
3933 		/* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3934 		 * RSS Tables per port. Provide RSS on VFs, only if number of
3935 		 * VFs requested is less than it's PF Pool's RSS Tables limit.
3936 		 */
3937 		if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
3938 			num_vf_qs = 1;
3939 	}
3940 
3941 	/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
3942 	 * which are modifiable using SET_PROFILE_CONFIG cmd.
3943 	 */
3944 	be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
3945 				  RESOURCE_MODIFIABLE, 0);
3946 
3947 	/* If RSS IFACE capability flags are modifiable for a VF, set the
3948 	 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
3949 	 * more than 1 RSSQ is available for a VF.
3950 	 * Otherwise, provision only 1 queue pair for VF.
3951 	 */
3952 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
3953 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3954 		if (num_vf_qs > 1) {
3955 			vf_if_cap_flags |= BE_IF_FLAGS_RSS;
3956 			if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
3957 				vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
3958 		} else {
3959 			vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
3960 					     BE_IF_FLAGS_DEFQ_RSS);
3961 		}
3962 	} else {
3963 		num_vf_qs = 1;
3964 	}
3965 
3966 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
3967 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3968 		vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3969 	}
3970 
3971 	vft_res->vf_if_cap_flags = vf_if_cap_flags;
3972 	vft_res->max_rx_qs = num_vf_qs;
3973 	vft_res->max_rss_qs = num_vf_qs;
3974 	vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
3975 	vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
3976 
3977 	/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
3978 	 * among the PF and it's VFs, if the fields are changeable
3979 	 */
3980 	if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
3981 		vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
3982 
3983 	if (res_mod.max_vlans == FIELD_MODIFIABLE)
3984 		vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
3985 
3986 	if (res_mod.max_iface_count == FIELD_MODIFIABLE)
3987 		vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
3988 
3989 	if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
3990 		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
3991 }
3992 
3993 static void be_if_destroy(struct be_adapter *adapter)
3994 {
3995 	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
3996 
3997 	kfree(adapter->pmac_id);
3998 	adapter->pmac_id = NULL;
3999 
4000 	kfree(adapter->mc_list);
4001 	adapter->mc_list = NULL;
4002 
4003 	kfree(adapter->uc_list);
4004 	adapter->uc_list = NULL;
4005 }
4006 
4007 static int be_clear(struct be_adapter *adapter)
4008 {
4009 	struct pci_dev *pdev = adapter->pdev;
4010 	struct  be_resources vft_res = {0};
4011 
4012 	be_cancel_worker(adapter);
4013 
4014 	flush_workqueue(be_wq);
4015 
4016 	if (sriov_enabled(adapter))
4017 		be_vf_clear(adapter);
4018 
4019 	/* Re-configure FW to distribute resources evenly across max-supported
4020 	 * number of VFs, only when VFs are not already enabled.
4021 	 */
4022 	if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4023 	    !pci_vfs_assigned(pdev)) {
4024 		be_calculate_vf_res(adapter,
4025 				    pci_sriov_get_totalvfs(pdev),
4026 				    &vft_res);
4027 		be_cmd_set_sriov_config(adapter, adapter->pool_res,
4028 					pci_sriov_get_totalvfs(pdev),
4029 					&vft_res);
4030 	}
4031 
4032 	be_disable_vxlan_offloads(adapter);
4033 
4034 	be_if_destroy(adapter);
4035 
4036 	be_clear_queues(adapter);
4037 
4038 	be_msix_disable(adapter);
4039 	adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4040 	return 0;
4041 }
4042 
4043 static int be_vfs_if_create(struct be_adapter *adapter)
4044 {
4045 	struct be_resources res = {0};
4046 	u32 cap_flags, en_flags, vf;
4047 	struct be_vf_cfg *vf_cfg;
4048 	int status;
4049 
4050 	/* If a FW profile exists, then cap_flags are updated */
4051 	cap_flags = BE_VF_IF_EN_FLAGS;
4052 
4053 	for_all_vfs(adapter, vf_cfg, vf) {
4054 		if (!BE3_chip(adapter)) {
4055 			status = be_cmd_get_profile_config(adapter, &res, NULL,
4056 							   ACTIVE_PROFILE_TYPE,
4057 							   RESOURCE_LIMITS,
4058 							   vf + 1);
4059 			if (!status) {
4060 				cap_flags = res.if_cap_flags;
4061 				/* Prevent VFs from enabling VLAN promiscuous
4062 				 * mode
4063 				 */
4064 				cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4065 			}
4066 		}
4067 
4068 		/* PF should enable IF flags during proxy if_create call */
4069 		en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4070 		status = be_cmd_if_create(adapter, cap_flags, en_flags,
4071 					  &vf_cfg->if_handle, vf + 1);
4072 		if (status)
4073 			return status;
4074 	}
4075 
4076 	return 0;
4077 }
4078 
4079 static int be_vf_setup_init(struct be_adapter *adapter)
4080 {
4081 	struct be_vf_cfg *vf_cfg;
4082 	int vf;
4083 
4084 	adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4085 				  GFP_KERNEL);
4086 	if (!adapter->vf_cfg)
4087 		return -ENOMEM;
4088 
4089 	for_all_vfs(adapter, vf_cfg, vf) {
4090 		vf_cfg->if_handle = -1;
4091 		vf_cfg->pmac_id = -1;
4092 	}
4093 	return 0;
4094 }
4095 
4096 static int be_vf_setup(struct be_adapter *adapter)
4097 {
4098 	struct device *dev = &adapter->pdev->dev;
4099 	struct be_vf_cfg *vf_cfg;
4100 	int status, old_vfs, vf;
4101 	bool spoofchk;
4102 
4103 	old_vfs = pci_num_vf(adapter->pdev);
4104 
4105 	status = be_vf_setup_init(adapter);
4106 	if (status)
4107 		goto err;
4108 
4109 	if (old_vfs) {
4110 		for_all_vfs(adapter, vf_cfg, vf) {
4111 			status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4112 			if (status)
4113 				goto err;
4114 		}
4115 
4116 		status = be_vfs_mac_query(adapter);
4117 		if (status)
4118 			goto err;
4119 	} else {
4120 		status = be_vfs_if_create(adapter);
4121 		if (status)
4122 			goto err;
4123 
4124 		status = be_vf_eth_addr_config(adapter);
4125 		if (status)
4126 			goto err;
4127 	}
4128 
4129 	for_all_vfs(adapter, vf_cfg, vf) {
4130 		/* Allow VFs to programs MAC/VLAN filters */
4131 		status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4132 						  vf + 1);
4133 		if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4134 			status = be_cmd_set_fn_privileges(adapter,
4135 							  vf_cfg->privileges |
4136 							  BE_PRIV_FILTMGMT,
4137 							  vf + 1);
4138 			if (!status) {
4139 				vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4140 				dev_info(dev, "VF%d has FILTMGMT privilege\n",
4141 					 vf);
4142 			}
4143 		}
4144 
4145 		/* Allow full available bandwidth */
4146 		if (!old_vfs)
4147 			be_cmd_config_qos(adapter, 0, 0, vf + 1);
4148 
4149 		status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4150 					       vf_cfg->if_handle, NULL,
4151 					       &spoofchk);
4152 		if (!status)
4153 			vf_cfg->spoofchk = spoofchk;
4154 
4155 		if (!old_vfs) {
4156 			be_cmd_enable_vf(adapter, vf + 1);
4157 			be_cmd_set_logical_link_config(adapter,
4158 						       IFLA_VF_LINK_STATE_AUTO,
4159 						       vf+1);
4160 		}
4161 	}
4162 
4163 	if (!old_vfs) {
4164 		status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4165 		if (status) {
4166 			dev_err(dev, "SRIOV enable failed\n");
4167 			adapter->num_vfs = 0;
4168 			goto err;
4169 		}
4170 	}
4171 
4172 	if (BE3_chip(adapter)) {
4173 		/* On BE3, enable VEB only when SRIOV is enabled */
4174 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4175 					       adapter->if_handle,
4176 					       PORT_FWD_TYPE_VEB, 0);
4177 		if (status)
4178 			goto err;
4179 	}
4180 
4181 	adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4182 	return 0;
4183 err:
4184 	dev_err(dev, "VF setup failed\n");
4185 	be_vf_clear(adapter);
4186 	return status;
4187 }
4188 
4189 /* Converting function_mode bits on BE3 to SH mc_type enums */
4190 
4191 static u8 be_convert_mc_type(u32 function_mode)
4192 {
4193 	if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4194 		return vNIC1;
4195 	else if (function_mode & QNQ_MODE)
4196 		return FLEX10;
4197 	else if (function_mode & VNIC_MODE)
4198 		return vNIC2;
4199 	else if (function_mode & UMC_ENABLED)
4200 		return UMC;
4201 	else
4202 		return MC_NONE;
4203 }
4204 
4205 /* On BE2/BE3 FW does not suggest the supported limits */
4206 static void BEx_get_resources(struct be_adapter *adapter,
4207 			      struct be_resources *res)
4208 {
4209 	bool use_sriov = adapter->num_vfs ? 1 : 0;
4210 
4211 	if (be_physfn(adapter))
4212 		res->max_uc_mac = BE_UC_PMAC_COUNT;
4213 	else
4214 		res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4215 
4216 	adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4217 
4218 	if (be_is_mc(adapter)) {
4219 		/* Assuming that there are 4 channels per port,
4220 		 * when multi-channel is enabled
4221 		 */
4222 		if (be_is_qnq_mode(adapter))
4223 			res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4224 		else
4225 			/* In a non-qnq multichannel mode, the pvid
4226 			 * takes up one vlan entry
4227 			 */
4228 			res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4229 	} else {
4230 		res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4231 	}
4232 
4233 	res->max_mcast_mac = BE_MAX_MC;
4234 
4235 	/* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4236 	 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4237 	 *    *only* if it is RSS-capable.
4238 	 */
4239 	if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4240 	    be_virtfn(adapter) ||
4241 	    (be_is_mc(adapter) &&
4242 	     !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4243 		res->max_tx_qs = 1;
4244 	} else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4245 		struct be_resources super_nic_res = {0};
4246 
4247 		/* On a SuperNIC profile, the driver needs to use the
4248 		 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4249 		 */
4250 		be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4251 					  ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4252 					  0);
4253 		/* Some old versions of BE3 FW don't report max_tx_qs value */
4254 		res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4255 	} else {
4256 		res->max_tx_qs = BE3_MAX_TX_QS;
4257 	}
4258 
4259 	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4260 	    !use_sriov && be_physfn(adapter))
4261 		res->max_rss_qs = (adapter->be3_native) ?
4262 					   BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4263 	res->max_rx_qs = res->max_rss_qs + 1;
4264 
4265 	if (be_physfn(adapter))
4266 		res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4267 					BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4268 	else
4269 		res->max_evt_qs = 1;
4270 
4271 	res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4272 	res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4273 	if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4274 		res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4275 }
4276 
4277 static void be_setup_init(struct be_adapter *adapter)
4278 {
4279 	adapter->vlan_prio_bmap = 0xff;
4280 	adapter->phy.link_speed = -1;
4281 	adapter->if_handle = -1;
4282 	adapter->be3_native = false;
4283 	adapter->if_flags = 0;
4284 	adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4285 	if (be_physfn(adapter))
4286 		adapter->cmd_privileges = MAX_PRIVILEGES;
4287 	else
4288 		adapter->cmd_privileges = MIN_PRIVILEGES;
4289 }
4290 
4291 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4292  * However, this HW limitation is not exposed to the host via any SLI cmd.
4293  * As a result, in the case of SRIOV and in particular multi-partition configs
4294  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4295  * for distribution between the VFs. This self-imposed limit will determine the
4296  * no: of VFs for which RSS can be enabled.
4297  */
4298 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4299 {
4300 	struct be_port_resources port_res = {0};
4301 	u8 rss_tables_on_port;
4302 	u16 max_vfs = be_max_vfs(adapter);
4303 
4304 	be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4305 				  RESOURCE_LIMITS, 0);
4306 
4307 	rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4308 
4309 	/* Each PF Pool's RSS Tables limit =
4310 	 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4311 	 */
4312 	adapter->pool_res.max_rss_tables =
4313 		max_vfs * rss_tables_on_port / port_res.max_vfs;
4314 }
4315 
4316 static int be_get_sriov_config(struct be_adapter *adapter)
4317 {
4318 	struct be_resources res = {0};
4319 	int max_vfs, old_vfs;
4320 
4321 	be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4322 				  RESOURCE_LIMITS, 0);
4323 
4324 	/* Some old versions of BE3 FW don't report max_vfs value */
4325 	if (BE3_chip(adapter) && !res.max_vfs) {
4326 		max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4327 		res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4328 	}
4329 
4330 	adapter->pool_res = res;
4331 
4332 	/* If during previous unload of the driver, the VFs were not disabled,
4333 	 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4334 	 * Instead use the TotalVFs value stored in the pci-dev struct.
4335 	 */
4336 	old_vfs = pci_num_vf(adapter->pdev);
4337 	if (old_vfs) {
4338 		dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4339 			 old_vfs);
4340 
4341 		adapter->pool_res.max_vfs =
4342 			pci_sriov_get_totalvfs(adapter->pdev);
4343 		adapter->num_vfs = old_vfs;
4344 	}
4345 
4346 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4347 		be_calculate_pf_pool_rss_tables(adapter);
4348 		dev_info(&adapter->pdev->dev,
4349 			 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4350 			 be_max_pf_pool_rss_tables(adapter));
4351 	}
4352 	return 0;
4353 }
4354 
4355 static void be_alloc_sriov_res(struct be_adapter *adapter)
4356 {
4357 	int old_vfs = pci_num_vf(adapter->pdev);
4358 	struct  be_resources vft_res = {0};
4359 	int status;
4360 
4361 	be_get_sriov_config(adapter);
4362 
4363 	if (!old_vfs)
4364 		pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4365 
4366 	/* When the HW is in SRIOV capable configuration, the PF-pool
4367 	 * resources are given to PF during driver load, if there are no
4368 	 * old VFs. This facility is not available in BE3 FW.
4369 	 * Also, this is done by FW in Lancer chip.
4370 	 */
4371 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4372 		be_calculate_vf_res(adapter, 0, &vft_res);
4373 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4374 						 &vft_res);
4375 		if (status)
4376 			dev_err(&adapter->pdev->dev,
4377 				"Failed to optimize SRIOV resources\n");
4378 	}
4379 }
4380 
4381 static int be_get_resources(struct be_adapter *adapter)
4382 {
4383 	struct device *dev = &adapter->pdev->dev;
4384 	struct be_resources res = {0};
4385 	int status;
4386 
4387 	/* For Lancer, SH etc read per-function resource limits from FW.
4388 	 * GET_FUNC_CONFIG returns per function guaranteed limits.
4389 	 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4390 	 */
4391 	if (BEx_chip(adapter)) {
4392 		BEx_get_resources(adapter, &res);
4393 	} else {
4394 		status = be_cmd_get_func_config(adapter, &res);
4395 		if (status)
4396 			return status;
4397 
4398 		/* If a deafault RXQ must be created, we'll use up one RSSQ*/
4399 		if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4400 		    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4401 			res.max_rss_qs -= 1;
4402 	}
4403 
4404 	/* If RoCE is supported stash away half the EQs for RoCE */
4405 	res.max_nic_evt_qs = be_roce_supported(adapter) ?
4406 				res.max_evt_qs / 2 : res.max_evt_qs;
4407 	adapter->res = res;
4408 
4409 	/* If FW supports RSS default queue, then skip creating non-RSS
4410 	 * queue for non-IP traffic.
4411 	 */
4412 	adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4413 				 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4414 
4415 	dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4416 		 be_max_txqs(adapter), be_max_rxqs(adapter),
4417 		 be_max_rss(adapter), be_max_nic_eqs(adapter),
4418 		 be_max_vfs(adapter));
4419 	dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4420 		 be_max_uc(adapter), be_max_mc(adapter),
4421 		 be_max_vlans(adapter));
4422 
4423 	/* Ensure RX and TX queues are created in pairs at init time */
4424 	adapter->cfg_num_rx_irqs =
4425 				min_t(u16, netif_get_num_default_rss_queues(),
4426 				      be_max_qp_irqs(adapter));
4427 	adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4428 	return 0;
4429 }
4430 
4431 static int be_get_config(struct be_adapter *adapter)
4432 {
4433 	int status, level;
4434 	u16 profile_id;
4435 
4436 	status = be_cmd_get_cntl_attributes(adapter);
4437 	if (status)
4438 		return status;
4439 
4440 	status = be_cmd_query_fw_cfg(adapter);
4441 	if (status)
4442 		return status;
4443 
4444 	if (!lancer_chip(adapter) && be_physfn(adapter))
4445 		be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4446 
4447 	if (BEx_chip(adapter)) {
4448 		level = be_cmd_get_fw_log_level(adapter);
4449 		adapter->msg_enable =
4450 			level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4451 	}
4452 
4453 	be_cmd_get_acpi_wol_cap(adapter);
4454 	pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4455 	pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4456 
4457 	be_cmd_query_port_name(adapter);
4458 
4459 	if (be_physfn(adapter)) {
4460 		status = be_cmd_get_active_profile(adapter, &profile_id);
4461 		if (!status)
4462 			dev_info(&adapter->pdev->dev,
4463 				 "Using profile 0x%x\n", profile_id);
4464 	}
4465 
4466 	return 0;
4467 }
4468 
4469 static int be_mac_setup(struct be_adapter *adapter)
4470 {
4471 	u8 mac[ETH_ALEN];
4472 	int status;
4473 
4474 	if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4475 		status = be_cmd_get_perm_mac(adapter, mac);
4476 		if (status)
4477 			return status;
4478 
4479 		memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4480 		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4481 
4482 		/* Initial MAC for BE3 VFs is already programmed by PF */
4483 		if (BEx_chip(adapter) && be_virtfn(adapter))
4484 			memcpy(adapter->dev_mac, mac, ETH_ALEN);
4485 	}
4486 
4487 	return 0;
4488 }
4489 
4490 static void be_schedule_worker(struct be_adapter *adapter)
4491 {
4492 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4493 	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4494 }
4495 
4496 static void be_destroy_err_recovery_workq(void)
4497 {
4498 	if (!be_err_recovery_workq)
4499 		return;
4500 
4501 	flush_workqueue(be_err_recovery_workq);
4502 	destroy_workqueue(be_err_recovery_workq);
4503 	be_err_recovery_workq = NULL;
4504 }
4505 
4506 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4507 {
4508 	struct be_error_recovery *err_rec = &adapter->error_recovery;
4509 
4510 	if (!be_err_recovery_workq)
4511 		return;
4512 
4513 	queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4514 			   msecs_to_jiffies(delay));
4515 	adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4516 }
4517 
4518 static int be_setup_queues(struct be_adapter *adapter)
4519 {
4520 	struct net_device *netdev = adapter->netdev;
4521 	int status;
4522 
4523 	status = be_evt_queues_create(adapter);
4524 	if (status)
4525 		goto err;
4526 
4527 	status = be_tx_qs_create(adapter);
4528 	if (status)
4529 		goto err;
4530 
4531 	status = be_rx_cqs_create(adapter);
4532 	if (status)
4533 		goto err;
4534 
4535 	status = be_mcc_queues_create(adapter);
4536 	if (status)
4537 		goto err;
4538 
4539 	status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4540 	if (status)
4541 		goto err;
4542 
4543 	status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4544 	if (status)
4545 		goto err;
4546 
4547 	return 0;
4548 err:
4549 	dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4550 	return status;
4551 }
4552 
4553 static int be_if_create(struct be_adapter *adapter)
4554 {
4555 	u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4556 	u32 cap_flags = be_if_cap_flags(adapter);
4557 	int status;
4558 
4559 	/* alloc required memory for other filtering fields */
4560 	adapter->pmac_id = kcalloc(be_max_uc(adapter),
4561 				   sizeof(*adapter->pmac_id), GFP_KERNEL);
4562 	if (!adapter->pmac_id)
4563 		return -ENOMEM;
4564 
4565 	adapter->mc_list = kcalloc(be_max_mc(adapter),
4566 				   sizeof(*adapter->mc_list), GFP_KERNEL);
4567 	if (!adapter->mc_list)
4568 		return -ENOMEM;
4569 
4570 	adapter->uc_list = kcalloc(be_max_uc(adapter),
4571 				   sizeof(*adapter->uc_list), GFP_KERNEL);
4572 	if (!adapter->uc_list)
4573 		return -ENOMEM;
4574 
4575 	if (adapter->cfg_num_rx_irqs == 1)
4576 		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4577 
4578 	en_flags &= cap_flags;
4579 	/* will enable all the needed filter flags in be_open() */
4580 	status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4581 				  &adapter->if_handle, 0);
4582 
4583 	if (status)
4584 		return status;
4585 
4586 	return 0;
4587 }
4588 
4589 int be_update_queues(struct be_adapter *adapter)
4590 {
4591 	struct net_device *netdev = adapter->netdev;
4592 	int status;
4593 
4594 	if (netif_running(netdev))
4595 		be_close(netdev);
4596 
4597 	be_cancel_worker(adapter);
4598 
4599 	/* If any vectors have been shared with RoCE we cannot re-program
4600 	 * the MSIx table.
4601 	 */
4602 	if (!adapter->num_msix_roce_vec)
4603 		be_msix_disable(adapter);
4604 
4605 	be_clear_queues(adapter);
4606 	status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4607 	if (status)
4608 		return status;
4609 
4610 	if (!msix_enabled(adapter)) {
4611 		status = be_msix_enable(adapter);
4612 		if (status)
4613 			return status;
4614 	}
4615 
4616 	status = be_if_create(adapter);
4617 	if (status)
4618 		return status;
4619 
4620 	status = be_setup_queues(adapter);
4621 	if (status)
4622 		return status;
4623 
4624 	be_schedule_worker(adapter);
4625 
4626 	if (netif_running(netdev))
4627 		status = be_open(netdev);
4628 
4629 	return status;
4630 }
4631 
4632 static inline int fw_major_num(const char *fw_ver)
4633 {
4634 	int fw_major = 0, i;
4635 
4636 	i = sscanf(fw_ver, "%d.", &fw_major);
4637 	if (i != 1)
4638 		return 0;
4639 
4640 	return fw_major;
4641 }
4642 
4643 /* If it is error recovery, FLR the PF
4644  * Else if any VFs are already enabled don't FLR the PF
4645  */
4646 static bool be_reset_required(struct be_adapter *adapter)
4647 {
4648 	if (be_error_recovering(adapter))
4649 		return true;
4650 	else
4651 		return pci_num_vf(adapter->pdev) == 0;
4652 }
4653 
4654 /* Wait for the FW to be ready and perform the required initialization */
4655 static int be_func_init(struct be_adapter *adapter)
4656 {
4657 	int status;
4658 
4659 	status = be_fw_wait_ready(adapter);
4660 	if (status)
4661 		return status;
4662 
4663 	/* FW is now ready; clear errors to allow cmds/doorbell */
4664 	be_clear_error(adapter, BE_CLEAR_ALL);
4665 
4666 	if (be_reset_required(adapter)) {
4667 		status = be_cmd_reset_function(adapter);
4668 		if (status)
4669 			return status;
4670 
4671 		/* Wait for interrupts to quiesce after an FLR */
4672 		msleep(100);
4673 	}
4674 
4675 	/* Tell FW we're ready to fire cmds */
4676 	status = be_cmd_fw_init(adapter);
4677 	if (status)
4678 		return status;
4679 
4680 	/* Allow interrupts for other ULPs running on NIC function */
4681 	be_intr_set(adapter, true);
4682 
4683 	return 0;
4684 }
4685 
4686 static int be_setup(struct be_adapter *adapter)
4687 {
4688 	struct device *dev = &adapter->pdev->dev;
4689 	int status;
4690 
4691 	status = be_func_init(adapter);
4692 	if (status)
4693 		return status;
4694 
4695 	be_setup_init(adapter);
4696 
4697 	if (!lancer_chip(adapter))
4698 		be_cmd_req_native_mode(adapter);
4699 
4700 	/* invoke this cmd first to get pf_num and vf_num which are needed
4701 	 * for issuing profile related cmds
4702 	 */
4703 	if (!BEx_chip(adapter)) {
4704 		status = be_cmd_get_func_config(adapter, NULL);
4705 		if (status)
4706 			return status;
4707 	}
4708 
4709 	status = be_get_config(adapter);
4710 	if (status)
4711 		goto err;
4712 
4713 	if (!BE2_chip(adapter) && be_physfn(adapter))
4714 		be_alloc_sriov_res(adapter);
4715 
4716 	status = be_get_resources(adapter);
4717 	if (status)
4718 		goto err;
4719 
4720 	status = be_msix_enable(adapter);
4721 	if (status)
4722 		goto err;
4723 
4724 	/* will enable all the needed filter flags in be_open() */
4725 	status = be_if_create(adapter);
4726 	if (status)
4727 		goto err;
4728 
4729 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4730 	rtnl_lock();
4731 	status = be_setup_queues(adapter);
4732 	rtnl_unlock();
4733 	if (status)
4734 		goto err;
4735 
4736 	be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4737 
4738 	status = be_mac_setup(adapter);
4739 	if (status)
4740 		goto err;
4741 
4742 	be_cmd_get_fw_ver(adapter);
4743 	dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4744 
4745 	if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4746 		dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4747 			adapter->fw_ver);
4748 		dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4749 	}
4750 
4751 	status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4752 					 adapter->rx_fc);
4753 	if (status)
4754 		be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4755 					&adapter->rx_fc);
4756 
4757 	dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4758 		 adapter->tx_fc, adapter->rx_fc);
4759 
4760 	if (be_physfn(adapter))
4761 		be_cmd_set_logical_link_config(adapter,
4762 					       IFLA_VF_LINK_STATE_AUTO, 0);
4763 
4764 	/* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4765 	 * confusing a linux bridge or OVS that it might be connected to.
4766 	 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4767 	 * when SRIOV is not enabled.
4768 	 */
4769 	if (BE3_chip(adapter))
4770 		be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4771 				      PORT_FWD_TYPE_PASSTHRU, 0);
4772 
4773 	if (adapter->num_vfs)
4774 		be_vf_setup(adapter);
4775 
4776 	status = be_cmd_get_phy_info(adapter);
4777 	if (!status && be_pause_supported(adapter))
4778 		adapter->phy.fc_autoneg = 1;
4779 
4780 	if (be_physfn(adapter) && !lancer_chip(adapter))
4781 		be_cmd_set_features(adapter);
4782 
4783 	be_schedule_worker(adapter);
4784 	adapter->flags |= BE_FLAGS_SETUP_DONE;
4785 	return 0;
4786 err:
4787 	be_clear(adapter);
4788 	return status;
4789 }
4790 
4791 #ifdef CONFIG_NET_POLL_CONTROLLER
4792 static void be_netpoll(struct net_device *netdev)
4793 {
4794 	struct be_adapter *adapter = netdev_priv(netdev);
4795 	struct be_eq_obj *eqo;
4796 	int i;
4797 
4798 	for_all_evt_queues(adapter, eqo, i) {
4799 		be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4800 		napi_schedule(&eqo->napi);
4801 	}
4802 }
4803 #endif
4804 
4805 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4806 {
4807 	const struct firmware *fw;
4808 	int status;
4809 
4810 	if (!netif_running(adapter->netdev)) {
4811 		dev_err(&adapter->pdev->dev,
4812 			"Firmware load not allowed (interface is down)\n");
4813 		return -ENETDOWN;
4814 	}
4815 
4816 	status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4817 	if (status)
4818 		goto fw_exit;
4819 
4820 	dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4821 
4822 	if (lancer_chip(adapter))
4823 		status = lancer_fw_download(adapter, fw);
4824 	else
4825 		status = be_fw_download(adapter, fw);
4826 
4827 	if (!status)
4828 		be_cmd_get_fw_ver(adapter);
4829 
4830 fw_exit:
4831 	release_firmware(fw);
4832 	return status;
4833 }
4834 
4835 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4836 				 u16 flags)
4837 {
4838 	struct be_adapter *adapter = netdev_priv(dev);
4839 	struct nlattr *attr, *br_spec;
4840 	int rem;
4841 	int status = 0;
4842 	u16 mode = 0;
4843 
4844 	if (!sriov_enabled(adapter))
4845 		return -EOPNOTSUPP;
4846 
4847 	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4848 	if (!br_spec)
4849 		return -EINVAL;
4850 
4851 	nla_for_each_nested(attr, br_spec, rem) {
4852 		if (nla_type(attr) != IFLA_BRIDGE_MODE)
4853 			continue;
4854 
4855 		if (nla_len(attr) < sizeof(mode))
4856 			return -EINVAL;
4857 
4858 		mode = nla_get_u16(attr);
4859 		if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4860 			return -EOPNOTSUPP;
4861 
4862 		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4863 			return -EINVAL;
4864 
4865 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4866 					       adapter->if_handle,
4867 					       mode == BRIDGE_MODE_VEPA ?
4868 					       PORT_FWD_TYPE_VEPA :
4869 					       PORT_FWD_TYPE_VEB, 0);
4870 		if (status)
4871 			goto err;
4872 
4873 		dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4874 			 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4875 
4876 		return status;
4877 	}
4878 err:
4879 	dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4880 		mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4881 
4882 	return status;
4883 }
4884 
4885 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4886 				 struct net_device *dev, u32 filter_mask,
4887 				 int nlflags)
4888 {
4889 	struct be_adapter *adapter = netdev_priv(dev);
4890 	int status = 0;
4891 	u8 hsw_mode;
4892 
4893 	/* BE and Lancer chips support VEB mode only */
4894 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
4895 		/* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4896 		if (!pci_sriov_get_totalvfs(adapter->pdev))
4897 			return 0;
4898 		hsw_mode = PORT_FWD_TYPE_VEB;
4899 	} else {
4900 		status = be_cmd_get_hsw_config(adapter, NULL, 0,
4901 					       adapter->if_handle, &hsw_mode,
4902 					       NULL);
4903 		if (status)
4904 			return 0;
4905 
4906 		if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4907 			return 0;
4908 	}
4909 
4910 	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4911 				       hsw_mode == PORT_FWD_TYPE_VEPA ?
4912 				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4913 				       0, 0, nlflags, filter_mask, NULL);
4914 }
4915 
4916 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4917 					 void (*func)(struct work_struct *))
4918 {
4919 	struct be_cmd_work *work;
4920 
4921 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
4922 	if (!work) {
4923 		dev_err(&adapter->pdev->dev,
4924 			"be_work memory allocation failed\n");
4925 		return NULL;
4926 	}
4927 
4928 	INIT_WORK(&work->work, func);
4929 	work->adapter = adapter;
4930 	return work;
4931 }
4932 
4933 /* VxLAN offload Notes:
4934  *
4935  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4936  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4937  * is expected to work across all types of IP tunnels once exported. Skyhawk
4938  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4939  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4940  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4941  * those other tunnels are unexported on the fly through ndo_features_check().
4942  *
4943  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
4944  * adds more than one port, disable offloads and re-enable them again when
4945  * there's only one port left. We maintain a list of ports for this purpose.
4946  */
4947 static void be_work_add_vxlan_port(struct work_struct *work)
4948 {
4949 	struct be_cmd_work *cmd_work =
4950 				container_of(work, struct be_cmd_work, work);
4951 	struct be_adapter *adapter = cmd_work->adapter;
4952 	struct device *dev = &adapter->pdev->dev;
4953 	__be16 port = cmd_work->info.vxlan_port;
4954 	struct be_vxlan_port *vxlan_port;
4955 	int status;
4956 
4957 	/* Bump up the alias count if it is an existing port */
4958 	list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
4959 		if (vxlan_port->port == port) {
4960 			vxlan_port->port_aliases++;
4961 			goto done;
4962 		}
4963 	}
4964 
4965 	/* Add a new port to our list. We don't need a lock here since port
4966 	 * add/delete are done only in the context of a single-threaded work
4967 	 * queue (be_wq).
4968 	 */
4969 	vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
4970 	if (!vxlan_port)
4971 		goto done;
4972 
4973 	vxlan_port->port = port;
4974 	INIT_LIST_HEAD(&vxlan_port->list);
4975 	list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
4976 	adapter->vxlan_port_count++;
4977 
4978 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
4979 		dev_info(dev,
4980 			 "Only one UDP port supported for VxLAN offloads\n");
4981 		dev_info(dev, "Disabling VxLAN offloads\n");
4982 		goto err;
4983 	}
4984 
4985 	if (adapter->vxlan_port_count > 1)
4986 		goto done;
4987 
4988 	status = be_enable_vxlan_offloads(adapter);
4989 	if (!status)
4990 		goto done;
4991 
4992 err:
4993 	be_disable_vxlan_offloads(adapter);
4994 done:
4995 	kfree(cmd_work);
4996 	return;
4997 }
4998 
4999 static void be_work_del_vxlan_port(struct work_struct *work)
5000 {
5001 	struct be_cmd_work *cmd_work =
5002 				container_of(work, struct be_cmd_work, work);
5003 	struct be_adapter *adapter = cmd_work->adapter;
5004 	__be16 port = cmd_work->info.vxlan_port;
5005 	struct be_vxlan_port *vxlan_port;
5006 
5007 	/* Nothing to be done if a port alias is being deleted */
5008 	list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5009 		if (vxlan_port->port == port) {
5010 			if (vxlan_port->port_aliases) {
5011 				vxlan_port->port_aliases--;
5012 				goto done;
5013 			}
5014 			break;
5015 		}
5016 	}
5017 
5018 	/* No port aliases left; delete the port from the list */
5019 	list_del(&vxlan_port->list);
5020 	adapter->vxlan_port_count--;
5021 
5022 	/* Disable VxLAN offload if this is the offloaded port */
5023 	if (adapter->vxlan_port == vxlan_port->port) {
5024 		WARN_ON(adapter->vxlan_port_count);
5025 		be_disable_vxlan_offloads(adapter);
5026 		dev_info(&adapter->pdev->dev,
5027 			 "Disabled VxLAN offloads for UDP port %d\n",
5028 			 be16_to_cpu(port));
5029 		goto out;
5030 	}
5031 
5032 	/* If only 1 port is left, re-enable VxLAN offload */
5033 	if (adapter->vxlan_port_count == 1)
5034 		be_enable_vxlan_offloads(adapter);
5035 
5036 out:
5037 	kfree(vxlan_port);
5038 done:
5039 	kfree(cmd_work);
5040 }
5041 
5042 static void be_cfg_vxlan_port(struct net_device *netdev,
5043 			      struct udp_tunnel_info *ti,
5044 			      void (*func)(struct work_struct *))
5045 {
5046 	struct be_adapter *adapter = netdev_priv(netdev);
5047 	struct be_cmd_work *cmd_work;
5048 
5049 	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5050 		return;
5051 
5052 	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5053 		return;
5054 
5055 	cmd_work = be_alloc_work(adapter, func);
5056 	if (cmd_work) {
5057 		cmd_work->info.vxlan_port = ti->port;
5058 		queue_work(be_wq, &cmd_work->work);
5059 	}
5060 }
5061 
5062 static void be_del_vxlan_port(struct net_device *netdev,
5063 			      struct udp_tunnel_info *ti)
5064 {
5065 	be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5066 }
5067 
5068 static void be_add_vxlan_port(struct net_device *netdev,
5069 			      struct udp_tunnel_info *ti)
5070 {
5071 	be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5072 }
5073 
5074 static netdev_features_t be_features_check(struct sk_buff *skb,
5075 					   struct net_device *dev,
5076 					   netdev_features_t features)
5077 {
5078 	struct be_adapter *adapter = netdev_priv(dev);
5079 	u8 l4_hdr = 0;
5080 
5081 	/* The code below restricts offload features for some tunneled packets.
5082 	 * Offload features for normal (non tunnel) packets are unchanged.
5083 	 */
5084 	if (!skb->encapsulation ||
5085 	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5086 		return features;
5087 
5088 	/* It's an encapsulated packet and VxLAN offloads are enabled. We
5089 	 * should disable tunnel offload features if it's not a VxLAN packet,
5090 	 * as tunnel offloads have been enabled only for VxLAN. This is done to
5091 	 * allow other tunneled traffic like GRE work fine while VxLAN
5092 	 * offloads are configured in Skyhawk-R.
5093 	 */
5094 	switch (vlan_get_protocol(skb)) {
5095 	case htons(ETH_P_IP):
5096 		l4_hdr = ip_hdr(skb)->protocol;
5097 		break;
5098 	case htons(ETH_P_IPV6):
5099 		l4_hdr = ipv6_hdr(skb)->nexthdr;
5100 		break;
5101 	default:
5102 		return features;
5103 	}
5104 
5105 	if (l4_hdr != IPPROTO_UDP ||
5106 	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5107 	    skb->inner_protocol != htons(ETH_P_TEB) ||
5108 	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5109 		sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5110 	    !adapter->vxlan_port ||
5111 	    udp_hdr(skb)->dest != adapter->vxlan_port)
5112 		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5113 
5114 	return features;
5115 }
5116 
5117 static int be_get_phys_port_id(struct net_device *dev,
5118 			       struct netdev_phys_item_id *ppid)
5119 {
5120 	int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5121 	struct be_adapter *adapter = netdev_priv(dev);
5122 	u8 *id;
5123 
5124 	if (MAX_PHYS_ITEM_ID_LEN < id_len)
5125 		return -ENOSPC;
5126 
5127 	ppid->id[0] = adapter->hba_port_num + 1;
5128 	id = &ppid->id[1];
5129 	for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5130 	     i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5131 		memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5132 
5133 	ppid->id_len = id_len;
5134 
5135 	return 0;
5136 }
5137 
5138 static void be_set_rx_mode(struct net_device *dev)
5139 {
5140 	struct be_adapter *adapter = netdev_priv(dev);
5141 	struct be_cmd_work *work;
5142 
5143 	work = be_alloc_work(adapter, be_work_set_rx_mode);
5144 	if (work)
5145 		queue_work(be_wq, &work->work);
5146 }
5147 
5148 static const struct net_device_ops be_netdev_ops = {
5149 	.ndo_open		= be_open,
5150 	.ndo_stop		= be_close,
5151 	.ndo_start_xmit		= be_xmit,
5152 	.ndo_set_rx_mode	= be_set_rx_mode,
5153 	.ndo_set_mac_address	= be_mac_addr_set,
5154 	.ndo_get_stats64	= be_get_stats64,
5155 	.ndo_validate_addr	= eth_validate_addr,
5156 	.ndo_vlan_rx_add_vid	= be_vlan_add_vid,
5157 	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
5158 	.ndo_set_vf_mac		= be_set_vf_mac,
5159 	.ndo_set_vf_vlan	= be_set_vf_vlan,
5160 	.ndo_set_vf_rate	= be_set_vf_tx_rate,
5161 	.ndo_get_vf_config	= be_get_vf_config,
5162 	.ndo_set_vf_link_state  = be_set_vf_link_state,
5163 	.ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5164 #ifdef CONFIG_NET_POLL_CONTROLLER
5165 	.ndo_poll_controller	= be_netpoll,
5166 #endif
5167 	.ndo_bridge_setlink	= be_ndo_bridge_setlink,
5168 	.ndo_bridge_getlink	= be_ndo_bridge_getlink,
5169 	.ndo_udp_tunnel_add	= be_add_vxlan_port,
5170 	.ndo_udp_tunnel_del	= be_del_vxlan_port,
5171 	.ndo_features_check	= be_features_check,
5172 	.ndo_get_phys_port_id   = be_get_phys_port_id,
5173 };
5174 
5175 static void be_netdev_init(struct net_device *netdev)
5176 {
5177 	struct be_adapter *adapter = netdev_priv(netdev);
5178 
5179 	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5180 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5181 		NETIF_F_HW_VLAN_CTAG_TX;
5182 	if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5183 		netdev->hw_features |= NETIF_F_RXHASH;
5184 
5185 	netdev->features |= netdev->hw_features |
5186 		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5187 
5188 	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5189 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5190 
5191 	netdev->priv_flags |= IFF_UNICAST_FLT;
5192 
5193 	netdev->flags |= IFF_MULTICAST;
5194 
5195 	netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5196 
5197 	netdev->netdev_ops = &be_netdev_ops;
5198 
5199 	netdev->ethtool_ops = &be_ethtool_ops;
5200 
5201 	/* MTU range: 256 - 9000 */
5202 	netdev->min_mtu = BE_MIN_MTU;
5203 	netdev->max_mtu = BE_MAX_MTU;
5204 }
5205 
5206 static void be_cleanup(struct be_adapter *adapter)
5207 {
5208 	struct net_device *netdev = adapter->netdev;
5209 
5210 	rtnl_lock();
5211 	netif_device_detach(netdev);
5212 	if (netif_running(netdev))
5213 		be_close(netdev);
5214 	rtnl_unlock();
5215 
5216 	be_clear(adapter);
5217 }
5218 
5219 static int be_resume(struct be_adapter *adapter)
5220 {
5221 	struct net_device *netdev = adapter->netdev;
5222 	int status;
5223 
5224 	status = be_setup(adapter);
5225 	if (status)
5226 		return status;
5227 
5228 	rtnl_lock();
5229 	if (netif_running(netdev))
5230 		status = be_open(netdev);
5231 	rtnl_unlock();
5232 
5233 	if (status)
5234 		return status;
5235 
5236 	netif_device_attach(netdev);
5237 
5238 	return 0;
5239 }
5240 
5241 static void be_soft_reset(struct be_adapter *adapter)
5242 {
5243 	u32 val;
5244 
5245 	dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5246 	val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5247 	val |= SLIPORT_SOFTRESET_SR_MASK;
5248 	iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5249 }
5250 
5251 static bool be_err_is_recoverable(struct be_adapter *adapter)
5252 {
5253 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5254 	unsigned long initial_idle_time =
5255 		msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5256 	unsigned long recovery_interval =
5257 		msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5258 	u16 ue_err_code;
5259 	u32 val;
5260 
5261 	val = be_POST_stage_get(adapter);
5262 	if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5263 		return false;
5264 	ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5265 	if (ue_err_code == 0)
5266 		return false;
5267 
5268 	dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5269 		ue_err_code);
5270 
5271 	if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5272 		dev_err(&adapter->pdev->dev,
5273 			"Cannot recover within %lu sec from driver load\n",
5274 			jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5275 		return false;
5276 	}
5277 
5278 	if (err_rec->last_recovery_time && time_before_eq(
5279 		jiffies - err_rec->last_recovery_time, recovery_interval)) {
5280 		dev_err(&adapter->pdev->dev,
5281 			"Cannot recover within %lu sec from last recovery\n",
5282 			jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5283 		return false;
5284 	}
5285 
5286 	if (ue_err_code == err_rec->last_err_code) {
5287 		dev_err(&adapter->pdev->dev,
5288 			"Cannot recover from a consecutive TPE error\n");
5289 		return false;
5290 	}
5291 
5292 	err_rec->last_recovery_time = jiffies;
5293 	err_rec->last_err_code = ue_err_code;
5294 	return true;
5295 }
5296 
5297 static int be_tpe_recover(struct be_adapter *adapter)
5298 {
5299 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5300 	int status = -EAGAIN;
5301 	u32 val;
5302 
5303 	switch (err_rec->recovery_state) {
5304 	case ERR_RECOVERY_ST_NONE:
5305 		err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5306 		err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5307 		break;
5308 
5309 	case ERR_RECOVERY_ST_DETECT:
5310 		val = be_POST_stage_get(adapter);
5311 		if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5312 		    POST_STAGE_RECOVERABLE_ERR) {
5313 			dev_err(&adapter->pdev->dev,
5314 				"Unrecoverable HW error detected: 0x%x\n", val);
5315 			status = -EINVAL;
5316 			err_rec->resched_delay = 0;
5317 			break;
5318 		}
5319 
5320 		dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5321 
5322 		/* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5323 		 * milliseconds before it checks for final error status in
5324 		 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5325 		 * If it does, then PF0 initiates a Soft Reset.
5326 		 */
5327 		if (adapter->pf_num == 0) {
5328 			err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5329 			err_rec->resched_delay = err_rec->ue_to_reset_time -
5330 					ERR_RECOVERY_UE_DETECT_DURATION;
5331 			break;
5332 		}
5333 
5334 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5335 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5336 					ERR_RECOVERY_UE_DETECT_DURATION;
5337 		break;
5338 
5339 	case ERR_RECOVERY_ST_RESET:
5340 		if (!be_err_is_recoverable(adapter)) {
5341 			dev_err(&adapter->pdev->dev,
5342 				"Failed to meet recovery criteria\n");
5343 			status = -EIO;
5344 			err_rec->resched_delay = 0;
5345 			break;
5346 		}
5347 		be_soft_reset(adapter);
5348 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5349 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5350 					err_rec->ue_to_reset_time;
5351 		break;
5352 
5353 	case ERR_RECOVERY_ST_PRE_POLL:
5354 		err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5355 		err_rec->resched_delay = 0;
5356 		status = 0;			/* done */
5357 		break;
5358 
5359 	default:
5360 		status = -EINVAL;
5361 		err_rec->resched_delay = 0;
5362 		break;
5363 	}
5364 
5365 	return status;
5366 }
5367 
5368 static int be_err_recover(struct be_adapter *adapter)
5369 {
5370 	int status;
5371 
5372 	if (!lancer_chip(adapter)) {
5373 		if (!adapter->error_recovery.recovery_supported ||
5374 		    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5375 			return -EIO;
5376 		status = be_tpe_recover(adapter);
5377 		if (status)
5378 			goto err;
5379 	}
5380 
5381 	/* Wait for adapter to reach quiescent state before
5382 	 * destroying queues
5383 	 */
5384 	status = be_fw_wait_ready(adapter);
5385 	if (status)
5386 		goto err;
5387 
5388 	adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5389 
5390 	be_cleanup(adapter);
5391 
5392 	status = be_resume(adapter);
5393 	if (status)
5394 		goto err;
5395 
5396 	adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5397 
5398 err:
5399 	return status;
5400 }
5401 
5402 static void be_err_detection_task(struct work_struct *work)
5403 {
5404 	struct be_error_recovery *err_rec =
5405 			container_of(work, struct be_error_recovery,
5406 				     err_detection_work.work);
5407 	struct be_adapter *adapter =
5408 			container_of(err_rec, struct be_adapter,
5409 				     error_recovery);
5410 	u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5411 	struct device *dev = &adapter->pdev->dev;
5412 	int recovery_status;
5413 
5414 	be_detect_error(adapter);
5415 	if (!be_check_error(adapter, BE_ERROR_HW))
5416 		goto reschedule_task;
5417 
5418 	recovery_status = be_err_recover(adapter);
5419 	if (!recovery_status) {
5420 		err_rec->recovery_retries = 0;
5421 		err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5422 		dev_info(dev, "Adapter recovery successful\n");
5423 		goto reschedule_task;
5424 	} else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5425 		/* BEx/SH recovery state machine */
5426 		if (adapter->pf_num == 0 &&
5427 		    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5428 			dev_err(&adapter->pdev->dev,
5429 				"Adapter recovery in progress\n");
5430 		resched_delay = err_rec->resched_delay;
5431 		goto reschedule_task;
5432 	} else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5433 		/* For VFs, check if PF have allocated resources
5434 		 * every second.
5435 		 */
5436 		dev_err(dev, "Re-trying adapter recovery\n");
5437 		goto reschedule_task;
5438 	} else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5439 		   ERR_RECOVERY_MAX_RETRY_COUNT) {
5440 		/* In case of another error during recovery, it takes 30 sec
5441 		 * for adapter to come out of error. Retry error recovery after
5442 		 * this time interval.
5443 		 */
5444 		dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5445 		resched_delay = ERR_RECOVERY_RETRY_DELAY;
5446 		goto reschedule_task;
5447 	} else {
5448 		dev_err(dev, "Adapter recovery failed\n");
5449 		dev_err(dev, "Please reboot server to recover\n");
5450 	}
5451 
5452 	return;
5453 
5454 reschedule_task:
5455 	be_schedule_err_detection(adapter, resched_delay);
5456 }
5457 
5458 static void be_log_sfp_info(struct be_adapter *adapter)
5459 {
5460 	int status;
5461 
5462 	status = be_cmd_query_sfp_info(adapter);
5463 	if (!status) {
5464 		dev_err(&adapter->pdev->dev,
5465 			"Port %c: %s Vendor: %s part no: %s",
5466 			adapter->port_name,
5467 			be_misconfig_evt_port_state[adapter->phy_state],
5468 			adapter->phy.vendor_name,
5469 			adapter->phy.vendor_pn);
5470 	}
5471 	adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5472 }
5473 
5474 static void be_worker(struct work_struct *work)
5475 {
5476 	struct be_adapter *adapter =
5477 		container_of(work, struct be_adapter, work.work);
5478 	struct be_rx_obj *rxo;
5479 	int i;
5480 
5481 	if (be_physfn(adapter) &&
5482 	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5483 		be_cmd_get_die_temperature(adapter);
5484 
5485 	/* when interrupts are not yet enabled, just reap any pending
5486 	 * mcc completions
5487 	 */
5488 	if (!netif_running(adapter->netdev)) {
5489 		local_bh_disable();
5490 		be_process_mcc(adapter);
5491 		local_bh_enable();
5492 		goto reschedule;
5493 	}
5494 
5495 	if (!adapter->stats_cmd_sent) {
5496 		if (lancer_chip(adapter))
5497 			lancer_cmd_get_pport_stats(adapter,
5498 						   &adapter->stats_cmd);
5499 		else
5500 			be_cmd_get_stats(adapter, &adapter->stats_cmd);
5501 	}
5502 
5503 	for_all_rx_queues(adapter, rxo, i) {
5504 		/* Replenish RX-queues starved due to memory
5505 		 * allocation failures.
5506 		 */
5507 		if (rxo->rx_post_starved)
5508 			be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5509 	}
5510 
5511 	/* EQ-delay update for Skyhawk is done while notifying EQ */
5512 	if (!skyhawk_chip(adapter))
5513 		be_eqd_update(adapter, false);
5514 
5515 	if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5516 		be_log_sfp_info(adapter);
5517 
5518 reschedule:
5519 	adapter->work_counter++;
5520 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5521 }
5522 
5523 static void be_unmap_pci_bars(struct be_adapter *adapter)
5524 {
5525 	if (adapter->csr)
5526 		pci_iounmap(adapter->pdev, adapter->csr);
5527 	if (adapter->db)
5528 		pci_iounmap(adapter->pdev, adapter->db);
5529 	if (adapter->pcicfg && adapter->pcicfg_mapped)
5530 		pci_iounmap(adapter->pdev, adapter->pcicfg);
5531 }
5532 
5533 static int db_bar(struct be_adapter *adapter)
5534 {
5535 	if (lancer_chip(adapter) || be_virtfn(adapter))
5536 		return 0;
5537 	else
5538 		return 4;
5539 }
5540 
5541 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5542 {
5543 	if (skyhawk_chip(adapter)) {
5544 		adapter->roce_db.size = 4096;
5545 		adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5546 							      db_bar(adapter));
5547 		adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5548 							       db_bar(adapter));
5549 	}
5550 	return 0;
5551 }
5552 
5553 static int be_map_pci_bars(struct be_adapter *adapter)
5554 {
5555 	struct pci_dev *pdev = adapter->pdev;
5556 	u8 __iomem *addr;
5557 	u32 sli_intf;
5558 
5559 	pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5560 	adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5561 				SLI_INTF_FAMILY_SHIFT;
5562 	adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5563 
5564 	if (BEx_chip(adapter) && be_physfn(adapter)) {
5565 		adapter->csr = pci_iomap(pdev, 2, 0);
5566 		if (!adapter->csr)
5567 			return -ENOMEM;
5568 	}
5569 
5570 	addr = pci_iomap(pdev, db_bar(adapter), 0);
5571 	if (!addr)
5572 		goto pci_map_err;
5573 	adapter->db = addr;
5574 
5575 	if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5576 		if (be_physfn(adapter)) {
5577 			/* PCICFG is the 2nd BAR in BE2 */
5578 			addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5579 			if (!addr)
5580 				goto pci_map_err;
5581 			adapter->pcicfg = addr;
5582 			adapter->pcicfg_mapped = true;
5583 		} else {
5584 			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5585 			adapter->pcicfg_mapped = false;
5586 		}
5587 	}
5588 
5589 	be_roce_map_pci_bars(adapter);
5590 	return 0;
5591 
5592 pci_map_err:
5593 	dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5594 	be_unmap_pci_bars(adapter);
5595 	return -ENOMEM;
5596 }
5597 
5598 static void be_drv_cleanup(struct be_adapter *adapter)
5599 {
5600 	struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5601 	struct device *dev = &adapter->pdev->dev;
5602 
5603 	if (mem->va)
5604 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5605 
5606 	mem = &adapter->rx_filter;
5607 	if (mem->va)
5608 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5609 
5610 	mem = &adapter->stats_cmd;
5611 	if (mem->va)
5612 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5613 }
5614 
5615 /* Allocate and initialize various fields in be_adapter struct */
5616 static int be_drv_init(struct be_adapter *adapter)
5617 {
5618 	struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5619 	struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5620 	struct be_dma_mem *rx_filter = &adapter->rx_filter;
5621 	struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5622 	struct device *dev = &adapter->pdev->dev;
5623 	int status = 0;
5624 
5625 	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5626 	mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5627 						 &mbox_mem_alloc->dma,
5628 						 GFP_KERNEL);
5629 	if (!mbox_mem_alloc->va)
5630 		return -ENOMEM;
5631 
5632 	mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5633 	mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5634 	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5635 
5636 	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5637 	rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5638 					    &rx_filter->dma, GFP_KERNEL);
5639 	if (!rx_filter->va) {
5640 		status = -ENOMEM;
5641 		goto free_mbox;
5642 	}
5643 
5644 	if (lancer_chip(adapter))
5645 		stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5646 	else if (BE2_chip(adapter))
5647 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5648 	else if (BE3_chip(adapter))
5649 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5650 	else
5651 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5652 	stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5653 					    &stats_cmd->dma, GFP_KERNEL);
5654 	if (!stats_cmd->va) {
5655 		status = -ENOMEM;
5656 		goto free_rx_filter;
5657 	}
5658 
5659 	mutex_init(&adapter->mbox_lock);
5660 	mutex_init(&adapter->mcc_lock);
5661 	mutex_init(&adapter->rx_filter_lock);
5662 	spin_lock_init(&adapter->mcc_cq_lock);
5663 	init_completion(&adapter->et_cmd_compl);
5664 
5665 	pci_save_state(adapter->pdev);
5666 
5667 	INIT_DELAYED_WORK(&adapter->work, be_worker);
5668 
5669 	adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5670 	adapter->error_recovery.resched_delay = 0;
5671 	INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5672 			  be_err_detection_task);
5673 
5674 	adapter->rx_fc = true;
5675 	adapter->tx_fc = true;
5676 
5677 	/* Must be a power of 2 or else MODULO will BUG_ON */
5678 	adapter->be_get_temp_freq = 64;
5679 
5680 	INIT_LIST_HEAD(&adapter->vxlan_port_list);
5681 	return 0;
5682 
5683 free_rx_filter:
5684 	dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5685 free_mbox:
5686 	dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5687 			  mbox_mem_alloc->dma);
5688 	return status;
5689 }
5690 
5691 static void be_remove(struct pci_dev *pdev)
5692 {
5693 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5694 
5695 	if (!adapter)
5696 		return;
5697 
5698 	be_roce_dev_remove(adapter);
5699 	be_intr_set(adapter, false);
5700 
5701 	be_cancel_err_detection(adapter);
5702 
5703 	unregister_netdev(adapter->netdev);
5704 
5705 	be_clear(adapter);
5706 
5707 	if (!pci_vfs_assigned(adapter->pdev))
5708 		be_cmd_reset_function(adapter);
5709 
5710 	/* tell fw we're done with firing cmds */
5711 	be_cmd_fw_clean(adapter);
5712 
5713 	be_unmap_pci_bars(adapter);
5714 	be_drv_cleanup(adapter);
5715 
5716 	pci_disable_pcie_error_reporting(pdev);
5717 
5718 	pci_release_regions(pdev);
5719 	pci_disable_device(pdev);
5720 
5721 	free_netdev(adapter->netdev);
5722 }
5723 
5724 static ssize_t be_hwmon_show_temp(struct device *dev,
5725 				  struct device_attribute *dev_attr,
5726 				  char *buf)
5727 {
5728 	struct be_adapter *adapter = dev_get_drvdata(dev);
5729 
5730 	/* Unit: millidegree Celsius */
5731 	if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5732 		return -EIO;
5733 	else
5734 		return sprintf(buf, "%u\n",
5735 			       adapter->hwmon_info.be_on_die_temp * 1000);
5736 }
5737 
5738 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5739 			  be_hwmon_show_temp, NULL, 1);
5740 
5741 static struct attribute *be_hwmon_attrs[] = {
5742 	&sensor_dev_attr_temp1_input.dev_attr.attr,
5743 	NULL
5744 };
5745 
5746 ATTRIBUTE_GROUPS(be_hwmon);
5747 
5748 static char *mc_name(struct be_adapter *adapter)
5749 {
5750 	char *str = "";	/* default */
5751 
5752 	switch (adapter->mc_type) {
5753 	case UMC:
5754 		str = "UMC";
5755 		break;
5756 	case FLEX10:
5757 		str = "FLEX10";
5758 		break;
5759 	case vNIC1:
5760 		str = "vNIC-1";
5761 		break;
5762 	case nPAR:
5763 		str = "nPAR";
5764 		break;
5765 	case UFP:
5766 		str = "UFP";
5767 		break;
5768 	case vNIC2:
5769 		str = "vNIC-2";
5770 		break;
5771 	default:
5772 		str = "";
5773 	}
5774 
5775 	return str;
5776 }
5777 
5778 static inline char *func_name(struct be_adapter *adapter)
5779 {
5780 	return be_physfn(adapter) ? "PF" : "VF";
5781 }
5782 
5783 static inline char *nic_name(struct pci_dev *pdev)
5784 {
5785 	switch (pdev->device) {
5786 	case OC_DEVICE_ID1:
5787 		return OC_NAME;
5788 	case OC_DEVICE_ID2:
5789 		return OC_NAME_BE;
5790 	case OC_DEVICE_ID3:
5791 	case OC_DEVICE_ID4:
5792 		return OC_NAME_LANCER;
5793 	case BE_DEVICE_ID2:
5794 		return BE3_NAME;
5795 	case OC_DEVICE_ID5:
5796 	case OC_DEVICE_ID6:
5797 		return OC_NAME_SH;
5798 	default:
5799 		return BE_NAME;
5800 	}
5801 }
5802 
5803 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5804 {
5805 	struct be_adapter *adapter;
5806 	struct net_device *netdev;
5807 	int status = 0;
5808 
5809 	dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5810 
5811 	status = pci_enable_device(pdev);
5812 	if (status)
5813 		goto do_none;
5814 
5815 	status = pci_request_regions(pdev, DRV_NAME);
5816 	if (status)
5817 		goto disable_dev;
5818 	pci_set_master(pdev);
5819 
5820 	netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5821 	if (!netdev) {
5822 		status = -ENOMEM;
5823 		goto rel_reg;
5824 	}
5825 	adapter = netdev_priv(netdev);
5826 	adapter->pdev = pdev;
5827 	pci_set_drvdata(pdev, adapter);
5828 	adapter->netdev = netdev;
5829 	SET_NETDEV_DEV(netdev, &pdev->dev);
5830 
5831 	status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5832 	if (!status) {
5833 		netdev->features |= NETIF_F_HIGHDMA;
5834 	} else {
5835 		status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5836 		if (status) {
5837 			dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5838 			goto free_netdev;
5839 		}
5840 	}
5841 
5842 	status = pci_enable_pcie_error_reporting(pdev);
5843 	if (!status)
5844 		dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5845 
5846 	status = be_map_pci_bars(adapter);
5847 	if (status)
5848 		goto free_netdev;
5849 
5850 	status = be_drv_init(adapter);
5851 	if (status)
5852 		goto unmap_bars;
5853 
5854 	status = be_setup(adapter);
5855 	if (status)
5856 		goto drv_cleanup;
5857 
5858 	be_netdev_init(netdev);
5859 	status = register_netdev(netdev);
5860 	if (status != 0)
5861 		goto unsetup;
5862 
5863 	be_roce_dev_add(adapter);
5864 
5865 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5866 	adapter->error_recovery.probe_time = jiffies;
5867 
5868 	/* On Die temperature not supported for VF. */
5869 	if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5870 		adapter->hwmon_info.hwmon_dev =
5871 			devm_hwmon_device_register_with_groups(&pdev->dev,
5872 							       DRV_NAME,
5873 							       adapter,
5874 							       be_hwmon_groups);
5875 		adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5876 	}
5877 
5878 	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5879 		 func_name(adapter), mc_name(adapter), adapter->port_name);
5880 
5881 	return 0;
5882 
5883 unsetup:
5884 	be_clear(adapter);
5885 drv_cleanup:
5886 	be_drv_cleanup(adapter);
5887 unmap_bars:
5888 	be_unmap_pci_bars(adapter);
5889 free_netdev:
5890 	free_netdev(netdev);
5891 rel_reg:
5892 	pci_release_regions(pdev);
5893 disable_dev:
5894 	pci_disable_device(pdev);
5895 do_none:
5896 	dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5897 	return status;
5898 }
5899 
5900 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5901 {
5902 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5903 
5904 	be_intr_set(adapter, false);
5905 	be_cancel_err_detection(adapter);
5906 
5907 	be_cleanup(adapter);
5908 
5909 	pci_save_state(pdev);
5910 	pci_disable_device(pdev);
5911 	pci_set_power_state(pdev, pci_choose_state(pdev, state));
5912 	return 0;
5913 }
5914 
5915 static int be_pci_resume(struct pci_dev *pdev)
5916 {
5917 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5918 	int status = 0;
5919 
5920 	status = pci_enable_device(pdev);
5921 	if (status)
5922 		return status;
5923 
5924 	pci_restore_state(pdev);
5925 
5926 	status = be_resume(adapter);
5927 	if (status)
5928 		return status;
5929 
5930 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5931 
5932 	return 0;
5933 }
5934 
5935 /*
5936  * An FLR will stop BE from DMAing any data.
5937  */
5938 static void be_shutdown(struct pci_dev *pdev)
5939 {
5940 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5941 
5942 	if (!adapter)
5943 		return;
5944 
5945 	be_roce_dev_shutdown(adapter);
5946 	cancel_delayed_work_sync(&adapter->work);
5947 	be_cancel_err_detection(adapter);
5948 
5949 	netif_device_detach(adapter->netdev);
5950 
5951 	be_cmd_reset_function(adapter);
5952 
5953 	pci_disable_device(pdev);
5954 }
5955 
5956 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5957 					    pci_channel_state_t state)
5958 {
5959 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5960 
5961 	dev_err(&adapter->pdev->dev, "EEH error detected\n");
5962 
5963 	be_roce_dev_remove(adapter);
5964 
5965 	if (!be_check_error(adapter, BE_ERROR_EEH)) {
5966 		be_set_error(adapter, BE_ERROR_EEH);
5967 
5968 		be_cancel_err_detection(adapter);
5969 
5970 		be_cleanup(adapter);
5971 	}
5972 
5973 	if (state == pci_channel_io_perm_failure)
5974 		return PCI_ERS_RESULT_DISCONNECT;
5975 
5976 	pci_disable_device(pdev);
5977 
5978 	/* The error could cause the FW to trigger a flash debug dump.
5979 	 * Resetting the card while flash dump is in progress
5980 	 * can cause it not to recover; wait for it to finish.
5981 	 * Wait only for first function as it is needed only once per
5982 	 * adapter.
5983 	 */
5984 	if (pdev->devfn == 0)
5985 		ssleep(30);
5986 
5987 	return PCI_ERS_RESULT_NEED_RESET;
5988 }
5989 
5990 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5991 {
5992 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5993 	int status;
5994 
5995 	dev_info(&adapter->pdev->dev, "EEH reset\n");
5996 
5997 	status = pci_enable_device(pdev);
5998 	if (status)
5999 		return PCI_ERS_RESULT_DISCONNECT;
6000 
6001 	pci_set_master(pdev);
6002 	pci_restore_state(pdev);
6003 
6004 	/* Check if card is ok and fw is ready */
6005 	dev_info(&adapter->pdev->dev,
6006 		 "Waiting for FW to be ready after EEH reset\n");
6007 	status = be_fw_wait_ready(adapter);
6008 	if (status)
6009 		return PCI_ERS_RESULT_DISCONNECT;
6010 
6011 	pci_cleanup_aer_uncorrect_error_status(pdev);
6012 	be_clear_error(adapter, BE_CLEAR_ALL);
6013 	return PCI_ERS_RESULT_RECOVERED;
6014 }
6015 
6016 static void be_eeh_resume(struct pci_dev *pdev)
6017 {
6018 	int status = 0;
6019 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6020 
6021 	dev_info(&adapter->pdev->dev, "EEH resume\n");
6022 
6023 	pci_save_state(pdev);
6024 
6025 	status = be_resume(adapter);
6026 	if (status)
6027 		goto err;
6028 
6029 	be_roce_dev_add(adapter);
6030 
6031 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6032 	return;
6033 err:
6034 	dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6035 }
6036 
6037 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6038 {
6039 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6040 	struct be_resources vft_res = {0};
6041 	int status;
6042 
6043 	if (!num_vfs)
6044 		be_vf_clear(adapter);
6045 
6046 	adapter->num_vfs = num_vfs;
6047 
6048 	if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6049 		dev_warn(&pdev->dev,
6050 			 "Cannot disable VFs while they are assigned\n");
6051 		return -EBUSY;
6052 	}
6053 
6054 	/* When the HW is in SRIOV capable configuration, the PF-pool resources
6055 	 * are equally distributed across the max-number of VFs. The user may
6056 	 * request only a subset of the max-vfs to be enabled.
6057 	 * Based on num_vfs, redistribute the resources across num_vfs so that
6058 	 * each VF will have access to more number of resources.
6059 	 * This facility is not available in BE3 FW.
6060 	 * Also, this is done by FW in Lancer chip.
6061 	 */
6062 	if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6063 		be_calculate_vf_res(adapter, adapter->num_vfs,
6064 				    &vft_res);
6065 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6066 						 adapter->num_vfs, &vft_res);
6067 		if (status)
6068 			dev_err(&pdev->dev,
6069 				"Failed to optimize SR-IOV resources\n");
6070 	}
6071 
6072 	status = be_get_resources(adapter);
6073 	if (status)
6074 		return be_cmd_status(status);
6075 
6076 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6077 	rtnl_lock();
6078 	status = be_update_queues(adapter);
6079 	rtnl_unlock();
6080 	if (status)
6081 		return be_cmd_status(status);
6082 
6083 	if (adapter->num_vfs)
6084 		status = be_vf_setup(adapter);
6085 
6086 	if (!status)
6087 		return adapter->num_vfs;
6088 
6089 	return 0;
6090 }
6091 
6092 static const struct pci_error_handlers be_eeh_handlers = {
6093 	.error_detected = be_eeh_err_detected,
6094 	.slot_reset = be_eeh_reset,
6095 	.resume = be_eeh_resume,
6096 };
6097 
6098 static struct pci_driver be_driver = {
6099 	.name = DRV_NAME,
6100 	.id_table = be_dev_ids,
6101 	.probe = be_probe,
6102 	.remove = be_remove,
6103 	.suspend = be_suspend,
6104 	.resume = be_pci_resume,
6105 	.shutdown = be_shutdown,
6106 	.sriov_configure = be_pci_sriov_configure,
6107 	.err_handler = &be_eeh_handlers
6108 };
6109 
6110 static int __init be_init_module(void)
6111 {
6112 	int status;
6113 
6114 	if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6115 	    rx_frag_size != 2048) {
6116 		printk(KERN_WARNING DRV_NAME
6117 			" : Module param rx_frag_size must be 2048/4096/8192."
6118 			" Using 2048\n");
6119 		rx_frag_size = 2048;
6120 	}
6121 
6122 	if (num_vfs > 0) {
6123 		pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6124 		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6125 	}
6126 
6127 	be_wq = create_singlethread_workqueue("be_wq");
6128 	if (!be_wq) {
6129 		pr_warn(DRV_NAME "workqueue creation failed\n");
6130 		return -1;
6131 	}
6132 
6133 	be_err_recovery_workq =
6134 		create_singlethread_workqueue("be_err_recover");
6135 	if (!be_err_recovery_workq)
6136 		pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6137 
6138 	status = pci_register_driver(&be_driver);
6139 	if (status) {
6140 		destroy_workqueue(be_wq);
6141 		be_destroy_err_recovery_workq();
6142 	}
6143 	return status;
6144 }
6145 module_init(be_init_module);
6146 
6147 static void __exit be_exit_module(void)
6148 {
6149 	pci_unregister_driver(&be_driver);
6150 
6151 	be_destroy_err_recovery_workq();
6152 
6153 	if (be_wq)
6154 		destroy_workqueue(be_wq);
6155 }
6156 module_exit(be_exit_module);
6157