xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/main.c (revision 8dd765a5d769c521d73931850d1c8708fbc490cb)
1 /*
2  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/highmem.h>
34 #include <linux/module.h>
35 #include <linux/init.h>
36 #include <linux/errno.h>
37 #include <linux/pci.h>
38 #include <linux/dma-mapping.h>
39 #include <linux/slab.h>
40 #include <linux/interrupt.h>
41 #include <linux/delay.h>
42 #include <linux/mlx5/driver.h>
43 #include <linux/mlx5/cq.h>
44 #include <linux/mlx5/qp.h>
45 #include <linux/debugfs.h>
46 #include <linux/kmod.h>
47 #include <linux/mlx5/mlx5_ifc.h>
48 #include <linux/mlx5/vport.h>
49 #include <linux/version.h>
50 #include <net/devlink.h>
51 #include "mlx5_core.h"
52 #include "lib/eq.h"
53 #include "fs_core.h"
54 #include "lib/mpfs.h"
55 #include "eswitch.h"
56 #include "devlink.h"
57 #include "fw_reset.h"
58 #include "lib/mlx5.h"
59 #include "lib/tout.h"
60 #include "fpga/core.h"
61 #include "en_accel/ipsec.h"
62 #include "lib/clock.h"
63 #include "lib/vxlan.h"
64 #include "lib/geneve.h"
65 #include "lib/devcom.h"
66 #include "lib/pci_vsc.h"
67 #include "diag/fw_tracer.h"
68 #include "ecpf.h"
69 #include "lib/hv_vhca.h"
70 #include "diag/rsc_dump.h"
71 #include "sf/vhca_event.h"
72 #include "sf/dev/dev.h"
73 #include "sf/sf.h"
74 #include "mlx5_irq.h"
75 #include "hwmon.h"
76 
77 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
78 MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
79 MODULE_LICENSE("Dual BSD/GPL");
80 
81 unsigned int mlx5_core_debug_mask;
82 module_param_named(debug_mask, mlx5_core_debug_mask, uint, 0644);
83 MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
84 
85 static unsigned int prof_sel = MLX5_DEFAULT_PROF;
86 module_param_named(prof_sel, prof_sel, uint, 0444);
87 MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
88 
89 static u32 sw_owner_id[4];
90 #define MAX_SW_VHCA_ID (BIT(__mlx5_bit_sz(cmd_hca_cap_2, sw_vhca_id)) - 1)
91 static DEFINE_IDA(sw_vhca_ida);
92 
93 enum {
94 	MLX5_ATOMIC_REQ_MODE_BE = 0x0,
95 	MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
96 };
97 
98 #define LOG_MAX_SUPPORTED_QPS 0xff
99 
100 static struct mlx5_profile profile[] = {
101 	[0] = {
102 		.mask           = 0,
103 		.num_cmd_caches = MLX5_NUM_COMMAND_CACHES,
104 	},
105 	[1] = {
106 		.mask		= MLX5_PROF_MASK_QP_SIZE,
107 		.log_max_qp	= 12,
108 		.num_cmd_caches = MLX5_NUM_COMMAND_CACHES,
109 
110 	},
111 	[2] = {
112 		.mask		= MLX5_PROF_MASK_QP_SIZE |
113 				  MLX5_PROF_MASK_MR_CACHE,
114 		.log_max_qp	= LOG_MAX_SUPPORTED_QPS,
115 		.num_cmd_caches = MLX5_NUM_COMMAND_CACHES,
116 		.mr_cache[0]	= {
117 			.size	= 500,
118 			.limit	= 250
119 		},
120 		.mr_cache[1]	= {
121 			.size	= 500,
122 			.limit	= 250
123 		},
124 		.mr_cache[2]	= {
125 			.size	= 500,
126 			.limit	= 250
127 		},
128 		.mr_cache[3]	= {
129 			.size	= 500,
130 			.limit	= 250
131 		},
132 		.mr_cache[4]	= {
133 			.size	= 500,
134 			.limit	= 250
135 		},
136 		.mr_cache[5]	= {
137 			.size	= 500,
138 			.limit	= 250
139 		},
140 		.mr_cache[6]	= {
141 			.size	= 500,
142 			.limit	= 250
143 		},
144 		.mr_cache[7]	= {
145 			.size	= 500,
146 			.limit	= 250
147 		},
148 		.mr_cache[8]	= {
149 			.size	= 500,
150 			.limit	= 250
151 		},
152 		.mr_cache[9]	= {
153 			.size	= 500,
154 			.limit	= 250
155 		},
156 		.mr_cache[10]	= {
157 			.size	= 500,
158 			.limit	= 250
159 		},
160 		.mr_cache[11]	= {
161 			.size	= 500,
162 			.limit	= 250
163 		},
164 		.mr_cache[12]	= {
165 			.size	= 64,
166 			.limit	= 32
167 		},
168 		.mr_cache[13]	= {
169 			.size	= 32,
170 			.limit	= 16
171 		},
172 		.mr_cache[14]	= {
173 			.size	= 16,
174 			.limit	= 8
175 		},
176 		.mr_cache[15]	= {
177 			.size	= 8,
178 			.limit	= 4
179 		},
180 	},
181 	[3] = {
182 		.mask		= MLX5_PROF_MASK_QP_SIZE,
183 		.log_max_qp	= LOG_MAX_SUPPORTED_QPS,
184 		.num_cmd_caches = 0,
185 	},
186 };
187 
188 static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
189 			u32 warn_time_mili)
190 {
191 	unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili);
192 	unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
193 	u32 fw_initializing;
194 	int err = 0;
195 
196 	do {
197 		fw_initializing = ioread32be(&dev->iseg->initializing);
198 		if (!(fw_initializing >> 31))
199 			break;
200 		if (time_after(jiffies, end) ||
201 		    test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) {
202 			err = -EBUSY;
203 			break;
204 		}
205 		if (warn_time_mili && time_after(jiffies, warn)) {
206 			mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds (0x%x)\n",
207 				       jiffies_to_msecs(end - warn) / 1000, fw_initializing);
208 			warn = jiffies + msecs_to_jiffies(warn_time_mili);
209 		}
210 		msleep(mlx5_tout_ms(dev, FW_PRE_INIT_WAIT));
211 	} while (true);
212 
213 	return err;
214 }
215 
216 static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
217 {
218 	int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in,
219 					      driver_version);
220 	u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {};
221 	int remaining_size = driver_ver_sz;
222 	char *string;
223 
224 	if (!MLX5_CAP_GEN(dev, driver_version))
225 		return;
226 
227 	string = MLX5_ADDR_OF(set_driver_version_in, in, driver_version);
228 
229 	strncpy(string, "Linux", remaining_size);
230 
231 	remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
232 	strncat(string, ",", remaining_size);
233 
234 	remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
235 	strncat(string, KBUILD_MODNAME, remaining_size);
236 
237 	remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
238 	strncat(string, ",", remaining_size);
239 
240 	remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
241 
242 	snprintf(string + strlen(string), remaining_size, "%u.%u.%u",
243 		LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL,
244 		LINUX_VERSION_SUBLEVEL);
245 
246 	/*Send the command*/
247 	MLX5_SET(set_driver_version_in, in, opcode,
248 		 MLX5_CMD_OP_SET_DRIVER_VERSION);
249 
250 	mlx5_cmd_exec_in(dev, set_driver_version, in);
251 }
252 
253 static int set_dma_caps(struct pci_dev *pdev)
254 {
255 	int err;
256 
257 	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
258 	if (err) {
259 		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
260 		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
261 		if (err) {
262 			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
263 			return err;
264 		}
265 	}
266 
267 	dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
268 	return err;
269 }
270 
271 static int mlx5_pci_enable_device(struct mlx5_core_dev *dev)
272 {
273 	struct pci_dev *pdev = dev->pdev;
274 	int err = 0;
275 
276 	mutex_lock(&dev->pci_status_mutex);
277 	if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) {
278 		err = pci_enable_device(pdev);
279 		if (!err)
280 			dev->pci_status = MLX5_PCI_STATUS_ENABLED;
281 	}
282 	mutex_unlock(&dev->pci_status_mutex);
283 
284 	return err;
285 }
286 
287 static void mlx5_pci_disable_device(struct mlx5_core_dev *dev)
288 {
289 	struct pci_dev *pdev = dev->pdev;
290 
291 	mutex_lock(&dev->pci_status_mutex);
292 	if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) {
293 		pci_disable_device(pdev);
294 		dev->pci_status = MLX5_PCI_STATUS_DISABLED;
295 	}
296 	mutex_unlock(&dev->pci_status_mutex);
297 }
298 
299 static int request_bar(struct pci_dev *pdev)
300 {
301 	int err = 0;
302 
303 	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
304 		dev_err(&pdev->dev, "Missing registers BAR, aborting\n");
305 		return -ENODEV;
306 	}
307 
308 	err = pci_request_regions(pdev, KBUILD_MODNAME);
309 	if (err)
310 		dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
311 
312 	return err;
313 }
314 
315 static void release_bar(struct pci_dev *pdev)
316 {
317 	pci_release_regions(pdev);
318 }
319 
320 struct mlx5_reg_host_endianness {
321 	u8	he;
322 	u8      rsvd[15];
323 };
324 
325 static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size)
326 {
327 	switch (size) {
328 	case 128:
329 		return 0;
330 	case 256:
331 		return 1;
332 	case 512:
333 		return 2;
334 	case 1024:
335 		return 3;
336 	case 2048:
337 		return 4;
338 	case 4096:
339 		return 5;
340 	default:
341 		mlx5_core_warn(dev, "invalid pkey table size %d\n", size);
342 		return 0;
343 	}
344 }
345 
346 void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *dev, struct net_device *netdev)
347 {
348 	mutex_lock(&dev->mlx5e_res.uplink_netdev_lock);
349 	dev->mlx5e_res.uplink_netdev = netdev;
350 	mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_UPLINK_NETDEV,
351 					  netdev);
352 	mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock);
353 }
354 
355 void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *dev)
356 {
357 	mutex_lock(&dev->mlx5e_res.uplink_netdev_lock);
358 	mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_UPLINK_NETDEV,
359 					  dev->mlx5e_res.uplink_netdev);
360 	mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock);
361 }
362 EXPORT_SYMBOL(mlx5_core_uplink_netdev_event_replay);
363 
364 int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type,
365 			    enum mlx5_cap_mode cap_mode)
366 {
367 	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
368 	int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
369 	void *out, *hca_caps;
370 	u16 opmod = (cap_type << 1) | (cap_mode & 0x01);
371 	int err;
372 
373 	memset(in, 0, sizeof(in));
374 	out = kzalloc(out_sz, GFP_KERNEL);
375 	if (!out)
376 		return -ENOMEM;
377 
378 	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
379 	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
380 	err = mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
381 	if (err) {
382 		mlx5_core_warn(dev,
383 			       "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
384 			       cap_type, cap_mode, err);
385 		goto query_ex;
386 	}
387 
388 	hca_caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
389 
390 	switch (cap_mode) {
391 	case HCA_CAP_OPMOD_GET_MAX:
392 		memcpy(dev->caps.hca[cap_type]->max, hca_caps,
393 		       MLX5_UN_SZ_BYTES(hca_cap_union));
394 		break;
395 	case HCA_CAP_OPMOD_GET_CUR:
396 		memcpy(dev->caps.hca[cap_type]->cur, hca_caps,
397 		       MLX5_UN_SZ_BYTES(hca_cap_union));
398 		break;
399 	default:
400 		mlx5_core_warn(dev,
401 			       "Tried to query dev cap type(%x) with wrong opmode(%x)\n",
402 			       cap_type, cap_mode);
403 		err = -EINVAL;
404 		break;
405 	}
406 query_ex:
407 	kfree(out);
408 	return err;
409 }
410 
411 int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
412 {
413 	int ret;
414 
415 	ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
416 	if (ret)
417 		return ret;
418 	return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
419 }
420 
421 static int set_caps(struct mlx5_core_dev *dev, void *in, int opmod)
422 {
423 	MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
424 	MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
425 	return mlx5_cmd_exec_in(dev, set_hca_cap, in);
426 }
427 
428 static int handle_hca_cap_atomic(struct mlx5_core_dev *dev, void *set_ctx)
429 {
430 	void *set_hca_cap;
431 	int req_endianness;
432 	int err;
433 
434 	if (!MLX5_CAP_GEN(dev, atomic))
435 		return 0;
436 
437 	err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
438 	if (err)
439 		return err;
440 
441 	req_endianness =
442 		MLX5_CAP_ATOMIC(dev,
443 				supported_atomic_req_8B_endianness_mode_1);
444 
445 	if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
446 		return 0;
447 
448 	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
449 
450 	/* Set requestor to host endianness */
451 	MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianness_mode,
452 		 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
453 
454 	return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
455 }
456 
457 static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
458 {
459 	void *set_hca_cap;
460 	bool do_set = false;
461 	int err;
462 
463 	if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) ||
464 	    !MLX5_CAP_GEN(dev, pg))
465 		return 0;
466 
467 	err = mlx5_core_get_caps(dev, MLX5_CAP_ODP);
468 	if (err)
469 		return err;
470 
471 	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
472 	memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ODP]->cur,
473 	       MLX5_ST_SZ_BYTES(odp_cap));
474 
475 #define ODP_CAP_SET_MAX(dev, field)                                            \
476 	do {                                                                   \
477 		u32 _res = MLX5_CAP_ODP_MAX(dev, field);                       \
478 		if (_res) {                                                    \
479 			do_set = true;                                         \
480 			MLX5_SET(odp_cap, set_hca_cap, field, _res);           \
481 		}                                                              \
482 	} while (0)
483 
484 	ODP_CAP_SET_MAX(dev, ud_odp_caps.srq_receive);
485 	ODP_CAP_SET_MAX(dev, rc_odp_caps.srq_receive);
486 	ODP_CAP_SET_MAX(dev, xrc_odp_caps.srq_receive);
487 	ODP_CAP_SET_MAX(dev, xrc_odp_caps.send);
488 	ODP_CAP_SET_MAX(dev, xrc_odp_caps.receive);
489 	ODP_CAP_SET_MAX(dev, xrc_odp_caps.write);
490 	ODP_CAP_SET_MAX(dev, xrc_odp_caps.read);
491 	ODP_CAP_SET_MAX(dev, xrc_odp_caps.atomic);
492 	ODP_CAP_SET_MAX(dev, dc_odp_caps.srq_receive);
493 	ODP_CAP_SET_MAX(dev, dc_odp_caps.send);
494 	ODP_CAP_SET_MAX(dev, dc_odp_caps.receive);
495 	ODP_CAP_SET_MAX(dev, dc_odp_caps.write);
496 	ODP_CAP_SET_MAX(dev, dc_odp_caps.read);
497 	ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic);
498 
499 	if (!do_set)
500 		return 0;
501 
502 	return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP);
503 }
504 
505 static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev)
506 {
507 	struct devlink *devlink = priv_to_devlink(dev);
508 	union devlink_param_value val;
509 	int err;
510 
511 	err = devl_param_driverinit_value_get(devlink,
512 					      DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
513 					      &val);
514 	if (!err)
515 		return val.vu32;
516 	mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err);
517 	return err;
518 }
519 
520 bool mlx5_is_roce_on(struct mlx5_core_dev *dev)
521 {
522 	struct devlink *devlink = priv_to_devlink(dev);
523 	union devlink_param_value val;
524 	int err;
525 
526 	err = devl_param_driverinit_value_get(devlink,
527 					      DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
528 					      &val);
529 
530 	if (!err)
531 		return val.vbool;
532 
533 	mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err);
534 	return MLX5_CAP_GEN(dev, roce);
535 }
536 EXPORT_SYMBOL(mlx5_is_roce_on);
537 
538 static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx)
539 {
540 	void *set_hca_cap;
541 	int err;
542 
543 	if (!MLX5_CAP_GEN_MAX(dev, hca_cap_2))
544 		return 0;
545 
546 	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2);
547 	if (err)
548 		return err;
549 
550 	if (!MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) ||
551 	    !(dev->priv.sw_vhca_id > 0))
552 		return 0;
553 
554 	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
555 				   capability);
556 	memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL_2]->cur,
557 	       MLX5_ST_SZ_BYTES(cmd_hca_cap_2));
558 	MLX5_SET(cmd_hca_cap_2, set_hca_cap, sw_vhca_id_valid, 1);
559 
560 	return set_caps(dev, set_ctx, MLX5_CAP_GENERAL_2);
561 }
562 
563 static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
564 {
565 	struct mlx5_profile *prof = &dev->profile;
566 	void *set_hca_cap;
567 	int max_uc_list;
568 	int err;
569 
570 	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
571 	if (err)
572 		return err;
573 
574 	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
575 				   capability);
576 	memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL]->cur,
577 	       MLX5_ST_SZ_BYTES(cmd_hca_cap));
578 
579 	mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n",
580 		      mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)),
581 		      128);
582 	/* we limit the size of the pkey table to 128 entries for now */
583 	MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
584 		 to_fw_pkey_sz(dev, 128));
585 
586 	/* Check log_max_qp from HCA caps to set in current profile */
587 	if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) {
588 		prof->log_max_qp = min_t(u8, 18, MLX5_CAP_GEN_MAX(dev, log_max_qp));
589 	} else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) {
590 		mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n",
591 			       prof->log_max_qp,
592 			       MLX5_CAP_GEN_MAX(dev, log_max_qp));
593 		prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp);
594 	}
595 	if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
596 		MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
597 			 prof->log_max_qp);
598 
599 	/* disable cmdif checksum */
600 	MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
601 
602 	/* Enable 4K UAR only when HCA supports it and page size is bigger
603 	 * than 4K.
604 	 */
605 	if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096)
606 		MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1);
607 
608 	MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
609 
610 	if (MLX5_CAP_GEN_MAX(dev, cache_line_128byte))
611 		MLX5_SET(cmd_hca_cap,
612 			 set_hca_cap,
613 			 cache_line_128byte,
614 			 cache_line_size() >= 128 ? 1 : 0);
615 
616 	if (MLX5_CAP_GEN_MAX(dev, dct))
617 		MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1);
618 
619 	if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_event))
620 		MLX5_SET(cmd_hca_cap, set_hca_cap, pci_sync_for_fw_update_event, 1);
621 	if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_with_driver_unload))
622 		MLX5_SET(cmd_hca_cap, set_hca_cap,
623 			 pci_sync_for_fw_update_with_driver_unload, 1);
624 
625 	if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports))
626 		MLX5_SET(cmd_hca_cap,
627 			 set_hca_cap,
628 			 num_vhca_ports,
629 			 MLX5_CAP_GEN_MAX(dev, num_vhca_ports));
630 
631 	if (MLX5_CAP_GEN_MAX(dev, release_all_pages))
632 		MLX5_SET(cmd_hca_cap, set_hca_cap, release_all_pages, 1);
633 
634 	if (MLX5_CAP_GEN_MAX(dev, mkey_by_name))
635 		MLX5_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1);
636 
637 	mlx5_vhca_state_cap_handle(dev, set_hca_cap);
638 
639 	if (MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix))
640 		MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix,
641 			 MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix));
642 
643 	if (MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce))
644 		MLX5_SET(cmd_hca_cap, set_hca_cap, roce,
645 			 mlx5_is_roce_on(dev));
646 
647 	max_uc_list = max_uc_list_get_devlink_param(dev);
648 	if (max_uc_list > 0)
649 		MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_current_uc_list,
650 			 ilog2(max_uc_list));
651 
652 	return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
653 }
654 
655 /* Cached MLX5_CAP_GEN(dev, roce) can be out of sync this early in the
656  * boot process.
657  * In case RoCE cap is writable in FW and user/devlink requested to change the
658  * cap, we are yet to query the final state of the above cap.
659  * Hence, the need for this function.
660  *
661  * Returns
662  * True:
663  * 1) RoCE cap is read only in FW and already disabled
664  * OR:
665  * 2) RoCE cap is writable in FW and user/devlink requested it off.
666  *
667  * In any other case, return False.
668  */
669 static bool is_roce_fw_disabled(struct mlx5_core_dev *dev)
670 {
671 	return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_on(dev)) ||
672 		(!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce));
673 }
674 
675 static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx)
676 {
677 	void *set_hca_cap;
678 	int err;
679 
680 	if (is_roce_fw_disabled(dev))
681 		return 0;
682 
683 	err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE);
684 	if (err)
685 		return err;
686 
687 	if (MLX5_CAP_ROCE(dev, sw_r_roce_src_udp_port) ||
688 	    !MLX5_CAP_ROCE_MAX(dev, sw_r_roce_src_udp_port))
689 		return 0;
690 
691 	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
692 	memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ROCE]->cur,
693 	       MLX5_ST_SZ_BYTES(roce_cap));
694 	MLX5_SET(roce_cap, set_hca_cap, sw_r_roce_src_udp_port, 1);
695 
696 	if (MLX5_CAP_ROCE_MAX(dev, qp_ooo_transmit_default))
697 		MLX5_SET(roce_cap, set_hca_cap, qp_ooo_transmit_default, 1);
698 
699 	err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ROCE);
700 	return err;
701 }
702 
703 static int handle_hca_cap_port_selection(struct mlx5_core_dev *dev,
704 					 void *set_ctx)
705 {
706 	void *set_hca_cap;
707 	int err;
708 
709 	if (!MLX5_CAP_GEN(dev, port_selection_cap))
710 		return 0;
711 
712 	err = mlx5_core_get_caps(dev, MLX5_CAP_PORT_SELECTION);
713 	if (err)
714 		return err;
715 
716 	if (MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass) ||
717 	    !MLX5_CAP_PORT_SELECTION_MAX(dev, port_select_flow_table_bypass))
718 		return 0;
719 
720 	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
721 	memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_PORT_SELECTION]->cur,
722 	       MLX5_ST_SZ_BYTES(port_selection_cap));
723 	MLX5_SET(port_selection_cap, set_hca_cap, port_select_flow_table_bypass, 1);
724 
725 	err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_PORT_SELECTION);
726 
727 	return err;
728 }
729 
730 static int set_hca_cap(struct mlx5_core_dev *dev)
731 {
732 	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
733 	void *set_ctx;
734 	int err;
735 
736 	set_ctx = kzalloc(set_sz, GFP_KERNEL);
737 	if (!set_ctx)
738 		return -ENOMEM;
739 
740 	err = handle_hca_cap(dev, set_ctx);
741 	if (err) {
742 		mlx5_core_err(dev, "handle_hca_cap failed\n");
743 		goto out;
744 	}
745 
746 	memset(set_ctx, 0, set_sz);
747 	err = handle_hca_cap_atomic(dev, set_ctx);
748 	if (err) {
749 		mlx5_core_err(dev, "handle_hca_cap_atomic failed\n");
750 		goto out;
751 	}
752 
753 	memset(set_ctx, 0, set_sz);
754 	err = handle_hca_cap_odp(dev, set_ctx);
755 	if (err) {
756 		mlx5_core_err(dev, "handle_hca_cap_odp failed\n");
757 		goto out;
758 	}
759 
760 	memset(set_ctx, 0, set_sz);
761 	err = handle_hca_cap_roce(dev, set_ctx);
762 	if (err) {
763 		mlx5_core_err(dev, "handle_hca_cap_roce failed\n");
764 		goto out;
765 	}
766 
767 	memset(set_ctx, 0, set_sz);
768 	err = handle_hca_cap_2(dev, set_ctx);
769 	if (err) {
770 		mlx5_core_err(dev, "handle_hca_cap_2 failed\n");
771 		goto out;
772 	}
773 
774 	memset(set_ctx, 0, set_sz);
775 	err = handle_hca_cap_port_selection(dev, set_ctx);
776 	if (err) {
777 		mlx5_core_err(dev, "handle_hca_cap_port_selection failed\n");
778 		goto out;
779 	}
780 
781 out:
782 	kfree(set_ctx);
783 	return err;
784 }
785 
786 static int set_hca_ctrl(struct mlx5_core_dev *dev)
787 {
788 	struct mlx5_reg_host_endianness he_in;
789 	struct mlx5_reg_host_endianness he_out;
790 	int err;
791 
792 	if (!mlx5_core_is_pf(dev))
793 		return 0;
794 
795 	memset(&he_in, 0, sizeof(he_in));
796 	he_in.he = MLX5_SET_HOST_ENDIANNESS;
797 	err = mlx5_core_access_reg(dev, &he_in,  sizeof(he_in),
798 					&he_out, sizeof(he_out),
799 					MLX5_REG_HOST_ENDIANNESS, 0, 1);
800 	return err;
801 }
802 
803 static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev)
804 {
805 	int ret = 0;
806 
807 	/* Disable local_lb by default */
808 	if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH)
809 		ret = mlx5_nic_vport_update_local_lb(dev, false);
810 
811 	return ret;
812 }
813 
814 int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
815 {
816 	u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {};
817 
818 	MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
819 	MLX5_SET(enable_hca_in, in, function_id, func_id);
820 	MLX5_SET(enable_hca_in, in, embedded_cpu_function,
821 		 dev->caps.embedded_cpu);
822 	return mlx5_cmd_exec_in(dev, enable_hca, in);
823 }
824 
825 int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
826 {
827 	u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {};
828 
829 	MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
830 	MLX5_SET(disable_hca_in, in, function_id, func_id);
831 	MLX5_SET(enable_hca_in, in, embedded_cpu_function,
832 		 dev->caps.embedded_cpu);
833 	return mlx5_cmd_exec_in(dev, disable_hca, in);
834 }
835 
836 static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
837 {
838 	u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {};
839 	u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {};
840 	u32 sup_issi;
841 	int err;
842 
843 	MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
844 	err = mlx5_cmd_exec_inout(dev, query_issi, query_in, query_out);
845 	if (err) {
846 		u32 syndrome = MLX5_GET(query_issi_out, query_out, syndrome);
847 		u8 status = MLX5_GET(query_issi_out, query_out, status);
848 
849 		if (!status || syndrome == MLX5_DRIVER_SYND) {
850 			mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n",
851 				      err, status, syndrome);
852 			return err;
853 		}
854 
855 		mlx5_core_warn(dev, "Query ISSI is not supported by FW, ISSI is 0\n");
856 		dev->issi = 0;
857 		return 0;
858 	}
859 
860 	sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
861 
862 	if (sup_issi & (1 << 1)) {
863 		u32 set_in[MLX5_ST_SZ_DW(set_issi_in)] = {};
864 
865 		MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
866 		MLX5_SET(set_issi_in, set_in, current_issi, 1);
867 		err = mlx5_cmd_exec_in(dev, set_issi, set_in);
868 		if (err) {
869 			mlx5_core_err(dev, "Failed to set ISSI to 1 err(%d)\n",
870 				      err);
871 			return err;
872 		}
873 
874 		dev->issi = 1;
875 
876 		return 0;
877 	} else if (sup_issi & (1 << 0) || !sup_issi) {
878 		return 0;
879 	}
880 
881 	return -EOPNOTSUPP;
882 }
883 
884 static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
885 			 const struct pci_device_id *id)
886 {
887 	int err = 0;
888 
889 	mutex_init(&dev->pci_status_mutex);
890 	pci_set_drvdata(dev->pdev, dev);
891 
892 	dev->bar_addr = pci_resource_start(pdev, 0);
893 
894 	err = mlx5_pci_enable_device(dev);
895 	if (err) {
896 		mlx5_core_err(dev, "Cannot enable PCI device, aborting\n");
897 		return err;
898 	}
899 
900 	err = request_bar(pdev);
901 	if (err) {
902 		mlx5_core_err(dev, "error requesting BARs, aborting\n");
903 		goto err_disable;
904 	}
905 
906 	pci_set_master(pdev);
907 
908 	err = set_dma_caps(pdev);
909 	if (err) {
910 		mlx5_core_err(dev, "Failed setting DMA capabilities mask, aborting\n");
911 		goto err_clr_master;
912 	}
913 
914 	if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) &&
915 	    pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) &&
916 	    pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128))
917 		mlx5_core_dbg(dev, "Enabling pci atomics failed\n");
918 
919 	dev->iseg_base = dev->bar_addr;
920 	dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
921 	if (!dev->iseg) {
922 		err = -ENOMEM;
923 		mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n");
924 		goto err_clr_master;
925 	}
926 
927 	mlx5_pci_vsc_init(dev);
928 	return 0;
929 
930 err_clr_master:
931 	release_bar(dev->pdev);
932 err_disable:
933 	mlx5_pci_disable_device(dev);
934 	return err;
935 }
936 
937 static void mlx5_pci_close(struct mlx5_core_dev *dev)
938 {
939 	/* health work might still be active, and it needs pci bar in
940 	 * order to know the NIC state. Therefore, drain the health WQ
941 	 * before removing the pci bars
942 	 */
943 	mlx5_drain_health_wq(dev);
944 	iounmap(dev->iseg);
945 	release_bar(dev->pdev);
946 	mlx5_pci_disable_device(dev);
947 }
948 
949 static int mlx5_init_once(struct mlx5_core_dev *dev)
950 {
951 	int err;
952 
953 	dev->priv.devc = mlx5_devcom_register_device(dev);
954 	if (IS_ERR(dev->priv.devc))
955 		mlx5_core_warn(dev, "failed to register devcom device %ld\n",
956 			       PTR_ERR(dev->priv.devc));
957 
958 	err = mlx5_query_board_id(dev);
959 	if (err) {
960 		mlx5_core_err(dev, "query board id failed\n");
961 		goto err_devcom;
962 	}
963 
964 	err = mlx5_irq_table_init(dev);
965 	if (err) {
966 		mlx5_core_err(dev, "failed to initialize irq table\n");
967 		goto err_devcom;
968 	}
969 
970 	err = mlx5_eq_table_init(dev);
971 	if (err) {
972 		mlx5_core_err(dev, "failed to initialize eq\n");
973 		goto err_irq_cleanup;
974 	}
975 
976 	err = mlx5_events_init(dev);
977 	if (err) {
978 		mlx5_core_err(dev, "failed to initialize events\n");
979 		goto err_eq_cleanup;
980 	}
981 
982 	err = mlx5_fw_reset_init(dev);
983 	if (err) {
984 		mlx5_core_err(dev, "failed to initialize fw reset events\n");
985 		goto err_events_cleanup;
986 	}
987 
988 	mlx5_cq_debugfs_init(dev);
989 
990 	mlx5_init_reserved_gids(dev);
991 
992 	mlx5_init_clock(dev);
993 
994 	dev->vxlan = mlx5_vxlan_create(dev);
995 	dev->geneve = mlx5_geneve_create(dev);
996 
997 	err = mlx5_init_rl_table(dev);
998 	if (err) {
999 		mlx5_core_err(dev, "Failed to init rate limiting\n");
1000 		goto err_tables_cleanup;
1001 	}
1002 
1003 	err = mlx5_mpfs_init(dev);
1004 	if (err) {
1005 		mlx5_core_err(dev, "Failed to init l2 table %d\n", err);
1006 		goto err_rl_cleanup;
1007 	}
1008 
1009 	err = mlx5_sriov_init(dev);
1010 	if (err) {
1011 		mlx5_core_err(dev, "Failed to init sriov %d\n", err);
1012 		goto err_mpfs_cleanup;
1013 	}
1014 
1015 	err = mlx5_eswitch_init(dev);
1016 	if (err) {
1017 		mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
1018 		goto err_sriov_cleanup;
1019 	}
1020 
1021 	err = mlx5_fpga_init(dev);
1022 	if (err) {
1023 		mlx5_core_err(dev, "Failed to init fpga device %d\n", err);
1024 		goto err_eswitch_cleanup;
1025 	}
1026 
1027 	err = mlx5_vhca_event_init(dev);
1028 	if (err) {
1029 		mlx5_core_err(dev, "Failed to init vhca event notifier %d\n", err);
1030 		goto err_fpga_cleanup;
1031 	}
1032 
1033 	err = mlx5_sf_hw_table_init(dev);
1034 	if (err) {
1035 		mlx5_core_err(dev, "Failed to init SF HW table %d\n", err);
1036 		goto err_sf_hw_table_cleanup;
1037 	}
1038 
1039 	err = mlx5_sf_table_init(dev);
1040 	if (err) {
1041 		mlx5_core_err(dev, "Failed to init SF table %d\n", err);
1042 		goto err_sf_table_cleanup;
1043 	}
1044 
1045 	err = mlx5_fs_core_alloc(dev);
1046 	if (err) {
1047 		mlx5_core_err(dev, "Failed to alloc flow steering\n");
1048 		goto err_fs;
1049 	}
1050 
1051 	dev->dm = mlx5_dm_create(dev);
1052 	if (IS_ERR(dev->dm))
1053 		mlx5_core_warn(dev, "Failed to init device memory %ld\n", PTR_ERR(dev->dm));
1054 
1055 	dev->tracer = mlx5_fw_tracer_create(dev);
1056 	dev->hv_vhca = mlx5_hv_vhca_create(dev);
1057 	dev->rsc_dump = mlx5_rsc_dump_create(dev);
1058 
1059 	return 0;
1060 
1061 err_fs:
1062 	mlx5_sf_table_cleanup(dev);
1063 err_sf_table_cleanup:
1064 	mlx5_sf_hw_table_cleanup(dev);
1065 err_sf_hw_table_cleanup:
1066 	mlx5_vhca_event_cleanup(dev);
1067 err_fpga_cleanup:
1068 	mlx5_fpga_cleanup(dev);
1069 err_eswitch_cleanup:
1070 	mlx5_eswitch_cleanup(dev->priv.eswitch);
1071 err_sriov_cleanup:
1072 	mlx5_sriov_cleanup(dev);
1073 err_mpfs_cleanup:
1074 	mlx5_mpfs_cleanup(dev);
1075 err_rl_cleanup:
1076 	mlx5_cleanup_rl_table(dev);
1077 err_tables_cleanup:
1078 	mlx5_geneve_destroy(dev->geneve);
1079 	mlx5_vxlan_destroy(dev->vxlan);
1080 	mlx5_cleanup_clock(dev);
1081 	mlx5_cleanup_reserved_gids(dev);
1082 	mlx5_cq_debugfs_cleanup(dev);
1083 	mlx5_fw_reset_cleanup(dev);
1084 err_events_cleanup:
1085 	mlx5_events_cleanup(dev);
1086 err_eq_cleanup:
1087 	mlx5_eq_table_cleanup(dev);
1088 err_irq_cleanup:
1089 	mlx5_irq_table_cleanup(dev);
1090 err_devcom:
1091 	mlx5_devcom_unregister_device(dev->priv.devc);
1092 
1093 	return err;
1094 }
1095 
1096 static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
1097 {
1098 	mlx5_rsc_dump_destroy(dev);
1099 	mlx5_hv_vhca_destroy(dev->hv_vhca);
1100 	mlx5_fw_tracer_destroy(dev->tracer);
1101 	mlx5_dm_cleanup(dev);
1102 	mlx5_fs_core_free(dev);
1103 	mlx5_sf_table_cleanup(dev);
1104 	mlx5_sf_hw_table_cleanup(dev);
1105 	mlx5_vhca_event_cleanup(dev);
1106 	mlx5_fpga_cleanup(dev);
1107 	mlx5_eswitch_cleanup(dev->priv.eswitch);
1108 	mlx5_sriov_cleanup(dev);
1109 	mlx5_mpfs_cleanup(dev);
1110 	mlx5_cleanup_rl_table(dev);
1111 	mlx5_geneve_destroy(dev->geneve);
1112 	mlx5_vxlan_destroy(dev->vxlan);
1113 	mlx5_cleanup_clock(dev);
1114 	mlx5_cleanup_reserved_gids(dev);
1115 	mlx5_cq_debugfs_cleanup(dev);
1116 	mlx5_fw_reset_cleanup(dev);
1117 	mlx5_events_cleanup(dev);
1118 	mlx5_eq_table_cleanup(dev);
1119 	mlx5_irq_table_cleanup(dev);
1120 	mlx5_devcom_unregister_device(dev->priv.devc);
1121 }
1122 
1123 static int mlx5_function_enable(struct mlx5_core_dev *dev, bool boot, u64 timeout)
1124 {
1125 	int err;
1126 
1127 	mlx5_core_info(dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
1128 		       fw_rev_min(dev), fw_rev_sub(dev));
1129 
1130 	/* Only PFs hold the relevant PCIe information for this query */
1131 	if (mlx5_core_is_pf(dev))
1132 		pcie_print_link_status(dev->pdev);
1133 
1134 	/* wait for firmware to accept initialization segments configurations
1135 	 */
1136 	err = wait_fw_init(dev, timeout,
1137 			   mlx5_tout_ms(dev, FW_PRE_INIT_WARN_MESSAGE_INTERVAL));
1138 	if (err) {
1139 		mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n",
1140 			      timeout);
1141 		return err;
1142 	}
1143 
1144 	err = mlx5_cmd_enable(dev);
1145 	if (err) {
1146 		mlx5_core_err(dev, "Failed initializing command interface, aborting\n");
1147 		return err;
1148 	}
1149 
1150 	mlx5_tout_query_iseg(dev);
1151 
1152 	err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_INIT), 0);
1153 	if (err) {
1154 		mlx5_core_err(dev, "Firmware over %llu MS in initializing state, aborting\n",
1155 			      mlx5_tout_ms(dev, FW_INIT));
1156 		goto err_cmd_cleanup;
1157 	}
1158 
1159 	dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
1160 	mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP);
1161 
1162 	mlx5_start_health_poll(dev);
1163 
1164 	err = mlx5_core_enable_hca(dev, 0);
1165 	if (err) {
1166 		mlx5_core_err(dev, "enable hca failed\n");
1167 		goto stop_health_poll;
1168 	}
1169 
1170 	err = mlx5_core_set_issi(dev);
1171 	if (err) {
1172 		mlx5_core_err(dev, "failed to set issi\n");
1173 		goto err_disable_hca;
1174 	}
1175 
1176 	err = mlx5_satisfy_startup_pages(dev, 1);
1177 	if (err) {
1178 		mlx5_core_err(dev, "failed to allocate boot pages\n");
1179 		goto err_disable_hca;
1180 	}
1181 
1182 	err = mlx5_tout_query_dtor(dev);
1183 	if (err) {
1184 		mlx5_core_err(dev, "failed to read dtor\n");
1185 		goto reclaim_boot_pages;
1186 	}
1187 
1188 	return 0;
1189 
1190 reclaim_boot_pages:
1191 	mlx5_reclaim_startup_pages(dev);
1192 err_disable_hca:
1193 	mlx5_core_disable_hca(dev, 0);
1194 stop_health_poll:
1195 	mlx5_stop_health_poll(dev, boot);
1196 err_cmd_cleanup:
1197 	mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
1198 	mlx5_cmd_disable(dev);
1199 
1200 	return err;
1201 }
1202 
1203 static void mlx5_function_disable(struct mlx5_core_dev *dev, bool boot)
1204 {
1205 	mlx5_reclaim_startup_pages(dev);
1206 	mlx5_core_disable_hca(dev, 0);
1207 	mlx5_stop_health_poll(dev, boot);
1208 	mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
1209 	mlx5_cmd_disable(dev);
1210 }
1211 
1212 static int mlx5_function_open(struct mlx5_core_dev *dev)
1213 {
1214 	int err;
1215 
1216 	err = set_hca_ctrl(dev);
1217 	if (err) {
1218 		mlx5_core_err(dev, "set_hca_ctrl failed\n");
1219 		return err;
1220 	}
1221 
1222 	err = set_hca_cap(dev);
1223 	if (err) {
1224 		mlx5_core_err(dev, "set_hca_cap failed\n");
1225 		return err;
1226 	}
1227 
1228 	err = mlx5_satisfy_startup_pages(dev, 0);
1229 	if (err) {
1230 		mlx5_core_err(dev, "failed to allocate init pages\n");
1231 		return err;
1232 	}
1233 
1234 	err = mlx5_cmd_init_hca(dev, sw_owner_id);
1235 	if (err) {
1236 		mlx5_core_err(dev, "init hca failed\n");
1237 		return err;
1238 	}
1239 
1240 	mlx5_set_driver_version(dev);
1241 
1242 	err = mlx5_query_hca_caps(dev);
1243 	if (err) {
1244 		mlx5_core_err(dev, "query hca failed\n");
1245 		return err;
1246 	}
1247 	mlx5_start_health_fw_log_up(dev);
1248 	return 0;
1249 }
1250 
1251 static int mlx5_function_close(struct mlx5_core_dev *dev)
1252 {
1253 	int err;
1254 
1255 	err = mlx5_cmd_teardown_hca(dev);
1256 	if (err) {
1257 		mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
1258 		return err;
1259 	}
1260 
1261 	return 0;
1262 }
1263 
1264 static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout)
1265 {
1266 	int err;
1267 
1268 	err = mlx5_function_enable(dev, boot, timeout);
1269 	if (err)
1270 		return err;
1271 
1272 	err = mlx5_function_open(dev);
1273 	if (err)
1274 		mlx5_function_disable(dev, boot);
1275 	return err;
1276 }
1277 
1278 static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
1279 {
1280 	int err = mlx5_function_close(dev);
1281 
1282 	if (!err)
1283 		mlx5_function_disable(dev, boot);
1284 	return err;
1285 }
1286 
1287 static int mlx5_load(struct mlx5_core_dev *dev)
1288 {
1289 	int err;
1290 
1291 	dev->priv.uar = mlx5_get_uars_page(dev);
1292 	if (IS_ERR(dev->priv.uar)) {
1293 		mlx5_core_err(dev, "Failed allocating uar, aborting\n");
1294 		err = PTR_ERR(dev->priv.uar);
1295 		return err;
1296 	}
1297 
1298 	mlx5_events_start(dev);
1299 	mlx5_pagealloc_start(dev);
1300 
1301 	err = mlx5_irq_table_create(dev);
1302 	if (err) {
1303 		mlx5_core_err(dev, "Failed to alloc IRQs\n");
1304 		goto err_irq_table;
1305 	}
1306 
1307 	err = mlx5_eq_table_create(dev);
1308 	if (err) {
1309 		mlx5_core_err(dev, "Failed to create EQs\n");
1310 		goto err_eq_table;
1311 	}
1312 
1313 	err = mlx5_fw_tracer_init(dev->tracer);
1314 	if (err) {
1315 		mlx5_core_err(dev, "Failed to init FW tracer %d\n", err);
1316 		mlx5_fw_tracer_destroy(dev->tracer);
1317 		dev->tracer = NULL;
1318 	}
1319 
1320 	mlx5_fw_reset_events_start(dev);
1321 	mlx5_hv_vhca_init(dev->hv_vhca);
1322 
1323 	err = mlx5_rsc_dump_init(dev);
1324 	if (err) {
1325 		mlx5_core_err(dev, "Failed to init Resource dump %d\n", err);
1326 		mlx5_rsc_dump_destroy(dev);
1327 		dev->rsc_dump = NULL;
1328 	}
1329 
1330 	err = mlx5_fpga_device_start(dev);
1331 	if (err) {
1332 		mlx5_core_err(dev, "fpga device start failed %d\n", err);
1333 		goto err_fpga_start;
1334 	}
1335 
1336 	err = mlx5_fs_core_init(dev);
1337 	if (err) {
1338 		mlx5_core_err(dev, "Failed to init flow steering\n");
1339 		goto err_fs;
1340 	}
1341 
1342 	err = mlx5_core_set_hca_defaults(dev);
1343 	if (err) {
1344 		mlx5_core_err(dev, "Failed to set hca defaults\n");
1345 		goto err_set_hca;
1346 	}
1347 
1348 	mlx5_vhca_event_start(dev);
1349 
1350 	err = mlx5_sf_hw_table_create(dev);
1351 	if (err) {
1352 		mlx5_core_err(dev, "sf table create failed %d\n", err);
1353 		goto err_vhca;
1354 	}
1355 
1356 	err = mlx5_ec_init(dev);
1357 	if (err) {
1358 		mlx5_core_err(dev, "Failed to init embedded CPU\n");
1359 		goto err_ec;
1360 	}
1361 
1362 	mlx5_lag_add_mdev(dev);
1363 	err = mlx5_sriov_attach(dev);
1364 	if (err) {
1365 		mlx5_core_err(dev, "sriov init failed %d\n", err);
1366 		goto err_sriov;
1367 	}
1368 
1369 	mlx5_sf_dev_table_create(dev);
1370 
1371 	err = mlx5_devlink_traps_register(priv_to_devlink(dev));
1372 	if (err)
1373 		goto err_traps_reg;
1374 
1375 	return 0;
1376 
1377 err_traps_reg:
1378 	mlx5_sf_dev_table_destroy(dev);
1379 	mlx5_sriov_detach(dev);
1380 err_sriov:
1381 	mlx5_lag_remove_mdev(dev);
1382 	mlx5_ec_cleanup(dev);
1383 err_ec:
1384 	mlx5_sf_hw_table_destroy(dev);
1385 err_vhca:
1386 	mlx5_vhca_event_stop(dev);
1387 err_set_hca:
1388 	mlx5_fs_core_cleanup(dev);
1389 err_fs:
1390 	mlx5_fpga_device_stop(dev);
1391 err_fpga_start:
1392 	mlx5_rsc_dump_cleanup(dev);
1393 	mlx5_hv_vhca_cleanup(dev->hv_vhca);
1394 	mlx5_fw_reset_events_stop(dev);
1395 	mlx5_fw_tracer_cleanup(dev->tracer);
1396 	mlx5_eq_table_destroy(dev);
1397 err_eq_table:
1398 	mlx5_irq_table_destroy(dev);
1399 err_irq_table:
1400 	mlx5_pagealloc_stop(dev);
1401 	mlx5_events_stop(dev);
1402 	mlx5_put_uars_page(dev, dev->priv.uar);
1403 	return err;
1404 }
1405 
1406 static void mlx5_unload(struct mlx5_core_dev *dev)
1407 {
1408 	mlx5_devlink_traps_unregister(priv_to_devlink(dev));
1409 	mlx5_sf_dev_table_destroy(dev);
1410 	mlx5_eswitch_disable(dev->priv.eswitch);
1411 	mlx5_sriov_detach(dev);
1412 	mlx5_lag_remove_mdev(dev);
1413 	mlx5_ec_cleanup(dev);
1414 	mlx5_sf_hw_table_destroy(dev);
1415 	mlx5_vhca_event_stop(dev);
1416 	mlx5_fs_core_cleanup(dev);
1417 	mlx5_fpga_device_stop(dev);
1418 	mlx5_rsc_dump_cleanup(dev);
1419 	mlx5_hv_vhca_cleanup(dev->hv_vhca);
1420 	mlx5_fw_reset_events_stop(dev);
1421 	mlx5_fw_tracer_cleanup(dev->tracer);
1422 	mlx5_eq_table_destroy(dev);
1423 	mlx5_irq_table_destroy(dev);
1424 	mlx5_pagealloc_stop(dev);
1425 	mlx5_events_stop(dev);
1426 	mlx5_put_uars_page(dev, dev->priv.uar);
1427 }
1428 
1429 int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev)
1430 {
1431 	bool light_probe = mlx5_dev_is_lightweight(dev);
1432 	int err = 0;
1433 
1434 	mutex_lock(&dev->intf_state_mutex);
1435 	dev->state = MLX5_DEVICE_STATE_UP;
1436 
1437 	err = mlx5_function_setup(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
1438 	if (err)
1439 		goto err_function;
1440 
1441 	err = mlx5_init_once(dev);
1442 	if (err) {
1443 		mlx5_core_err(dev, "sw objs init failed\n");
1444 		goto function_teardown;
1445 	}
1446 
1447 	/* In case of light_probe, mlx5_devlink is already registered.
1448 	 * Hence, don't register devlink again.
1449 	 */
1450 	if (!light_probe) {
1451 		err = mlx5_devlink_params_register(priv_to_devlink(dev));
1452 		if (err)
1453 			goto err_devlink_params_reg;
1454 	}
1455 
1456 	err = mlx5_load(dev);
1457 	if (err)
1458 		goto err_load;
1459 
1460 	set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1461 
1462 	err = mlx5_register_device(dev);
1463 	if (err)
1464 		goto err_register;
1465 
1466 	mutex_unlock(&dev->intf_state_mutex);
1467 	return 0;
1468 
1469 err_register:
1470 	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1471 	mlx5_unload(dev);
1472 err_load:
1473 	if (!light_probe)
1474 		mlx5_devlink_params_unregister(priv_to_devlink(dev));
1475 err_devlink_params_reg:
1476 	mlx5_cleanup_once(dev);
1477 function_teardown:
1478 	mlx5_function_teardown(dev, true);
1479 err_function:
1480 	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
1481 	mutex_unlock(&dev->intf_state_mutex);
1482 	return err;
1483 }
1484 
1485 int mlx5_init_one(struct mlx5_core_dev *dev)
1486 {
1487 	struct devlink *devlink = priv_to_devlink(dev);
1488 	int err;
1489 
1490 	devl_lock(devlink);
1491 	err = mlx5_init_one_devl_locked(dev);
1492 	devl_unlock(devlink);
1493 	return err;
1494 }
1495 
1496 void mlx5_uninit_one(struct mlx5_core_dev *dev)
1497 {
1498 	struct devlink *devlink = priv_to_devlink(dev);
1499 
1500 	devl_lock(devlink);
1501 	mutex_lock(&dev->intf_state_mutex);
1502 
1503 	mlx5_unregister_device(dev);
1504 
1505 	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
1506 		mlx5_core_warn(dev, "%s: interface is down, NOP\n",
1507 			       __func__);
1508 		mlx5_devlink_params_unregister(priv_to_devlink(dev));
1509 		mlx5_cleanup_once(dev);
1510 		goto out;
1511 	}
1512 
1513 	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1514 	mlx5_unload(dev);
1515 	mlx5_devlink_params_unregister(priv_to_devlink(dev));
1516 	mlx5_cleanup_once(dev);
1517 	mlx5_function_teardown(dev, true);
1518 out:
1519 	mutex_unlock(&dev->intf_state_mutex);
1520 	devl_unlock(devlink);
1521 }
1522 
1523 int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery)
1524 {
1525 	int err = 0;
1526 	u64 timeout;
1527 
1528 	devl_assert_locked(priv_to_devlink(dev));
1529 	mutex_lock(&dev->intf_state_mutex);
1530 	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
1531 		mlx5_core_warn(dev, "interface is up, NOP\n");
1532 		goto out;
1533 	}
1534 	/* remove any previous indication of internal error */
1535 	dev->state = MLX5_DEVICE_STATE_UP;
1536 
1537 	if (recovery)
1538 		timeout = mlx5_tout_ms(dev, FW_PRE_INIT_ON_RECOVERY_TIMEOUT);
1539 	else
1540 		timeout = mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT);
1541 	err = mlx5_function_setup(dev, false, timeout);
1542 	if (err)
1543 		goto err_function;
1544 
1545 	err = mlx5_load(dev);
1546 	if (err)
1547 		goto err_load;
1548 
1549 	set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1550 
1551 	err = mlx5_attach_device(dev);
1552 	if (err)
1553 		goto err_attach;
1554 
1555 	mutex_unlock(&dev->intf_state_mutex);
1556 	return 0;
1557 
1558 err_attach:
1559 	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1560 	mlx5_unload(dev);
1561 err_load:
1562 	mlx5_function_teardown(dev, false);
1563 err_function:
1564 	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
1565 out:
1566 	mutex_unlock(&dev->intf_state_mutex);
1567 	return err;
1568 }
1569 
1570 int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery)
1571 {
1572 	struct devlink *devlink = priv_to_devlink(dev);
1573 	int ret;
1574 
1575 	devl_lock(devlink);
1576 	ret = mlx5_load_one_devl_locked(dev, recovery);
1577 	devl_unlock(devlink);
1578 	return ret;
1579 }
1580 
1581 void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend)
1582 {
1583 	devl_assert_locked(priv_to_devlink(dev));
1584 	mutex_lock(&dev->intf_state_mutex);
1585 
1586 	mlx5_detach_device(dev, suspend);
1587 
1588 	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
1589 		mlx5_core_warn(dev, "%s: interface is down, NOP\n",
1590 			       __func__);
1591 		goto out;
1592 	}
1593 
1594 	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1595 	mlx5_unload(dev);
1596 	mlx5_function_teardown(dev, false);
1597 out:
1598 	mutex_unlock(&dev->intf_state_mutex);
1599 }
1600 
1601 void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend)
1602 {
1603 	struct devlink *devlink = priv_to_devlink(dev);
1604 
1605 	devl_lock(devlink);
1606 	mlx5_unload_one_devl_locked(dev, suspend);
1607 	devl_unlock(devlink);
1608 }
1609 
1610 /* In case of light probe, we don't need a full query of hca_caps, but only the bellow caps.
1611  * A full query of hca_caps will be done when the device will reload.
1612  */
1613 static int mlx5_query_hca_caps_light(struct mlx5_core_dev *dev)
1614 {
1615 	int err;
1616 
1617 	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
1618 	if (err)
1619 		return err;
1620 
1621 	if (MLX5_CAP_GEN(dev, eth_net_offloads)) {
1622 		err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ETHERNET_OFFLOADS,
1623 					      HCA_CAP_OPMOD_GET_CUR);
1624 		if (err)
1625 			return err;
1626 	}
1627 
1628 	if (MLX5_CAP_GEN(dev, nic_flow_table) ||
1629 	    MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) {
1630 		err = mlx5_core_get_caps_mode(dev, MLX5_CAP_FLOW_TABLE,
1631 					      HCA_CAP_OPMOD_GET_CUR);
1632 		if (err)
1633 			return err;
1634 	}
1635 
1636 	if (MLX5_CAP_GEN_64(dev, general_obj_types) &
1637 		MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) {
1638 		err = mlx5_core_get_caps_mode(dev, MLX5_CAP_VDPA_EMULATION,
1639 					      HCA_CAP_OPMOD_GET_CUR);
1640 		if (err)
1641 			return err;
1642 	}
1643 
1644 	return 0;
1645 }
1646 
1647 int mlx5_init_one_light(struct mlx5_core_dev *dev)
1648 {
1649 	struct devlink *devlink = priv_to_devlink(dev);
1650 	int err;
1651 
1652 	dev->state = MLX5_DEVICE_STATE_UP;
1653 	err = mlx5_function_enable(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
1654 	if (err) {
1655 		mlx5_core_warn(dev, "mlx5_function_enable err=%d\n", err);
1656 		goto out;
1657 	}
1658 
1659 	err = mlx5_query_hca_caps_light(dev);
1660 	if (err) {
1661 		mlx5_core_warn(dev, "mlx5_query_hca_caps_light err=%d\n", err);
1662 		goto query_hca_caps_err;
1663 	}
1664 
1665 	devl_lock(devlink);
1666 	err = mlx5_devlink_params_register(priv_to_devlink(dev));
1667 	devl_unlock(devlink);
1668 	if (err) {
1669 		mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err);
1670 		goto query_hca_caps_err;
1671 	}
1672 
1673 	return 0;
1674 
1675 query_hca_caps_err:
1676 	mlx5_function_disable(dev, true);
1677 out:
1678 	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
1679 	return err;
1680 }
1681 
1682 void mlx5_uninit_one_light(struct mlx5_core_dev *dev)
1683 {
1684 	struct devlink *devlink = priv_to_devlink(dev);
1685 
1686 	devl_lock(devlink);
1687 	mlx5_devlink_params_unregister(priv_to_devlink(dev));
1688 	devl_unlock(devlink);
1689 	if (dev->state != MLX5_DEVICE_STATE_UP)
1690 		return;
1691 	mlx5_function_disable(dev, true);
1692 }
1693 
1694 /* xxx_light() function are used in order to configure the device without full
1695  * init (light init). e.g.: There isn't a point in reload a device to light state.
1696  * Hence, mlx5_load_one_light() isn't needed.
1697  */
1698 
1699 void mlx5_unload_one_light(struct mlx5_core_dev *dev)
1700 {
1701 	if (dev->state != MLX5_DEVICE_STATE_UP)
1702 		return;
1703 	mlx5_function_disable(dev, false);
1704 	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
1705 }
1706 
1707 static const int types[] = {
1708 	MLX5_CAP_GENERAL,
1709 	MLX5_CAP_GENERAL_2,
1710 	MLX5_CAP_ETHERNET_OFFLOADS,
1711 	MLX5_CAP_IPOIB_ENHANCED_OFFLOADS,
1712 	MLX5_CAP_ODP,
1713 	MLX5_CAP_ATOMIC,
1714 	MLX5_CAP_ROCE,
1715 	MLX5_CAP_IPOIB_OFFLOADS,
1716 	MLX5_CAP_FLOW_TABLE,
1717 	MLX5_CAP_ESWITCH_FLOW_TABLE,
1718 	MLX5_CAP_ESWITCH,
1719 	MLX5_CAP_QOS,
1720 	MLX5_CAP_DEBUG,
1721 	MLX5_CAP_DEV_MEM,
1722 	MLX5_CAP_DEV_EVENT,
1723 	MLX5_CAP_TLS,
1724 	MLX5_CAP_VDPA_EMULATION,
1725 	MLX5_CAP_IPSEC,
1726 	MLX5_CAP_PORT_SELECTION,
1727 	MLX5_CAP_MACSEC,
1728 	MLX5_CAP_ADV_VIRTUALIZATION,
1729 	MLX5_CAP_CRYPTO,
1730 };
1731 
1732 static void mlx5_hca_caps_free(struct mlx5_core_dev *dev)
1733 {
1734 	int type;
1735 	int i;
1736 
1737 	for (i = 0; i < ARRAY_SIZE(types); i++) {
1738 		type = types[i];
1739 		kfree(dev->caps.hca[type]);
1740 	}
1741 }
1742 
1743 static int mlx5_hca_caps_alloc(struct mlx5_core_dev *dev)
1744 {
1745 	struct mlx5_hca_cap *cap;
1746 	int type;
1747 	int i;
1748 
1749 	for (i = 0; i < ARRAY_SIZE(types); i++) {
1750 		cap = kzalloc(sizeof(*cap), GFP_KERNEL);
1751 		if (!cap)
1752 			goto err;
1753 		type = types[i];
1754 		dev->caps.hca[type] = cap;
1755 	}
1756 
1757 	return 0;
1758 
1759 err:
1760 	mlx5_hca_caps_free(dev);
1761 	return -ENOMEM;
1762 }
1763 
1764 static int vhca_id_show(struct seq_file *file, void *priv)
1765 {
1766 	struct mlx5_core_dev *dev = file->private;
1767 
1768 	seq_printf(file, "0x%x\n", MLX5_CAP_GEN(dev, vhca_id));
1769 	return 0;
1770 }
1771 
1772 DEFINE_SHOW_ATTRIBUTE(vhca_id);
1773 
1774 int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
1775 {
1776 	struct mlx5_priv *priv = &dev->priv;
1777 	int err;
1778 
1779 	memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile));
1780 	lockdep_register_key(&dev->lock_key);
1781 	mutex_init(&dev->intf_state_mutex);
1782 	lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key);
1783 	mutex_init(&dev->mlx5e_res.uplink_netdev_lock);
1784 
1785 	mutex_init(&priv->bfregs.reg_head.lock);
1786 	mutex_init(&priv->bfregs.wc_head.lock);
1787 	INIT_LIST_HEAD(&priv->bfregs.reg_head.list);
1788 	INIT_LIST_HEAD(&priv->bfregs.wc_head.list);
1789 
1790 	mutex_init(&priv->alloc_mutex);
1791 	mutex_init(&priv->pgdir_mutex);
1792 	INIT_LIST_HEAD(&priv->pgdir_list);
1793 
1794 	priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev));
1795 	priv->dbg.dbg_root = debugfs_create_dir(dev_name(dev->device),
1796 						mlx5_debugfs_root);
1797 	debugfs_create_file("vhca_id", 0400, priv->dbg.dbg_root, dev, &vhca_id_fops);
1798 	INIT_LIST_HEAD(&priv->traps);
1799 
1800 	err = mlx5_cmd_init(dev);
1801 	if (err) {
1802 		mlx5_core_err(dev, "Failed initializing cmdif SW structs, aborting\n");
1803 		goto err_cmd_init;
1804 	}
1805 
1806 	err = mlx5_tout_init(dev);
1807 	if (err) {
1808 		mlx5_core_err(dev, "Failed initializing timeouts, aborting\n");
1809 		goto err_timeout_init;
1810 	}
1811 
1812 	err = mlx5_health_init(dev);
1813 	if (err)
1814 		goto err_health_init;
1815 
1816 	err = mlx5_pagealloc_init(dev);
1817 	if (err)
1818 		goto err_pagealloc_init;
1819 
1820 	err = mlx5_adev_init(dev);
1821 	if (err)
1822 		goto err_adev_init;
1823 
1824 	err = mlx5_hca_caps_alloc(dev);
1825 	if (err)
1826 		goto err_hca_caps;
1827 
1828 	/* The conjunction of sw_vhca_id with sw_owner_id will be a global
1829 	 * unique id per function which uses mlx5_core.
1830 	 * Those values are supplied to FW as part of the init HCA command to
1831 	 * be used by both driver and FW when it's applicable.
1832 	 */
1833 	dev->priv.sw_vhca_id = ida_alloc_range(&sw_vhca_ida, 1,
1834 					       MAX_SW_VHCA_ID,
1835 					       GFP_KERNEL);
1836 	if (dev->priv.sw_vhca_id < 0)
1837 		mlx5_core_err(dev, "failed to allocate sw_vhca_id, err=%d\n",
1838 			      dev->priv.sw_vhca_id);
1839 
1840 	return 0;
1841 
1842 err_hca_caps:
1843 	mlx5_adev_cleanup(dev);
1844 err_adev_init:
1845 	mlx5_pagealloc_cleanup(dev);
1846 err_pagealloc_init:
1847 	mlx5_health_cleanup(dev);
1848 err_health_init:
1849 	mlx5_tout_cleanup(dev);
1850 err_timeout_init:
1851 	mlx5_cmd_cleanup(dev);
1852 err_cmd_init:
1853 	debugfs_remove(dev->priv.dbg.dbg_root);
1854 	mutex_destroy(&priv->pgdir_mutex);
1855 	mutex_destroy(&priv->alloc_mutex);
1856 	mutex_destroy(&priv->bfregs.wc_head.lock);
1857 	mutex_destroy(&priv->bfregs.reg_head.lock);
1858 	mutex_destroy(&dev->intf_state_mutex);
1859 	lockdep_unregister_key(&dev->lock_key);
1860 	return err;
1861 }
1862 
1863 void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
1864 {
1865 	struct mlx5_priv *priv = &dev->priv;
1866 
1867 	if (priv->sw_vhca_id > 0)
1868 		ida_free(&sw_vhca_ida, dev->priv.sw_vhca_id);
1869 
1870 	mlx5_hca_caps_free(dev);
1871 	mlx5_adev_cleanup(dev);
1872 	mlx5_pagealloc_cleanup(dev);
1873 	mlx5_health_cleanup(dev);
1874 	mlx5_tout_cleanup(dev);
1875 	mlx5_cmd_cleanup(dev);
1876 	debugfs_remove_recursive(dev->priv.dbg.dbg_root);
1877 	mutex_destroy(&priv->pgdir_mutex);
1878 	mutex_destroy(&priv->alloc_mutex);
1879 	mutex_destroy(&priv->bfregs.wc_head.lock);
1880 	mutex_destroy(&priv->bfregs.reg_head.lock);
1881 	mutex_destroy(&dev->mlx5e_res.uplink_netdev_lock);
1882 	mutex_destroy(&dev->intf_state_mutex);
1883 	lockdep_unregister_key(&dev->lock_key);
1884 }
1885 
1886 static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
1887 {
1888 	struct mlx5_core_dev *dev;
1889 	struct devlink *devlink;
1890 	int err;
1891 
1892 	devlink = mlx5_devlink_alloc(&pdev->dev);
1893 	if (!devlink) {
1894 		dev_err(&pdev->dev, "devlink alloc failed\n");
1895 		return -ENOMEM;
1896 	}
1897 
1898 	dev = devlink_priv(devlink);
1899 	dev->device = &pdev->dev;
1900 	dev->pdev = pdev;
1901 
1902 	dev->coredev_type = id->driver_data & MLX5_PCI_DEV_IS_VF ?
1903 			 MLX5_COREDEV_VF : MLX5_COREDEV_PF;
1904 
1905 	dev->priv.adev_idx = mlx5_adev_idx_alloc();
1906 	if (dev->priv.adev_idx < 0) {
1907 		err = dev->priv.adev_idx;
1908 		goto adev_init_err;
1909 	}
1910 
1911 	err = mlx5_mdev_init(dev, prof_sel);
1912 	if (err)
1913 		goto mdev_init_err;
1914 
1915 	err = mlx5_pci_init(dev, pdev, id);
1916 	if (err) {
1917 		mlx5_core_err(dev, "mlx5_pci_init failed with error code %d\n",
1918 			      err);
1919 		goto pci_init_err;
1920 	}
1921 
1922 	err = mlx5_init_one(dev);
1923 	if (err) {
1924 		mlx5_core_err(dev, "mlx5_init_one failed with error code %d\n",
1925 			      err);
1926 		goto err_init_one;
1927 	}
1928 
1929 	err = mlx5_crdump_enable(dev);
1930 	if (err)
1931 		dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
1932 
1933 	err = mlx5_hwmon_dev_register(dev);
1934 	if (err)
1935 		mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err);
1936 
1937 	pci_save_state(pdev);
1938 	devlink_register(devlink);
1939 	return 0;
1940 
1941 err_init_one:
1942 	mlx5_pci_close(dev);
1943 pci_init_err:
1944 	mlx5_mdev_uninit(dev);
1945 mdev_init_err:
1946 	mlx5_adev_idx_free(dev->priv.adev_idx);
1947 adev_init_err:
1948 	mlx5_devlink_free(devlink);
1949 
1950 	return err;
1951 }
1952 
1953 static void remove_one(struct pci_dev *pdev)
1954 {
1955 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
1956 	struct devlink *devlink = priv_to_devlink(dev);
1957 
1958 	set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
1959 	/* mlx5_drain_fw_reset() and mlx5_drain_health_wq() are using
1960 	 * devlink notify APIs.
1961 	 * Hence, we must drain them before unregistering the devlink.
1962 	 */
1963 	mlx5_drain_fw_reset(dev);
1964 	mlx5_drain_health_wq(dev);
1965 	devlink_unregister(devlink);
1966 	mlx5_sriov_disable(pdev, false);
1967 	mlx5_hwmon_dev_unregister(dev);
1968 	mlx5_crdump_disable(dev);
1969 	mlx5_uninit_one(dev);
1970 	mlx5_pci_close(dev);
1971 	mlx5_mdev_uninit(dev);
1972 	mlx5_adev_idx_free(dev->priv.adev_idx);
1973 	mlx5_devlink_free(devlink);
1974 }
1975 
1976 #define mlx5_pci_trace(dev, fmt, ...) ({ \
1977 	struct mlx5_core_dev *__dev = (dev); \
1978 	mlx5_core_info(__dev, "%s Device state = %d health sensors: %d pci_status: %d. " fmt, \
1979 		       __func__, __dev->state, mlx5_health_check_fatal_sensors(__dev), \
1980 		       __dev->pci_status, ##__VA_ARGS__); \
1981 })
1982 
1983 static const char *result2str(enum pci_ers_result result)
1984 {
1985 	return  result == PCI_ERS_RESULT_NEED_RESET ? "need reset" :
1986 		result == PCI_ERS_RESULT_DISCONNECT ? "disconnect" :
1987 		result == PCI_ERS_RESULT_RECOVERED  ? "recovered" :
1988 		"unknown";
1989 }
1990 
1991 static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
1992 					      pci_channel_state_t state)
1993 {
1994 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1995 	enum pci_ers_result res;
1996 
1997 	mlx5_pci_trace(dev, "Enter, pci channel state = %d\n", state);
1998 
1999 	mlx5_enter_error_state(dev, false);
2000 	mlx5_error_sw_reset(dev);
2001 	mlx5_unload_one(dev, false);
2002 	mlx5_drain_health_wq(dev);
2003 	mlx5_pci_disable_device(dev);
2004 
2005 	res = state == pci_channel_io_perm_failure ?
2006 		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
2007 
2008 	mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, result = %d, %s\n",
2009 		       __func__, dev->state, dev->pci_status, res, result2str(res));
2010 	return res;
2011 }
2012 
2013 /* wait for the device to show vital signs by waiting
2014  * for the health counter to start counting.
2015  */
2016 static int wait_vital(struct pci_dev *pdev)
2017 {
2018 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
2019 	struct mlx5_core_health *health = &dev->priv.health;
2020 	const int niter = 100;
2021 	u32 last_count = 0;
2022 	u32 count;
2023 	int i;
2024 
2025 	for (i = 0; i < niter; i++) {
2026 		count = ioread32be(health->health_counter);
2027 		if (count && count != 0xffffffff) {
2028 			if (last_count && last_count != count) {
2029 				mlx5_core_info(dev,
2030 					       "wait vital counter value 0x%x after %d iterations\n",
2031 					       count, i);
2032 				return 0;
2033 			}
2034 			last_count = count;
2035 		}
2036 		msleep(50);
2037 	}
2038 
2039 	return -ETIMEDOUT;
2040 }
2041 
2042 static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
2043 {
2044 	enum pci_ers_result res = PCI_ERS_RESULT_DISCONNECT;
2045 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
2046 	int err;
2047 
2048 	mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Enter\n",
2049 		       __func__, dev->state, dev->pci_status);
2050 
2051 	err = mlx5_pci_enable_device(dev);
2052 	if (err) {
2053 		mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n",
2054 			      __func__, err);
2055 		goto out;
2056 	}
2057 
2058 	pci_set_master(pdev);
2059 	pci_restore_state(pdev);
2060 	pci_save_state(pdev);
2061 
2062 	err = wait_vital(pdev);
2063 	if (err) {
2064 		mlx5_core_err(dev, "%s: wait vital failed with error code: %d\n",
2065 			      __func__, err);
2066 		goto out;
2067 	}
2068 
2069 	res = PCI_ERS_RESULT_RECOVERED;
2070 out:
2071 	mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, err = %d, result = %d, %s\n",
2072 		       __func__, dev->state, dev->pci_status, err, res, result2str(res));
2073 	return res;
2074 }
2075 
2076 static void mlx5_pci_resume(struct pci_dev *pdev)
2077 {
2078 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
2079 	int err;
2080 
2081 	mlx5_pci_trace(dev, "Enter, loading driver..\n");
2082 
2083 	err = mlx5_load_one(dev, false);
2084 
2085 	if (!err)
2086 		devlink_health_reporter_state_update(dev->priv.health.fw_fatal_reporter,
2087 						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
2088 
2089 	mlx5_pci_trace(dev, "Done, err = %d, device %s\n", err,
2090 		       !err ? "recovered" : "Failed");
2091 }
2092 
2093 static const struct pci_error_handlers mlx5_err_handler = {
2094 	.error_detected = mlx5_pci_err_detected,
2095 	.slot_reset	= mlx5_pci_slot_reset,
2096 	.resume		= mlx5_pci_resume
2097 };
2098 
2099 static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
2100 {
2101 	bool fast_teardown = false, force_teardown = false;
2102 	int ret = 1;
2103 
2104 	fast_teardown = MLX5_CAP_GEN(dev, fast_teardown);
2105 	force_teardown = MLX5_CAP_GEN(dev, force_teardown);
2106 
2107 	mlx5_core_dbg(dev, "force teardown firmware support=%d\n", force_teardown);
2108 	mlx5_core_dbg(dev, "fast teardown firmware support=%d\n", fast_teardown);
2109 
2110 	if (!fast_teardown && !force_teardown)
2111 		return -EOPNOTSUPP;
2112 
2113 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
2114 		mlx5_core_dbg(dev, "Device in internal error state, giving up\n");
2115 		return -EAGAIN;
2116 	}
2117 
2118 	/* Panic tear down fw command will stop the PCI bus communication
2119 	 * with the HCA, so the health poll is no longer needed.
2120 	 */
2121 	mlx5_drain_health_wq(dev);
2122 	mlx5_stop_health_poll(dev, false);
2123 
2124 	ret = mlx5_cmd_fast_teardown_hca(dev);
2125 	if (!ret)
2126 		goto succeed;
2127 
2128 	ret = mlx5_cmd_force_teardown_hca(dev);
2129 	if (!ret)
2130 		goto succeed;
2131 
2132 	mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret);
2133 	mlx5_start_health_poll(dev);
2134 	return ret;
2135 
2136 succeed:
2137 	mlx5_enter_error_state(dev, true);
2138 
2139 	/* Some platforms requiring freeing the IRQ's in the shutdown
2140 	 * flow. If they aren't freed they can't be allocated after
2141 	 * kexec. There is no need to cleanup the mlx5_core software
2142 	 * contexts.
2143 	 */
2144 	mlx5_core_eq_free_irqs(dev);
2145 
2146 	return 0;
2147 }
2148 
2149 static void shutdown(struct pci_dev *pdev)
2150 {
2151 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
2152 	int err;
2153 
2154 	mlx5_core_info(dev, "Shutdown was called\n");
2155 	set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
2156 	err = mlx5_try_fast_unload(dev);
2157 	if (err)
2158 		mlx5_unload_one(dev, false);
2159 	mlx5_pci_disable_device(dev);
2160 }
2161 
2162 static int mlx5_suspend(struct pci_dev *pdev, pm_message_t state)
2163 {
2164 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
2165 
2166 	mlx5_unload_one(dev, true);
2167 
2168 	return 0;
2169 }
2170 
2171 static int mlx5_resume(struct pci_dev *pdev)
2172 {
2173 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
2174 
2175 	return mlx5_load_one(dev, false);
2176 }
2177 
2178 static const struct pci_device_id mlx5_core_pci_table[] = {
2179 	{ PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTIB) },
2180 	{ PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF},	/* Connect-IB VF */
2181 	{ PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4) },
2182 	{ PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF},	/* ConnectX-4 VF */
2183 	{ PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX) },
2184 	{ PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF},	/* ConnectX-4LX VF */
2185 	{ PCI_VDEVICE(MELLANOX, 0x1017) },			/* ConnectX-5, PCIe 3.0 */
2186 	{ PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF},	/* ConnectX-5 VF */
2187 	{ PCI_VDEVICE(MELLANOX, 0x1019) },			/* ConnectX-5 Ex */
2188 	{ PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF},	/* ConnectX-5 Ex VF */
2189 	{ PCI_VDEVICE(MELLANOX, 0x101b) },			/* ConnectX-6 */
2190 	{ PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF},	/* ConnectX-6 VF */
2191 	{ PCI_VDEVICE(MELLANOX, 0x101d) },			/* ConnectX-6 Dx */
2192 	{ PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF},	/* ConnectX Family mlx5Gen Virtual Function */
2193 	{ PCI_VDEVICE(MELLANOX, 0x101f) },			/* ConnectX-6 LX */
2194 	{ PCI_VDEVICE(MELLANOX, 0x1021) },			/* ConnectX-7 */
2195 	{ PCI_VDEVICE(MELLANOX, 0x1023) },			/* ConnectX-8 */
2196 	{ PCI_VDEVICE(MELLANOX, 0xa2d2) },			/* BlueField integrated ConnectX-5 network controller */
2197 	{ PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF},	/* BlueField integrated ConnectX-5 network controller VF */
2198 	{ PCI_VDEVICE(MELLANOX, 0xa2d6) },			/* BlueField-2 integrated ConnectX-6 Dx network controller */
2199 	{ PCI_VDEVICE(MELLANOX, 0xa2dc) },			/* BlueField-3 integrated ConnectX-7 network controller */
2200 	{ PCI_VDEVICE(MELLANOX, 0xa2df) },			/* BlueField-4 integrated ConnectX-8 network controller */
2201 	{ 0, }
2202 };
2203 
2204 MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
2205 
2206 void mlx5_disable_device(struct mlx5_core_dev *dev)
2207 {
2208 	mlx5_error_sw_reset(dev);
2209 	mlx5_unload_one_devl_locked(dev, false);
2210 }
2211 
2212 int mlx5_recover_device(struct mlx5_core_dev *dev)
2213 {
2214 	if (!mlx5_core_is_sf(dev)) {
2215 		mlx5_pci_disable_device(dev);
2216 		if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED)
2217 			return -EIO;
2218 	}
2219 
2220 	return mlx5_load_one_devl_locked(dev, true);
2221 }
2222 
2223 static struct pci_driver mlx5_core_driver = {
2224 	.name           = KBUILD_MODNAME,
2225 	.id_table       = mlx5_core_pci_table,
2226 	.probe          = probe_one,
2227 	.remove         = remove_one,
2228 	.suspend        = mlx5_suspend,
2229 	.resume         = mlx5_resume,
2230 	.shutdown	= shutdown,
2231 	.err_handler	= &mlx5_err_handler,
2232 	.sriov_configure   = mlx5_core_sriov_configure,
2233 	.sriov_get_vf_total_msix = mlx5_sriov_get_vf_total_msix,
2234 	.sriov_set_msix_vec_count = mlx5_core_sriov_set_msix_vec_count,
2235 };
2236 
2237 /**
2238  * mlx5_vf_get_core_dev - Get the mlx5 core device from a given VF PCI device if
2239  *                     mlx5_core is its driver.
2240  * @pdev: The associated PCI device.
2241  *
2242  * Upon return the interface state lock stay held to let caller uses it safely.
2243  * Caller must ensure to use the returned mlx5 device for a narrow window
2244  * and put it back with mlx5_vf_put_core_dev() immediately once usage was over.
2245  *
2246  * Return: Pointer to the associated mlx5_core_dev or NULL.
2247  */
2248 struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev)
2249 {
2250 	struct mlx5_core_dev *mdev;
2251 
2252 	mdev = pci_iov_get_pf_drvdata(pdev, &mlx5_core_driver);
2253 	if (IS_ERR(mdev))
2254 		return NULL;
2255 
2256 	mutex_lock(&mdev->intf_state_mutex);
2257 	if (!test_bit(MLX5_INTERFACE_STATE_UP, &mdev->intf_state)) {
2258 		mutex_unlock(&mdev->intf_state_mutex);
2259 		return NULL;
2260 	}
2261 
2262 	return mdev;
2263 }
2264 EXPORT_SYMBOL(mlx5_vf_get_core_dev);
2265 
2266 /**
2267  * mlx5_vf_put_core_dev - Put the mlx5 core device back.
2268  * @mdev: The mlx5 core device.
2269  *
2270  * Upon return the interface state lock is unlocked and caller should not
2271  * access the mdev any more.
2272  */
2273 void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev)
2274 {
2275 	mutex_unlock(&mdev->intf_state_mutex);
2276 }
2277 EXPORT_SYMBOL(mlx5_vf_put_core_dev);
2278 
2279 static void mlx5_core_verify_params(void)
2280 {
2281 	if (prof_sel >= ARRAY_SIZE(profile)) {
2282 		pr_warn("mlx5_core: WARNING: Invalid module parameter prof_sel %d, valid range 0-%zu, changing back to default(%d)\n",
2283 			prof_sel,
2284 			ARRAY_SIZE(profile) - 1,
2285 			MLX5_DEFAULT_PROF);
2286 		prof_sel = MLX5_DEFAULT_PROF;
2287 	}
2288 }
2289 
2290 static int __init mlx5_init(void)
2291 {
2292 	int err;
2293 
2294 	WARN_ONCE(strcmp(MLX5_ADEV_NAME, KBUILD_MODNAME),
2295 		  "mlx5_core name not in sync with kernel module name");
2296 
2297 	get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
2298 
2299 	mlx5_core_verify_params();
2300 	mlx5_register_debugfs();
2301 
2302 	err = mlx5e_init();
2303 	if (err)
2304 		goto err_debug;
2305 
2306 	err = mlx5_sf_driver_register();
2307 	if (err)
2308 		goto err_sf;
2309 
2310 	err = pci_register_driver(&mlx5_core_driver);
2311 	if (err)
2312 		goto err_pci;
2313 
2314 	return 0;
2315 
2316 err_pci:
2317 	mlx5_sf_driver_unregister();
2318 err_sf:
2319 	mlx5e_cleanup();
2320 err_debug:
2321 	mlx5_unregister_debugfs();
2322 	return err;
2323 }
2324 
2325 static void __exit mlx5_cleanup(void)
2326 {
2327 	pci_unregister_driver(&mlx5_core_driver);
2328 	mlx5_sf_driver_unregister();
2329 	mlx5e_cleanup();
2330 	mlx5_unregister_debugfs();
2331 }
2332 
2333 module_init(mlx5_init);
2334 module_exit(mlx5_cleanup);
2335