xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/main.c (revision fbc872c38c8fed31948c85683b5326ee5ab9fccc)
1 /*
2  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/highmem.h>
34 #include <linux/module.h>
35 #include <linux/init.h>
36 #include <linux/errno.h>
37 #include <linux/pci.h>
38 #include <linux/dma-mapping.h>
39 #include <linux/slab.h>
40 #include <linux/io-mapping.h>
41 #include <linux/interrupt.h>
42 #include <linux/delay.h>
43 #include <linux/mlx5/driver.h>
44 #include <linux/mlx5/cq.h>
45 #include <linux/mlx5/qp.h>
46 #include <linux/mlx5/srq.h>
47 #include <linux/debugfs.h>
48 #include <linux/kmod.h>
49 #include <linux/delay.h>
50 #include <linux/mlx5/mlx5_ifc.h>
51 #ifdef CONFIG_RFS_ACCEL
52 #include <linux/cpu_rmap.h>
53 #endif
54 #include "mlx5_core.h"
55 #include "fs_core.h"
56 #ifdef CONFIG_MLX5_CORE_EN
57 #include "eswitch.h"
58 #endif
59 
60 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
61 MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
62 MODULE_LICENSE("Dual BSD/GPL");
63 MODULE_VERSION(DRIVER_VERSION);
64 
65 int mlx5_core_debug_mask;
66 module_param_named(debug_mask, mlx5_core_debug_mask, int, 0644);
67 MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
68 
69 #define MLX5_DEFAULT_PROF	2
70 static int prof_sel = MLX5_DEFAULT_PROF;
71 module_param_named(prof_sel, prof_sel, int, 0444);
72 MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
73 
74 static LIST_HEAD(intf_list);
75 static LIST_HEAD(dev_list);
76 static DEFINE_MUTEX(intf_mutex);
77 
78 struct mlx5_device_context {
79 	struct list_head	list;
80 	struct mlx5_interface  *intf;
81 	void		       *context;
82 };
83 
84 enum {
85 	MLX5_ATOMIC_REQ_MODE_BE = 0x0,
86 	MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
87 };
88 
89 static struct mlx5_profile profile[] = {
90 	[0] = {
91 		.mask           = 0,
92 	},
93 	[1] = {
94 		.mask		= MLX5_PROF_MASK_QP_SIZE,
95 		.log_max_qp	= 12,
96 	},
97 	[2] = {
98 		.mask		= MLX5_PROF_MASK_QP_SIZE |
99 				  MLX5_PROF_MASK_MR_CACHE,
100 		.log_max_qp	= 17,
101 		.mr_cache[0]	= {
102 			.size	= 500,
103 			.limit	= 250
104 		},
105 		.mr_cache[1]	= {
106 			.size	= 500,
107 			.limit	= 250
108 		},
109 		.mr_cache[2]	= {
110 			.size	= 500,
111 			.limit	= 250
112 		},
113 		.mr_cache[3]	= {
114 			.size	= 500,
115 			.limit	= 250
116 		},
117 		.mr_cache[4]	= {
118 			.size	= 500,
119 			.limit	= 250
120 		},
121 		.mr_cache[5]	= {
122 			.size	= 500,
123 			.limit	= 250
124 		},
125 		.mr_cache[6]	= {
126 			.size	= 500,
127 			.limit	= 250
128 		},
129 		.mr_cache[7]	= {
130 			.size	= 500,
131 			.limit	= 250
132 		},
133 		.mr_cache[8]	= {
134 			.size	= 500,
135 			.limit	= 250
136 		},
137 		.mr_cache[9]	= {
138 			.size	= 500,
139 			.limit	= 250
140 		},
141 		.mr_cache[10]	= {
142 			.size	= 500,
143 			.limit	= 250
144 		},
145 		.mr_cache[11]	= {
146 			.size	= 500,
147 			.limit	= 250
148 		},
149 		.mr_cache[12]	= {
150 			.size	= 64,
151 			.limit	= 32
152 		},
153 		.mr_cache[13]	= {
154 			.size	= 32,
155 			.limit	= 16
156 		},
157 		.mr_cache[14]	= {
158 			.size	= 16,
159 			.limit	= 8
160 		},
161 		.mr_cache[15]	= {
162 			.size	= 8,
163 			.limit	= 4
164 		},
165 	},
166 };
167 
168 #define FW_INIT_TIMEOUT_MILI	2000
169 #define FW_INIT_WAIT_MS		2
170 
171 static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
172 {
173 	unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
174 	int err = 0;
175 
176 	while (fw_initializing(dev)) {
177 		if (time_after(jiffies, end)) {
178 			err = -EBUSY;
179 			break;
180 		}
181 		msleep(FW_INIT_WAIT_MS);
182 	}
183 
184 	return err;
185 }
186 
187 static int set_dma_caps(struct pci_dev *pdev)
188 {
189 	int err;
190 
191 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
192 	if (err) {
193 		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
194 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
195 		if (err) {
196 			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
197 			return err;
198 		}
199 	}
200 
201 	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
202 	if (err) {
203 		dev_warn(&pdev->dev,
204 			 "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
205 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
206 		if (err) {
207 			dev_err(&pdev->dev,
208 				"Can't set consistent PCI DMA mask, aborting\n");
209 			return err;
210 		}
211 	}
212 
213 	dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
214 	return err;
215 }
216 
217 static int mlx5_pci_enable_device(struct mlx5_core_dev *dev)
218 {
219 	struct pci_dev *pdev = dev->pdev;
220 	int err = 0;
221 
222 	mutex_lock(&dev->pci_status_mutex);
223 	if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) {
224 		err = pci_enable_device(pdev);
225 		if (!err)
226 			dev->pci_status = MLX5_PCI_STATUS_ENABLED;
227 	}
228 	mutex_unlock(&dev->pci_status_mutex);
229 
230 	return err;
231 }
232 
233 static void mlx5_pci_disable_device(struct mlx5_core_dev *dev)
234 {
235 	struct pci_dev *pdev = dev->pdev;
236 
237 	mutex_lock(&dev->pci_status_mutex);
238 	if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) {
239 		pci_disable_device(pdev);
240 		dev->pci_status = MLX5_PCI_STATUS_DISABLED;
241 	}
242 	mutex_unlock(&dev->pci_status_mutex);
243 }
244 
245 static int request_bar(struct pci_dev *pdev)
246 {
247 	int err = 0;
248 
249 	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
250 		dev_err(&pdev->dev, "Missing registers BAR, aborting\n");
251 		return -ENODEV;
252 	}
253 
254 	err = pci_request_regions(pdev, DRIVER_NAME);
255 	if (err)
256 		dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
257 
258 	return err;
259 }
260 
261 static void release_bar(struct pci_dev *pdev)
262 {
263 	pci_release_regions(pdev);
264 }
265 
266 static int mlx5_enable_msix(struct mlx5_core_dev *dev)
267 {
268 	struct mlx5_priv *priv = &dev->priv;
269 	struct mlx5_eq_table *table = &priv->eq_table;
270 	int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
271 	int nvec;
272 	int i;
273 
274 	nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
275 	       MLX5_EQ_VEC_COMP_BASE;
276 	nvec = min_t(int, nvec, num_eqs);
277 	if (nvec <= MLX5_EQ_VEC_COMP_BASE)
278 		return -ENOMEM;
279 
280 	priv->msix_arr = kcalloc(nvec, sizeof(*priv->msix_arr), GFP_KERNEL);
281 
282 	priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL);
283 	if (!priv->msix_arr || !priv->irq_info)
284 		goto err_free_msix;
285 
286 	for (i = 0; i < nvec; i++)
287 		priv->msix_arr[i].entry = i;
288 
289 	nvec = pci_enable_msix_range(dev->pdev, priv->msix_arr,
290 				     MLX5_EQ_VEC_COMP_BASE + 1, nvec);
291 	if (nvec < 0)
292 		return nvec;
293 
294 	table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
295 
296 	return 0;
297 
298 err_free_msix:
299 	kfree(priv->irq_info);
300 	kfree(priv->msix_arr);
301 	return -ENOMEM;
302 }
303 
304 static void mlx5_disable_msix(struct mlx5_core_dev *dev)
305 {
306 	struct mlx5_priv *priv = &dev->priv;
307 
308 	pci_disable_msix(dev->pdev);
309 	kfree(priv->irq_info);
310 	kfree(priv->msix_arr);
311 }
312 
313 struct mlx5_reg_host_endianess {
314 	u8	he;
315 	u8      rsvd[15];
316 };
317 
318 
319 #define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
320 
321 enum {
322 	MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
323 				MLX5_DEV_CAP_FLAG_DCT,
324 };
325 
326 static u16 to_fw_pkey_sz(u32 size)
327 {
328 	switch (size) {
329 	case 128:
330 		return 0;
331 	case 256:
332 		return 1;
333 	case 512:
334 		return 2;
335 	case 1024:
336 		return 3;
337 	case 2048:
338 		return 4;
339 	case 4096:
340 		return 5;
341 	default:
342 		pr_warn("invalid pkey table size %d\n", size);
343 		return 0;
344 	}
345 }
346 
347 static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
348 				   enum mlx5_cap_type cap_type,
349 				   enum mlx5_cap_mode cap_mode)
350 {
351 	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
352 	int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
353 	void *out, *hca_caps;
354 	u16 opmod = (cap_type << 1) | (cap_mode & 0x01);
355 	int err;
356 
357 	memset(in, 0, sizeof(in));
358 	out = kzalloc(out_sz, GFP_KERNEL);
359 	if (!out)
360 		return -ENOMEM;
361 
362 	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
363 	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
364 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
365 	if (err)
366 		goto query_ex;
367 
368 	err = mlx5_cmd_status_to_err_v2(out);
369 	if (err) {
370 		mlx5_core_warn(dev,
371 			       "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
372 			       cap_type, cap_mode, err);
373 		goto query_ex;
374 	}
375 
376 	hca_caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
377 
378 	switch (cap_mode) {
379 	case HCA_CAP_OPMOD_GET_MAX:
380 		memcpy(dev->hca_caps_max[cap_type], hca_caps,
381 		       MLX5_UN_SZ_BYTES(hca_cap_union));
382 		break;
383 	case HCA_CAP_OPMOD_GET_CUR:
384 		memcpy(dev->hca_caps_cur[cap_type], hca_caps,
385 		       MLX5_UN_SZ_BYTES(hca_cap_union));
386 		break;
387 	default:
388 		mlx5_core_warn(dev,
389 			       "Tried to query dev cap type(%x) with wrong opmode(%x)\n",
390 			       cap_type, cap_mode);
391 		err = -EINVAL;
392 		break;
393 	}
394 query_ex:
395 	kfree(out);
396 	return err;
397 }
398 
399 int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
400 {
401 	int ret;
402 
403 	ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
404 	if (ret)
405 		return ret;
406 	return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
407 }
408 
409 static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
410 {
411 	u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)];
412 	int err;
413 
414 	memset(out, 0, sizeof(out));
415 
416 	MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
417 	MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
418 	err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
419 	if (err)
420 		return err;
421 
422 	err = mlx5_cmd_status_to_err_v2(out);
423 
424 	return err;
425 }
426 
427 static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
428 {
429 	void *set_ctx;
430 	void *set_hca_cap;
431 	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
432 	int req_endianness;
433 	int err;
434 
435 	if (MLX5_CAP_GEN(dev, atomic)) {
436 		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
437 		if (err)
438 			return err;
439 	} else {
440 		return 0;
441 	}
442 
443 	req_endianness =
444 		MLX5_CAP_ATOMIC(dev,
445 				supported_atomic_req_8B_endianess_mode_1);
446 
447 	if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
448 		return 0;
449 
450 	set_ctx = kzalloc(set_sz, GFP_KERNEL);
451 	if (!set_ctx)
452 		return -ENOMEM;
453 
454 	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
455 
456 	/* Set requestor to host endianness */
457 	MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode,
458 		 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
459 
460 	err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
461 
462 	kfree(set_ctx);
463 	return err;
464 }
465 
466 static int handle_hca_cap(struct mlx5_core_dev *dev)
467 {
468 	void *set_ctx = NULL;
469 	struct mlx5_profile *prof = dev->profile;
470 	int err = -ENOMEM;
471 	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
472 	void *set_hca_cap;
473 
474 	set_ctx = kzalloc(set_sz, GFP_KERNEL);
475 	if (!set_ctx)
476 		goto query_ex;
477 
478 	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
479 	if (err)
480 		goto query_ex;
481 
482 	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
483 				   capability);
484 	memcpy(set_hca_cap, dev->hca_caps_cur[MLX5_CAP_GENERAL],
485 	       MLX5_ST_SZ_BYTES(cmd_hca_cap));
486 
487 	mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n",
488 		      mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)),
489 		      128);
490 	/* we limit the size of the pkey table to 128 entries for now */
491 	MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
492 		 to_fw_pkey_sz(128));
493 
494 	if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
495 		MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
496 			 prof->log_max_qp);
497 
498 	/* disable cmdif checksum */
499 	MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
500 
501 	MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
502 
503 	err = set_caps(dev, set_ctx, set_sz,
504 		       MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
505 
506 query_ex:
507 	kfree(set_ctx);
508 	return err;
509 }
510 
511 static int set_hca_ctrl(struct mlx5_core_dev *dev)
512 {
513 	struct mlx5_reg_host_endianess he_in;
514 	struct mlx5_reg_host_endianess he_out;
515 	int err;
516 
517 	if (!mlx5_core_is_pf(dev))
518 		return 0;
519 
520 	memset(&he_in, 0, sizeof(he_in));
521 	he_in.he = MLX5_SET_HOST_ENDIANNESS;
522 	err = mlx5_core_access_reg(dev, &he_in,  sizeof(he_in),
523 					&he_out, sizeof(he_out),
524 					MLX5_REG_HOST_ENDIANNESS, 0, 1);
525 	return err;
526 }
527 
528 int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
529 {
530 	u32 out[MLX5_ST_SZ_DW(enable_hca_out)];
531 	u32 in[MLX5_ST_SZ_DW(enable_hca_in)];
532 	int err;
533 
534 	memset(in, 0, sizeof(in));
535 	MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
536 	MLX5_SET(enable_hca_in, in, function_id, func_id);
537 	memset(out, 0, sizeof(out));
538 
539 	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
540 	if (err)
541 		return err;
542 
543 	return mlx5_cmd_status_to_err_v2(out);
544 }
545 
546 int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
547 {
548 	u32 out[MLX5_ST_SZ_DW(disable_hca_out)];
549 	u32 in[MLX5_ST_SZ_DW(disable_hca_in)];
550 	int err;
551 
552 	memset(in, 0, sizeof(in));
553 	MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
554 	MLX5_SET(disable_hca_in, in, function_id, func_id);
555 	memset(out, 0, sizeof(out));
556 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
557 	if (err)
558 		return err;
559 
560 	return mlx5_cmd_status_to_err_v2(out);
561 }
562 
563 cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev)
564 {
565 	u32 timer_h, timer_h1, timer_l;
566 
567 	timer_h = ioread32be(&dev->iseg->internal_timer_h);
568 	timer_l = ioread32be(&dev->iseg->internal_timer_l);
569 	timer_h1 = ioread32be(&dev->iseg->internal_timer_h);
570 	if (timer_h != timer_h1) /* wrap around */
571 		timer_l = ioread32be(&dev->iseg->internal_timer_l);
572 
573 	return (cycle_t)timer_l | (cycle_t)timer_h1 << 32;
574 }
575 
576 static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
577 {
578 	struct mlx5_priv *priv  = &mdev->priv;
579 	struct msix_entry *msix = priv->msix_arr;
580 	int irq                 = msix[i + MLX5_EQ_VEC_COMP_BASE].vector;
581 	int numa_node           = priv->numa_node;
582 	int err;
583 
584 	if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
585 		mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
586 		return -ENOMEM;
587 	}
588 
589 	cpumask_set_cpu(cpumask_local_spread(i, numa_node),
590 			priv->irq_info[i].mask);
591 
592 	err = irq_set_affinity_hint(irq, priv->irq_info[i].mask);
593 	if (err) {
594 		mlx5_core_warn(mdev, "irq_set_affinity_hint failed,irq 0x%.4x",
595 			       irq);
596 		goto err_clear_mask;
597 	}
598 
599 	return 0;
600 
601 err_clear_mask:
602 	free_cpumask_var(priv->irq_info[i].mask);
603 	return err;
604 }
605 
606 static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
607 {
608 	struct mlx5_priv *priv  = &mdev->priv;
609 	struct msix_entry *msix = priv->msix_arr;
610 	int irq                 = msix[i + MLX5_EQ_VEC_COMP_BASE].vector;
611 
612 	irq_set_affinity_hint(irq, NULL);
613 	free_cpumask_var(priv->irq_info[i].mask);
614 }
615 
616 static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
617 {
618 	int err;
619 	int i;
620 
621 	for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
622 		err = mlx5_irq_set_affinity_hint(mdev, i);
623 		if (err)
624 			goto err_out;
625 	}
626 
627 	return 0;
628 
629 err_out:
630 	for (i--; i >= 0; i--)
631 		mlx5_irq_clear_affinity_hint(mdev, i);
632 
633 	return err;
634 }
635 
636 static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
637 {
638 	int i;
639 
640 	for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
641 		mlx5_irq_clear_affinity_hint(mdev, i);
642 }
643 
644 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
645 		    unsigned int *irqn)
646 {
647 	struct mlx5_eq_table *table = &dev->priv.eq_table;
648 	struct mlx5_eq *eq, *n;
649 	int err = -ENOENT;
650 
651 	spin_lock(&table->lock);
652 	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
653 		if (eq->index == vector) {
654 			*eqn = eq->eqn;
655 			*irqn = eq->irqn;
656 			err = 0;
657 			break;
658 		}
659 	}
660 	spin_unlock(&table->lock);
661 
662 	return err;
663 }
664 EXPORT_SYMBOL(mlx5_vector2eqn);
665 
666 struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn)
667 {
668 	struct mlx5_eq_table *table = &dev->priv.eq_table;
669 	struct mlx5_eq *eq;
670 
671 	spin_lock(&table->lock);
672 	list_for_each_entry(eq, &table->comp_eqs_list, list)
673 		if (eq->eqn == eqn) {
674 			spin_unlock(&table->lock);
675 			return eq;
676 		}
677 
678 	spin_unlock(&table->lock);
679 
680 	return ERR_PTR(-ENOENT);
681 }
682 
683 static void free_comp_eqs(struct mlx5_core_dev *dev)
684 {
685 	struct mlx5_eq_table *table = &dev->priv.eq_table;
686 	struct mlx5_eq *eq, *n;
687 
688 #ifdef CONFIG_RFS_ACCEL
689 	if (dev->rmap) {
690 		free_irq_cpu_rmap(dev->rmap);
691 		dev->rmap = NULL;
692 	}
693 #endif
694 	spin_lock(&table->lock);
695 	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
696 		list_del(&eq->list);
697 		spin_unlock(&table->lock);
698 		if (mlx5_destroy_unmap_eq(dev, eq))
699 			mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n",
700 				       eq->eqn);
701 		kfree(eq);
702 		spin_lock(&table->lock);
703 	}
704 	spin_unlock(&table->lock);
705 }
706 
707 static int alloc_comp_eqs(struct mlx5_core_dev *dev)
708 {
709 	struct mlx5_eq_table *table = &dev->priv.eq_table;
710 	char name[MLX5_MAX_IRQ_NAME];
711 	struct mlx5_eq *eq;
712 	int ncomp_vec;
713 	int nent;
714 	int err;
715 	int i;
716 
717 	INIT_LIST_HEAD(&table->comp_eqs_list);
718 	ncomp_vec = table->num_comp_vectors;
719 	nent = MLX5_COMP_EQ_SIZE;
720 #ifdef CONFIG_RFS_ACCEL
721 	dev->rmap = alloc_irq_cpu_rmap(ncomp_vec);
722 	if (!dev->rmap)
723 		return -ENOMEM;
724 #endif
725 	for (i = 0; i < ncomp_vec; i++) {
726 		eq = kzalloc(sizeof(*eq), GFP_KERNEL);
727 		if (!eq) {
728 			err = -ENOMEM;
729 			goto clean;
730 		}
731 
732 #ifdef CONFIG_RFS_ACCEL
733 		irq_cpu_rmap_add(dev->rmap,
734 				 dev->priv.msix_arr[i + MLX5_EQ_VEC_COMP_BASE].vector);
735 #endif
736 		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
737 		err = mlx5_create_map_eq(dev, eq,
738 					 i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
739 					 name, &dev->priv.uuari.uars[0]);
740 		if (err) {
741 			kfree(eq);
742 			goto clean;
743 		}
744 		mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
745 		eq->index = i;
746 		spin_lock(&table->lock);
747 		list_add_tail(&eq->list, &table->comp_eqs_list);
748 		spin_unlock(&table->lock);
749 	}
750 
751 	return 0;
752 
753 clean:
754 	free_comp_eqs(dev);
755 	return err;
756 }
757 
758 static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
759 {
760 	u32 query_in[MLX5_ST_SZ_DW(query_issi_in)];
761 	u32 query_out[MLX5_ST_SZ_DW(query_issi_out)];
762 	u32 set_in[MLX5_ST_SZ_DW(set_issi_in)];
763 	u32 set_out[MLX5_ST_SZ_DW(set_issi_out)];
764 	int err;
765 	u32 sup_issi;
766 
767 	memset(query_in, 0, sizeof(query_in));
768 	memset(query_out, 0, sizeof(query_out));
769 
770 	MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
771 
772 	err = mlx5_cmd_exec_check_status(dev, query_in, sizeof(query_in),
773 					 query_out, sizeof(query_out));
774 	if (err) {
775 		if (((struct mlx5_outbox_hdr *)query_out)->status ==
776 		    MLX5_CMD_STAT_BAD_OP_ERR) {
777 			pr_debug("Only ISSI 0 is supported\n");
778 			return 0;
779 		}
780 
781 		pr_err("failed to query ISSI\n");
782 		return err;
783 	}
784 
785 	sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
786 
787 	if (sup_issi & (1 << 1)) {
788 		memset(set_in, 0, sizeof(set_in));
789 		memset(set_out, 0, sizeof(set_out));
790 
791 		MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
792 		MLX5_SET(set_issi_in, set_in, current_issi, 1);
793 
794 		err = mlx5_cmd_exec_check_status(dev, set_in, sizeof(set_in),
795 						 set_out, sizeof(set_out));
796 		if (err) {
797 			pr_err("failed to set ISSI=1\n");
798 			return err;
799 		}
800 
801 		dev->issi = 1;
802 
803 		return 0;
804 	} else if (sup_issi & (1 << 0) || !sup_issi) {
805 		return 0;
806 	}
807 
808 	return -ENOTSUPP;
809 }
810 
811 static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
812 {
813 	struct mlx5_device_context *dev_ctx;
814 	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
815 
816 	dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL);
817 	if (!dev_ctx)
818 		return;
819 
820 	dev_ctx->intf    = intf;
821 	dev_ctx->context = intf->add(dev);
822 
823 	if (dev_ctx->context) {
824 		spin_lock_irq(&priv->ctx_lock);
825 		list_add_tail(&dev_ctx->list, &priv->ctx_list);
826 		spin_unlock_irq(&priv->ctx_lock);
827 	} else {
828 		kfree(dev_ctx);
829 	}
830 }
831 
832 static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
833 {
834 	struct mlx5_device_context *dev_ctx;
835 	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
836 
837 	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
838 		if (dev_ctx->intf == intf) {
839 			spin_lock_irq(&priv->ctx_lock);
840 			list_del(&dev_ctx->list);
841 			spin_unlock_irq(&priv->ctx_lock);
842 
843 			intf->remove(dev, dev_ctx->context);
844 			kfree(dev_ctx);
845 			return;
846 		}
847 }
848 
849 static int mlx5_register_device(struct mlx5_core_dev *dev)
850 {
851 	struct mlx5_priv *priv = &dev->priv;
852 	struct mlx5_interface *intf;
853 
854 	mutex_lock(&intf_mutex);
855 	list_add_tail(&priv->dev_list, &dev_list);
856 	list_for_each_entry(intf, &intf_list, list)
857 		mlx5_add_device(intf, priv);
858 	mutex_unlock(&intf_mutex);
859 
860 	return 0;
861 }
862 
863 static void mlx5_unregister_device(struct mlx5_core_dev *dev)
864 {
865 	struct mlx5_priv *priv = &dev->priv;
866 	struct mlx5_interface *intf;
867 
868 	mutex_lock(&intf_mutex);
869 	list_for_each_entry(intf, &intf_list, list)
870 		mlx5_remove_device(intf, priv);
871 	list_del(&priv->dev_list);
872 	mutex_unlock(&intf_mutex);
873 }
874 
875 int mlx5_register_interface(struct mlx5_interface *intf)
876 {
877 	struct mlx5_priv *priv;
878 
879 	if (!intf->add || !intf->remove)
880 		return -EINVAL;
881 
882 	mutex_lock(&intf_mutex);
883 	list_add_tail(&intf->list, &intf_list);
884 	list_for_each_entry(priv, &dev_list, dev_list)
885 		mlx5_add_device(intf, priv);
886 	mutex_unlock(&intf_mutex);
887 
888 	return 0;
889 }
890 EXPORT_SYMBOL(mlx5_register_interface);
891 
892 void mlx5_unregister_interface(struct mlx5_interface *intf)
893 {
894 	struct mlx5_priv *priv;
895 
896 	mutex_lock(&intf_mutex);
897 	list_for_each_entry(priv, &dev_list, dev_list)
898 		mlx5_remove_device(intf, priv);
899 	list_del(&intf->list);
900 	mutex_unlock(&intf_mutex);
901 }
902 EXPORT_SYMBOL(mlx5_unregister_interface);
903 
904 void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
905 {
906 	struct mlx5_priv *priv = &mdev->priv;
907 	struct mlx5_device_context *dev_ctx;
908 	unsigned long flags;
909 	void *result = NULL;
910 
911 	spin_lock_irqsave(&priv->ctx_lock, flags);
912 
913 	list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
914 		if ((dev_ctx->intf->protocol == protocol) &&
915 		    dev_ctx->intf->get_dev) {
916 			result = dev_ctx->intf->get_dev(dev_ctx->context);
917 			break;
918 		}
919 
920 	spin_unlock_irqrestore(&priv->ctx_lock, flags);
921 
922 	return result;
923 }
924 EXPORT_SYMBOL(mlx5_get_protocol_dev);
925 
926 static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
927 {
928 	struct pci_dev *pdev = dev->pdev;
929 	int err = 0;
930 
931 	pci_set_drvdata(dev->pdev, dev);
932 	strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN);
933 	priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
934 
935 	mutex_init(&priv->pgdir_mutex);
936 	INIT_LIST_HEAD(&priv->pgdir_list);
937 	spin_lock_init(&priv->mkey_lock);
938 
939 	mutex_init(&priv->alloc_mutex);
940 
941 	priv->numa_node = dev_to_node(&dev->pdev->dev);
942 
943 	priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root);
944 	if (!priv->dbg_root)
945 		return -ENOMEM;
946 
947 	err = mlx5_pci_enable_device(dev);
948 	if (err) {
949 		dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
950 		goto err_dbg;
951 	}
952 
953 	err = request_bar(pdev);
954 	if (err) {
955 		dev_err(&pdev->dev, "error requesting BARs, aborting\n");
956 		goto err_disable;
957 	}
958 
959 	pci_set_master(pdev);
960 
961 	err = set_dma_caps(pdev);
962 	if (err) {
963 		dev_err(&pdev->dev, "Failed setting DMA capabilities mask, aborting\n");
964 		goto err_clr_master;
965 	}
966 
967 	dev->iseg_base = pci_resource_start(dev->pdev, 0);
968 	dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
969 	if (!dev->iseg) {
970 		err = -ENOMEM;
971 		dev_err(&pdev->dev, "Failed mapping initialization segment, aborting\n");
972 		goto err_clr_master;
973 	}
974 
975 	return 0;
976 
977 err_clr_master:
978 	pci_clear_master(dev->pdev);
979 	release_bar(dev->pdev);
980 err_disable:
981 	mlx5_pci_disable_device(dev);
982 
983 err_dbg:
984 	debugfs_remove(priv->dbg_root);
985 	return err;
986 }
987 
988 static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
989 {
990 	iounmap(dev->iseg);
991 	pci_clear_master(dev->pdev);
992 	release_bar(dev->pdev);
993 	mlx5_pci_disable_device(dev);
994 	debugfs_remove(priv->dbg_root);
995 }
996 
997 #define MLX5_IB_MOD "mlx5_ib"
998 static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
999 {
1000 	struct pci_dev *pdev = dev->pdev;
1001 	int err;
1002 
1003 	mutex_lock(&dev->intf_state_mutex);
1004 	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
1005 		dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n",
1006 			 __func__);
1007 		goto out;
1008 	}
1009 
1010 	dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
1011 		 fw_rev_min(dev), fw_rev_sub(dev));
1012 
1013 	/* on load removing any previous indication of internal error, device is
1014 	 * up
1015 	 */
1016 	dev->state = MLX5_DEVICE_STATE_UP;
1017 
1018 	err = mlx5_cmd_init(dev);
1019 	if (err) {
1020 		dev_err(&pdev->dev, "Failed initializing command interface, aborting\n");
1021 		goto out_err;
1022 	}
1023 
1024 	err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI);
1025 	if (err) {
1026 		dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n",
1027 			FW_INIT_TIMEOUT_MILI);
1028 		goto out_err;
1029 	}
1030 
1031 	mlx5_pagealloc_init(dev);
1032 
1033 	err = mlx5_core_enable_hca(dev, 0);
1034 	if (err) {
1035 		dev_err(&pdev->dev, "enable hca failed\n");
1036 		goto err_pagealloc_cleanup;
1037 	}
1038 
1039 	err = mlx5_core_set_issi(dev);
1040 	if (err) {
1041 		dev_err(&pdev->dev, "failed to set issi\n");
1042 		goto err_disable_hca;
1043 	}
1044 
1045 	err = mlx5_satisfy_startup_pages(dev, 1);
1046 	if (err) {
1047 		dev_err(&pdev->dev, "failed to allocate boot pages\n");
1048 		goto err_disable_hca;
1049 	}
1050 
1051 	err = set_hca_ctrl(dev);
1052 	if (err) {
1053 		dev_err(&pdev->dev, "set_hca_ctrl failed\n");
1054 		goto reclaim_boot_pages;
1055 	}
1056 
1057 	err = handle_hca_cap(dev);
1058 	if (err) {
1059 		dev_err(&pdev->dev, "handle_hca_cap failed\n");
1060 		goto reclaim_boot_pages;
1061 	}
1062 
1063 	err = handle_hca_cap_atomic(dev);
1064 	if (err) {
1065 		dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n");
1066 		goto reclaim_boot_pages;
1067 	}
1068 
1069 	err = mlx5_satisfy_startup_pages(dev, 0);
1070 	if (err) {
1071 		dev_err(&pdev->dev, "failed to allocate init pages\n");
1072 		goto reclaim_boot_pages;
1073 	}
1074 
1075 	err = mlx5_pagealloc_start(dev);
1076 	if (err) {
1077 		dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n");
1078 		goto reclaim_boot_pages;
1079 	}
1080 
1081 	err = mlx5_cmd_init_hca(dev);
1082 	if (err) {
1083 		dev_err(&pdev->dev, "init hca failed\n");
1084 		goto err_pagealloc_stop;
1085 	}
1086 
1087 	mlx5_start_health_poll(dev);
1088 
1089 	err = mlx5_query_hca_caps(dev);
1090 	if (err) {
1091 		dev_err(&pdev->dev, "query hca failed\n");
1092 		goto err_stop_poll;
1093 	}
1094 
1095 	err = mlx5_query_board_id(dev);
1096 	if (err) {
1097 		dev_err(&pdev->dev, "query board id failed\n");
1098 		goto err_stop_poll;
1099 	}
1100 
1101 	err = mlx5_enable_msix(dev);
1102 	if (err) {
1103 		dev_err(&pdev->dev, "enable msix failed\n");
1104 		goto err_stop_poll;
1105 	}
1106 
1107 	err = mlx5_eq_init(dev);
1108 	if (err) {
1109 		dev_err(&pdev->dev, "failed to initialize eq\n");
1110 		goto disable_msix;
1111 	}
1112 
1113 	err = mlx5_alloc_uuars(dev, &priv->uuari);
1114 	if (err) {
1115 		dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
1116 		goto err_eq_cleanup;
1117 	}
1118 
1119 	err = mlx5_start_eqs(dev);
1120 	if (err) {
1121 		dev_err(&pdev->dev, "Failed to start pages and async EQs\n");
1122 		goto err_free_uar;
1123 	}
1124 
1125 	err = alloc_comp_eqs(dev);
1126 	if (err) {
1127 		dev_err(&pdev->dev, "Failed to alloc completion EQs\n");
1128 		goto err_stop_eqs;
1129 	}
1130 
1131 	err = mlx5_irq_set_affinity_hints(dev);
1132 	if (err)
1133 		dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
1134 
1135 	MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
1136 
1137 	mlx5_init_cq_table(dev);
1138 	mlx5_init_qp_table(dev);
1139 	mlx5_init_srq_table(dev);
1140 	mlx5_init_mkey_table(dev);
1141 
1142 	err = mlx5_init_fs(dev);
1143 	if (err) {
1144 		dev_err(&pdev->dev, "Failed to init flow steering\n");
1145 		goto err_fs;
1146 	}
1147 #ifdef CONFIG_MLX5_CORE_EN
1148 	err = mlx5_eswitch_init(dev);
1149 	if (err) {
1150 		dev_err(&pdev->dev, "eswitch init failed %d\n", err);
1151 		goto err_reg_dev;
1152 	}
1153 #endif
1154 
1155 	err = mlx5_sriov_init(dev);
1156 	if (err) {
1157 		dev_err(&pdev->dev, "sriov init failed %d\n", err);
1158 		goto err_sriov;
1159 	}
1160 
1161 	err = mlx5_register_device(dev);
1162 	if (err) {
1163 		dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
1164 		goto err_reg_dev;
1165 	}
1166 
1167 	err = request_module_nowait(MLX5_IB_MOD);
1168 	if (err)
1169 		pr_info("failed request module on %s\n", MLX5_IB_MOD);
1170 
1171 	clear_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
1172 	set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1173 out:
1174 	mutex_unlock(&dev->intf_state_mutex);
1175 
1176 	return 0;
1177 
1178 err_sriov:
1179 	if (mlx5_sriov_cleanup(dev))
1180 		dev_err(&dev->pdev->dev, "sriov cleanup failed\n");
1181 
1182 #ifdef CONFIG_MLX5_CORE_EN
1183 	mlx5_eswitch_cleanup(dev->priv.eswitch);
1184 #endif
1185 err_reg_dev:
1186 	mlx5_cleanup_fs(dev);
1187 err_fs:
1188 	mlx5_cleanup_mkey_table(dev);
1189 	mlx5_cleanup_srq_table(dev);
1190 	mlx5_cleanup_qp_table(dev);
1191 	mlx5_cleanup_cq_table(dev);
1192 	mlx5_irq_clear_affinity_hints(dev);
1193 	free_comp_eqs(dev);
1194 
1195 err_stop_eqs:
1196 	mlx5_stop_eqs(dev);
1197 
1198 err_free_uar:
1199 	mlx5_free_uuars(dev, &priv->uuari);
1200 
1201 err_eq_cleanup:
1202 	mlx5_eq_cleanup(dev);
1203 
1204 disable_msix:
1205 	mlx5_disable_msix(dev);
1206 
1207 err_stop_poll:
1208 	mlx5_stop_health_poll(dev);
1209 	if (mlx5_cmd_teardown_hca(dev)) {
1210 		dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
1211 		goto out_err;
1212 	}
1213 
1214 err_pagealloc_stop:
1215 	mlx5_pagealloc_stop(dev);
1216 
1217 reclaim_boot_pages:
1218 	mlx5_reclaim_startup_pages(dev);
1219 
1220 err_disable_hca:
1221 	mlx5_core_disable_hca(dev, 0);
1222 
1223 err_pagealloc_cleanup:
1224 	mlx5_pagealloc_cleanup(dev);
1225 	mlx5_cmd_cleanup(dev);
1226 
1227 out_err:
1228 	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
1229 	mutex_unlock(&dev->intf_state_mutex);
1230 
1231 	return err;
1232 }
1233 
1234 static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
1235 {
1236 	int err = 0;
1237 
1238 	err = mlx5_sriov_cleanup(dev);
1239 	if (err) {
1240 		dev_warn(&dev->pdev->dev, "%s: sriov cleanup failed - abort\n",
1241 			 __func__);
1242 		return err;
1243 	}
1244 
1245 	mutex_lock(&dev->intf_state_mutex);
1246 	if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) {
1247 		dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
1248 			 __func__);
1249 		goto out;
1250 	}
1251 	mlx5_unregister_device(dev);
1252 #ifdef CONFIG_MLX5_CORE_EN
1253 	mlx5_eswitch_cleanup(dev->priv.eswitch);
1254 #endif
1255 
1256 	mlx5_cleanup_fs(dev);
1257 	mlx5_cleanup_mkey_table(dev);
1258 	mlx5_cleanup_srq_table(dev);
1259 	mlx5_cleanup_qp_table(dev);
1260 	mlx5_cleanup_cq_table(dev);
1261 	mlx5_irq_clear_affinity_hints(dev);
1262 	free_comp_eqs(dev);
1263 	mlx5_stop_eqs(dev);
1264 	mlx5_free_uuars(dev, &priv->uuari);
1265 	mlx5_eq_cleanup(dev);
1266 	mlx5_disable_msix(dev);
1267 	mlx5_stop_health_poll(dev);
1268 	err = mlx5_cmd_teardown_hca(dev);
1269 	if (err) {
1270 		dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
1271 		goto out;
1272 	}
1273 	mlx5_pagealloc_stop(dev);
1274 	mlx5_reclaim_startup_pages(dev);
1275 	mlx5_core_disable_hca(dev, 0);
1276 	mlx5_pagealloc_cleanup(dev);
1277 	mlx5_cmd_cleanup(dev);
1278 
1279 out:
1280 	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1281 	set_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
1282 	mutex_unlock(&dev->intf_state_mutex);
1283 	return err;
1284 }
1285 
1286 void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
1287 		     unsigned long param)
1288 {
1289 	struct mlx5_priv *priv = &dev->priv;
1290 	struct mlx5_device_context *dev_ctx;
1291 	unsigned long flags;
1292 
1293 	spin_lock_irqsave(&priv->ctx_lock, flags);
1294 
1295 	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
1296 		if (dev_ctx->intf->event)
1297 			dev_ctx->intf->event(dev, dev_ctx->context, event, param);
1298 
1299 	spin_unlock_irqrestore(&priv->ctx_lock, flags);
1300 }
1301 
1302 struct mlx5_core_event_handler {
1303 	void (*event)(struct mlx5_core_dev *dev,
1304 		      enum mlx5_dev_event event,
1305 		      void *data);
1306 };
1307 
1308 
1309 static int init_one(struct pci_dev *pdev,
1310 		    const struct pci_device_id *id)
1311 {
1312 	struct mlx5_core_dev *dev;
1313 	struct mlx5_priv *priv;
1314 	int err;
1315 
1316 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1317 	if (!dev) {
1318 		dev_err(&pdev->dev, "kzalloc failed\n");
1319 		return -ENOMEM;
1320 	}
1321 	priv = &dev->priv;
1322 	priv->pci_dev_data = id->driver_data;
1323 
1324 	pci_set_drvdata(pdev, dev);
1325 
1326 	if (prof_sel < 0 || prof_sel >= ARRAY_SIZE(profile)) {
1327 		pr_warn("selected profile out of range, selecting default (%d)\n",
1328 			MLX5_DEFAULT_PROF);
1329 		prof_sel = MLX5_DEFAULT_PROF;
1330 	}
1331 	dev->profile = &profile[prof_sel];
1332 	dev->pdev = pdev;
1333 	dev->event = mlx5_core_event;
1334 
1335 	INIT_LIST_HEAD(&priv->ctx_list);
1336 	spin_lock_init(&priv->ctx_lock);
1337 	mutex_init(&dev->pci_status_mutex);
1338 	mutex_init(&dev->intf_state_mutex);
1339 	err = mlx5_pci_init(dev, priv);
1340 	if (err) {
1341 		dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
1342 		goto clean_dev;
1343 	}
1344 
1345 	err = mlx5_health_init(dev);
1346 	if (err) {
1347 		dev_err(&pdev->dev, "mlx5_health_init failed with error code %d\n", err);
1348 		goto close_pci;
1349 	}
1350 
1351 	err = mlx5_load_one(dev, priv);
1352 	if (err) {
1353 		dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err);
1354 		goto clean_health;
1355 	}
1356 
1357 	return 0;
1358 
1359 clean_health:
1360 	mlx5_health_cleanup(dev);
1361 close_pci:
1362 	mlx5_pci_close(dev, priv);
1363 clean_dev:
1364 	pci_set_drvdata(pdev, NULL);
1365 	kfree(dev);
1366 
1367 	return err;
1368 }
1369 
1370 static void remove_one(struct pci_dev *pdev)
1371 {
1372 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
1373 	struct mlx5_priv *priv = &dev->priv;
1374 
1375 	if (mlx5_unload_one(dev, priv)) {
1376 		dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n");
1377 		mlx5_health_cleanup(dev);
1378 		return;
1379 	}
1380 	mlx5_health_cleanup(dev);
1381 	mlx5_pci_close(dev, priv);
1382 	pci_set_drvdata(pdev, NULL);
1383 	kfree(dev);
1384 }
1385 
1386 static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
1387 					      pci_channel_state_t state)
1388 {
1389 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1390 	struct mlx5_priv *priv = &dev->priv;
1391 
1392 	dev_info(&pdev->dev, "%s was called\n", __func__);
1393 	mlx5_enter_error_state(dev);
1394 	mlx5_unload_one(dev, priv);
1395 	mlx5_pci_disable_device(dev);
1396 	return state == pci_channel_io_perm_failure ?
1397 		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
1398 }
1399 
1400 static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
1401 {
1402 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1403 	int err = 0;
1404 
1405 	dev_info(&pdev->dev, "%s was called\n", __func__);
1406 
1407 	err = mlx5_pci_enable_device(dev);
1408 	if (err) {
1409 		dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
1410 			, __func__, err);
1411 		return PCI_ERS_RESULT_DISCONNECT;
1412 	}
1413 	pci_set_master(pdev);
1414 	pci_set_power_state(pdev, PCI_D0);
1415 	pci_restore_state(pdev);
1416 
1417 	return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
1418 }
1419 
1420 void mlx5_disable_device(struct mlx5_core_dev *dev)
1421 {
1422 	mlx5_pci_err_detected(dev->pdev, 0);
1423 }
1424 
1425 /* wait for the device to show vital signs. For now we check
1426  * that we can read the device ID and that the health buffer
1427  * shows a non zero value which is different than 0xffffffff
1428  */
1429 static void wait_vital(struct pci_dev *pdev)
1430 {
1431 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1432 	struct mlx5_core_health *health = &dev->priv.health;
1433 	const int niter = 100;
1434 	u32 count;
1435 	u16 did;
1436 	int i;
1437 
1438 	/* Wait for firmware to be ready after reset */
1439 	msleep(1000);
1440 	for (i = 0; i < niter; i++) {
1441 		if (pci_read_config_word(pdev, 2, &did)) {
1442 			dev_warn(&pdev->dev, "failed reading config word\n");
1443 			break;
1444 		}
1445 		if (did == pdev->device) {
1446 			dev_info(&pdev->dev, "device ID correctly read after %d iterations\n", i);
1447 			break;
1448 		}
1449 		msleep(50);
1450 	}
1451 	if (i == niter)
1452 		dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
1453 
1454 	for (i = 0; i < niter; i++) {
1455 		count = ioread32be(health->health_counter);
1456 		if (count && count != 0xffffffff) {
1457 			dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
1458 			break;
1459 		}
1460 		msleep(50);
1461 	}
1462 
1463 	if (i == niter)
1464 		dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
1465 }
1466 
1467 static void mlx5_pci_resume(struct pci_dev *pdev)
1468 {
1469 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1470 	struct mlx5_priv *priv = &dev->priv;
1471 	int err;
1472 
1473 	dev_info(&pdev->dev, "%s was called\n", __func__);
1474 
1475 	pci_save_state(pdev);
1476 	wait_vital(pdev);
1477 
1478 	err = mlx5_load_one(dev, priv);
1479 	if (err)
1480 		dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
1481 			, __func__, err);
1482 	else
1483 		dev_info(&pdev->dev, "%s: device recovered\n", __func__);
1484 }
1485 
1486 static const struct pci_error_handlers mlx5_err_handler = {
1487 	.error_detected = mlx5_pci_err_detected,
1488 	.slot_reset	= mlx5_pci_slot_reset,
1489 	.resume		= mlx5_pci_resume
1490 };
1491 
1492 static void shutdown(struct pci_dev *pdev)
1493 {
1494 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
1495 	struct mlx5_priv *priv = &dev->priv;
1496 
1497 	dev_info(&pdev->dev, "Shutdown was called\n");
1498 	/* Notify mlx5 clients that the kernel is being shut down */
1499 	set_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &dev->intf_state);
1500 	mlx5_unload_one(dev, priv);
1501 	mlx5_pci_disable_device(dev);
1502 }
1503 
1504 static const struct pci_device_id mlx5_core_pci_table[] = {
1505 	{ PCI_VDEVICE(MELLANOX, 0x1011) },			/* Connect-IB */
1506 	{ PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF},	/* Connect-IB VF */
1507 	{ PCI_VDEVICE(MELLANOX, 0x1013) },			/* ConnectX-4 */
1508 	{ PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF},	/* ConnectX-4 VF */
1509 	{ PCI_VDEVICE(MELLANOX, 0x1015) },			/* ConnectX-4LX */
1510 	{ PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF},	/* ConnectX-4LX VF */
1511 	{ PCI_VDEVICE(MELLANOX, 0x1017) },			/* ConnectX-5, PCIe 3.0 */
1512 	{ PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF},	/* ConnectX-5 VF */
1513 	{ PCI_VDEVICE(MELLANOX, 0x1019) },			/* ConnectX-5, PCIe 4.0 */
1514 	{ 0, }
1515 };
1516 
1517 MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
1518 
1519 static struct pci_driver mlx5_core_driver = {
1520 	.name           = DRIVER_NAME,
1521 	.id_table       = mlx5_core_pci_table,
1522 	.probe          = init_one,
1523 	.remove         = remove_one,
1524 	.shutdown	= shutdown,
1525 	.err_handler	= &mlx5_err_handler,
1526 	.sriov_configure   = mlx5_core_sriov_configure,
1527 };
1528 
1529 static int __init init(void)
1530 {
1531 	int err;
1532 
1533 	mlx5_register_debugfs();
1534 
1535 	err = pci_register_driver(&mlx5_core_driver);
1536 	if (err)
1537 		goto err_debug;
1538 
1539 #ifdef CONFIG_MLX5_CORE_EN
1540 	mlx5e_init();
1541 #endif
1542 
1543 	return 0;
1544 
1545 err_debug:
1546 	mlx5_unregister_debugfs();
1547 	return err;
1548 }
1549 
1550 static void __exit cleanup(void)
1551 {
1552 #ifdef CONFIG_MLX5_CORE_EN
1553 	mlx5e_cleanup();
1554 #endif
1555 	pci_unregister_driver(&mlx5_core_driver);
1556 	mlx5_unregister_debugfs();
1557 }
1558 
1559 module_init(init);
1560 module_exit(cleanup);
1561