xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c (revision e5a52fd2b8cdb700b3c07b030e050a49ef3156b9)
1 /*
2  * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/module.h>
34 #include <linux/etherdevice.h>
35 #include <linux/mlx5/driver.h>
36 
37 #include "mlx5_core.h"
38 #include "lib/mlx5.h"
39 #include "lib/eq.h"
40 #include "fpga/core.h"
41 #include "fpga/conn.h"
42 
43 static const char *const mlx5_fpga_error_strings[] = {
44 	"Null Syndrome",
45 	"Corrupted DDR",
46 	"Flash Timeout",
47 	"Internal Link Error",
48 	"Watchdog HW Failure",
49 	"I2C Failure",
50 	"Image Changed",
51 	"Temperature Critical",
52 };
53 
54 static const char * const mlx5_fpga_qp_error_strings[] = {
55 	"Null Syndrome",
56 	"Retry Counter Expired",
57 	"RNR Expired",
58 };
59 static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void)
60 {
61 	struct mlx5_fpga_device *fdev = NULL;
62 
63 	fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
64 	if (!fdev)
65 		return NULL;
66 
67 	spin_lock_init(&fdev->state_lock);
68 	fdev->state = MLX5_FPGA_STATUS_NONE;
69 	return fdev;
70 }
71 
72 static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image)
73 {
74 	switch (image) {
75 	case MLX5_FPGA_IMAGE_USER:
76 		return "user";
77 	case MLX5_FPGA_IMAGE_FACTORY:
78 		return "factory";
79 	default:
80 		return "unknown";
81 	}
82 }
83 
84 static const char *mlx5_fpga_name(u32 fpga_id)
85 {
86 	static char ret[32];
87 
88 	switch (fpga_id) {
89 	case MLX5_FPGA_NEWTON:
90 		return "Newton";
91 	case MLX5_FPGA_EDISON:
92 		return "Edison";
93 	case MLX5_FPGA_MORSE:
94 		return "Morse";
95 	case MLX5_FPGA_MORSEQ:
96 		return "MorseQ";
97 	}
98 
99 	snprintf(ret, sizeof(ret), "Unknown %d", fpga_id);
100 	return ret;
101 }
102 
103 static int mlx5_is_fpga_lookaside(u32 fpga_id)
104 {
105 	return fpga_id != MLX5_FPGA_NEWTON && fpga_id != MLX5_FPGA_EDISON;
106 }
107 
108 static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
109 {
110 	struct mlx5_fpga_query query;
111 	int err;
112 
113 	err = mlx5_fpga_query(fdev->mdev, &query);
114 	if (err) {
115 		mlx5_fpga_err(fdev, "Failed to query status: %d\n", err);
116 		return err;
117 	}
118 
119 	fdev->last_admin_image = query.admin_image;
120 	fdev->last_oper_image = query.oper_image;
121 
122 	mlx5_fpga_info(fdev, "Status %u; Admin image %u; Oper image %u\n",
123 		       query.status, query.admin_image, query.oper_image);
124 
125 	/* for FPGA lookaside projects FPGA load status is not important */
126 	if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id)))
127 		return 0;
128 
129 	if (query.status != MLX5_FPGA_STATUS_SUCCESS) {
130 		mlx5_fpga_err(fdev, "%s image failed to load; status %u\n",
131 			      mlx5_fpga_image_name(fdev->last_oper_image),
132 			      query.status);
133 		return -EIO;
134 	}
135 
136 	return 0;
137 }
138 
139 static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
140 {
141 	int err;
142 	struct mlx5_core_dev *mdev = fdev->mdev;
143 
144 	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
145 	if (err) {
146 		mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
147 		return err;
148 	}
149 	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
150 	if (err) {
151 		mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
152 		return err;
153 	}
154 	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
155 	if (err) {
156 		mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
157 		return err;
158 	}
159 	return 0;
160 }
161 
162 static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *);
163 
164 static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
165 {
166 	struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb);
167 
168 	return mlx5_fpga_event(fdev, event, eqe);
169 }
170 
171 static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
172 {
173 	struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb);
174 
175 	return mlx5_fpga_event(fdev, event, eqe);
176 }
177 
178 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
179 {
180 	struct mlx5_fpga_device *fdev = mdev->fpga;
181 	unsigned int max_num_qps;
182 	unsigned long flags;
183 	u32 fpga_id;
184 	int err;
185 
186 	if (!fdev)
187 		return 0;
188 
189 	err = mlx5_fpga_caps(fdev->mdev);
190 	if (err)
191 		goto out;
192 
193 	err = mlx5_fpga_device_load_check(fdev);
194 	if (err)
195 		goto out;
196 
197 	fpga_id = MLX5_CAP_FPGA(fdev->mdev, fpga_id);
198 	mlx5_fpga_info(fdev, "FPGA card %s:%u\n", mlx5_fpga_name(fpga_id), fpga_id);
199 
200 	/* No QPs if FPGA does not participate in net processing */
201 	if (mlx5_is_fpga_lookaside(fpga_id))
202 		goto out;
203 
204 	mlx5_fpga_info(fdev, "%s(%d): image, version %u; SBU %06x:%04x version %d\n",
205 		       mlx5_fpga_image_name(fdev->last_oper_image),
206 		       fdev->last_oper_image,
207 		       MLX5_CAP_FPGA(fdev->mdev, image_version),
208 		       MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id),
209 		       MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id),
210 		       MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version));
211 
212 	max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
213 	if (!max_num_qps) {
214 		mlx5_fpga_err(fdev, "FPGA reports 0 QPs in SHELL_CAPS\n");
215 		err = -ENOTSUPP;
216 		goto out;
217 	}
218 
219 	err = mlx5_core_reserve_gids(mdev, max_num_qps);
220 	if (err)
221 		goto out;
222 
223 	MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR);
224 	MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR);
225 	mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb);
226 	mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb);
227 
228 	err = mlx5_fpga_conn_device_init(fdev);
229 	if (err)
230 		goto err_rsvd_gid;
231 
232 	if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
233 		err = mlx5_fpga_device_brb(fdev);
234 		if (err)
235 			goto err_conn_init;
236 	}
237 
238 	goto out;
239 
240 err_conn_init:
241 	mlx5_fpga_conn_device_cleanup(fdev);
242 
243 err_rsvd_gid:
244 	mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
245 	mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
246 	mlx5_core_unreserve_gids(mdev, max_num_qps);
247 out:
248 	spin_lock_irqsave(&fdev->state_lock, flags);
249 	fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
250 	spin_unlock_irqrestore(&fdev->state_lock, flags);
251 	return err;
252 }
253 
254 int mlx5_fpga_init(struct mlx5_core_dev *mdev)
255 {
256 	struct mlx5_fpga_device *fdev = NULL;
257 
258 	if (!MLX5_CAP_GEN(mdev, fpga)) {
259 		mlx5_core_dbg(mdev, "FPGA capability not present\n");
260 		return 0;
261 	}
262 
263 	mlx5_core_dbg(mdev, "Initializing FPGA\n");
264 
265 	fdev = mlx5_fpga_device_alloc();
266 	if (!fdev)
267 		return -ENOMEM;
268 
269 	fdev->mdev = mdev;
270 	mdev->fpga = fdev;
271 
272 	return 0;
273 }
274 
275 void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
276 {
277 	struct mlx5_fpga_device *fdev = mdev->fpga;
278 	unsigned int max_num_qps;
279 	unsigned long flags;
280 	int err;
281 
282 	if (!fdev)
283 		return;
284 
285 	if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id)))
286 		return;
287 
288 	spin_lock_irqsave(&fdev->state_lock, flags);
289 	if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
290 		spin_unlock_irqrestore(&fdev->state_lock, flags);
291 		return;
292 	}
293 	fdev->state = MLX5_FPGA_STATUS_NONE;
294 	spin_unlock_irqrestore(&fdev->state_lock, flags);
295 
296 	if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
297 		err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
298 		if (err)
299 			mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
300 				      err);
301 	}
302 
303 	mlx5_fpga_conn_device_cleanup(fdev);
304 	mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
305 	mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
306 
307 	max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
308 	mlx5_core_unreserve_gids(mdev, max_num_qps);
309 }
310 
311 void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
312 {
313 	struct mlx5_fpga_device *fdev = mdev->fpga;
314 
315 	mlx5_fpga_device_stop(mdev);
316 	kfree(fdev);
317 	mdev->fpga = NULL;
318 }
319 
320 static const char *mlx5_fpga_syndrome_to_string(u8 syndrome)
321 {
322 	if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings))
323 		return mlx5_fpga_error_strings[syndrome];
324 	return "Unknown";
325 }
326 
327 static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
328 {
329 	if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings))
330 		return mlx5_fpga_qp_error_strings[syndrome];
331 	return "Unknown";
332 }
333 
334 static int mlx5_fpga_event(struct mlx5_fpga_device *fdev,
335 			   unsigned long event, void *eqe)
336 {
337 	void *data = ((struct mlx5_eqe *)eqe)->data.raw;
338 	const char *event_name;
339 	bool teardown = false;
340 	unsigned long flags;
341 	u8 syndrome;
342 
343 	switch (event) {
344 	case MLX5_EVENT_TYPE_FPGA_ERROR:
345 		syndrome = MLX5_GET(fpga_error_event, data, syndrome);
346 		event_name = mlx5_fpga_syndrome_to_string(syndrome);
347 		break;
348 	case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
349 		syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome);
350 		event_name = mlx5_fpga_qp_syndrome_to_string(syndrome);
351 		break;
352 	default:
353 		return NOTIFY_DONE;
354 	}
355 
356 	spin_lock_irqsave(&fdev->state_lock, flags);
357 	switch (fdev->state) {
358 	case MLX5_FPGA_STATUS_SUCCESS:
359 		mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name);
360 		teardown = true;
361 		break;
362 	default:
363 		mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n",
364 					   syndrome, event_name);
365 	}
366 	spin_unlock_irqrestore(&fdev->state_lock, flags);
367 	/* We tear-down the card's interfaces and functionality because
368 	 * the FPGA bump-on-the-wire is misbehaving and we lose ability
369 	 * to communicate with the network. User may still be able to
370 	 * recover by re-programming or debugging the FPGA
371 	 */
372 	if (teardown)
373 		mlx5_trigger_health_work(fdev->mdev);
374 
375 	return NOTIFY_OK;
376 }
377