1 /* 2 * Copyright (c) 2017, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/module.h> 34 #include <linux/etherdevice.h> 35 #include <linux/mlx5/driver.h> 36 37 #include "mlx5_core.h" 38 #include "lib/mlx5.h" 39 #include "lib/eq.h" 40 #include "fpga/core.h" 41 #include "fpga/conn.h" 42 43 static const char *const mlx5_fpga_error_strings[] = { 44 "Null Syndrome", 45 "Corrupted DDR", 46 "Flash Timeout", 47 "Internal Link Error", 48 "Watchdog HW Failure", 49 "I2C Failure", 50 "Image Changed", 51 "Temperature Critical", 52 }; 53 54 static const char * const mlx5_fpga_qp_error_strings[] = { 55 "Null Syndrome", 56 "Retry Counter Expired", 57 "RNR Expired", 58 }; 59 static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void) 60 { 61 struct mlx5_fpga_device *fdev = NULL; 62 63 fdev = kzalloc(sizeof(*fdev), GFP_KERNEL); 64 if (!fdev) 65 return NULL; 66 67 spin_lock_init(&fdev->state_lock); 68 fdev->state = MLX5_FPGA_STATUS_NONE; 69 return fdev; 70 } 71 72 static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image) 73 { 74 switch (image) { 75 case MLX5_FPGA_IMAGE_USER: 76 return "user"; 77 case MLX5_FPGA_IMAGE_FACTORY: 78 return "factory"; 79 default: 80 return "unknown"; 81 } 82 } 83 84 static const char *mlx5_fpga_name(u32 fpga_id) 85 { 86 static char ret[32]; 87 88 switch (fpga_id) { 89 case MLX5_FPGA_NEWTON: 90 return "Newton"; 91 case MLX5_FPGA_EDISON: 92 return "Edison"; 93 case MLX5_FPGA_MORSE: 94 return "Morse"; 95 case MLX5_FPGA_MORSEQ: 96 return "MorseQ"; 97 } 98 99 snprintf(ret, sizeof(ret), "Unknown %d", fpga_id); 100 return ret; 101 } 102 103 static int mlx5_is_fpga_lookaside(u32 fpga_id) 104 { 105 return fpga_id != MLX5_FPGA_NEWTON && fpga_id != MLX5_FPGA_EDISON; 106 } 107 108 static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev) 109 { 110 struct mlx5_fpga_query query; 111 int err; 112 113 err = mlx5_fpga_query(fdev->mdev, &query); 114 if (err) { 115 mlx5_fpga_err(fdev, "Failed to query status: %d\n", err); 116 return err; 117 } 118 119 fdev->last_admin_image = query.admin_image; 120 fdev->last_oper_image = query.oper_image; 121 122 mlx5_fpga_info(fdev, "Status %u; Admin image %u; Oper image %u\n", 123 query.status, query.admin_image, query.oper_image); 124 125 /* for FPGA lookaside projects FPGA load status is not important */ 126 if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id))) 127 return 0; 128 129 if (query.status != MLX5_FPGA_STATUS_SUCCESS) { 130 mlx5_fpga_err(fdev, "%s image failed to load; status %u\n", 131 mlx5_fpga_image_name(fdev->last_oper_image), 132 query.status); 133 return -EIO; 134 } 135 136 return 0; 137 } 138 139 static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev) 140 { 141 int err; 142 struct mlx5_core_dev *mdev = fdev->mdev; 143 144 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON); 145 if (err) { 146 mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err); 147 return err; 148 } 149 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX); 150 if (err) { 151 mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err); 152 return err; 153 } 154 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF); 155 if (err) { 156 mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err); 157 return err; 158 } 159 return 0; 160 } 161 162 static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *); 163 164 static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe) 165 { 166 struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb); 167 168 return mlx5_fpga_event(fdev, event, eqe); 169 } 170 171 static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe) 172 { 173 struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb); 174 175 return mlx5_fpga_event(fdev, event, eqe); 176 } 177 178 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) 179 { 180 struct mlx5_fpga_device *fdev = mdev->fpga; 181 unsigned int max_num_qps; 182 unsigned long flags; 183 u32 fpga_id; 184 int err; 185 186 if (!fdev) 187 return 0; 188 189 err = mlx5_fpga_caps(fdev->mdev); 190 if (err) 191 goto out; 192 193 err = mlx5_fpga_device_load_check(fdev); 194 if (err) 195 goto out; 196 197 fpga_id = MLX5_CAP_FPGA(fdev->mdev, fpga_id); 198 mlx5_fpga_info(fdev, "FPGA card %s:%u\n", mlx5_fpga_name(fpga_id), fpga_id); 199 200 /* No QPs if FPGA does not participate in net processing */ 201 if (mlx5_is_fpga_lookaside(fpga_id)) 202 goto out; 203 204 mlx5_fpga_info(fdev, "%s(%d): image, version %u; SBU %06x:%04x version %d\n", 205 mlx5_fpga_image_name(fdev->last_oper_image), 206 fdev->last_oper_image, 207 MLX5_CAP_FPGA(fdev->mdev, image_version), 208 MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id), 209 MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id), 210 MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version)); 211 212 max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps); 213 if (!max_num_qps) { 214 mlx5_fpga_err(fdev, "FPGA reports 0 QPs in SHELL_CAPS\n"); 215 err = -ENOTSUPP; 216 goto out; 217 } 218 219 err = mlx5_core_reserve_gids(mdev, max_num_qps); 220 if (err) 221 goto out; 222 223 MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR); 224 MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR); 225 mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb); 226 mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb); 227 228 err = mlx5_fpga_conn_device_init(fdev); 229 if (err) 230 goto err_rsvd_gid; 231 232 if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) { 233 err = mlx5_fpga_device_brb(fdev); 234 if (err) 235 goto err_conn_init; 236 } 237 238 goto out; 239 240 err_conn_init: 241 mlx5_fpga_conn_device_cleanup(fdev); 242 243 err_rsvd_gid: 244 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb); 245 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb); 246 mlx5_core_unreserve_gids(mdev, max_num_qps); 247 out: 248 spin_lock_irqsave(&fdev->state_lock, flags); 249 fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS; 250 spin_unlock_irqrestore(&fdev->state_lock, flags); 251 return err; 252 } 253 254 int mlx5_fpga_init(struct mlx5_core_dev *mdev) 255 { 256 struct mlx5_fpga_device *fdev = NULL; 257 258 if (!MLX5_CAP_GEN(mdev, fpga)) { 259 mlx5_core_dbg(mdev, "FPGA capability not present\n"); 260 return 0; 261 } 262 263 mlx5_core_dbg(mdev, "Initializing FPGA\n"); 264 265 fdev = mlx5_fpga_device_alloc(); 266 if (!fdev) 267 return -ENOMEM; 268 269 fdev->mdev = mdev; 270 mdev->fpga = fdev; 271 272 return 0; 273 } 274 275 void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) 276 { 277 struct mlx5_fpga_device *fdev = mdev->fpga; 278 unsigned int max_num_qps; 279 unsigned long flags; 280 int err; 281 282 if (!fdev) 283 return; 284 285 if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id))) 286 return; 287 288 spin_lock_irqsave(&fdev->state_lock, flags); 289 if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) { 290 spin_unlock_irqrestore(&fdev->state_lock, flags); 291 return; 292 } 293 fdev->state = MLX5_FPGA_STATUS_NONE; 294 spin_unlock_irqrestore(&fdev->state_lock, flags); 295 296 if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) { 297 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON); 298 if (err) 299 mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n", 300 err); 301 } 302 303 mlx5_fpga_conn_device_cleanup(fdev); 304 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb); 305 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb); 306 307 max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps); 308 mlx5_core_unreserve_gids(mdev, max_num_qps); 309 } 310 311 void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev) 312 { 313 struct mlx5_fpga_device *fdev = mdev->fpga; 314 315 mlx5_fpga_device_stop(mdev); 316 kfree(fdev); 317 mdev->fpga = NULL; 318 } 319 320 static const char *mlx5_fpga_syndrome_to_string(u8 syndrome) 321 { 322 if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings)) 323 return mlx5_fpga_error_strings[syndrome]; 324 return "Unknown"; 325 } 326 327 static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome) 328 { 329 if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings)) 330 return mlx5_fpga_qp_error_strings[syndrome]; 331 return "Unknown"; 332 } 333 334 static int mlx5_fpga_event(struct mlx5_fpga_device *fdev, 335 unsigned long event, void *eqe) 336 { 337 void *data = ((struct mlx5_eqe *)eqe)->data.raw; 338 const char *event_name; 339 bool teardown = false; 340 unsigned long flags; 341 u8 syndrome; 342 343 switch (event) { 344 case MLX5_EVENT_TYPE_FPGA_ERROR: 345 syndrome = MLX5_GET(fpga_error_event, data, syndrome); 346 event_name = mlx5_fpga_syndrome_to_string(syndrome); 347 break; 348 case MLX5_EVENT_TYPE_FPGA_QP_ERROR: 349 syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome); 350 event_name = mlx5_fpga_qp_syndrome_to_string(syndrome); 351 break; 352 default: 353 return NOTIFY_DONE; 354 } 355 356 spin_lock_irqsave(&fdev->state_lock, flags); 357 switch (fdev->state) { 358 case MLX5_FPGA_STATUS_SUCCESS: 359 mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name); 360 teardown = true; 361 break; 362 default: 363 mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n", 364 syndrome, event_name); 365 } 366 spin_unlock_irqrestore(&fdev->state_lock, flags); 367 /* We tear-down the card's interfaces and functionality because 368 * the FPGA bump-on-the-wire is misbehaving and we lose ability 369 * to communicate with the network. User may still be able to 370 * recover by re-programming or debugging the FPGA 371 */ 372 if (teardown) 373 mlx5_trigger_health_work(fdev->mdev); 374 375 return NOTIFY_OK; 376 } 377