1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (c) 2019 Mellanox Technologies. 3 4 #include "health.h" 5 #include "lib/eq.h" 6 #include "lib/mlx5.h" 7 8 int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name) 9 { 10 int err; 11 12 err = devlink_fmsg_pair_nest_start(fmsg, name); 13 if (err) 14 return err; 15 16 err = devlink_fmsg_obj_nest_start(fmsg); 17 if (err) 18 return err; 19 20 return 0; 21 } 22 23 int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg) 24 { 25 int err; 26 27 err = devlink_fmsg_obj_nest_end(fmsg); 28 if (err) 29 return err; 30 31 err = devlink_fmsg_pair_nest_end(fmsg); 32 if (err) 33 return err; 34 35 return 0; 36 } 37 38 int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) 39 { 40 struct mlx5e_priv *priv = cq->channel->priv; 41 u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {}; 42 u8 hw_status; 43 void *cqc; 44 int err; 45 46 err = mlx5_core_query_cq(priv->mdev, &cq->mcq, out); 47 if (err) 48 return err; 49 50 cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context); 51 hw_status = MLX5_GET(cqc, cqc, status); 52 53 err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ"); 54 if (err) 55 return err; 56 57 err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn); 58 if (err) 59 return err; 60 61 err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status); 62 if (err) 63 return err; 64 65 err = mlx5e_reporter_named_obj_nest_end(fmsg); 66 if (err) 67 return err; 68 69 return 0; 70 } 71 72 int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) 73 { 74 u8 cq_log_stride; 75 u32 cq_sz; 76 int err; 77 78 cq_sz = mlx5_cqwq_get_size(&cq->wq); 79 cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq); 80 81 err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ"); 82 if (err) 83 return err; 84 85 err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride)); 86 if (err) 87 return err; 88 89 err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz); 90 if (err) 91 return err; 92 93 err = mlx5e_reporter_named_obj_nest_end(fmsg); 94 if (err) 95 return err; 96 97 return 0; 98 } 99 100 int mlx5e_health_create_reporters(struct mlx5e_priv *priv) 101 { 102 int err; 103 104 err = mlx5e_reporter_tx_create(priv); 105 if (err) 106 return err; 107 108 err = mlx5e_reporter_rx_create(priv); 109 if (err) 110 return err; 111 112 return 0; 113 } 114 115 void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv) 116 { 117 mlx5e_reporter_rx_destroy(priv); 118 mlx5e_reporter_tx_destroy(priv); 119 } 120 121 void mlx5e_health_channels_update(struct mlx5e_priv *priv) 122 { 123 if (priv->tx_reporter) 124 devlink_health_reporter_state_update(priv->tx_reporter, 125 DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); 126 if (priv->rx_reporter) 127 devlink_health_reporter_state_update(priv->rx_reporter, 128 DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); 129 } 130 131 int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn) 132 { 133 struct mlx5_core_dev *mdev = channel->mdev; 134 struct net_device *dev = channel->netdev; 135 struct mlx5e_modify_sq_param msp = {}; 136 int err; 137 138 msp.curr_state = MLX5_SQC_STATE_ERR; 139 msp.next_state = MLX5_SQC_STATE_RST; 140 141 err = mlx5e_modify_sq(mdev, sqn, &msp); 142 if (err) { 143 netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn); 144 return err; 145 } 146 147 memset(&msp, 0, sizeof(msp)); 148 msp.curr_state = MLX5_SQC_STATE_RST; 149 msp.next_state = MLX5_SQC_STATE_RDY; 150 151 err = mlx5e_modify_sq(mdev, sqn, &msp); 152 if (err) { 153 netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn); 154 return err; 155 } 156 157 return 0; 158 } 159 160 int mlx5e_health_recover_channels(struct mlx5e_priv *priv) 161 { 162 int err = 0; 163 164 rtnl_lock(); 165 mutex_lock(&priv->state_lock); 166 167 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 168 goto out; 169 170 err = mlx5e_safe_reopen_channels(priv); 171 172 out: 173 mutex_unlock(&priv->state_lock); 174 rtnl_unlock(); 175 176 return err; 177 } 178 179 int mlx5e_health_channel_eq_recover(struct mlx5_eq_comp *eq, struct mlx5e_channel *channel) 180 { 181 u32 eqe_count; 182 183 netdev_err(channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", 184 eq->core.eqn, eq->core.cons_index, eq->core.irqn); 185 186 eqe_count = mlx5_eq_poll_irq_disabled(eq); 187 if (!eqe_count) 188 return -EIO; 189 190 netdev_err(channel->netdev, "Recovered %d eqes on EQ 0x%x\n", 191 eqe_count, eq->core.eqn); 192 193 channel->stats->eq_rearm++; 194 return 0; 195 } 196 197 int mlx5e_health_report(struct mlx5e_priv *priv, 198 struct devlink_health_reporter *reporter, char *err_str, 199 struct mlx5e_err_ctx *err_ctx) 200 { 201 netdev_err(priv->netdev, "%s\n", err_str); 202 203 if (!reporter) 204 return err_ctx->recover(err_ctx->ctx); 205 206 return devlink_health_report(reporter, err_str, err_ctx); 207 } 208 209 #define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024 210 static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg, 211 const void *value, u32 value_len) 212 213 { 214 u32 data_size; 215 u32 offset; 216 int err; 217 218 for (offset = 0; offset < value_len; offset += data_size) { 219 data_size = value_len - offset; 220 if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE) 221 data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE; 222 err = devlink_fmsg_binary_put(fmsg, value + offset, data_size); 223 if (err) 224 break; 225 } 226 return err; 227 } 228 229 int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key, 230 struct devlink_fmsg *fmsg) 231 { 232 struct mlx5_core_dev *mdev = priv->mdev; 233 struct mlx5_rsc_dump_cmd *cmd; 234 struct page *page; 235 int cmd_err, err; 236 int end_err; 237 int size; 238 239 if (IS_ERR_OR_NULL(mdev->rsc_dump)) 240 return -EOPNOTSUPP; 241 242 page = alloc_page(GFP_KERNEL); 243 if (!page) 244 return -ENOMEM; 245 246 err = devlink_fmsg_binary_pair_nest_start(fmsg, "data"); 247 if (err) 248 return err; 249 250 cmd = mlx5_rsc_dump_cmd_create(mdev, key); 251 if (IS_ERR(cmd)) { 252 err = PTR_ERR(cmd); 253 goto free_page; 254 } 255 256 do { 257 cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size); 258 if (cmd_err < 0) { 259 err = cmd_err; 260 goto destroy_cmd; 261 } 262 263 err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size); 264 if (err) 265 goto destroy_cmd; 266 267 } while (cmd_err > 0); 268 269 destroy_cmd: 270 mlx5_rsc_dump_cmd_destroy(cmd); 271 end_err = devlink_fmsg_binary_pair_nest_end(fmsg); 272 if (end_err) 273 err = end_err; 274 free_page: 275 __free_page(page); 276 return err; 277 } 278 279 int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, 280 int queue_idx, char *lbl) 281 { 282 struct mlx5_rsc_key key = {}; 283 int err; 284 285 key.rsc = MLX5_SGMT_TYPE_FULL_QPC; 286 key.index1 = queue_idx; 287 key.size = PAGE_SIZE; 288 key.num_of_obj1 = 1; 289 290 err = devlink_fmsg_obj_nest_start(fmsg); 291 if (err) 292 return err; 293 294 err = mlx5e_reporter_named_obj_nest_start(fmsg, lbl); 295 if (err) 296 return err; 297 298 err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx); 299 if (err) 300 return err; 301 302 err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 303 if (err) 304 return err; 305 306 err = mlx5e_reporter_named_obj_nest_end(fmsg); 307 if (err) 308 return err; 309 310 return devlink_fmsg_obj_nest_end(fmsg); 311 } 312