1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies. */ 3 4 #include <linux/refcount.h> 5 #include <linux/list.h> 6 #include <linux/rculist.h> 7 #include <linux/rtnetlink.h> 8 #include <linux/workqueue.h> 9 #include <linux/rwlock.h> 10 #include <linux/spinlock.h> 11 #include <linux/notifier.h> 12 #include <net/netevent.h> 13 #include "neigh.h" 14 #include "tc.h" 15 #include "en_rep.h" 16 #include "fs_core.h" 17 #include "diag/en_rep_tracepoint.h" 18 19 static unsigned long mlx5e_rep_ipv6_interval(void) 20 { 21 if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl) 22 return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME); 23 24 return ~0UL; 25 } 26 27 static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) 28 { 29 unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); 30 unsigned long ipv6_interval = mlx5e_rep_ipv6_interval(); 31 struct net_device *netdev = rpriv->netdev; 32 struct mlx5e_priv *priv = netdev_priv(netdev); 33 34 rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval); 35 mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval); 36 } 37 38 void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) 39 { 40 struct mlx5e_rep_priv *rpriv = priv->ppriv; 41 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; 42 43 mlx5_fc_queue_stats_work(priv->mdev, 44 &neigh_update->neigh_stats_work, 45 neigh_update->min_interval); 46 } 47 48 static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) 49 { 50 return refcount_inc_not_zero(&nhe->refcnt); 51 } 52 53 static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe); 54 55 void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) 56 { 57 if (refcount_dec_and_test(&nhe->refcnt)) { 58 mlx5e_rep_neigh_entry_remove(nhe); 59 kfree_rcu(nhe, rcu); 60 } 61 } 62 63 static struct mlx5e_neigh_hash_entry * 64 mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv, 65 struct mlx5e_neigh_hash_entry *nhe) 66 { 67 struct mlx5e_neigh_hash_entry *next = NULL; 68 69 rcu_read_lock(); 70 71 for (next = nhe ? 72 list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, 73 &nhe->neigh_list, 74 struct mlx5e_neigh_hash_entry, 75 neigh_list) : 76 list_first_or_null_rcu(&rpriv->neigh_update.neigh_list, 77 struct mlx5e_neigh_hash_entry, 78 neigh_list); 79 next; 80 next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, 81 &next->neigh_list, 82 struct mlx5e_neigh_hash_entry, 83 neigh_list)) 84 if (mlx5e_rep_neigh_entry_hold(next)) 85 break; 86 87 rcu_read_unlock(); 88 89 if (nhe) 90 mlx5e_rep_neigh_entry_release(nhe); 91 92 return next; 93 } 94 95 static void mlx5e_rep_neigh_stats_work(struct work_struct *work) 96 { 97 struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, 98 neigh_update.neigh_stats_work.work); 99 struct net_device *netdev = rpriv->netdev; 100 struct mlx5e_priv *priv = netdev_priv(netdev); 101 struct mlx5e_neigh_hash_entry *nhe = NULL; 102 103 rtnl_lock(); 104 if (!list_empty(&rpriv->neigh_update.neigh_list)) 105 mlx5e_rep_queue_neigh_stats_work(priv); 106 107 while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL) 108 mlx5e_tc_update_neigh_used_value(nhe); 109 110 rtnl_unlock(); 111 } 112 113 static void mlx5e_rep_neigh_update(struct work_struct *work) 114 { 115 struct mlx5e_neigh_hash_entry *nhe = 116 container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work); 117 struct neighbour *n = nhe->n; 118 struct mlx5e_encap_entry *e; 119 unsigned char ha[ETH_ALEN]; 120 struct mlx5e_priv *priv; 121 bool neigh_connected; 122 u8 nud_state, dead; 123 124 rtnl_lock(); 125 126 /* If these parameters are changed after we release the lock, 127 * we'll receive another event letting us know about it. 128 * We use this lock to avoid inconsistency between the neigh validity 129 * and it's hw address. 130 */ 131 read_lock_bh(&n->lock); 132 memcpy(ha, n->ha, ETH_ALEN); 133 nud_state = n->nud_state; 134 dead = n->dead; 135 read_unlock_bh(&n->lock); 136 137 neigh_connected = (nud_state & NUD_VALID) && !dead; 138 139 trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); 140 141 list_for_each_entry(e, &nhe->encap_list, encap_list) { 142 if (!mlx5e_encap_take(e)) 143 continue; 144 145 priv = netdev_priv(e->out_dev); 146 mlx5e_rep_update_flows(priv, e, neigh_connected, ha); 147 mlx5e_encap_put(priv, e); 148 } 149 mlx5e_rep_neigh_entry_release(nhe); 150 rtnl_unlock(); 151 neigh_release(n); 152 } 153 154 static void mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv, 155 struct mlx5e_neigh_hash_entry *nhe, 156 struct neighbour *n) 157 { 158 /* Take a reference to ensure the neighbour and mlx5 encap 159 * entry won't be destructed until we drop the reference in 160 * delayed work. 161 */ 162 neigh_hold(n); 163 164 /* This assignment is valid as long as the the neigh reference 165 * is taken 166 */ 167 nhe->n = n; 168 169 if (!queue_work(priv->wq, &nhe->neigh_update_work)) { 170 mlx5e_rep_neigh_entry_release(nhe); 171 neigh_release(n); 172 } 173 } 174 175 static int mlx5e_rep_netevent_event(struct notifier_block *nb, 176 unsigned long event, void *ptr) 177 { 178 struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, 179 neigh_update.netevent_nb); 180 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; 181 struct net_device *netdev = rpriv->netdev; 182 struct mlx5e_priv *priv = netdev_priv(netdev); 183 struct mlx5e_neigh_hash_entry *nhe = NULL; 184 struct mlx5e_neigh m_neigh = {}; 185 struct neigh_parms *p; 186 struct neighbour *n; 187 bool found = false; 188 189 switch (event) { 190 case NETEVENT_NEIGH_UPDATE: 191 n = ptr; 192 #if IS_ENABLED(CONFIG_IPV6) 193 if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) 194 #else 195 if (n->tbl != &arp_tbl) 196 #endif 197 return NOTIFY_DONE; 198 199 m_neigh.dev = n->dev; 200 m_neigh.family = n->ops->family; 201 memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); 202 203 rcu_read_lock(); 204 nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); 205 rcu_read_unlock(); 206 if (!nhe) 207 return NOTIFY_DONE; 208 209 mlx5e_rep_queue_neigh_update_work(priv, nhe, n); 210 break; 211 212 case NETEVENT_DELAY_PROBE_TIME_UPDATE: 213 p = ptr; 214 215 /* We check the device is present since we don't care about 216 * changes in the default table, we only care about changes 217 * done per device delay prob time parameter. 218 */ 219 #if IS_ENABLED(CONFIG_IPV6) 220 if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) 221 #else 222 if (!p->dev || p->tbl != &arp_tbl) 223 #endif 224 return NOTIFY_DONE; 225 226 rcu_read_lock(); 227 list_for_each_entry_rcu(nhe, &neigh_update->neigh_list, 228 neigh_list) { 229 if (p->dev == nhe->m_neigh.dev) { 230 found = true; 231 break; 232 } 233 } 234 rcu_read_unlock(); 235 if (!found) 236 return NOTIFY_DONE; 237 238 neigh_update->min_interval = min_t(unsigned long, 239 NEIGH_VAR(p, DELAY_PROBE_TIME), 240 neigh_update->min_interval); 241 mlx5_fc_update_sampling_interval(priv->mdev, 242 neigh_update->min_interval); 243 break; 244 } 245 return NOTIFY_DONE; 246 } 247 248 static const struct rhashtable_params mlx5e_neigh_ht_params = { 249 .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node), 250 .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh), 251 .key_len = sizeof(struct mlx5e_neigh), 252 .automatic_shrinking = true, 253 }; 254 255 int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) 256 { 257 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; 258 int err; 259 260 err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params); 261 if (err) 262 return err; 263 264 INIT_LIST_HEAD(&neigh_update->neigh_list); 265 mutex_init(&neigh_update->encap_lock); 266 INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, 267 mlx5e_rep_neigh_stats_work); 268 mlx5e_rep_neigh_update_init_interval(rpriv); 269 270 rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event; 271 err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb); 272 if (err) 273 goto out_err; 274 return 0; 275 276 out_err: 277 rhashtable_destroy(&neigh_update->neigh_ht); 278 return err; 279 } 280 281 void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) 282 { 283 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; 284 struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); 285 286 unregister_netevent_notifier(&neigh_update->netevent_nb); 287 288 flush_workqueue(priv->wq); /* flush neigh update works */ 289 290 cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work); 291 292 mutex_destroy(&neigh_update->encap_lock); 293 rhashtable_destroy(&neigh_update->neigh_ht); 294 } 295 296 static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, 297 struct mlx5e_neigh_hash_entry *nhe) 298 { 299 struct mlx5e_rep_priv *rpriv = priv->ppriv; 300 int err; 301 302 err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht, 303 &nhe->rhash_node, 304 mlx5e_neigh_ht_params); 305 if (err) 306 return err; 307 308 list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); 309 310 return err; 311 } 312 313 static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe) 314 { 315 struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv; 316 317 mutex_lock(&rpriv->neigh_update.encap_lock); 318 319 list_del_rcu(&nhe->neigh_list); 320 321 rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht, 322 &nhe->rhash_node, 323 mlx5e_neigh_ht_params); 324 mutex_unlock(&rpriv->neigh_update.encap_lock); 325 } 326 327 /* This function must only be called under the representor's encap_lock or 328 * inside rcu read lock section. 329 */ 330 struct mlx5e_neigh_hash_entry * 331 mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, 332 struct mlx5e_neigh *m_neigh) 333 { 334 struct mlx5e_rep_priv *rpriv = priv->ppriv; 335 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; 336 struct mlx5e_neigh_hash_entry *nhe; 337 338 nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, 339 mlx5e_neigh_ht_params); 340 return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL; 341 } 342 343 int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, 344 struct mlx5e_encap_entry *e, 345 struct mlx5e_neigh_hash_entry **nhe) 346 { 347 int err; 348 349 *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL); 350 if (!*nhe) 351 return -ENOMEM; 352 353 (*nhe)->priv = priv; 354 memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); 355 INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update); 356 spin_lock_init(&(*nhe)->encap_list_lock); 357 INIT_LIST_HEAD(&(*nhe)->encap_list); 358 refcount_set(&(*nhe)->refcnt, 1); 359 360 err = mlx5e_rep_neigh_entry_insert(priv, *nhe); 361 if (err) 362 goto out_free; 363 return 0; 364 365 out_free: 366 kfree(*nhe); 367 return err; 368 } 369