xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c (revision 3503d56cc7233ced602e38a4c13caa64f00ab2aa)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies. */
3 
4 #include <linux/refcount.h>
5 #include <linux/list.h>
6 #include <linux/rculist.h>
7 #include <linux/rtnetlink.h>
8 #include <linux/workqueue.h>
9 #include <linux/rwlock.h>
10 #include <linux/spinlock.h>
11 #include <linux/notifier.h>
12 #include <net/netevent.h>
13 #include "neigh.h"
14 #include "tc.h"
15 #include "en_rep.h"
16 #include "fs_core.h"
17 #include "diag/en_rep_tracepoint.h"
18 
19 static unsigned long mlx5e_rep_ipv6_interval(void)
20 {
21 	if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
22 		return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
23 
24 	return ~0UL;
25 }
26 
27 static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
28 {
29 	unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
30 	unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
31 	struct net_device *netdev = rpriv->netdev;
32 	struct mlx5e_priv *priv = netdev_priv(netdev);
33 
34 	rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
35 	mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
36 }
37 
38 void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
39 {
40 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
41 	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
42 
43 	mlx5_fc_queue_stats_work(priv->mdev,
44 				 &neigh_update->neigh_stats_work,
45 				 neigh_update->min_interval);
46 }
47 
48 static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
49 {
50 	return refcount_inc_not_zero(&nhe->refcnt);
51 }
52 
53 static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
54 
55 void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
56 {
57 	if (refcount_dec_and_test(&nhe->refcnt)) {
58 		mlx5e_rep_neigh_entry_remove(nhe);
59 		kfree_rcu(nhe, rcu);
60 	}
61 }
62 
63 static struct mlx5e_neigh_hash_entry *
64 mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
65 		   struct mlx5e_neigh_hash_entry *nhe)
66 {
67 	struct mlx5e_neigh_hash_entry *next = NULL;
68 
69 	rcu_read_lock();
70 
71 	for (next = nhe ?
72 		     list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
73 					   &nhe->neigh_list,
74 					   struct mlx5e_neigh_hash_entry,
75 					   neigh_list) :
76 		     list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
77 					    struct mlx5e_neigh_hash_entry,
78 					    neigh_list);
79 	     next;
80 	     next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
81 					  &next->neigh_list,
82 					  struct mlx5e_neigh_hash_entry,
83 					  neigh_list))
84 		if (mlx5e_rep_neigh_entry_hold(next))
85 			break;
86 
87 	rcu_read_unlock();
88 
89 	if (nhe)
90 		mlx5e_rep_neigh_entry_release(nhe);
91 
92 	return next;
93 }
94 
95 static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
96 {
97 	struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
98 						    neigh_update.neigh_stats_work.work);
99 	struct net_device *netdev = rpriv->netdev;
100 	struct mlx5e_priv *priv = netdev_priv(netdev);
101 	struct mlx5e_neigh_hash_entry *nhe = NULL;
102 
103 	rtnl_lock();
104 	if (!list_empty(&rpriv->neigh_update.neigh_list))
105 		mlx5e_rep_queue_neigh_stats_work(priv);
106 
107 	while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
108 		mlx5e_tc_update_neigh_used_value(nhe);
109 
110 	rtnl_unlock();
111 }
112 
113 static void mlx5e_rep_neigh_update(struct work_struct *work)
114 {
115 	struct mlx5e_neigh_hash_entry *nhe =
116 		container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
117 	struct neighbour *n = nhe->n;
118 	struct mlx5e_encap_entry *e;
119 	unsigned char ha[ETH_ALEN];
120 	struct mlx5e_priv *priv;
121 	bool neigh_connected;
122 	u8 nud_state, dead;
123 
124 	rtnl_lock();
125 
126 	/* If these parameters are changed after we release the lock,
127 	 * we'll receive another event letting us know about it.
128 	 * We use this lock to avoid inconsistency between the neigh validity
129 	 * and it's hw address.
130 	 */
131 	read_lock_bh(&n->lock);
132 	memcpy(ha, n->ha, ETH_ALEN);
133 	nud_state = n->nud_state;
134 	dead = n->dead;
135 	read_unlock_bh(&n->lock);
136 
137 	neigh_connected = (nud_state & NUD_VALID) && !dead;
138 
139 	trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
140 
141 	list_for_each_entry(e, &nhe->encap_list, encap_list) {
142 		if (!mlx5e_encap_take(e))
143 			continue;
144 
145 		priv = netdev_priv(e->out_dev);
146 		mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
147 		mlx5e_encap_put(priv, e);
148 	}
149 	mlx5e_rep_neigh_entry_release(nhe);
150 	rtnl_unlock();
151 	neigh_release(n);
152 }
153 
154 static void mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv,
155 					      struct mlx5e_neigh_hash_entry *nhe,
156 					      struct neighbour *n)
157 {
158 	/* Take a reference to ensure the neighbour and mlx5 encap
159 	 * entry won't be destructed until we drop the reference in
160 	 * delayed work.
161 	 */
162 	neigh_hold(n);
163 
164 	/* This assignment is valid as long as the the neigh reference
165 	 * is taken
166 	 */
167 	nhe->n = n;
168 
169 	if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
170 		mlx5e_rep_neigh_entry_release(nhe);
171 		neigh_release(n);
172 	}
173 }
174 
175 static int mlx5e_rep_netevent_event(struct notifier_block *nb,
176 				    unsigned long event, void *ptr)
177 {
178 	struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
179 						    neigh_update.netevent_nb);
180 	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
181 	struct net_device *netdev = rpriv->netdev;
182 	struct mlx5e_priv *priv = netdev_priv(netdev);
183 	struct mlx5e_neigh_hash_entry *nhe = NULL;
184 	struct mlx5e_neigh m_neigh = {};
185 	struct neigh_parms *p;
186 	struct neighbour *n;
187 	bool found = false;
188 
189 	switch (event) {
190 	case NETEVENT_NEIGH_UPDATE:
191 		n = ptr;
192 #if IS_ENABLED(CONFIG_IPV6)
193 		if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
194 #else
195 		if (n->tbl != &arp_tbl)
196 #endif
197 			return NOTIFY_DONE;
198 
199 		m_neigh.dev = n->dev;
200 		m_neigh.family = n->ops->family;
201 		memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
202 
203 		rcu_read_lock();
204 		nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
205 		rcu_read_unlock();
206 		if (!nhe)
207 			return NOTIFY_DONE;
208 
209 		mlx5e_rep_queue_neigh_update_work(priv, nhe, n);
210 		break;
211 
212 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
213 		p = ptr;
214 
215 		/* We check the device is present since we don't care about
216 		 * changes in the default table, we only care about changes
217 		 * done per device delay prob time parameter.
218 		 */
219 #if IS_ENABLED(CONFIG_IPV6)
220 		if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
221 #else
222 		if (!p->dev || p->tbl != &arp_tbl)
223 #endif
224 			return NOTIFY_DONE;
225 
226 		rcu_read_lock();
227 		list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
228 					neigh_list) {
229 			if (p->dev == nhe->m_neigh.dev) {
230 				found = true;
231 				break;
232 			}
233 		}
234 		rcu_read_unlock();
235 		if (!found)
236 			return NOTIFY_DONE;
237 
238 		neigh_update->min_interval = min_t(unsigned long,
239 						   NEIGH_VAR(p, DELAY_PROBE_TIME),
240 						   neigh_update->min_interval);
241 		mlx5_fc_update_sampling_interval(priv->mdev,
242 						 neigh_update->min_interval);
243 		break;
244 	}
245 	return NOTIFY_DONE;
246 }
247 
248 static const struct rhashtable_params mlx5e_neigh_ht_params = {
249 	.head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
250 	.key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
251 	.key_len = sizeof(struct mlx5e_neigh),
252 	.automatic_shrinking = true,
253 };
254 
255 int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
256 {
257 	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
258 	int err;
259 
260 	err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
261 	if (err)
262 		return err;
263 
264 	INIT_LIST_HEAD(&neigh_update->neigh_list);
265 	mutex_init(&neigh_update->encap_lock);
266 	INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
267 			  mlx5e_rep_neigh_stats_work);
268 	mlx5e_rep_neigh_update_init_interval(rpriv);
269 
270 	rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event;
271 	err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb);
272 	if (err)
273 		goto out_err;
274 	return 0;
275 
276 out_err:
277 	rhashtable_destroy(&neigh_update->neigh_ht);
278 	return err;
279 }
280 
281 void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
282 {
283 	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
284 	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
285 
286 	unregister_netevent_notifier(&neigh_update->netevent_nb);
287 
288 	flush_workqueue(priv->wq); /* flush neigh update works */
289 
290 	cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
291 
292 	mutex_destroy(&neigh_update->encap_lock);
293 	rhashtable_destroy(&neigh_update->neigh_ht);
294 }
295 
296 static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
297 					struct mlx5e_neigh_hash_entry *nhe)
298 {
299 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
300 	int err;
301 
302 	err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
303 				     &nhe->rhash_node,
304 				     mlx5e_neigh_ht_params);
305 	if (err)
306 		return err;
307 
308 	list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
309 
310 	return err;
311 }
312 
313 static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
314 {
315 	struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
316 
317 	mutex_lock(&rpriv->neigh_update.encap_lock);
318 
319 	list_del_rcu(&nhe->neigh_list);
320 
321 	rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
322 			       &nhe->rhash_node,
323 			       mlx5e_neigh_ht_params);
324 	mutex_unlock(&rpriv->neigh_update.encap_lock);
325 }
326 
327 /* This function must only be called under the representor's encap_lock or
328  * inside rcu read lock section.
329  */
330 struct mlx5e_neigh_hash_entry *
331 mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
332 			     struct mlx5e_neigh *m_neigh)
333 {
334 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
335 	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
336 	struct mlx5e_neigh_hash_entry *nhe;
337 
338 	nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
339 				     mlx5e_neigh_ht_params);
340 	return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
341 }
342 
343 int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
344 				 struct mlx5e_encap_entry *e,
345 				 struct mlx5e_neigh_hash_entry **nhe)
346 {
347 	int err;
348 
349 	*nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
350 	if (!*nhe)
351 		return -ENOMEM;
352 
353 	(*nhe)->priv = priv;
354 	memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
355 	INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
356 	spin_lock_init(&(*nhe)->encap_list_lock);
357 	INIT_LIST_HEAD(&(*nhe)->encap_list);
358 	refcount_set(&(*nhe)->refcnt, 1);
359 
360 	err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
361 	if (err)
362 		goto out_free;
363 	return 0;
364 
365 out_free:
366 	kfree(*nhe);
367 	return err;
368 }
369