xref: /linux/drivers/infiniband/core/cache.c (revision 975ef7ff81bb000af6e6c8e63e81f89f3468dcf7)
1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Intel Corporation. All rights reserved.
4  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
5  * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35 
36 #include <linux/module.h>
37 #include <linux/errno.h>
38 #include <linux/slab.h>
39 #include <linux/workqueue.h>
40 #include <linux/netdevice.h>
41 #include <net/addrconf.h>
42 
43 #include <rdma/ib_cache.h>
44 
45 #include "core_priv.h"
46 
47 struct ib_pkey_cache {
48 	int             table_len;
49 	u16             table[0];
50 };
51 
52 struct ib_update_work {
53 	struct work_struct work;
54 	struct ib_device  *device;
55 	u8                 port_num;
56 	bool		   enforce_security;
57 };
58 
59 union ib_gid zgid;
60 EXPORT_SYMBOL(zgid);
61 
62 enum gid_attr_find_mask {
63 	GID_ATTR_FIND_MASK_GID          = 1UL << 0,
64 	GID_ATTR_FIND_MASK_NETDEV	= 1UL << 1,
65 	GID_ATTR_FIND_MASK_DEFAULT	= 1UL << 2,
66 	GID_ATTR_FIND_MASK_GID_TYPE	= 1UL << 3,
67 };
68 
69 enum gid_table_entry_props {
70 	GID_TABLE_ENTRY_INVALID		= 1UL << 0,
71 	GID_TABLE_ENTRY_DEFAULT		= 1UL << 1,
72 };
73 
74 struct ib_gid_table_entry {
75 	unsigned long	    props;
76 	union ib_gid        gid;
77 	struct ib_gid_attr  attr;
78 	void		   *context;
79 };
80 
81 struct ib_gid_table {
82 	int                  sz;
83 	/* In RoCE, adding a GID to the table requires:
84 	 * (a) Find if this GID is already exists.
85 	 * (b) Find a free space.
86 	 * (c) Write the new GID
87 	 *
88 	 * Delete requires different set of operations:
89 	 * (a) Find the GID
90 	 * (b) Delete it.
91 	 *
92 	 **/
93 	/* Any writer to data_vec must hold this lock and the write side of
94 	 * rwlock. readers must hold only rwlock. All writers must be in a
95 	 * sleepable context.
96 	 */
97 	struct mutex         lock;
98 	/* rwlock protects data_vec[ix]->props. */
99 	rwlock_t	     rwlock;
100 	struct ib_gid_table_entry *data_vec;
101 };
102 
103 static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
104 {
105 	struct ib_event event;
106 
107 	event.device		= ib_dev;
108 	event.element.port_num	= port;
109 	event.event		= IB_EVENT_GID_CHANGE;
110 
111 	ib_dispatch_event(&event);
112 }
113 
114 static const char * const gid_type_str[] = {
115 	[IB_GID_TYPE_IB]	= "IB/RoCE v1",
116 	[IB_GID_TYPE_ROCE_UDP_ENCAP]	= "RoCE v2",
117 };
118 
119 const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
120 {
121 	if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
122 		return gid_type_str[gid_type];
123 
124 	return "Invalid GID type";
125 }
126 EXPORT_SYMBOL(ib_cache_gid_type_str);
127 
128 /** rdma_is_zero_gid - Check if given GID is zero or not.
129  * @gid:	GID to check
130  * Returns true if given GID is zero, returns false otherwise.
131  */
132 bool rdma_is_zero_gid(const union ib_gid *gid)
133 {
134 	return !memcmp(gid, &zgid, sizeof(*gid));
135 }
136 EXPORT_SYMBOL(rdma_is_zero_gid);
137 
138 int ib_cache_gid_parse_type_str(const char *buf)
139 {
140 	unsigned int i;
141 	size_t len;
142 	int err = -EINVAL;
143 
144 	len = strlen(buf);
145 	if (len == 0)
146 		return -EINVAL;
147 
148 	if (buf[len - 1] == '\n')
149 		len--;
150 
151 	for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
152 		if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
153 		    len == strlen(gid_type_str[i])) {
154 			err = i;
155 			break;
156 		}
157 
158 	return err;
159 }
160 EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
161 
162 static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
163 {
164 	return device->cache.ports[port - rdma_start_port(device)].gid;
165 }
166 
167 static void del_roce_gid(struct ib_device *device, u8 port_num,
168 			 struct ib_gid_table *table, int ix)
169 {
170 	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
171 		 device->name, port_num, ix,
172 		 table->data_vec[ix].gid.raw);
173 
174 	if (rdma_cap_roce_gid_table(device, port_num))
175 		device->del_gid(&table->data_vec[ix].attr,
176 				&table->data_vec[ix].context);
177 	dev_put(table->data_vec[ix].attr.ndev);
178 }
179 
180 static int add_roce_gid(struct ib_gid_table *table,
181 			const union ib_gid *gid,
182 			const struct ib_gid_attr *attr)
183 {
184 	struct ib_gid_table_entry *entry;
185 	int ix = attr->index;
186 	int ret = 0;
187 
188 	if (!attr->ndev) {
189 		pr_err("%s NULL netdev device=%s port=%d index=%d\n",
190 		       __func__, attr->device->name, attr->port_num,
191 		       attr->index);
192 		return -EINVAL;
193 	}
194 
195 	entry = &table->data_vec[ix];
196 	if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) {
197 		WARN(1, "GID table corruption device=%s port=%d index=%d\n",
198 		     attr->device->name, attr->port_num,
199 		     attr->index);
200 		return -EINVAL;
201 	}
202 
203 	if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
204 		ret = attr->device->add_gid(gid, attr, &entry->context);
205 		if (ret) {
206 			pr_err("%s GID add failed device=%s port=%d index=%d\n",
207 			       __func__, attr->device->name, attr->port_num,
208 			       attr->index);
209 			goto add_err;
210 		}
211 	}
212 	dev_hold(attr->ndev);
213 
214 add_err:
215 	if (!ret)
216 		pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
217 			 attr->device->name, attr->port_num, ix, gid->raw);
218 	return ret;
219 }
220 
221 /**
222  * add_modify_gid - Add or modify GID table entry
223  *
224  * @table:	GID table in which GID to be added or modified
225  * @gid:	GID content
226  * @attr:	Attributes of the GID
227  *
228  * Returns 0 on success or appropriate error code. It accepts zero
229  * GID addition for non RoCE ports for HCA's who report them as valid
230  * GID. However such zero GIDs are not added to the cache.
231  */
232 static int add_modify_gid(struct ib_gid_table *table,
233 			  const union ib_gid *gid,
234 			  const struct ib_gid_attr *attr)
235 {
236 	int ret;
237 
238 	if (rdma_protocol_roce(attr->device, attr->port_num)) {
239 		ret = add_roce_gid(table, gid, attr);
240 		if (ret)
241 			return ret;
242 	} else {
243 		/*
244 		 * Some HCA's report multiple GID entries with only one
245 		 * valid GID, but remaining as zero GID.
246 		 * So ignore such behavior for IB link layer and don't
247 		 * fail the call, but don't add such entry to GID cache.
248 		 */
249 		if (rdma_is_zero_gid(gid))
250 			return 0;
251 	}
252 
253 	lockdep_assert_held(&table->lock);
254 	memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid));
255 	memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr));
256 
257 	write_lock_irq(&table->rwlock);
258 	table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID;
259 	write_unlock_irq(&table->rwlock);
260 	return 0;
261 }
262 
263 /**
264  * del_gid - Delete GID table entry
265  *
266  * @ib_dev:	IB device whose GID entry to be deleted
267  * @port:	Port number of the IB device
268  * @table:	GID table of the IB device for a port
269  * @ix:		GID entry index to delete
270  *
271  */
272 static void del_gid(struct ib_device *ib_dev, u8 port,
273 		    struct ib_gid_table *table, int ix)
274 {
275 	lockdep_assert_held(&table->lock);
276 	write_lock_irq(&table->rwlock);
277 	table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
278 	write_unlock_irq(&table->rwlock);
279 
280 	if (rdma_protocol_roce(ib_dev, port))
281 		del_roce_gid(ib_dev, port, table, ix);
282 	memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid));
283 	memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
284 	table->data_vec[ix].context = NULL;
285 }
286 
287 /* rwlock should be read locked, or lock should be held */
288 static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
289 		    const struct ib_gid_attr *val, bool default_gid,
290 		    unsigned long mask, int *pempty)
291 {
292 	int i = 0;
293 	int found = -1;
294 	int empty = pempty ? -1 : 0;
295 
296 	while (i < table->sz && (found < 0 || empty < 0)) {
297 		struct ib_gid_table_entry *data = &table->data_vec[i];
298 		struct ib_gid_attr *attr = &data->attr;
299 		int curr_index = i;
300 
301 		i++;
302 
303 		/* find_gid() is used during GID addition where it is expected
304 		 * to return a free entry slot which is not duplicate.
305 		 * Free entry slot is requested and returned if pempty is set,
306 		 * so lookup free slot only if requested.
307 		 */
308 		if (pempty && empty < 0) {
309 			if (data->props & GID_TABLE_ENTRY_INVALID &&
310 			    (default_gid ==
311 			     !!(data->props & GID_TABLE_ENTRY_DEFAULT))) {
312 				/*
313 				 * Found an invalid (free) entry; allocate it.
314 				 * If default GID is requested, then our
315 				 * found slot must be one of the DEFAULT
316 				 * reserved slots or we fail.
317 				 * This ensures that only DEFAULT reserved
318 				 * slots are used for default property GIDs.
319 				 */
320 				empty = curr_index;
321 			}
322 		}
323 
324 		/*
325 		 * Additionally find_gid() is used to find valid entry during
326 		 * lookup operation, where validity needs to be checked. So
327 		 * find the empty entry first to continue to search for a free
328 		 * slot and ignore its INVALID flag.
329 		 */
330 		if (data->props & GID_TABLE_ENTRY_INVALID)
331 			continue;
332 
333 		if (found >= 0)
334 			continue;
335 
336 		if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
337 		    attr->gid_type != val->gid_type)
338 			continue;
339 
340 		if (mask & GID_ATTR_FIND_MASK_GID &&
341 		    memcmp(gid, &data->gid, sizeof(*gid)))
342 			continue;
343 
344 		if (mask & GID_ATTR_FIND_MASK_NETDEV &&
345 		    attr->ndev != val->ndev)
346 			continue;
347 
348 		if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
349 		    !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
350 		    default_gid)
351 			continue;
352 
353 		found = curr_index;
354 	}
355 
356 	if (pempty)
357 		*pempty = empty;
358 
359 	return found;
360 }
361 
362 static void make_default_gid(struct  net_device *dev, union ib_gid *gid)
363 {
364 	gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
365 	addrconf_ifid_eui48(&gid->raw[8], dev);
366 }
367 
368 static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
369 			      union ib_gid *gid, struct ib_gid_attr *attr,
370 			      unsigned long mask, bool default_gid)
371 {
372 	struct ib_gid_table *table;
373 	int ret = 0;
374 	int empty;
375 	int ix;
376 
377 	/* Do not allow adding zero GID in support of
378 	 * IB spec version 1.3 section 4.1.1 point (6) and
379 	 * section 12.7.10 and section 12.7.20
380 	 */
381 	if (rdma_is_zero_gid(gid))
382 		return -EINVAL;
383 
384 	table = rdma_gid_table(ib_dev, port);
385 
386 	mutex_lock(&table->lock);
387 
388 	ix = find_gid(table, gid, attr, default_gid, mask, &empty);
389 	if (ix >= 0)
390 		goto out_unlock;
391 
392 	if (empty < 0) {
393 		ret = -ENOSPC;
394 		goto out_unlock;
395 	}
396 	attr->device = ib_dev;
397 	attr->index = empty;
398 	attr->port_num = port;
399 	ret = add_modify_gid(table, gid, attr);
400 	if (!ret)
401 		dispatch_gid_change_event(ib_dev, port);
402 
403 out_unlock:
404 	mutex_unlock(&table->lock);
405 	if (ret)
406 		pr_warn("%s: unable to add gid %pI6 error=%d\n",
407 			__func__, gid->raw, ret);
408 	return ret;
409 }
410 
411 int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
412 		     union ib_gid *gid, struct ib_gid_attr *attr)
413 {
414 	struct net_device *idev;
415 	unsigned long mask;
416 	int ret;
417 
418 	if (ib_dev->get_netdev) {
419 		idev = ib_dev->get_netdev(ib_dev, port);
420 		if (idev && attr->ndev != idev) {
421 			union ib_gid default_gid;
422 
423 			/* Adding default GIDs in not permitted */
424 			make_default_gid(idev, &default_gid);
425 			if (!memcmp(gid, &default_gid, sizeof(*gid))) {
426 				dev_put(idev);
427 				return -EPERM;
428 			}
429 		}
430 		if (idev)
431 			dev_put(idev);
432 	}
433 
434 	mask = GID_ATTR_FIND_MASK_GID |
435 	       GID_ATTR_FIND_MASK_GID_TYPE |
436 	       GID_ATTR_FIND_MASK_NETDEV;
437 
438 	ret = __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
439 	return ret;
440 }
441 
442 static int
443 _ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
444 		  union ib_gid *gid, struct ib_gid_attr *attr,
445 		  unsigned long mask, bool default_gid)
446 {
447 	struct ib_gid_table *table;
448 	int ret = 0;
449 	int ix;
450 
451 	table = rdma_gid_table(ib_dev, port);
452 
453 	mutex_lock(&table->lock);
454 
455 	ix = find_gid(table, gid, attr, default_gid, mask, NULL);
456 	if (ix < 0) {
457 		ret = -EINVAL;
458 		goto out_unlock;
459 	}
460 
461 	del_gid(ib_dev, port, table, ix);
462 	dispatch_gid_change_event(ib_dev, port);
463 
464 out_unlock:
465 	mutex_unlock(&table->lock);
466 	if (ret)
467 		pr_debug("%s: can't delete gid %pI6 error=%d\n",
468 			 __func__, gid->raw, ret);
469 	return ret;
470 }
471 
472 int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
473 		     union ib_gid *gid, struct ib_gid_attr *attr)
474 {
475 	unsigned long mask = GID_ATTR_FIND_MASK_GID	  |
476 			     GID_ATTR_FIND_MASK_GID_TYPE |
477 			     GID_ATTR_FIND_MASK_DEFAULT  |
478 			     GID_ATTR_FIND_MASK_NETDEV;
479 
480 	return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, false);
481 }
482 
483 int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
484 				     struct net_device *ndev)
485 {
486 	struct ib_gid_table *table;
487 	int ix;
488 	bool deleted = false;
489 
490 	table = rdma_gid_table(ib_dev, port);
491 
492 	mutex_lock(&table->lock);
493 
494 	for (ix = 0; ix < table->sz; ix++) {
495 		if (table->data_vec[ix].attr.ndev == ndev) {
496 			del_gid(ib_dev, port, table, ix);
497 			deleted = true;
498 		}
499 	}
500 
501 	mutex_unlock(&table->lock);
502 
503 	if (deleted)
504 		dispatch_gid_change_event(ib_dev, port);
505 
506 	return 0;
507 }
508 
509 static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
510 			      union ib_gid *gid, struct ib_gid_attr *attr)
511 {
512 	struct ib_gid_table *table;
513 
514 	table = rdma_gid_table(ib_dev, port);
515 
516 	if (index < 0 || index >= table->sz)
517 		return -EINVAL;
518 
519 	if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
520 		return -EINVAL;
521 
522 	memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
523 	if (attr) {
524 		memcpy(attr, &table->data_vec[index].attr, sizeof(*attr));
525 		if (attr->ndev)
526 			dev_hold(attr->ndev);
527 	}
528 
529 	return 0;
530 }
531 
532 static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
533 				    const union ib_gid *gid,
534 				    const struct ib_gid_attr *val,
535 				    unsigned long mask,
536 				    u8 *port, u16 *index)
537 {
538 	struct ib_gid_table *table;
539 	u8 p;
540 	int local_index;
541 	unsigned long flags;
542 
543 	for (p = 0; p < ib_dev->phys_port_cnt; p++) {
544 		table = ib_dev->cache.ports[p].gid;
545 		read_lock_irqsave(&table->rwlock, flags);
546 		local_index = find_gid(table, gid, val, false, mask, NULL);
547 		if (local_index >= 0) {
548 			if (index)
549 				*index = local_index;
550 			if (port)
551 				*port = p + rdma_start_port(ib_dev);
552 			read_unlock_irqrestore(&table->rwlock, flags);
553 			return 0;
554 		}
555 		read_unlock_irqrestore(&table->rwlock, flags);
556 	}
557 
558 	return -ENOENT;
559 }
560 
561 static int ib_cache_gid_find(struct ib_device *ib_dev,
562 			     const union ib_gid *gid,
563 			     enum ib_gid_type gid_type,
564 			     struct net_device *ndev, u8 *port,
565 			     u16 *index)
566 {
567 	unsigned long mask = GID_ATTR_FIND_MASK_GID |
568 			     GID_ATTR_FIND_MASK_GID_TYPE;
569 	struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
570 
571 	if (ndev)
572 		mask |= GID_ATTR_FIND_MASK_NETDEV;
573 
574 	return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val,
575 					mask, port, index);
576 }
577 
578 /**
579  * ib_find_cached_gid_by_port - Returns the GID table index where a specified
580  * GID value occurs. It searches for the specified GID value in the local
581  * software cache.
582  * @device: The device to query.
583  * @gid: The GID value to search for.
584  * @gid_type: The GID type to search for.
585  * @port_num: The port number of the device where the GID value should be
586  *   searched.
587  * @ndev: In RoCE, the net device of the device. Null means ignore.
588  * @index: The index into the cached GID table where the GID was found. This
589  *   parameter may be NULL.
590  */
591 int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
592 			       const union ib_gid *gid,
593 			       enum ib_gid_type gid_type,
594 			       u8 port, struct net_device *ndev,
595 			       u16 *index)
596 {
597 	int local_index;
598 	struct ib_gid_table *table;
599 	unsigned long mask = GID_ATTR_FIND_MASK_GID |
600 			     GID_ATTR_FIND_MASK_GID_TYPE;
601 	struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
602 	unsigned long flags;
603 
604 	if (!rdma_is_port_valid(ib_dev, port))
605 		return -ENOENT;
606 
607 	table = rdma_gid_table(ib_dev, port);
608 
609 	if (ndev)
610 		mask |= GID_ATTR_FIND_MASK_NETDEV;
611 
612 	read_lock_irqsave(&table->rwlock, flags);
613 	local_index = find_gid(table, gid, &val, false, mask, NULL);
614 	if (local_index >= 0) {
615 		if (index)
616 			*index = local_index;
617 		read_unlock_irqrestore(&table->rwlock, flags);
618 		return 0;
619 	}
620 
621 	read_unlock_irqrestore(&table->rwlock, flags);
622 	return -ENOENT;
623 }
624 EXPORT_SYMBOL(ib_find_cached_gid_by_port);
625 
626 /**
627  * ib_cache_gid_find_by_filter - Returns the GID table index where a specified
628  * GID value occurs
629  * @device: The device to query.
630  * @gid: The GID value to search for.
631  * @port_num: The port number of the device where the GID value could be
632  *   searched.
633  * @filter: The filter function is executed on any matching GID in the table.
634  *   If the filter function returns true, the corresponding index is returned,
635  *   otherwise, we continue searching the GID table. It's guaranteed that
636  *   while filter is executed, ndev field is valid and the structure won't
637  *   change. filter is executed in an atomic context. filter must not be NULL.
638  * @index: The index into the cached GID table where the GID was found. This
639  *   parameter may be NULL.
640  *
641  * ib_cache_gid_find_by_filter() searches for the specified GID value
642  * of which the filter function returns true in the port's GID table.
643  * This function is only supported on RoCE ports.
644  *
645  */
646 static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
647 				       const union ib_gid *gid,
648 				       u8 port,
649 				       bool (*filter)(const union ib_gid *,
650 						      const struct ib_gid_attr *,
651 						      void *),
652 				       void *context,
653 				       u16 *index)
654 {
655 	struct ib_gid_table *table;
656 	unsigned int i;
657 	unsigned long flags;
658 	bool found = false;
659 
660 
661 	if (!rdma_is_port_valid(ib_dev, port) ||
662 	    !rdma_protocol_roce(ib_dev, port))
663 		return -EPROTONOSUPPORT;
664 
665 	table = rdma_gid_table(ib_dev, port);
666 
667 	read_lock_irqsave(&table->rwlock, flags);
668 	for (i = 0; i < table->sz; i++) {
669 		struct ib_gid_attr attr;
670 
671 		if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
672 			continue;
673 
674 		if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
675 			continue;
676 
677 		memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
678 
679 		if (filter(gid, &attr, context)) {
680 			found = true;
681 			if (index)
682 				*index = i;
683 			break;
684 		}
685 	}
686 	read_unlock_irqrestore(&table->rwlock, flags);
687 
688 	if (!found)
689 		return -ENOENT;
690 	return 0;
691 }
692 
693 static struct ib_gid_table *alloc_gid_table(int sz)
694 {
695 	struct ib_gid_table *table =
696 		kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
697 	int i;
698 
699 	if (!table)
700 		return NULL;
701 
702 	table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL);
703 	if (!table->data_vec)
704 		goto err_free_table;
705 
706 	mutex_init(&table->lock);
707 
708 	table->sz = sz;
709 	rwlock_init(&table->rwlock);
710 
711 	/* Mark all entries as invalid so that allocator can allocate
712 	 * one of the invalid (free) entry.
713 	 */
714 	for (i = 0; i < sz; i++)
715 		table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID;
716 	return table;
717 
718 err_free_table:
719 	kfree(table);
720 	return NULL;
721 }
722 
723 static void release_gid_table(struct ib_gid_table *table)
724 {
725 	if (table) {
726 		kfree(table->data_vec);
727 		kfree(table);
728 	}
729 }
730 
731 static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
732 				   struct ib_gid_table *table)
733 {
734 	int i;
735 	bool deleted = false;
736 
737 	if (!table)
738 		return;
739 
740 	mutex_lock(&table->lock);
741 	for (i = 0; i < table->sz; ++i) {
742 		if (!rdma_is_zero_gid(&table->data_vec[i].gid)) {
743 			del_gid(ib_dev, port, table, i);
744 			deleted = true;
745 		}
746 	}
747 	mutex_unlock(&table->lock);
748 
749 	if (deleted)
750 		dispatch_gid_change_event(ib_dev, port);
751 }
752 
753 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
754 				  struct net_device *ndev,
755 				  unsigned long gid_type_mask,
756 				  enum ib_cache_gid_default_mode mode)
757 {
758 	union ib_gid gid = { };
759 	struct ib_gid_attr gid_attr;
760 	struct ib_gid_table *table;
761 	unsigned int gid_type;
762 	unsigned long mask;
763 
764 	table = rdma_gid_table(ib_dev, port);
765 
766 	mask = GID_ATTR_FIND_MASK_GID_TYPE |
767 	       GID_ATTR_FIND_MASK_DEFAULT |
768 	       GID_ATTR_FIND_MASK_NETDEV;
769 	memset(&gid_attr, 0, sizeof(gid_attr));
770 	gid_attr.ndev = ndev;
771 
772 	for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
773 		if (1UL << gid_type & ~gid_type_mask)
774 			continue;
775 
776 		gid_attr.gid_type = gid_type;
777 
778 		if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
779 			make_default_gid(ndev, &gid);
780 			__ib_cache_gid_add(ib_dev, port, &gid,
781 					   &gid_attr, mask, true);
782 		} else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) {
783 			_ib_cache_gid_del(ib_dev, port, &gid,
784 					  &gid_attr, mask, true);
785 		}
786 	}
787 }
788 
789 static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
790 				      struct ib_gid_table *table)
791 {
792 	unsigned int i;
793 	unsigned long roce_gid_type_mask;
794 	unsigned int num_default_gids;
795 	unsigned int current_gid = 0;
796 
797 	roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
798 	num_default_gids = hweight_long(roce_gid_type_mask);
799 	for (i = 0; i < num_default_gids && i < table->sz; i++) {
800 		struct ib_gid_table_entry *entry = &table->data_vec[i];
801 
802 		entry->props |= GID_TABLE_ENTRY_DEFAULT;
803 		current_gid = find_next_bit(&roce_gid_type_mask,
804 					    BITS_PER_LONG,
805 					    current_gid);
806 		entry->attr.gid_type = current_gid++;
807 	}
808 }
809 
810 
811 static void gid_table_release_one(struct ib_device *ib_dev)
812 {
813 	struct ib_gid_table *table;
814 	u8 port;
815 
816 	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
817 		table = ib_dev->cache.ports[port].gid;
818 		release_gid_table(table);
819 		ib_dev->cache.ports[port].gid = NULL;
820 	}
821 }
822 
823 static int _gid_table_setup_one(struct ib_device *ib_dev)
824 {
825 	u8 port;
826 	struct ib_gid_table *table;
827 
828 	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
829 		u8 rdma_port = port + rdma_start_port(ib_dev);
830 
831 		table =	alloc_gid_table(
832 				ib_dev->port_immutable[rdma_port].gid_tbl_len);
833 		if (!table)
834 			goto rollback_table_setup;
835 
836 		gid_table_reserve_default(ib_dev, rdma_port, table);
837 		ib_dev->cache.ports[port].gid = table;
838 	}
839 	return 0;
840 
841 rollback_table_setup:
842 	gid_table_release_one(ib_dev);
843 	return -ENOMEM;
844 }
845 
846 static void gid_table_cleanup_one(struct ib_device *ib_dev)
847 {
848 	struct ib_gid_table *table;
849 	u8 port;
850 
851 	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
852 		table = ib_dev->cache.ports[port].gid;
853 		cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
854 				       table);
855 	}
856 }
857 
858 static int gid_table_setup_one(struct ib_device *ib_dev)
859 {
860 	int err;
861 
862 	err = _gid_table_setup_one(ib_dev);
863 
864 	if (err)
865 		return err;
866 
867 	rdma_roce_rescan_device(ib_dev);
868 
869 	return err;
870 }
871 
872 int ib_get_cached_gid(struct ib_device *device,
873 		      u8                port_num,
874 		      int               index,
875 		      union ib_gid     *gid,
876 		      struct ib_gid_attr *gid_attr)
877 {
878 	int res;
879 	unsigned long flags;
880 	struct ib_gid_table *table;
881 
882 	if (!rdma_is_port_valid(device, port_num))
883 		return -EINVAL;
884 
885 	table = rdma_gid_table(device, port_num);
886 	read_lock_irqsave(&table->rwlock, flags);
887 	res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
888 	read_unlock_irqrestore(&table->rwlock, flags);
889 
890 	return res;
891 }
892 EXPORT_SYMBOL(ib_get_cached_gid);
893 
894 /**
895  * ib_find_cached_gid - Returns the port number and GID table index where
896  *   a specified GID value occurs.
897  * @device: The device to query.
898  * @gid: The GID value to search for.
899  * @gid_type: The GID type to search for.
900  * @ndev: In RoCE, the net device of the device. NULL means ignore.
901  * @port_num: The port number of the device where the GID value was found.
902  * @index: The index into the cached GID table where the GID was found.  This
903  *   parameter may be NULL.
904  *
905  * ib_find_cached_gid() searches for the specified GID value in
906  * the local software cache.
907  */
908 int ib_find_cached_gid(struct ib_device *device,
909 		       const union ib_gid *gid,
910 		       enum ib_gid_type gid_type,
911 		       struct net_device *ndev,
912 		       u8               *port_num,
913 		       u16              *index)
914 {
915 	return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
916 }
917 EXPORT_SYMBOL(ib_find_cached_gid);
918 
919 int ib_find_gid_by_filter(struct ib_device *device,
920 			  const union ib_gid *gid,
921 			  u8 port_num,
922 			  bool (*filter)(const union ib_gid *gid,
923 					 const struct ib_gid_attr *,
924 					 void *),
925 			  void *context, u16 *index)
926 {
927 	/* Only RoCE GID table supports filter function */
928 	if (!rdma_protocol_roce(device, port_num) && filter)
929 		return -EPROTONOSUPPORT;
930 
931 	return ib_cache_gid_find_by_filter(device, gid,
932 					   port_num, filter,
933 					   context, index);
934 }
935 
936 int ib_get_cached_pkey(struct ib_device *device,
937 		       u8                port_num,
938 		       int               index,
939 		       u16              *pkey)
940 {
941 	struct ib_pkey_cache *cache;
942 	unsigned long flags;
943 	int ret = 0;
944 
945 	if (!rdma_is_port_valid(device, port_num))
946 		return -EINVAL;
947 
948 	read_lock_irqsave(&device->cache.lock, flags);
949 
950 	cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
951 
952 	if (index < 0 || index >= cache->table_len)
953 		ret = -EINVAL;
954 	else
955 		*pkey = cache->table[index];
956 
957 	read_unlock_irqrestore(&device->cache.lock, flags);
958 
959 	return ret;
960 }
961 EXPORT_SYMBOL(ib_get_cached_pkey);
962 
963 int ib_get_cached_subnet_prefix(struct ib_device *device,
964 				u8                port_num,
965 				u64              *sn_pfx)
966 {
967 	unsigned long flags;
968 	int p;
969 
970 	if (!rdma_is_port_valid(device, port_num))
971 		return -EINVAL;
972 
973 	p = port_num - rdma_start_port(device);
974 	read_lock_irqsave(&device->cache.lock, flags);
975 	*sn_pfx = device->cache.ports[p].subnet_prefix;
976 	read_unlock_irqrestore(&device->cache.lock, flags);
977 
978 	return 0;
979 }
980 EXPORT_SYMBOL(ib_get_cached_subnet_prefix);
981 
982 int ib_find_cached_pkey(struct ib_device *device,
983 			u8                port_num,
984 			u16               pkey,
985 			u16              *index)
986 {
987 	struct ib_pkey_cache *cache;
988 	unsigned long flags;
989 	int i;
990 	int ret = -ENOENT;
991 	int partial_ix = -1;
992 
993 	if (!rdma_is_port_valid(device, port_num))
994 		return -EINVAL;
995 
996 	read_lock_irqsave(&device->cache.lock, flags);
997 
998 	cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
999 
1000 	*index = -1;
1001 
1002 	for (i = 0; i < cache->table_len; ++i)
1003 		if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
1004 			if (cache->table[i] & 0x8000) {
1005 				*index = i;
1006 				ret = 0;
1007 				break;
1008 			} else
1009 				partial_ix = i;
1010 		}
1011 
1012 	if (ret && partial_ix >= 0) {
1013 		*index = partial_ix;
1014 		ret = 0;
1015 	}
1016 
1017 	read_unlock_irqrestore(&device->cache.lock, flags);
1018 
1019 	return ret;
1020 }
1021 EXPORT_SYMBOL(ib_find_cached_pkey);
1022 
1023 int ib_find_exact_cached_pkey(struct ib_device *device,
1024 			      u8                port_num,
1025 			      u16               pkey,
1026 			      u16              *index)
1027 {
1028 	struct ib_pkey_cache *cache;
1029 	unsigned long flags;
1030 	int i;
1031 	int ret = -ENOENT;
1032 
1033 	if (!rdma_is_port_valid(device, port_num))
1034 		return -EINVAL;
1035 
1036 	read_lock_irqsave(&device->cache.lock, flags);
1037 
1038 	cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
1039 
1040 	*index = -1;
1041 
1042 	for (i = 0; i < cache->table_len; ++i)
1043 		if (cache->table[i] == pkey) {
1044 			*index = i;
1045 			ret = 0;
1046 			break;
1047 		}
1048 
1049 	read_unlock_irqrestore(&device->cache.lock, flags);
1050 
1051 	return ret;
1052 }
1053 EXPORT_SYMBOL(ib_find_exact_cached_pkey);
1054 
1055 int ib_get_cached_lmc(struct ib_device *device,
1056 		      u8                port_num,
1057 		      u8                *lmc)
1058 {
1059 	unsigned long flags;
1060 	int ret = 0;
1061 
1062 	if (!rdma_is_port_valid(device, port_num))
1063 		return -EINVAL;
1064 
1065 	read_lock_irqsave(&device->cache.lock, flags);
1066 	*lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc;
1067 	read_unlock_irqrestore(&device->cache.lock, flags);
1068 
1069 	return ret;
1070 }
1071 EXPORT_SYMBOL(ib_get_cached_lmc);
1072 
1073 int ib_get_cached_port_state(struct ib_device   *device,
1074 			     u8                  port_num,
1075 			     enum ib_port_state *port_state)
1076 {
1077 	unsigned long flags;
1078 	int ret = 0;
1079 
1080 	if (!rdma_is_port_valid(device, port_num))
1081 		return -EINVAL;
1082 
1083 	read_lock_irqsave(&device->cache.lock, flags);
1084 	*port_state = device->cache.ports[port_num
1085 		- rdma_start_port(device)].port_state;
1086 	read_unlock_irqrestore(&device->cache.lock, flags);
1087 
1088 	return ret;
1089 }
1090 EXPORT_SYMBOL(ib_get_cached_port_state);
1091 
1092 static int config_non_roce_gid_cache(struct ib_device *device,
1093 				     u8 port, int gid_tbl_len)
1094 {
1095 	struct ib_gid_attr gid_attr = {};
1096 	struct ib_gid_table *table;
1097 	union ib_gid gid;
1098 	int ret = 0;
1099 	int i;
1100 
1101 	gid_attr.device = device;
1102 	gid_attr.port_num = port;
1103 	table = rdma_gid_table(device, port);
1104 
1105 	mutex_lock(&table->lock);
1106 	for (i = 0; i < gid_tbl_len; ++i) {
1107 		if (!device->query_gid)
1108 			continue;
1109 		ret = device->query_gid(device, port, i, &gid);
1110 		if (ret) {
1111 			pr_warn("query_gid failed (%d) for %s (index %d)\n",
1112 				ret, device->name, i);
1113 			goto err;
1114 		}
1115 		gid_attr.index = i;
1116 		add_modify_gid(table, &gid, &gid_attr);
1117 	}
1118 err:
1119 	mutex_unlock(&table->lock);
1120 	return ret;
1121 }
1122 
1123 static void ib_cache_update(struct ib_device *device,
1124 			    u8                port,
1125 			    bool	      enforce_security)
1126 {
1127 	struct ib_port_attr       *tprops = NULL;
1128 	struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
1129 	int                        i;
1130 	int                        ret;
1131 	struct ib_gid_table	  *table;
1132 
1133 	if (!rdma_is_port_valid(device, port))
1134 		return;
1135 
1136 	table = rdma_gid_table(device, port);
1137 
1138 	tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
1139 	if (!tprops)
1140 		return;
1141 
1142 	ret = ib_query_port(device, port, tprops);
1143 	if (ret) {
1144 		pr_warn("ib_query_port failed (%d) for %s\n",
1145 			ret, device->name);
1146 		goto err;
1147 	}
1148 
1149 	if (!rdma_protocol_roce(device, port)) {
1150 		ret = config_non_roce_gid_cache(device, port,
1151 						tprops->gid_tbl_len);
1152 		if (ret)
1153 			goto err;
1154 	}
1155 
1156 	pkey_cache = kmalloc(struct_size(pkey_cache, table,
1157 					 tprops->pkey_tbl_len),
1158 			     GFP_KERNEL);
1159 	if (!pkey_cache)
1160 		goto err;
1161 
1162 	pkey_cache->table_len = tprops->pkey_tbl_len;
1163 
1164 	for (i = 0; i < pkey_cache->table_len; ++i) {
1165 		ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
1166 		if (ret) {
1167 			pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
1168 				ret, device->name, i);
1169 			goto err;
1170 		}
1171 	}
1172 
1173 	write_lock_irq(&device->cache.lock);
1174 
1175 	old_pkey_cache = device->cache.ports[port -
1176 		rdma_start_port(device)].pkey;
1177 
1178 	device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
1179 	device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
1180 	device->cache.ports[port - rdma_start_port(device)].port_state =
1181 		tprops->state;
1182 
1183 	device->cache.ports[port - rdma_start_port(device)].subnet_prefix =
1184 							tprops->subnet_prefix;
1185 	write_unlock_irq(&device->cache.lock);
1186 
1187 	if (enforce_security)
1188 		ib_security_cache_change(device,
1189 					 port,
1190 					 tprops->subnet_prefix);
1191 
1192 	kfree(old_pkey_cache);
1193 	kfree(tprops);
1194 	return;
1195 
1196 err:
1197 	kfree(pkey_cache);
1198 	kfree(tprops);
1199 }
1200 
1201 static void ib_cache_task(struct work_struct *_work)
1202 {
1203 	struct ib_update_work *work =
1204 		container_of(_work, struct ib_update_work, work);
1205 
1206 	ib_cache_update(work->device,
1207 			work->port_num,
1208 			work->enforce_security);
1209 	kfree(work);
1210 }
1211 
1212 static void ib_cache_event(struct ib_event_handler *handler,
1213 			   struct ib_event *event)
1214 {
1215 	struct ib_update_work *work;
1216 
1217 	if (event->event == IB_EVENT_PORT_ERR    ||
1218 	    event->event == IB_EVENT_PORT_ACTIVE ||
1219 	    event->event == IB_EVENT_LID_CHANGE  ||
1220 	    event->event == IB_EVENT_PKEY_CHANGE ||
1221 	    event->event == IB_EVENT_SM_CHANGE   ||
1222 	    event->event == IB_EVENT_CLIENT_REREGISTER ||
1223 	    event->event == IB_EVENT_GID_CHANGE) {
1224 		work = kmalloc(sizeof *work, GFP_ATOMIC);
1225 		if (work) {
1226 			INIT_WORK(&work->work, ib_cache_task);
1227 			work->device   = event->device;
1228 			work->port_num = event->element.port_num;
1229 			if (event->event == IB_EVENT_PKEY_CHANGE ||
1230 			    event->event == IB_EVENT_GID_CHANGE)
1231 				work->enforce_security = true;
1232 			else
1233 				work->enforce_security = false;
1234 
1235 			queue_work(ib_wq, &work->work);
1236 		}
1237 	}
1238 }
1239 
1240 int ib_cache_setup_one(struct ib_device *device)
1241 {
1242 	int p;
1243 	int err;
1244 
1245 	rwlock_init(&device->cache.lock);
1246 
1247 	device->cache.ports =
1248 		kcalloc(rdma_end_port(device) - rdma_start_port(device) + 1,
1249 			sizeof(*device->cache.ports),
1250 			GFP_KERNEL);
1251 	if (!device->cache.ports)
1252 		return -ENOMEM;
1253 
1254 	err = gid_table_setup_one(device);
1255 	if (err) {
1256 		kfree(device->cache.ports);
1257 		device->cache.ports = NULL;
1258 		return err;
1259 	}
1260 
1261 	for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1262 		ib_cache_update(device, p + rdma_start_port(device), true);
1263 
1264 	INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
1265 			      device, ib_cache_event);
1266 	ib_register_event_handler(&device->cache.event_handler);
1267 	return 0;
1268 }
1269 
1270 void ib_cache_release_one(struct ib_device *device)
1271 {
1272 	int p;
1273 
1274 	/*
1275 	 * The release function frees all the cache elements.
1276 	 * This function should be called as part of freeing
1277 	 * all the device's resources when the cache could no
1278 	 * longer be accessed.
1279 	 */
1280 	for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1281 		kfree(device->cache.ports[p].pkey);
1282 
1283 	gid_table_release_one(device);
1284 	kfree(device->cache.ports);
1285 }
1286 
1287 void ib_cache_cleanup_one(struct ib_device *device)
1288 {
1289 	/* The cleanup function unregisters the event handler,
1290 	 * waits for all in-progress workqueue elements and cleans
1291 	 * up the GID cache. This function should be called after
1292 	 * the device was removed from the devices list and all
1293 	 * clients were removed, so the cache exists but is
1294 	 * non-functional and shouldn't be updated anymore.
1295 	 */
1296 	ib_unregister_event_handler(&device->cache.event_handler);
1297 	flush_workqueue(ib_wq);
1298 	gid_table_cleanup_one(device);
1299 }
1300