xref: /illumos-gate/usr/src/uts/common/sys/ib/adapters/tavor/tavor.h (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_SYS_IB_ADAPTERS_TAVOR_H
28 #define	_SYS_IB_ADAPTERS_TAVOR_H
29 
30 
31 /*
32  * tavor.h
33  *    Contains the #defines and typedefs necessary for the Tavor softstate
34  *    structure and for proper attach() and detach() processing.  Also
35  *    includes all the other Tavor header files (and so is the only header
36  *    file that is directly included by the Tavor source files).
37  *    Additionally, this file contains some defines and macros used by
38  *    Tavor TNF tracing mechanism.
39  *    Lastly, this file includes everything necessary for implementing the
40  *    devmap interface and for maintaining the "mapped resource database".
41  */
42 
43 #include <sys/types.h>
44 #include <sys/conf.h>
45 #include <sys/ddi.h>
46 #include <sys/sunddi.h>
47 #include <sys/tnf_probe.h>
48 #include <sys/taskq.h>
49 
50 #include <sys/ib/ibtl/ibci.h>
51 #include <sys/ib/adapters/mlnx_umap.h>
52 
53 /*
54  * First include all the Tavor typedefs, then include all the other Tavor
55  * specific headers (many of which depend on the typedefs having already
56  * been defined.
57  */
58 #include <sys/ib/adapters/tavor/tavor_typedef.h>
59 
60 #include <sys/ib/adapters/tavor/tavor_agents.h>
61 #include <sys/ib/adapters/tavor/tavor_cfg.h>
62 #include <sys/ib/adapters/tavor/tavor_cmd.h>
63 #include <sys/ib/adapters/tavor/tavor_cq.h>
64 #include <sys/ib/adapters/tavor/tavor_event.h>
65 #include <sys/ib/adapters/tavor/tavor_hw.h>
66 #include <sys/ib/adapters/tavor/tavor_ioctl.h>
67 #include <sys/ib/adapters/tavor/tavor_misc.h>
68 #include <sys/ib/adapters/tavor/tavor_mr.h>
69 #include <sys/ib/adapters/tavor/tavor_qp.h>
70 #include <sys/ib/adapters/tavor/tavor_srq.h>
71 #include <sys/ib/adapters/tavor/tavor_rsrc.h>
72 #include <sys/ib/adapters/tavor/tavor_wr.h>
73 
74 #ifdef __cplusplus
75 extern "C" {
76 #endif
77 
78 /*
79  * The following defines and macros are used for Tavor TNF tracing
80  * Note: TAVOR_TNF_FAIL is used in routines which has many failure cases.
81  * It will initialize the "errormsg" and "status" variables (both of which
82  * must be declared locally in the routines where this macro is used) for use
83  * in a later TNF probe and return from routine.
84  */
85 #define	TAVOR_TNF_ERROR			"tavor tavor_error "
86 #define	TAVOR_TNF_TRACE			"tavor tavor_trace "
87 #define	TAVOR_TNF_ENTER(func)						\
88 	TNF_PROBE_0_DEBUG(func##_start, TAVOR_TNF_TRACE, "")
89 #define	TAVOR_TNF_EXIT(func)						\
90 	TNF_PROBE_0_DEBUG(func##_end, TAVOR_TNF_TRACE, "")
91 #define	TAVOR_TNF_FAIL(s, e)						\
92 {									\
93 	errormsg = (e);							\
94 	status = (s);							\
95 }
96 
97 #define	TAVOR_VPD_HDR_DWSIZE		0x10 /* 16 Dwords */
98 #define	TAVOR_VPD_HDR_BSIZE		0x40 /* 64 Bytes */
99 
100 /*
101  * Number of initial states to setup. Used in call to ddi_soft_state_init()
102  */
103 #define	TAVOR_INITIAL_STATES		3
104 
105 /*
106  * Macro and defines used to calculate device instance number from minor
107  * number (and vice versa).
108  */
109 #define	TAVOR_MINORNUM_SHIFT		3
110 #define	TAVOR_DEV_INSTANCE(dev)	(getminor((dev)) &			\
111 	((1 << TAVOR_MINORNUM_SHIFT) - 1))
112 
113 /*
114  * Locations for the various Tavor hardware PCI BARs (CMD, UAR, DDR)
115  */
116 #define	TAVOR_CMD_BAR			1
117 #define	TAVOR_UAR_BAR			2
118 #define	TAVOR_DDR_BAR			3
119 
120 /*
121  * Some defines for the software reset.  These define the value that should
122  * be written to begin the reset (TAVOR_SW_RESET_START), the delay before
123  * beginning to poll for completion (TAVOR_SW_RESET_DELAY), the in-between
124  * polling delay (TAVOR_SW_RESET_POLL_DELAY), and the value that indicates
125  * that the reset has not completed (TAVOR_SW_RESET_NOTDONE).
126  */
127 #define	TAVOR_SW_RESET_START		0x00000001
128 #define	TAVOR_SW_RESET_DELAY		100000		/* 100 ms */
129 #define	TAVOR_SW_RESET_POLL_DELAY	100		/* 100 us */
130 #define	TAVOR_SW_RESET_NOTDONE		0xFFFFFFFF
131 
132 /*
133  * These defines are used in the Tavor software reset operation.  They define
134  * the total number PCI registers to read/restore during the reset.  And they
135  * also specify two config registers which should not be read or restored.
136  */
137 #define	TAVOR_SW_RESET_NUMREGS		0x40
138 #define	TAVOR_SW_RESET_REG22_RSVD	0x16
139 #define	TAVOR_SW_RESET_REG23_RSVD	0x17
140 
141 /*
142  * Macro used to output Tavor warning messages.  Note: Tavor warning messages
143  * are only generated when an unexpected condition has been detected.  This
144  * can be the result of a software bug or some other problem, but it is more
145  * often an indication that the Tavor firmware (and/or hardware) has done
146  * something unexpected.  This warning message means that the driver state
147  * in unpredictable and that shutdown/restart is suggested.
148  */
149 #define	TAVOR_WARNING(state, string)					\
150 	cmn_err(CE_WARN, "tavor%d: "string, (state)->ts_instance)
151 
152 /*
153  * Macro used to set attach failure messages.  Also, the attach message buf
154  * size is set here.
155  */
156 #define	TAVOR_ATTACH_MSGSIZE	80
157 #define	TAVOR_ATTACH_MSG(attach_buf, attach_msg)			\
158 	(void) snprintf((attach_buf), TAVOR_ATTACH_MSGSIZE, (attach_msg));
159 #define	TAVOR_ATTACH_MSG_INIT(attach_buf)				\
160 	(attach_buf)[0] = '\0';
161 
162 /*
163  * Macros used for controlling whether or not event callbacks will be forwarded
164  * to the IBTF.  This is necessary because there are certain race conditions
165  * that can occur (e.g. calling IBTF with an asynch event before the IBTF
166  * registration has successfully completed or handling an event after we've
167  * detached from the IBTF.)
168  *
169  * TAVOR_ENABLE_IBTF_CALLB() initializes the "ts_ibtfpriv" field in the Tavor
170  *    softstate.  When "ts_ibtfpriv" is non-NULL, it is OK to forward asynch
171  *    and CQ events to the IBTF.
172  *
173  * TAVOR_DO_IBTF_ASYNC_CALLB() and TAVOR_DO_IBTF_CQ_CALLB() both set and clear
174  *    the "ts_in_evcallb" flag, as necessary, to indicate that an IBTF
175  *    callback is currently in progress.  This is necessary so that we can
176  *    block on this condition in tavor_detach().
177  *
178  * TAVOR_QUIESCE_IBTF_CALLB() is used in tavor_detach() to set the
179  *    "ts_ibtfpriv" to NULL (thereby disabling any further IBTF callbacks)
180  *    and to poll on the "ts_in_evcallb" flag.  When this flag is zero, all
181  *    IBTF callbacks have quiesced and it is safe to continue with detach
182  *    (i.e. continue detaching from IBTF).
183  */
184 #define	TAVOR_ENABLE_IBTF_CALLB(state, tmp_ibtfpriv)			\
185 	(state)->ts_ibtfpriv = (tmp_ibtfpriv);
186 
187 #define	TAVOR_DO_IBTF_ASYNC_CALLB(state, type, event)			\
188 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS((state)->ts_in_evcallb))	\
189 	(state)->ts_in_evcallb = 1;					\
190 	ibc_async_handler((state)->ts_ibtfpriv, (type), (event));	\
191 	(state)->ts_in_evcallb = 0;
192 
193 #define	TAVOR_DO_IBTF_CQ_CALLB(state, cq)				\
194 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS((state)->ts_in_evcallb))	\
195 	(state)->ts_in_evcallb = 1;					\
196 	ibc_cq_handler((state)->ts_ibtfpriv, (cq)->cq_hdlrarg);		\
197 	(state)->ts_in_evcallb = 0;
198 
199 #define	TAVOR_QUIESCE_IBTF_CALLB(state)					\
200 {									\
201 	uint_t		count = 0;					\
202 									\
203 	state->ts_ibtfpriv = NULL;					\
204 	while (((state)->ts_in_evcallb != 0) &&				\
205 	    (count++ < TAVOR_QUIESCE_IBTF_CALLB_POLL_MAX)) {		\
206 		drv_usecwait(TAVOR_QUIESCE_IBTF_CALLB_POLL_DELAY);	\
207 	}								\
208 }
209 
210 /*
211  * Defines used by the TAVOR_QUIESCE_IBTF_CALLB() macro to determine the
212  * duration and number of times (at maximum) to poll while waiting for IBTF
213  * callbacks to quiesce.
214  */
215 #define	TAVOR_QUIESCE_IBTF_CALLB_POLL_DELAY	1
216 #define	TAVOR_QUIESCE_IBTF_CALLB_POLL_MAX	1000000
217 
218 /*
219  * Define used to determine the device mode to which Tavor driver has been
220  * attached.  TAVOR_IS_MAINTENANCE_MODE() returns true when the device has
221  * come up in the "maintenance mode".  In this mode, no InfiniBand interfaces
222  * are enabled, but the device's firmware can be updated/flashed (and
223  * test/debug interfaces should be useable).
224  * TAVOR_IS_HCA_MODE() returns true when the device has come up in the normal
225  * HCA mode.  In this mode, all necessary InfiniBand interfaces are enabled
226  * (and, if necessary, Tavor firmware can be updated/flashed).
227  */
228 #define	TAVOR_IS_MAINTENANCE_MODE(dip)					\
229 	(((ddi_prop_get_int(DDI_DEV_T_ANY, (dip), DDI_PROP_DONTPASS,	\
230 	"device-id", -1) == 0x5a45) ||					\
231 	(ddi_prop_get_int(DDI_DEV_T_ANY, (dip), DDI_PROP_DONTPASS,	\
232 	"device-id", -1) == 0x6279)) &&					\
233 	(ddi_prop_get_int(DDI_DEV_T_ANY, (dip), DDI_PROP_DONTPASS,	\
234 	"vendor-id", -1) == 0x15b3))
235 #define	TAVOR_IS_COMPAT_MODE(dip)					\
236 	((ddi_prop_get_int(DDI_DEV_T_ANY, (dip), DDI_PROP_DONTPASS,	\
237 	"device-id", -1) == 0x6278) &&					\
238 	(ddi_prop_get_int(DDI_DEV_T_ANY, (dip), DDI_PROP_DONTPASS,	\
239 	"vendor-id", -1) == 0x15b3))
240 #define	TAVOR_IS_HCA_MODE(dip)						\
241 	((ddi_prop_get_int(DDI_DEV_T_ANY, (dip), DDI_PROP_DONTPASS,	\
242 	"device-id", -1) == 0x5a44) &&					\
243 	(ddi_prop_get_int(DDI_DEV_T_ANY, (dip), DDI_PROP_DONTPASS,	\
244 	"vendor-id", -1) == 0x15b3))
245 
246 #define	TAVOR_MAINTENANCE_MODE		1
247 #define	TAVOR_COMPAT_MODE		2
248 #define	TAVOR_HCA_MODE			3
249 
250 /*
251  * Used to determine if the device is operational, or not in maintenance mode.
252  * This means either the driver has attached successfully against an arbel
253  * device in tavor compatibility mode, or against a tavor device in full HCA
254  * mode.
255  */
256 #define	TAVOR_IS_OPERATIONAL(mode)					\
257 	(mode == TAVOR_COMPAT_MODE || mode == TAVOR_HCA_MODE)
258 
259 /*
260  * Used to determine if parent bridge is a PCI bridge; used in software reset
261  */
262 #define	TAVOR_PARENT_IS_BRIDGE(dip)					\
263 	((ddi_prop_get_int(DDI_DEV_T_ANY, (dip), DDI_PROP_DONTPASS,	\
264 	"device-id", -1) == 0x5a46))
265 
266 /*
267  * The following define is used (in tavor_umap_db_set_onclose_cb()) to
268  * indicate that a cleanup callback is needed to undo initialization done
269  * by the firmware flash burn code.
270  */
271 #define	TAVOR_ONCLOSE_FLASH_INPROGRESS		(1 << 0)
272 
273 /*
274  * The following enumerated type and structures are used during driver
275  * initialization.  Note: The TAVOR_DRV_CLEANUP_ALL type is used as a marker
276  * for end of the cleanup steps.  No cleanup steps should be added after
277  * TAVOR_DRV_CLEANUP_ALL.  Any addition steps should be added before it.
278  */
279 typedef enum {
280 	TAVOR_DRV_CLEANUP_LEVEL0,
281 	TAVOR_DRV_CLEANUP_LEVEL1,
282 	TAVOR_DRV_CLEANUP_LEVEL2,
283 	TAVOR_DRV_CLEANUP_LEVEL3,
284 	TAVOR_DRV_CLEANUP_LEVEL4,
285 	TAVOR_DRV_CLEANUP_LEVEL5,
286 	TAVOR_DRV_CLEANUP_LEVEL6,
287 	TAVOR_DRV_CLEANUP_LEVEL7,
288 	TAVOR_DRV_CLEANUP_LEVEL8,
289 	TAVOR_DRV_CLEANUP_LEVEL9,
290 	TAVOR_DRV_CLEANUP_LEVEL10,
291 	TAVOR_DRV_CLEANUP_LEVEL11,
292 	TAVOR_DRV_CLEANUP_LEVEL12,
293 	TAVOR_DRV_CLEANUP_LEVEL13,
294 	TAVOR_DRV_CLEANUP_LEVEL14,
295 	/* No more driver cleanup steps below this point! */
296 	TAVOR_DRV_CLEANUP_ALL
297 } tavor_drv_cleanup_level_t;
298 
299 /*
300  *  tavor_mem_alloc_hdl_t structure store DMA handles for the new
301  * ibc_alloc_io_mem calls
302  */
303 typedef struct tavor_mem_alloc_hdl_s {
304 	ddi_dma_handle_t tavor_dma_hdl;
305 	ddi_acc_handle_t tavor_acc_hdl;
306 } *tavor_mem_alloc_hdl_t;
307 
308 
309 /*
310  * The tavor_cmd_reg_t structure is used to hold the address of the each of
311  * the most frequently accessed hardware registers.  Specifically, it holds
312  * the HCA Command Registers (HCR, used to pass command and mailbox
313  * information back and forth to Tavor firmware) and the lock used to guarantee
314  * mutually exclusive access to the registers.  It also holds the Event Cause
315  * Register (ECR) and its related clear register.  These are used to indicate
316  * during interrupt processing which EQs have fired and require servicing.
317  * Related to this, is the "clr_int" register which is used to clear the
318  * interrupt once all EQs have been services.
319  * Finally, there is the software reset register which is used to reinitialize
320  * the Tavor device and to put it into a known state at driver startup time.
321  * Below we also have the offsets (into the CMD register space) for each of
322  * the various registers.
323  */
324 typedef struct tavor_cmd_reg_s {
325 	tavor_hw_hcr_t	*hcr;
326 	kmutex_t	hcr_lock;
327 	uint64_t	*ecr;
328 	uint64_t	*clr_ecr;
329 	uint64_t	*clr_int;
330 	uint32_t	*sw_reset;
331 } tavor_cmd_reg_t;
332 _NOTE(MUTEX_PROTECTS_DATA(tavor_cmd_reg_t::hcr_lock,
333     tavor_cmd_reg_t::hcr))
334 
335 
336 /*
337  * The tavor_state_t structure is the Tavor software state structure.  It
338  * contains all the pointers and placeholder for everything that the Tavor
339  * driver needs to properly operate.  One of these structures exists for
340  * every instance of the Tavor driver.
341  */
342 struct tavor_state_s {
343 	dev_info_t		*ts_dip;
344 	int			ts_instance;
345 
346 	/* Tavor interrupt/MSI information */
347 	int			ts_intr_types_avail;
348 	uint_t			ts_intr_type_chosen;
349 	int			ts_intrmsi_count;
350 	int			ts_intrmsi_avail;
351 	int			ts_intrmsi_allocd;
352 	ddi_intr_handle_t	ts_intrmsi_hdl;
353 	uint_t			ts_intrmsi_pri;
354 	int			ts_intrmsi_cap;
355 
356 	/* Tavor device operational mode */
357 	int			ts_operational_mode;
358 
359 	/* Attach buffer saved per state to store detailed attach errors */
360 	char			ts_attach_buf[TAVOR_ATTACH_MSGSIZE];
361 
362 	/*
363 	 * Tavor NodeGUID, SystemImageGUID, NodeDescription, HCA name,
364 	 * and HCA part number.
365 	 */
366 	uint64_t		ts_nodeguid;
367 	uint64_t		ts_sysimgguid;
368 	char			ts_nodedesc[64];
369 	char			ts_hca_name[64];
370 	char			ts_hca_pn[64];
371 	int			ts_hca_pn_len;
372 
373 	/* Info passed to IBTF during registration */
374 	ibc_hca_info_t		ts_ibtfinfo;
375 	ibc_clnt_hdl_t		ts_ibtfpriv;
376 
377 	/*
378 	 * Tavor register mapping.  Holds the device access attributes,
379 	 * kernel mapped addresses, and DDI access handles for each of
380 	 * Tavor's three types of address register (CMD, UAR, and DDR).
381 	 */
382 	ddi_device_acc_attr_t	ts_reg_accattr;
383 	caddr_t			ts_reg_cmd_baseaddr;	/* Tavor CMD BAR */
384 	ddi_acc_handle_t	ts_reg_cmdhdl;
385 	caddr_t			ts_reg_uar_baseaddr;	/* Tavor UAR BAR */
386 	ddi_acc_handle_t	ts_reg_uarhdl;
387 	caddr_t			ts_reg_ddr_baseaddr;	/* Tavor DDR BAR */
388 	ddi_acc_handle_t	ts_reg_ddrhdl;
389 
390 	/*
391 	 * Tavor PCI config space registers.  These two arrays are used to
392 	 * save and restore the PCI config registers before and after a
393 	 * software reset.  Note: We must save away both our own registers
394 	 * and our parent's (the "virtual" PCI bridge in the device) because
395 	 * the software reset will reset both sets.
396 	 */
397 	uint32_t		ts_cfg_data[TAVOR_SW_RESET_NUMREGS];
398 	uint32_t		ts_cfg_pdata[TAVOR_SW_RESET_NUMREGS];
399 
400 	/*
401 	 * Tavor UAR page resources.  Holds the resource pointers for
402 	 * UAR page #0 (reserved) and for UAR page #1 (used for kernel
403 	 * driver doorbells).  In addition, we save a pointer to the
404 	 * UAR page #1 doorbells which will be used throughout the driver
405 	 * whenever it is necessary to ring one of them.  And, in case we
406 	 * are unable to do 64-bit writes to the page (because of system
407 	 * architecture), we include a lock (to ensure atomic 64-bit access).
408 	 */
409 	tavor_rsrc_t		*ts_uarpg0_rsrc_rsrvd;
410 	tavor_rsrc_t		*ts_uarpg1_rsrc;
411 	tavor_hw_uar_t		*ts_uar;
412 	kmutex_t		ts_uar_lock;
413 
414 	/*
415 	 * Used during a call to open() if we are in maintenance mode, this
416 	 * field serves as a semi-unique rolling count index value, used only
417 	 * in the setup of umap_db entries.  This is primarily needed to
418 	 * firmware device access ioctl operations can still be guaranteed to
419 	 * close in the event of an unplanned process exit, even in maintenance
420 	 * mode.
421 	 */
422 	uint_t			ts_open_tr_indx;
423 
424 	/*
425 	 * Tavor command registers.  This structure contains the addresses
426 	 * for each of the most frequently accessed CMD registers.  Since
427 	 * almost all accesses to the Tavor hardware are through the Tavor
428 	 * command interface (i.e. the HCR), we save away the pointer to
429 	 * the HCR, as well as pointers to the ECR and INT registers (as
430 	 * well as their corresponding "clear" registers) for interrupt
431 	 * processing.  And we also save away a pointer to the software
432 	 * reset register (see above).
433 	 */
434 	tavor_cmd_reg_t		ts_cmd_regs;
435 
436 	/*
437 	 * Tavor resource pointers.  The following are pointers to the vmem
438 	 * arena (created to manage the DDR memory), the kmem cache (from
439 	 * which the Tavor resource handles are allocated), and the array
440 	 * of "resource pools" (which store all the pertinent information
441 	 * necessary to manage each of the various types of resources that
442 	 * are used by the Tavor driver.  See tavor_rsrc.h for more detail.
443 	 */
444 	vmem_t			*ts_ddrvmem;
445 	kmem_cache_t		*ts_rsrc_cache;
446 	tavor_rsrc_pool_info_t	*ts_rsrc_hdl;
447 
448 	/*
449 	 * Tavor mailbox lists.  These hold the information necessary to
450 	 * manage the pools of pre-allocated Tavor mailboxes (both "In" and
451 	 * "Out" type).  See tavor_cmd.h for more detail.
452 	 */
453 	tavor_mboxlist_t	ts_in_mblist;
454 	tavor_mboxlist_t	ts_out_mblist;
455 
456 	/*
457 	 * Tavor interrupt mailbox lists.  We allocate both an "In" mailbox
458 	 * and an "Out" type mailbox for the interrupt context.  This is in
459 	 * order to guarantee that a mailbox entry will always be available in
460 	 * the interrupt context, and we can NOSLEEP without having to worry
461 	 * about possible failure allocating the mbox.  We create this as an
462 	 * mboxlist so that we have the potential for having multiple mboxes
463 	 * available based on the number of interrupts we can receive at once.
464 	 */
465 	tavor_mboxlist_t		ts_in_intr_mblist;
466 	tavor_mboxlist_t		ts_out_intr_mblist;
467 
468 	/*
469 	 * Tavor outstanding command list.  Used to hold all the information
470 	 * necessary to manage the Tavor "outstanding command list".  See
471 	 * tavor_cmd.h for more detail.
472 	 */
473 	tavor_cmdlist_t		ts_cmd_list;
474 
475 	/*
476 	 * This structure contains the Tavor driver's "configuration profile".
477 	 * This is the collected set of configuration information, such as
478 	 * number of QPs, CQs, mailboxes and other resources, sizes of
479 	 * individual resources, other system level configuration information,
480 	 * etc.  See tavor_cfg.h for more detail.
481 	 */
482 	tavor_cfg_profile_t	*ts_cfg_profile;
483 
484 	/*
485 	 * This flag contains the profile setting, selecting which profile the
486 	 * driver would use.  This is needed in the case where we have to
487 	 * fallback to a smaller profile based on some DDR conditions.  If we
488 	 * don't fallback, then it is set to the size of DDR in the system.
489 	 */
490 	uint32_t		ts_cfg_profile_setting;
491 
492 	/*
493 	 * The following are a collection of resource handles used by the
494 	 * Tavor driver (internally).  First is the protection domain (PD)
495 	 * handle that is used when mapping all kernel memory (work queues,
496 	 * completion queues, etc).  Next is an array of EQ handles.  This
497 	 * array is indexed by EQ number and allows the Tavor driver to quickly
498 	 * convert an EQ number into the software structure associated with the
499 	 * given EQ.  Likewise, we have three arrays for CQ, QP and SRQ
500 	 * handles.  These arrays are also indexed by CQ, QP or SRQ number and
501 	 * allow the driver to quickly find the corresponding CQ, QP or SRQ
502 	 * software structure.  Note: while the EQ table is of fixed size
503 	 * (because there are a maximum of 64 EQs), each of the CQ, QP and SRQ
504 	 * handle lists must be allocated at driver startup.
505 	 */
506 	tavor_pdhdl_t		ts_pdhdl_internal;
507 	tavor_eqhdl_t		ts_eqhdl[TAVOR_NUM_EQ];
508 	tavor_cqhdl_t		*ts_cqhdl;
509 	tavor_qphdl_t		*ts_qphdl;
510 	tavor_srqhdl_t		*ts_srqhdl;
511 
512 	/*
513 	 * The AVL tree is used to store information regarding QP number
514 	 * allocations.  The lock protects access to the AVL tree.
515 	 */
516 	avl_tree_t		ts_qpn_avl;
517 	kmutex_t		ts_qpn_avl_lock;
518 
519 	/*
520 	 * This field is used to indicate whether or not the Tavor driver is
521 	 * currently in an IBTF event callback elsewhere in the system.  Note:
522 	 * It is "volatile" because we intend to poll on this value - in
523 	 * tavor_detach() - until we are assured that no further IBTF callbacks
524 	 * are currently being processed.
525 	 */
526 	volatile uint32_t	ts_in_evcallb;
527 
528 	/*
529 	 * The following structures are used to store the results of several
530 	 * device query commands passed to the Tavor hardware at startup.
531 	 * Specifically, we have hung onto the results of QUERY_DDR (which
532 	 * gives information about how much DDR memory is present and where
533 	 * it is located), QUERY_FW (which gives information about firmware
534 	 * version numbers and the location and extent of firmware's footprint
535 	 * in DDR, QUERY_DEVLIM (which gives the device limitations/resource
536 	 * maximums), QUERY_ADAPTER (which gives additional miscellaneous
537 	 * information), and INIT/QUERY_HCA (which serves the purpose of
538 	 * recording what configuration information was passed to the firmware
539 	 * when the HCA was initialized).
540 	 */
541 	struct tavor_hw_queryddr_s	ts_ddr;
542 	struct tavor_hw_queryfw_s	ts_fw;
543 	struct tavor_hw_querydevlim_s	ts_devlim;
544 	struct tavor_hw_queryadapter_s	ts_adapter;
545 	struct tavor_hw_initqueryhca_s	ts_hcaparams;
546 
547 	/*
548 	 * The following are used for managing special QP resources.
549 	 * Specifically, we have a lock, a set of flags (in "ts_spec_qpflags")
550 	 * used to track the special QP resources, and two Tavor resource
551 	 * handle pointers.  Each resource handle actually corresponds to two
552 	 * consecutive QP contexts (one per port) for each special QP type.
553 	 */
554 	kmutex_t		ts_spec_qplock;
555 	uint_t			ts_spec_qpflags;
556 	tavor_rsrc_t		*ts_spec_qp0;
557 	tavor_rsrc_t		*ts_spec_qp1;
558 
559 	/*
560 	 * Related in some ways to the special QP handling above are these
561 	 * resources which are used specifically for implementing the Tavor
562 	 * agents (SMA, PMA, and BMA).  Although, each of these agents does
563 	 * little more that intercept the appropriate incoming MAD and forward
564 	 * it along to the firmware (see tavor_agents.c for more details), we
565 	 * do still use a task queue to queue them up.  We can also configure
566 	 * the driver to force firmware handling for certain classes of MAD,
567 	 * and, therefore, we require the agent list and number of agents
568 	 * in order to know what needs to be torn down at detach() time.
569 	 */
570 	tavor_agent_list_t	*ts_agents;
571 	ddi_taskq_t		*ts_taskq_agents;
572 	uint_t			ts_num_agents;
573 
574 	/*
575 	 * Multicast group lists.  These are used to track the "shadow" MCG
576 	 * lists that speed up the processing of attach and detach multicast
577 	 * group operations.  See tavor_misc.h for more details.  Note: we
578 	 * need the pointer to the "temporary" MCG entry here primarily
579 	 * because the size of a given MCG entry is configurable.  Therefore,
580 	 * it is impossible to put this variable on the stack.  And rather
581 	 * than allocate and deallocate the entry multiple times, we choose
582 	 * instead to preallocate it once and reuse it over and over again.
583 	 */
584 	kmutex_t		ts_mcglock;
585 	tavor_mcghdl_t		ts_mcghdl;
586 	tavor_hw_mcg_t		*ts_mcgtmp;
587 
588 	/*
589 	 * Used for tracking Tavor kstat information
590 	 */
591 	tavor_ks_info_t		*ts_ks_info;
592 
593 	/*
594 	 * Used for Tavor info ioctl used by VTS
595 	 */
596 	kmutex_t		ts_info_lock;
597 
598 	/*
599 	 * Used for Tavor FW flash burning.  They are used exclusively
600 	 * within the ioctl calls for use when accessing the tavor
601 	 * flash device.
602 	 */
603 	kmutex_t		ts_fw_flashlock;
604 	int			ts_fw_flashstarted;
605 	dev_t			ts_fw_flashdev;
606 	uint32_t		ts_fw_log_sector_sz;
607 	uint32_t		ts_fw_device_sz;
608 	uint32_t		ts_fw_flashbank;
609 	uint32_t		*ts_fw_sector;
610 	uint32_t		ts_fw_gpio[4];
611 	ddi_acc_handle_t	ts_pci_cfghdl;		/* PCI cfg handle */
612 	int			ts_fw_cmdset;
613 
614 	/* Tavor fastreboot support */
615 	boolean_t		ts_quiescing;		/* in fastreboot */
616 };
617 _NOTE(MUTEX_PROTECTS_DATA(tavor_state_s::ts_fw_flashlock,
618     tavor_state_s::ts_fw_flashstarted
619     tavor_state_s::ts_fw_flashdev
620     tavor_state_s::ts_fw_log_sector_sz
621     tavor_state_s::ts_fw_device_sz))
622 _NOTE(MUTEX_PROTECTS_DATA(tavor_state_s::ts_spec_qplock,
623     tavor_state_s::ts_spec_qpflags
624     tavor_state_s::ts_spec_qp0
625     tavor_state_s::ts_spec_qp1))
626 _NOTE(MUTEX_PROTECTS_DATA(tavor_state_s::ts_mcglock,
627     tavor_state_s::ts_mcghdl
628     tavor_state_s::ts_mcgtmp))
629 _NOTE(DATA_READABLE_WITHOUT_LOCK(tavor_state_s::ts_in_evcallb
630     tavor_state_s::ts_fw_log_sector_sz
631     tavor_state_s::ts_fw_device_sz
632     tavor_state_s::ts_fw_sector
633     tavor_state_s::ts_spec_qpflags
634     tavor_state_s::ts_spec_qp0
635     tavor_state_s::ts_spec_qp1))
636 _NOTE(MUTEX_PROTECTS_DATA(tavor_state_s::ts_qpn_avl_lock,
637     tavor_state_s::ts_qpn_avl))
638 
639 /*
640  * TAVOR_IN_FASTREBOOT() shows if Hermon driver is at fastreboot.
641  * This macro should be used to check if the mutex lock can be used
642  * since the lock cannot be used if the driver is in the quiesce mode.
643  */
644 #define	TAVOR_IN_FASTREBOOT(state)	(state->ts_quiescing == B_TRUE)
645 
646 /*
647  * Bit positions in the "ts_spec_qpflags" field above.  The flags are (from
648  * least significant to most): (QP0,Port1), (QP0,Port2), (QP1,Port1), and
649  * (QP1,Port2).  The masks are there to help with some specific allocation
650  * and freeing operations
651  */
652 #define	TAVOR_SPECIAL_QP0_RSRC		0
653 #define	TAVOR_SPECIAL_QP0_RSRC_MASK	0x3
654 #define	TAVOR_SPECIAL_QP1_RSRC		2
655 #define	TAVOR_SPECIAL_QP1_RSRC_MASK	0xC
656 
657 
658 /*
659  * These flags specifies additional behaviors on database access.
660  * TAVOR_UMAP_DB_REMOVE, for example, specifies that (if found) the database
661  * entry should be removed from the database.  TAVOR_UMAP_DB_IGNORE_INSTANCE
662  * specifies that a particular database query should ignore value in the
663  * "tdb_instance" field as a criterion for the search.
664  */
665 #define	TAVOR_UMAP_DB_REMOVE		(1 << 0)
666 #define	TAVOR_UMAP_DB_IGNORE_INSTANCE	(1 << 1)
667 
668 
669 /*
670  * The tavor_umap_db_t structure contains what is referred to throughout the
671  * driver code as the "userland resources database".  This structure contains
672  * all the necessary information to track resources that have been prepared
673  * for direct-from-userland access.  There is an AVL tree ("tdl_umapdb_avl")
674  * which consists of the "tavor_umap_db_entry_t" (below) and a lock to ensure
675  * atomic access when adding or removing entries from the database.
676  */
677 typedef struct tavor_umap_db_s {
678 	kmutex_t		tdl_umapdb_lock;
679 	avl_tree_t		tdl_umapdb_avl;
680 } tavor_umap_db_t;
681 
682 /*
683  * The tavor_umap_db_priv_t structure currently contains information necessary
684  * to provide the "on close" callback to the firmware flash interfaces.  It
685  * is intended that this structure could be extended to enable other "on
686  * close" callbacks as well.
687  */
688 typedef struct tavor_umap_db_priv_s {
689 	void		(*tdp_cb)(void *);
690 	void		*tdp_arg;
691 } tavor_umap_db_priv_t;
692 
693 /*
694  * The tavor_umap_db_common_t structure contains fields which are common
695  * between the database entries ("tavor_umap_db_entry_t") and the structure
696  * used to contain the search criteria ("tavor_umap_db_query_t").  This
697  * structure contains a key, a resource type (described above), an instance
698  * (corresponding to the driver instance which inserted the database entry),
699  * and a "value" field.  Typically, "tdb_value" is a pointer to a Tavor
700  * resource object.  Although for memory regions, the value field corresponds
701  * to the ddi_umem_cookie_t for the pinned userland memory.
702  * The structure also includes a placeholder for private data ("tdb_priv").
703  * Currently this data is being used for holding "on close" callback
704  * information to allow certain kinds of cleanup even if a userland process
705  * prematurely exits.
706  */
707 typedef struct tavor_umap_db_common_s {
708 	uint64_t		tdb_key;
709 	uint64_t		tdb_value;
710 	uint_t			tdb_type;
711 	uint_t			tdb_instance;
712 	void			*tdb_priv;
713 } tavor_umap_db_common_t;
714 
715 /*
716  * The tavor_umap_db_entry_t structure is the entry in "userland resources
717  * database".  As required by the AVL framework, each entry contains an
718  * "avl_node_t".  Then, as required to implement the database, each entry
719  * contains a "tavor_umap_db_common_t" structure used to contain all of the
720  * relevant entries.
721  */
722 typedef struct tavor_umap_db_entry_s {
723 	avl_node_t		tdbe_avlnode;
724 	tavor_umap_db_common_t	tdbe_common;
725 } tavor_umap_db_entry_t;
726 
727 /*
728  * The tavor_umap_db_query_t structure is used in queries to the "userland
729  * resources database".  In addition to the "tavor_umap_db_common_t" structure
730  * used to contain the various search criteria, this structure also contains
731  * a flags field "tqdb_flags" which can be used to specify additional behaviors
732  * (as described above).  Specifically, the flags field can be used to specify
733  * that an entry should be removed from the database, if found, and to
734  * specify whether the database lookup should consider "tdb_instance" in the
735  * search.
736  */
737 typedef struct tavor_umap_db_query_s {
738 	uint_t			tqdb_flags;
739 	tavor_umap_db_common_t	tqdb_common;
740 } tavor_umap_db_query_t;
741 _NOTE(MUTEX_PROTECTS_DATA(tavor_umap_db_s::tdl_umapdb_lock,
742     tavor_umap_db_entry_s::tdbe_avlnode
743     tavor_umap_db_entry_s::tdbe_common.tdb_key
744     tavor_umap_db_entry_s::tdbe_common.tdb_value
745     tavor_umap_db_entry_s::tdbe_common.tdb_type
746     tavor_umap_db_entry_s::tdbe_common.tdb_instance))
747 
748 /*
749  * The tavor_devmap_track_t structure contains all the necessary information
750  * to track resources that have been mapped through devmap.  There is a
751  * back-pointer to the Tavor softstate, the logical offset corresponding with
752  * the mapped resource, the size of the mapped resource (zero indicates an
753  * "invalid mapping"), and a reference count and lock used to determine when
754  * to free the structure (specifically, this is necessary to handle partial
755  * unmappings).
756  */
757 typedef struct tavor_devmap_track_s {
758 	tavor_state_t	*tdt_state;
759 	uint64_t	tdt_offset;
760 	uint_t		tdt_size;
761 	int		tdt_refcnt;
762 	kmutex_t	tdt_lock;
763 } tavor_devmap_track_t;
764 
765 
766 /* Defined in tavor_umap.c */
767 int tavor_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
768     size_t *maplen, uint_t model);
769 ibt_status_t tavor_umap_ci_data_in(tavor_state_t *state,
770     ibt_ci_data_flags_t flags, ibt_object_type_t object, void *hdl,
771     void *data_p, size_t data_sz);
772 ibt_status_t tavor_umap_ci_data_out(tavor_state_t *state,
773     ibt_ci_data_flags_t flags, ibt_object_type_t object, void *hdl,
774     void *data_p, size_t data_sz);
775 void tavor_umap_db_init(void);
776 void tavor_umap_db_fini(void);
777 tavor_umap_db_entry_t *tavor_umap_db_alloc(uint_t instance, uint64_t key,
778     uint_t type, uint64_t value);
779 void tavor_umap_db_free(tavor_umap_db_entry_t *umapdb);
780 void tavor_umap_db_add(tavor_umap_db_entry_t *umapdb);
781 void tavor_umap_db_add_nolock(tavor_umap_db_entry_t *umapdb);
782 int tavor_umap_db_find(uint_t instance, uint64_t key, uint_t type,
783     uint64_t *value, uint_t flags, tavor_umap_db_entry_t **umapdb);
784 int tavor_umap_db_find_nolock(uint_t instance, uint64_t key, uint_t type,
785     uint64_t *value, uint_t flags, tavor_umap_db_entry_t **umapdb);
786 void tavor_umap_umemlock_cb(ddi_umem_cookie_t *umem_cookie);
787 int tavor_umap_db_set_onclose_cb(dev_t dev, uint64_t flag,
788     void (*callback)(void *), void *arg);
789 int tavor_umap_db_clear_onclose_cb(dev_t dev, uint64_t flag);
790 void tavor_umap_db_handle_onclose_cb(tavor_umap_db_priv_t *priv);
791 
792 #ifdef __cplusplus
793 }
794 #endif
795 
796 #endif	/* _SYS_IB_ADAPTERS_TAVOR_H */
797