xref: /illumos-gate/usr/src/uts/common/sys/ib/adapters/tavor/tavor_mr.h (revision 6a634c9dca3093f3922e4b7ab826d7bdf17bf78e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #ifndef	_SYS_IB_ADAPTERS_TAVOR_MR_H
27 #define	_SYS_IB_ADAPTERS_TAVOR_MR_H
28 
29 /*
30  * tavor_mr.h
31  *    Contains all of the prototypes, #defines, and structures necessary
32  *    for the Tavor Memory Region/Window routines.
33  *    Specifically it contains #defines, macros, and prototypes for each of
34  *    the required memory region/window verbs that can be accessed through
35  *    the IBTF's CI interfaces.  In particular each of the prototypes defined
36  *    below is called from a corresponding CI interface routine (as specified
37  *    in the tavor_ci.c file).
38  */
39 
40 #include <sys/types.h>
41 #include <sys/conf.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 
45 #ifdef __cplusplus
46 extern "C" {
47 #endif
48 
49 /*
50  * The following defines specify the default number of MPT entries and their
51  * individual entry size.  Settings exist for the supported DDR DIMM sizes of
52  * 128MB and 256MB.  If a DIMM greater than 256 is found, then the 256MB
53  * profile is used.  See tavor_cfg.c for more discussion on config profiles.
54  *
55  * For manual configuration (not using config profiles), this value is
56  * controllable through the "tavor_log_num_mpt" configuration variable.  To
57  * override config profile settings the 'tavor_alt_config_enable' configuration
58  * variable must first be set.
59  */
60 #define	TAVOR_NUM_MPT_SHIFT_128		0x14
61 #define	TAVOR_NUM_MPT_SHIFT_256		0x15
62 #define	TAVOR_MPT_SIZE_SHIFT		0x6
63 #define	TAVOR_MPT_SIZE			(1 << TAVOR_MPT_SIZE_SHIFT)
64 
65 /*
66  * Minimal configuration value.
67  */
68 #define	TAVOR_NUM_MPT_SHIFT_MIN		0xD
69 
70 /*
71  * The following defines specify the size of each individual MTT entry and
72  * the number of MTT entries that make up an MTT segment (TAVOR_MTTSEG_SIZE)
73  */
74 #define	TAVOR_MTT_SIZE_SHIFT		0x3
75 #define	TAVOR_MTT_SIZE			(1 << TAVOR_MTT_SIZE_SHIFT)
76 #define	TAVOR_MTTSEG_SIZE_SHIFT		0x0
77 #define	TAVOR_MTTSEG_SIZE		(8 << TAVOR_MTTSEG_SIZE_SHIFT)
78 
79 /*
80  * These define the total number of MTT segments.  By default we are setting
81  * this number of MTT segments (the MTT table size) to 2M segments.  This
82  * default value is used to initialize the "tavor_log_num_mttseg" config
83  * variable.
84  * Note: Each segment is currently set to 8 MTT entries (TAVOR_MTTSEG_SIZE).
85  * This means that we can support up to 16M MTT entries (i.e. "pages").
86  */
87 #define	TAVOR_NUM_MTTSEG_SHIFT		0x15
88 #define	TAVOR_NUM_MTTSEG		(1 << TAVOR_NUM_MTTSEG_SHIFT)
89 
90 /*
91  * Minimal configuration value.
92  */
93 #define	TAVOR_NUM_MTTSEG_SHIFT_MIN	0x11
94 
95 /*
96  * Macro to round a number of MTT entries to the number of MTT segments.
97  */
98 #define	TAVOR_NUMMTT_TO_MTTSEG(num)		\
99 	(((num) + TAVOR_MTTSEG_SIZE - 1) >>	\
100 	(TAVOR_MTTSEG_SIZE_SHIFT + TAVOR_MTT_SIZE_SHIFT))
101 
102 /*
103  * This define is used to specify the "MTT page walk version" in the Tavor
104  * INIT_HCA command.
105  */
106 #define	TAVOR_MTT_PG_WALK_VER		0
107 
108 /*
109  * This define is the maximum size of a memory region or window (log 2).  It is
110  * set depending on size of the DDR being either 128MB or 256MB.  These defines
111  * are used to initialize the "tavor_log_max_mrw_sz" configuration variable,
112  * and are proportional to the max MPT size set above.
113  */
114 #define	TAVOR_MAX_MEM_MPT_SHIFT_128		0x23
115 #define	TAVOR_MAX_MEM_MPT_SHIFT_256		0x24
116 
117 /*
118  * Minimal configuration value.
119  */
120 #define	TAVOR_MAX_MEM_MPT_SHIFT_MIN		0x1E
121 
122 /*
123  * Defines used by tavor_mr_deregister() to specify how much/to what extent
124  * a given memory regions resources should be freed up.  TAVOR_MR_DEREG_ALL
125  * says what it means, free up all the resources associated with the region.
126  * TAVOR_MR_DEREG_NO_HW2SW_MPT indicates that it is unnecessary to attempt
127  * the ownership transfer (from hardware to software) for the given MPT entry.
128  * And TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND indicates that it is not only
129  * unnecessary to attempt the ownership transfer for MPT, but it is also
130  * unnecessary to attempt to unbind the memory.
131  * In general, these last two are specified when tavor_mr_deregister() is
132  * called from tavor_mr_reregister(), where the MPT ownership transfer or
133  * memory unbinding may have already been successfully performed.
134  */
135 #define	TAVOR_MR_DEREG_ALL			3
136 #define	TAVOR_MR_DEREG_NO_HW2SW_MPT		2
137 #define	TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND	1
138 
139 /*
140  * The following define is used by tavor_mr_rereg_xlat_helper() to determine
141  * whether or not a given DMA handle can be reused.  If the DMA handle was
142  * previously initialized for IOMMU bypass mapping, then it can not be reused
143  * to reregister a region for DDI_DMA_STREAMING access.
144  */
145 #define	TAVOR_MR_REUSE_DMAHDL(mr, flags)				\
146 	(((mr)->mr_bindinfo.bi_bypass != TAVOR_BINDMEM_BYPASS) ||	\
147 	    !((flags) & IBT_MR_NONCOHERENT))
148 
149 /*
150  * The tavor_sw_refcnt_t structure is used internally by the Tavor driver to
151  * track all the information necessary to manage shared memory regions.  Since
152  * a shared memory region _will_ have its own distinct MPT entry, but will
153  * _share_ its MTT entries with another region, it is necessary to track the
154  * number of times a given MTT structure is shared.  This ensures that it will
155  * not be prematurely freed up and that can be destroyed only when it is
156  * appropriate to do so.
157  *
158  * Each tavor_sw_refcnt_t structure contains a lock and a reference count
159  * variable which are used to track the necessary information.
160  *
161  * The following macros (below) are used to manipulate and query the MTT
162  * reference count parameters.  TAVOR_MTT_REFCNT_INIT() is used to initialize
163  * a newly allocated tavor_sw_refcnt_t struct (setting the "swrc_refcnt" to 1).
164  * And the TAVOR_MTT_IS_NOT_SHARED() and TAVOR_MTT_IS_SHARED() macros are
165  * used to query the current status of tavor_sw_refcnt_t struct to determine
166  * if its "swrc_refcnt" is one or not.
167  */
168 typedef struct tavor_sw_refcnt_s {
169 	kmutex_t		swrc_lock;
170 	uint_t			swrc_refcnt;
171 } tavor_sw_refcnt_t;
172 _NOTE(DATA_READABLE_WITHOUT_LOCK(tavor_sw_refcnt_t::swrc_refcnt))
173 _NOTE(MUTEX_PROTECTS_DATA(tavor_sw_refcnt_t::swrc_lock,
174     tavor_sw_refcnt_t::swrc_refcnt))
175 #define	TAVOR_MTT_REFCNT_INIT(swrc_tmp)		((swrc_tmp)->swrc_refcnt = 1)
176 #define	TAVOR_MTT_IS_NOT_SHARED(swrc_tmp)	((swrc_tmp)->swrc_refcnt == 1)
177 #define	TAVOR_MTT_IS_SHARED(swrc_tmp)		((swrc_tmp)->swrc_refcnt != 1)
178 
179 
180 /*
181  * The tavor_bind_info_t structure is used internally by the Tavor driver to
182  * track all the information necessary to perform the DMA mappings necessary
183  * for memory registration.  It is specifically passed into both the
184  * tavor_mr_mem_bind() and tavor_mr_mtt_write() functions which perform most
185  * of the necessary operations for Tavor memory registration.
186  *
187  * This structure is used to pass all the information necessary for a call
188  * to either ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle().  Note:
189  * the fields which need to be valid for each type of binding are slightly
190  * different and that it indicated by the value in the "bi_type" field.  The
191  * "bi_type" field may be set to either of the following defined values:
192  * TAVOR_BINDHDL_VADDR (to indicate an "addr" bind) or TAVOR_BINDHDL_BUF (to
193  * indicate a "buf" bind).
194  *
195  * Upon return from tavor_mr_mem_bind(), the tavor_bind_info_t struct will
196  * have its "bi_dmahdl", "bi_dmacookie", and "bi_cookiecnt" fields filled in.
197  * It is these values which are of particular interest to the
198  * tavor_mr_mtt_write() routine (they hold the PCI mapped addresses).
199  *
200  * Once initialized and used in this way, the tavor_bind_info_t will not to be
201  * modified in anyway until it is subsequently passed to tavor_mr_mem_unbind()
202  * where the memory and resources will be unbound and reclaimed.  Note:  the
203  * "bi_free_dmahdl" flag indicated whether the ddi_dma_handle_t should be
204  * freed as part of the tavor_mr_mem_unbind() operation or whether it will
205  * be freed later elsewhere.
206  */
207 typedef struct tavor_bind_info_s {
208 	uint64_t		bi_addr;
209 	uint64_t		bi_len;
210 	struct as		*bi_as;
211 	struct buf		*bi_buf;
212 	ddi_dma_handle_t	bi_dmahdl;
213 	ddi_dma_cookie_t	bi_dmacookie;
214 	uint_t			bi_cookiecnt;
215 	uint_t			bi_type;
216 	uint_t			bi_flags;
217 	uint_t			bi_bypass;
218 	uint_t			bi_free_dmahdl;
219 } tavor_bind_info_t;
220 #define	TAVOR_BINDHDL_NONE		0
221 #define	TAVOR_BINDHDL_VADDR		1
222 #define	TAVOR_BINDHDL_BUF		2
223 #define	TAVOR_BINDHDL_UBUF		3
224 
225 /*
226  * The tavor_sw_mr_s structure is also referred to using the "tavor_mrhdl_t"
227  * typedef (see tavor_typedef.h).  It encodes all the information necessary
228  * to track the various resources needed to register, reregister, deregister,
229  * and perform all the myriad other operations on both memory regions _and_
230  * memory windows.
231  *
232  * A pointer to this structure is returned from many of the IBTF's CI verbs
233  * interfaces for memory registration.
234  *
235  * It contains pointers to the various resources allocated for a memory
236  * region, i.e. MPT resource, MTT resource, and MTT reference count resource.
237  * In addition it contains the tavor_bind_info_t struct used for the memory
238  * bind operation on a given memory region.
239  *
240  * It also has a pointers to the associated PD handle, placeholders for access
241  * flags, memory keys, and suggested page size for the region.  It also has
242  * the necessary backpointer to the resource that corresponds to the structure
243  * itself.  And lastly, it contains a placeholder for a callback which should
244  * be called on memory region unpinning.
245  */
246 struct tavor_sw_mr_s {
247 	kmutex_t		mr_lock;
248 	tavor_rsrc_t		*mr_mptrsrcp;
249 	tavor_rsrc_t		*mr_mttrsrcp;
250 	tavor_rsrc_t		*mr_mttrefcntp;
251 	tavor_pdhdl_t		mr_pdhdl;
252 	tavor_bind_info_t	mr_bindinfo;
253 	ibt_mr_attr_flags_t	mr_accflag;
254 	uint32_t		mr_lkey;
255 	uint32_t		mr_rkey;
256 	uint32_t		mr_logmttpgsz;
257 	tavor_rsrc_t		*mr_rsrcp;
258 	uint_t			mr_is_umem;
259 	ddi_umem_cookie_t	mr_umemcookie;
260 	void 			(*mr_umem_cbfunc)(void *, void *);
261 	void			*mr_umem_cbarg1;
262 	void			*mr_umem_cbarg2;
263 };
264 _NOTE(DATA_READABLE_WITHOUT_LOCK(tavor_sw_mr_s::mr_bindinfo
265     tavor_sw_mr_s::mr_lkey
266     tavor_sw_mr_s::mr_is_umem))
267 _NOTE(MUTEX_PROTECTS_DATA(tavor_sw_mr_s::mr_lock,
268     tavor_sw_mr_s::mr_mptrsrcp
269     tavor_sw_mr_s::mr_mttrsrcp
270     tavor_sw_mr_s::mr_mttrefcntp
271     tavor_sw_mr_s::mr_bindinfo
272     tavor_sw_mr_s::mr_lkey
273     tavor_sw_mr_s::mr_rkey
274     tavor_sw_mr_s::mr_logmttpgsz
275     tavor_sw_mr_s::mr_rsrcp
276     tavor_sw_mr_s::mr_is_umem
277     tavor_sw_mr_s::mr_umemcookie
278     tavor_sw_mr_s::mr_umem_cbfunc
279     tavor_sw_mr_s::mr_umem_cbarg1
280     tavor_sw_mr_s::mr_umem_cbarg2))
281 
282 /*
283  * The tavor_mr_options_t structure is used in several of the Tavor memory
284  * registration routines to provide additional option functionality.  When
285  * a NULL pointer is passed in place of a pointer to this struct, it is a
286  * way of specifying the "default" behavior.  Using this structure, however,
287  * is a way of controlling any extended behavior.
288  *
289  * Currently, the only defined "extended" behaviors are for specifying whether
290  * a given memory region should bypass the PCI IOMMU (TAVOR_BINDMEM_BYPASS)
291  * or be mapped into the IOMMU (TAVOR_BINDMEM_NORMAL), for specifying whether
292  * a given ddi_dma_handle_t should be used in the bind operation, and for
293  * specifying whether a memory registration should attempt to return an IB
294  * vaddr which is "zero-based" (aids in alignment contraints for QPs).
295  *
296  * This defaults today to always bypassing the IOMMU (can be changed by using
297  * the "tavor_iommu_bypass" configuration variable), to always allocating
298  * a new dma handle, and to using the virtual address passed in (i.e. not
299  * "zero-based").
300  */
301 typedef struct tavor_mr_options_s {
302 	ddi_dma_handle_t	mro_bind_dmahdl;
303 	uint_t			mro_bind_type;
304 	uint_t			mro_bind_override_addr;
305 } tavor_mr_options_t;
306 #define	TAVOR_BINDMEM_NORMAL		1
307 #define	TAVOR_BINDMEM_BYPASS		0
308 
309 int tavor_dma_mr_register(tavor_state_t *state, tavor_pdhdl_t pdhdl,
310     ibt_dmr_attr_t *attr_p, tavor_mrhdl_t *mrhdl);
311 int tavor_mr_register(tavor_state_t *state, tavor_pdhdl_t pdhdl,
312     ibt_mr_attr_t *attr_p, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op);
313 int tavor_mr_register_buf(tavor_state_t *state, tavor_pdhdl_t pdhdl,
314     ibt_smr_attr_t *attrp, struct buf *buf, tavor_mrhdl_t *mrhdl,
315     tavor_mr_options_t *op);
316 int tavor_mr_mtt_bind(tavor_state_t *state, tavor_bind_info_t *bind,
317     ddi_dma_handle_t bind_dmahdl, tavor_rsrc_t **mtt, uint_t *mtt_pgsz_bits);
318 int tavor_mr_mtt_unbind(tavor_state_t *state, tavor_bind_info_t *bind,
319     tavor_rsrc_t *mtt);
320 int tavor_mr_register_shared(tavor_state_t *state, tavor_mrhdl_t mrhdl,
321     tavor_pdhdl_t pdhdl, ibt_smr_attr_t *attr_p, tavor_mrhdl_t *mrhdl_new);
322 int tavor_mr_deregister(tavor_state_t *state, tavor_mrhdl_t *mrhdl,
323     uint_t level, uint_t sleep);
324 int tavor_mr_query(tavor_state_t *state, tavor_mrhdl_t mrhdl,
325     ibt_mr_query_attr_t *attr);
326 int tavor_mr_reregister(tavor_state_t *state, tavor_mrhdl_t mrhdl,
327     tavor_pdhdl_t pdhdl, ibt_mr_attr_t *attr_p, tavor_mrhdl_t *mrhdl_new,
328     tavor_mr_options_t *op);
329 int tavor_mr_reregister_buf(tavor_state_t *state, tavor_mrhdl_t mr,
330     tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf,
331     tavor_mrhdl_t *mrhdl_new, tavor_mr_options_t *op);
332 int tavor_mr_sync(tavor_state_t *state, ibt_mr_sync_t *mr_segs,
333     size_t num_segs);
334 int tavor_mw_alloc(tavor_state_t *state, tavor_pdhdl_t pdhdl,
335     ibt_mw_flags_t flags, tavor_mwhdl_t *mwhdl);
336 int tavor_mw_free(tavor_state_t *state, tavor_mwhdl_t *mwhdl, uint_t sleep);
337 void tavor_mr_keycalc(tavor_state_t *state, uint32_t indx, uint32_t *key);
338 
339 #ifdef __cplusplus
340 }
341 #endif
342 
343 #endif	/* _SYS_IB_ADAPTERS_TAVOR_MR_H */
344