xref: /illumos-gate/usr/src/uts/common/sys/ib/adapters/tavor/tavor_mr.h (revision cd3e933325e68e23516a196a8fea7f49b1e497c3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_SYS_IB_ADAPTERS_TAVOR_MR_H
28 #define	_SYS_IB_ADAPTERS_TAVOR_MR_H
29 
30 /*
31  * tavor_mr.h
32  *    Contains all of the prototypes, #defines, and structures necessary
33  *    for the Tavor Memory Region/Window routines.
34  *    Specifically it contains #defines, macros, and prototypes for each of
35  *    the required memory region/window verbs that can be accessed through
36  *    the IBTF's CI interfaces.  In particular each of the prototypes defined
37  *    below is called from a corresponding CI interface routine (as specified
38  *    in the tavor_ci.c file).
39  */
40 
41 #include <sys/types.h>
42 #include <sys/conf.h>
43 #include <sys/ddi.h>
44 #include <sys/sunddi.h>
45 
46 #ifdef __cplusplus
47 extern "C" {
48 #endif
49 
50 /*
51  * The following defines specify the default number of MPT entries and their
52  * individual entry size.  Settings exist for the supported DDR DIMM sizes of
53  * 128MB and 256MB.  If a DIMM greater than 256 is found, then the 256MB
54  * profile is used.  See tavor_cfg.c for more discussion on config profiles.
55  *
56  * For manual configuration (not using config profiles), this value is
57  * controllable through the "tavor_log_num_mpt" configuration variable.  To
58  * override config profile settings the 'tavor_alt_config_enable' configuration
59  * variable must first be set.
60  */
61 #define	TAVOR_NUM_MPT_SHIFT_128		0x14
62 #define	TAVOR_NUM_MPT_SHIFT_256		0x15
63 #define	TAVOR_MPT_SIZE_SHIFT		0x6
64 #define	TAVOR_MPT_SIZE			(1 << TAVOR_MPT_SIZE_SHIFT)
65 
66 /*
67  * Minimal configuration value.
68  */
69 #define	TAVOR_NUM_MPT_SHIFT_MIN		0xD
70 
71 /*
72  * The following defines specify the size of each individual MTT entry and
73  * the number of MTT entries that make up an MTT segment (TAVOR_MTTSEG_SIZE)
74  */
75 #define	TAVOR_MTT_SIZE_SHIFT		0x3
76 #define	TAVOR_MTT_SIZE			(1 << TAVOR_MTT_SIZE_SHIFT)
77 #define	TAVOR_MTTSEG_SIZE_SHIFT		0x0
78 #define	TAVOR_MTTSEG_SIZE		(8 << TAVOR_MTTSEG_SIZE_SHIFT)
79 
80 /*
81  * These define the total number of MTT segments.  By default we are setting
82  * this number of MTT segments (the MTT table size) to 2M segments.  This
83  * default value is used to initialize the "tavor_log_num_mttseg" config
84  * variable.
85  * Note: Each segment is currently set to 8 MTT entries (TAVOR_MTTSEG_SIZE).
86  * This means that we can support up to 16M MTT entries (i.e. "pages").
87  */
88 #define	TAVOR_NUM_MTTSEG_SHIFT		0x15
89 #define	TAVOR_NUM_MTTSEG		(1 << TAVOR_NUM_MTTSEG_SHIFT)
90 
91 /*
92  * Minimal configuration value.
93  */
94 #define	TAVOR_NUM_MTTSEG_SHIFT_MIN	0x11
95 
96 /*
97  * Macro to round a number of MTT entries to the number of MTT segments.
98  */
99 #define	TAVOR_NUMMTT_TO_MTTSEG(num)		\
100 	(((num) + TAVOR_MTTSEG_SIZE - 1) >>	\
101 	(TAVOR_MTTSEG_SIZE_SHIFT + TAVOR_MTT_SIZE_SHIFT))
102 
103 /*
104  * This define is used to specify the "MTT page walk version" in the Tavor
105  * INIT_HCA command.
106  */
107 #define	TAVOR_MTT_PG_WALK_VER		0
108 
109 /*
110  * This define is the maximum size of a memory region or window (log 2).  It is
111  * set depending on size of the DDR being either 128MB or 256MB.  These defines
112  * are used to initialize the "tavor_log_max_mrw_sz" configuration variable,
113  * and are proportional to the max MPT size set above.
114  */
115 #define	TAVOR_MAX_MEM_MPT_SHIFT_128		0x23
116 #define	TAVOR_MAX_MEM_MPT_SHIFT_256		0x24
117 
118 /*
119  * Minimal configuration value.
120  */
121 #define	TAVOR_MAX_MEM_MPT_SHIFT_MIN		0x1E
122 
123 /*
124  * Defines used by tavor_mr_deregister() to specify how much/to what extent
125  * a given memory regions resources should be freed up.  TAVOR_MR_DEREG_ALL
126  * says what it means, free up all the resources associated with the region.
127  * TAVOR_MR_DEREG_NO_HW2SW_MPT indicates that it is unnecessary to attempt
128  * the ownership transfer (from hardware to software) for the given MPT entry.
129  * And TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND indicates that it is not only
130  * unnecessary to attempt the ownership transfer for MPT, but it is also
131  * unnecessary to attempt to unbind the memory.
132  * In general, these last two are specified when tavor_mr_deregister() is
133  * called from tavor_mr_reregister(), where the MPT ownership transfer or
134  * memory unbinding may have already been successfully performed.
135  */
136 #define	TAVOR_MR_DEREG_ALL			3
137 #define	TAVOR_MR_DEREG_NO_HW2SW_MPT		2
138 #define	TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND	1
139 
140 /*
141  * The following define is used by tavor_mr_rereg_xlat_helper() to determine
142  * whether or not a given DMA handle can be reused.  If the DMA handle was
143  * previously initialized for IOMMU bypass mapping, then it can not be reused
144  * to reregister a region for DDI_DMA_STREAMING access.
145  */
146 #define	TAVOR_MR_REUSE_DMAHDL(mr, flags)				\
147 	(((mr)->mr_bindinfo.bi_bypass != TAVOR_BINDMEM_BYPASS) ||	\
148 	    !((flags) & IBT_MR_NONCOHERENT))
149 
150 /*
151  * The tavor_sw_refcnt_t structure is used internally by the Tavor driver to
152  * track all the information necessary to manage shared memory regions.  Since
153  * a shared memory region _will_ have its own distinct MPT entry, but will
154  * _share_ its MTT entries with another region, it is necessary to track the
155  * number of times a given MTT structure is shared.  This ensures that it will
156  * not be prematurely freed up and that can be destroyed only when it is
157  * appropriate to do so.
158  *
159  * Each tavor_sw_refcnt_t structure contains a lock and a reference count
160  * variable which are used to track the necessary information.
161  *
162  * The following macros (below) are used to manipulate and query the MTT
163  * reference count parameters.  TAVOR_MTT_REFCNT_INIT() is used to initialize
164  * a newly allocated tavor_sw_refcnt_t struct (setting the "swrc_refcnt" to 1).
165  * And the TAVOR_MTT_IS_NOT_SHARED() and TAVOR_MTT_IS_SHARED() macros are
166  * used to query the current status of tavor_sw_refcnt_t struct to determine
167  * if its "swrc_refcnt" is one or not.
168  */
169 typedef struct tavor_sw_refcnt_s {
170 	kmutex_t		swrc_lock;
171 	uint_t			swrc_refcnt;
172 } tavor_sw_refcnt_t;
173 _NOTE(DATA_READABLE_WITHOUT_LOCK(tavor_sw_refcnt_t::swrc_refcnt))
174 _NOTE(MUTEX_PROTECTS_DATA(tavor_sw_refcnt_t::swrc_lock,
175     tavor_sw_refcnt_t::swrc_refcnt))
176 #define	TAVOR_MTT_REFCNT_INIT(swrc_tmp)		((swrc_tmp)->swrc_refcnt = 1)
177 #define	TAVOR_MTT_IS_NOT_SHARED(swrc_tmp)	((swrc_tmp)->swrc_refcnt == 1)
178 #define	TAVOR_MTT_IS_SHARED(swrc_tmp)		((swrc_tmp)->swrc_refcnt != 1)
179 
180 
181 /*
182  * The tavor_bind_info_t structure is used internally by the Tavor driver to
183  * track all the information necessary to perform the DMA mappings necessary
184  * for memory registration.  It is specifically passed into both the
185  * tavor_mr_mem_bind() and tavor_mr_mtt_write() functions which perform most
186  * of the necessary operations for Tavor memory registration.
187  *
188  * This structure is used to pass all the information necessary for a call
189  * to either ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle().  Note:
190  * the fields which need to be valid for each type of binding are slightly
191  * different and that it indicated by the value in the "bi_type" field.  The
192  * "bi_type" field may be set to either of the following defined values:
193  * TAVOR_BINDHDL_VADDR (to indicate an "addr" bind) or TAVOR_BINDHDL_BUF (to
194  * indicate a "buf" bind).
195  *
196  * Upon return from tavor_mr_mem_bind(), the tavor_bind_info_t struct will
197  * have its "bi_dmahdl", "bi_dmacookie", and "bi_cookiecnt" fields filled in.
198  * It is these values which are of particular interest to the
199  * tavor_mr_mtt_write() routine (they hold the PCI mapped addresses).
200  *
201  * Once initialized and used in this way, the tavor_bind_info_t will not to be
202  * modified in anyway until it is subsequently passed to tavor_mr_mem_unbind()
203  * where the memory and resources will be unbound and reclaimed.  Note:  the
204  * "bi_free_dmahdl" flag indicated whether the ddi_dma_handle_t should be
205  * freed as part of the tavor_mr_mem_unbind() operation or whether it will
206  * be freed later elsewhere.
207  */
208 typedef struct tavor_bind_info_s {
209 	uint64_t		bi_addr;
210 	uint64_t		bi_len;
211 	struct as		*bi_as;
212 	struct buf		*bi_buf;
213 	ddi_dma_handle_t	bi_dmahdl;
214 	ddi_dma_cookie_t	bi_dmacookie;
215 	uint_t			bi_cookiecnt;
216 	uint_t			bi_type;
217 	uint_t			bi_flags;
218 	uint_t			bi_bypass;
219 	uint_t			bi_free_dmahdl;
220 } tavor_bind_info_t;
221 #define	TAVOR_BINDHDL_NONE		0
222 #define	TAVOR_BINDHDL_VADDR		1
223 #define	TAVOR_BINDHDL_BUF		2
224 #define	TAVOR_BINDHDL_UBUF		3
225 
226 /*
227  * The tavor_sw_mr_s structure is also referred to using the "tavor_mrhdl_t"
228  * typedef (see tavor_typedef.h).  It encodes all the information necessary
229  * to track the various resources needed to register, reregister, deregister,
230  * and perform all the myriad other operations on both memory regions _and_
231  * memory windows.
232  *
233  * A pointer to this structure is returned from many of the IBTF's CI verbs
234  * interfaces for memory registration.
235  *
236  * It contains pointers to the various resources allocated for a memory
237  * region, i.e. MPT resource, MTT resource, and MTT reference count resource.
238  * In addition it contains the tavor_bind_info_t struct used for the memory
239  * bind operation on a given memory region.
240  *
241  * It also has a pointers to the associated PD handle, placeholders for access
242  * flags, memory keys, and suggested page size for the region.  It also has
243  * the necessary backpointer to the resource that corresponds to the structure
244  * itself.  And lastly, it contains a placeholder for a callback which should
245  * be called on memory region unpinning.
246  */
247 struct tavor_sw_mr_s {
248 	kmutex_t		mr_lock;
249 	tavor_rsrc_t		*mr_mptrsrcp;
250 	tavor_rsrc_t		*mr_mttrsrcp;
251 	tavor_rsrc_t		*mr_mttrefcntp;
252 	tavor_pdhdl_t		mr_pdhdl;
253 	tavor_bind_info_t	mr_bindinfo;
254 	ibt_mr_attr_flags_t	mr_accflag;
255 	uint32_t		mr_lkey;
256 	uint32_t		mr_rkey;
257 	uint32_t		mr_logmttpgsz;
258 	tavor_rsrc_t		*mr_rsrcp;
259 	uint_t			mr_is_umem;
260 	ddi_umem_cookie_t	mr_umemcookie;
261 	void 			(*mr_umem_cbfunc)(void *, void *);
262 	void			*mr_umem_cbarg1;
263 	void			*mr_umem_cbarg2;
264 };
265 _NOTE(DATA_READABLE_WITHOUT_LOCK(tavor_sw_mr_s::mr_bindinfo
266     tavor_sw_mr_s::mr_lkey
267     tavor_sw_mr_s::mr_is_umem))
268 _NOTE(MUTEX_PROTECTS_DATA(tavor_sw_mr_s::mr_lock,
269     tavor_sw_mr_s::mr_mptrsrcp
270     tavor_sw_mr_s::mr_mttrsrcp
271     tavor_sw_mr_s::mr_mttrefcntp
272     tavor_sw_mr_s::mr_bindinfo
273     tavor_sw_mr_s::mr_lkey
274     tavor_sw_mr_s::mr_rkey
275     tavor_sw_mr_s::mr_logmttpgsz
276     tavor_sw_mr_s::mr_rsrcp
277     tavor_sw_mr_s::mr_is_umem
278     tavor_sw_mr_s::mr_umemcookie
279     tavor_sw_mr_s::mr_umem_cbfunc
280     tavor_sw_mr_s::mr_umem_cbarg1
281     tavor_sw_mr_s::mr_umem_cbarg2))
282 
283 /*
284  * The tavor_mr_options_t structure is used in several of the Tavor memory
285  * registration routines to provide additional option functionality.  When
286  * a NULL pointer is passed in place of a pointer to this struct, it is a
287  * way of specifying the "default" behavior.  Using this structure, however,
288  * is a way of controlling any extended behavior.
289  *
290  * Currently, the only defined "extended" behaviors are for specifying whether
291  * a given memory region should bypass the PCI IOMMU (TAVOR_BINDMEM_BYPASS)
292  * or be mapped into the IOMMU (TAVOR_BINDMEM_NORMAL), for specifying whether
293  * a given ddi_dma_handle_t should be used in the bind operation, and for
294  * specifying whether a memory registration should attempt to return an IB
295  * vaddr which is "zero-based" (aids in alignment contraints for QPs).
296  *
297  * This defaults today to always bypassing the IOMMU (can be changed by using
298  * the "tavor_iommu_bypass" configuration variable), to always allocating
299  * a new dma handle, and to using the virtual address passed in (i.e. not
300  * "zero-based").
301  */
302 typedef struct tavor_mr_options_s {
303 	ddi_dma_handle_t	mro_bind_dmahdl;
304 	uint_t			mro_bind_type;
305 	uint_t			mro_bind_override_addr;
306 } tavor_mr_options_t;
307 #define	TAVOR_BINDMEM_NORMAL		1
308 #define	TAVOR_BINDMEM_BYPASS		0
309 
310 int tavor_mr_register(tavor_state_t *state, tavor_pdhdl_t pdhdl,
311     ibt_mr_attr_t *attr_p, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op);
312 int tavor_mr_register_buf(tavor_state_t *state, tavor_pdhdl_t pdhdl,
313     ibt_smr_attr_t *attrp, struct buf *buf, tavor_mrhdl_t *mrhdl,
314     tavor_mr_options_t *op);
315 int tavor_mr_mtt_bind(tavor_state_t *state, tavor_bind_info_t *bind,
316     ddi_dma_handle_t bind_dmahdl, tavor_rsrc_t **mtt, uint_t *mtt_pgsz_bits);
317 int tavor_mr_mtt_unbind(tavor_state_t *state, tavor_bind_info_t *bind,
318     tavor_rsrc_t *mtt);
319 int tavor_mr_register_shared(tavor_state_t *state, tavor_mrhdl_t mrhdl,
320     tavor_pdhdl_t pdhdl, ibt_smr_attr_t *attr_p, tavor_mrhdl_t *mrhdl_new);
321 int tavor_mr_deregister(tavor_state_t *state, tavor_mrhdl_t *mrhdl,
322     uint_t level, uint_t sleep);
323 int tavor_mr_query(tavor_state_t *state, tavor_mrhdl_t mrhdl,
324     ibt_mr_query_attr_t *attr);
325 int tavor_mr_reregister(tavor_state_t *state, tavor_mrhdl_t mrhdl,
326     tavor_pdhdl_t pdhdl, ibt_mr_attr_t *attr_p, tavor_mrhdl_t *mrhdl_new,
327     tavor_mr_options_t *op);
328 int tavor_mr_reregister_buf(tavor_state_t *state, tavor_mrhdl_t mr,
329     tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf,
330     tavor_mrhdl_t *mrhdl_new, tavor_mr_options_t *op);
331 int tavor_mr_sync(tavor_state_t *state, ibt_mr_sync_t *mr_segs,
332     size_t num_segs);
333 int tavor_mw_alloc(tavor_state_t *state, tavor_pdhdl_t pdhdl,
334     ibt_mw_flags_t flags, tavor_mwhdl_t *mwhdl);
335 int tavor_mw_free(tavor_state_t *state, tavor_mwhdl_t *mwhdl, uint_t sleep);
336 void tavor_mr_keycalc(tavor_state_t *state, uint32_t indx, uint32_t *key);
337 
338 #ifdef __cplusplus
339 }
340 #endif
341 
342 #endif	/* _SYS_IB_ADAPTERS_TAVOR_MR_H */
343