xref: /illumos-gate/usr/src/uts/common/io/usb/hcd/xhci/xhci_dma.c (revision bf5d9f18edeb77c14df996d367853599bdd43fd1)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2016 Joyent, Inc.
14  */
15 
16 /*
17  * xHCI DMA Management Routines
18  *
19  * Please see the big theory statement in xhci.c for more information.
20  */
21 
22 #include <sys/usb/hcd/xhci/xhci.h>
23 
24 int
25 xhci_check_dma_handle(xhci_t *xhcip, xhci_dma_buffer_t *xdb)
26 {
27 	ddi_fm_error_t de;
28 
29 	if (!DDI_FM_DMA_ERR_CAP(xhcip->xhci_fm_caps))
30 		return (0);
31 
32 	ddi_fm_dma_err_get(xdb->xdb_dma_handle, &de, DDI_FME_VERSION);
33 	return (de.fme_status);
34 }
35 
36 void
37 xhci_dma_acc_attr(xhci_t *xhcip, ddi_device_acc_attr_t *accp)
38 {
39 	accp->devacc_attr_version = DDI_DEVICE_ATTR_V0;
40 	accp->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC;
41 	accp->devacc_attr_dataorder = DDI_STRICTORDER_ACC;
42 
43 	if (DDI_FM_DMA_ERR_CAP(xhcip->xhci_fm_caps)) {
44 		accp->devacc_attr_access = DDI_FLAGERR_ACC;
45 	} else {
46 		accp->devacc_attr_access = DDI_DEFAULT_ACC;
47 	}
48 }
49 
50 /*
51  * These are DMA attributes that we assign when making a transfer. The SGL is
52  * variable and based on the caller, which varies based on the type of transfer
53  * we're doing.
54  */
55 void
56 xhci_dma_transfer_attr(xhci_t *xhcip, ddi_dma_attr_t *attrp, uint_t sgl)
57 {
58 	VERIFY3U(sgl, >, 0);
59 	VERIFY3U(sgl, <=, XHCI_TRANSFER_DMA_SGL);
60 	attrp->dma_attr_version = DMA_ATTR_V0;
61 
62 	/*
63 	 * The range of data that we can use is based on what hardware supports.
64 	 */
65 	attrp->dma_attr_addr_lo = 0x0;
66 	if (xhcip->xhci_caps.xcap_flags & XCAP_AC64) {
67 		attrp->dma_attr_addr_hi = UINT64_MAX;
68 	} else {
69 		attrp->dma_attr_addr_hi = UINT32_MAX;
70 	}
71 
72 	/*
73 	 * The count max indicates the total amount that will fit into one
74 	 * cookie, which is one TRB in our world. In other words 64k.
75 	 */
76 	attrp->dma_attr_count_max = XHCI_TRB_MAX_TRANSFER;
77 
78 	/*
79 	 * The alignment and segment are related. The alignment describes the
80 	 * alignment of the PA. The segment describes a boundary that the DMA
81 	 * allocation cannot cross. In other words, for a given chunk of memory
82 	 * it cannot cross a 64-byte boundary. However, the physical address
83 	 * only needs to be aligned to align bytes.
84 	 */
85 	attrp->dma_attr_align = XHCI_DMA_ALIGN;
86 	attrp->dma_attr_seg = XHCI_TRB_MAX_TRANSFER - 1;
87 
88 
89 	attrp->dma_attr_burstsizes = 0xfff;
90 
91 	/*
92 	 * This is the maximum we can send. Technically this is limited by the
93 	 * descriptors and not by hardware, hence why we use a large value for
94 	 * the max that'll be less than any memory allocation we ever throw at
95 	 * it.
96 	 */
97 	attrp->dma_attr_minxfer = 0x1;
98 	attrp->dma_attr_maxxfer = UINT32_MAX;
99 
100 	/*
101 	 * This is determined by the caller.
102 	 */
103 	attrp->dma_attr_sgllen = sgl;
104 
105 	/*
106 	 * The granularity describes the addressing granularity. e.g. can things
107 	 * ask for chunks in units of this number of bytes. For PCI this should
108 	 * always be one.
109 	 */
110 	attrp->dma_attr_granular = 1;
111 
112 	if (DDI_FM_DMA_ERR_CAP(xhcip->xhci_fm_caps)) {
113 		attrp->dma_attr_flags = DDI_DMA_FLAGERR;
114 	} else {
115 		attrp->dma_attr_flags = 0;
116 	}
117 }
118 
119 /*
120  * This routine tries to create DMA attributes for normal allocations for data
121  * structures and the like. By default we use the same values as the transfer
122  * attributes, but have explicit comments about how they're different.
123  */
124 void
125 xhci_dma_dma_attr(xhci_t *xhcip, ddi_dma_attr_t *attrp)
126 {
127 	/*
128 	 * Note, we always use a single SGL for these DMA allocations as these
129 	 * are used for small data structures.
130 	 */
131 	xhci_dma_transfer_attr(xhcip, attrp, XHCI_DEF_DMA_SGL);
132 
133 	/*
134 	 * The maximum size of any of these structures is 4k as opposed to the
135 	 * 64K max described above. Similarly the boundary requirement is
136 	 * reduced to 4k.
137 	 */
138 	attrp->dma_attr_count_max = xhcip->xhci_caps.xcap_pagesize;
139 	attrp->dma_attr_maxxfer = xhcip->xhci_caps.xcap_pagesize;
140 	attrp->dma_attr_seg = xhcip->xhci_caps.xcap_pagesize - 1;
141 }
142 
143 /*
144  * Fill in attributes for a scratchpad entry. The scratchpad entries are
145  * somewhat different in so far as they are closest to a normal DMA attribute,
146  * except they have stricter alignments, needing to be page sized.
147  *
148  * In addition, because we never access this memory ourselves, we can just mark
149  * it all as relaxed ordering.
150  */
151 void
152 xhci_dma_scratchpad_attr(xhci_t *xhcip, ddi_dma_attr_t *attrp)
153 {
154 	xhci_dma_dma_attr(xhcip, attrp);
155 	attrp->dma_attr_align = xhcip->xhci_caps.xcap_pagesize;
156 	attrp->dma_attr_flags |= DDI_DMA_RELAXED_ORDERING;
157 }
158 
159 /*
160  * This should be used for the simple case of a single SGL entry, which is the
161  * vast majority of the non-transfer allocations.
162  */
163 uint64_t
164 xhci_dma_pa(xhci_dma_buffer_t *xdb)
165 {
166 	ASSERT(xdb->xdb_ncookies == 1);
167 	return (xdb->xdb_cookies[0].dmac_laddress);
168 }
169 
170 void
171 xhci_dma_free(xhci_dma_buffer_t *xdb)
172 {
173 	if (xdb->xdb_ncookies != 0) {
174 		VERIFY(xdb->xdb_dma_handle != NULL);
175 		(void) ddi_dma_unbind_handle(xdb->xdb_dma_handle);
176 		xdb->xdb_ncookies = 0;
177 		bzero(xdb->xdb_cookies, sizeof (ddi_dma_cookie_t) *
178 		    XHCI_TRANSFER_DMA_SGL);
179 		xdb->xdb_len = 0;
180 	}
181 
182 	if (xdb->xdb_acc_handle != NULL) {
183 		ddi_dma_mem_free(&xdb->xdb_acc_handle);
184 		xdb->xdb_acc_handle = NULL;
185 		xdb->xdb_va = NULL;
186 	}
187 
188 	if (xdb->xdb_dma_handle != NULL) {
189 		ddi_dma_free_handle(&xdb->xdb_dma_handle);
190 		xdb->xdb_dma_handle = NULL;
191 	}
192 
193 	ASSERT(xdb->xdb_va == NULL);
194 	ASSERT(xdb->xdb_ncookies == 0);
195 	ASSERT(xdb->xdb_cookies[0].dmac_laddress == 0);
196 	ASSERT(xdb->xdb_len == 0);
197 }
198 
199 boolean_t
200 xhci_dma_alloc(xhci_t *xhcip, xhci_dma_buffer_t *xdb,
201     ddi_dma_attr_t *attrp, ddi_device_acc_attr_t *accp, boolean_t zero,
202     size_t size, boolean_t wait)
203 {
204 	int ret, i;
205 	uint_t flags = DDI_DMA_CONSISTENT;
206 	size_t len;
207 	ddi_dma_cookie_t cookie;
208 	uint_t ncookies;
209 	int (*memcb)(caddr_t);
210 
211 	if (wait == B_TRUE) {
212 		memcb = DDI_DMA_SLEEP;
213 	} else {
214 		memcb = DDI_DMA_DONTWAIT;
215 	}
216 
217 	ret = ddi_dma_alloc_handle(xhcip->xhci_dip, attrp, memcb, NULL,
218 	    &xdb->xdb_dma_handle);
219 	if (ret != 0) {
220 		xhci_log(xhcip, "!failed to allocate DMA handle: %d", ret);
221 		xdb->xdb_dma_handle = NULL;
222 		return (B_FALSE);
223 	}
224 
225 	ret = ddi_dma_mem_alloc(xdb->xdb_dma_handle, size, accp, flags, memcb,
226 	    NULL, &xdb->xdb_va, &len, &xdb->xdb_acc_handle);
227 	if (ret != DDI_SUCCESS) {
228 		xhci_log(xhcip, "!failed to allocate DMA memory: %d", ret);
229 		xdb->xdb_va = NULL;
230 		xdb->xdb_acc_handle = NULL;
231 		xhci_dma_free(xdb);
232 		return (B_FALSE);
233 	}
234 
235 	if (zero == B_TRUE)
236 		bzero(xdb->xdb_va, len);
237 
238 	ret = ddi_dma_addr_bind_handle(xdb->xdb_dma_handle, NULL,
239 	    xdb->xdb_va, len, DDI_DMA_RDWR | flags, memcb, NULL, &cookie,
240 	    &ncookies);
241 	if (ret != 0) {
242 		xhci_log(xhcip, "!failed to bind DMA memory: %d", ret);
243 		xhci_dma_free(xdb);
244 		return (B_FALSE);
245 	}
246 
247 	/*
248 	 * Note we explicitly store the logical length of this allocation. The
249 	 * physical length is available via the cookies.
250 	 */
251 	xdb->xdb_len = size;
252 	xdb->xdb_ncookies = ncookies;
253 	xdb->xdb_cookies[0] = cookie;
254 	for (i = 1; i < ncookies; i++) {
255 		ddi_dma_nextcookie(xdb->xdb_dma_handle, &xdb->xdb_cookies[i]);
256 	}
257 
258 
259 	return (B_TRUE);
260 }
261 
262 void
263 xhci_transfer_free(xhci_t *xhcip, xhci_transfer_t *xt)
264 {
265 	if (xt == NULL)
266 		return;
267 
268 	VERIFY(xhcip != NULL);
269 	xhci_dma_free(&xt->xt_buffer);
270 	if (xt->xt_isoc != NULL) {
271 		ASSERT(xt->xt_ntrbs > 0);
272 		kmem_free(xt->xt_isoc, sizeof (usb_isoc_pkt_descr_t) *
273 		    xt->xt_ntrbs);
274 		xt->xt_isoc = NULL;
275 	}
276 	if (xt->xt_trbs != NULL) {
277 		ASSERT(xt->xt_ntrbs > 0);
278 		kmem_free(xt->xt_trbs, sizeof (xhci_trb_t) * xt->xt_ntrbs);
279 		xt->xt_trbs = NULL;
280 	}
281 	kmem_free(xt, sizeof (xhci_transfer_t));
282 }
283 
284 xhci_transfer_t *
285 xhci_transfer_alloc(xhci_t *xhcip, xhci_endpoint_t *xep, size_t size, int trbs,
286     int usb_flags)
287 {
288 	int kmflags;
289 	boolean_t dmawait;
290 	xhci_transfer_t *xt;
291 	ddi_device_acc_attr_t acc;
292 	ddi_dma_attr_t attr;
293 
294 	if (usb_flags & USB_FLAGS_SLEEP) {
295 		kmflags = KM_SLEEP;
296 		dmawait = B_TRUE;
297 	} else {
298 		kmflags = KM_NOSLEEP;
299 		dmawait = B_FALSE;
300 	}
301 
302 	xt = kmem_zalloc(sizeof (xhci_transfer_t), kmflags);
303 	if (xt == NULL)
304 		return (NULL);
305 
306 	if (size != 0) {
307 		int sgl = XHCI_DEF_DMA_SGL;
308 
309 		/*
310 		 * For BULK transfers, we always increase the number of SGL
311 		 * entries that we support to make things easier for the kernel.
312 		 * However, for control transfers, we currently opt to keep
313 		 * things a bit simpler and use our default of one SGL.  There's
314 		 * no good technical reason for this, rather it just keeps
315 		 * things a bit easier.
316 		 *
317 		 * To simplify things, we don't use additional SGL entries for
318 		 * ISOC transfers. While this isn't the best, it isn't too far
319 		 * off from what ehci and co. have done before. If this becomes
320 		 * a technical issue, it's certainly possible to increase the
321 		 * SGL entry count.
322 		 */
323 		if (xep->xep_type == USB_EP_ATTR_BULK)
324 			sgl = XHCI_TRANSFER_DMA_SGL;
325 
326 		xhci_dma_acc_attr(xhcip, &acc);
327 		xhci_dma_transfer_attr(xhcip, &attr, sgl);
328 		if (xhci_dma_alloc(xhcip, &xt->xt_buffer, &attr, &acc, B_FALSE,
329 		    size, dmawait) == B_FALSE) {
330 			kmem_free(xt, sizeof (xhci_transfer_t));
331 			return (NULL);
332 		}
333 
334 		/*
335 		 * ISOC transfers are a bit special and don't need additional
336 		 * TRBs for data.
337 		 */
338 		if (xep->xep_type != USB_EP_ATTR_ISOCH)
339 			trbs += xt->xt_buffer.xdb_ncookies;
340 	}
341 
342 	xt->xt_trbs = kmem_zalloc(sizeof (xhci_trb_t) * trbs, kmflags);
343 	if (xt->xt_trbs == NULL) {
344 		xhci_dma_free(&xt->xt_buffer);
345 		kmem_free(xt, sizeof (xhci_transfer_t));
346 		return (NULL);
347 	}
348 
349 	/*
350 	 * For ISOCH transfers, we need to also allocate the results data.
351 	 */
352 	if (xep->xep_type == USB_EP_ATTR_ISOCH) {
353 		xt->xt_isoc = kmem_zalloc(sizeof (usb_isoc_pkt_descr_t) * trbs,
354 		    kmflags);
355 		if (xt->xt_isoc == NULL) {
356 			kmem_free(xt->xt_trbs, sizeof (xhci_trb_t) * trbs);
357 			xhci_dma_free(&xt->xt_buffer);
358 			kmem_free(xt, sizeof (xhci_transfer_t));
359 			return (NULL);
360 		}
361 	}
362 
363 	xt->xt_ntrbs = trbs;
364 	xt->xt_cr = USB_CR_OK;
365 
366 	return (xt);
367 }
368 
369 /*
370  * Abstract the notion of copying out to handle the case of multiple DMA
371  * cookies. If tobuf is true, we are copying to the kernel provided buffer,
372  * otherwise we're copying into the DMA memory.
373  */
374 void
375 xhci_transfer_copy(xhci_transfer_t *xt, void *buf, size_t len,
376     boolean_t tobuf)
377 {
378 	void *dmabuf = xt->xt_buffer.xdb_va;
379 	if (tobuf == B_TRUE)
380 		bcopy(dmabuf, buf, len);
381 	else
382 		bcopy(buf, dmabuf, len);
383 }
384 
385 int
386 xhci_transfer_sync(xhci_t *xhcip, xhci_transfer_t *xt, uint_t type)
387 {
388 	XHCI_DMA_SYNC(xt->xt_buffer, type);
389 	return (xhci_check_dma_handle(xhcip, &xt->xt_buffer));
390 }
391 
392 /*
393  * We're required to try and inform the xHCI controller about the number of data
394  * packets that are required. The algorithm to use is described in xHCI 1.1 /
395  * 4.11.2.4. While it might be tempting to just try and calculate the number of
396  * packets based on simple rounding of the remaining number of bytes, that
397  * misses a critical problem -- DMA boundaries may cause us to need additional
398  * packets that are missed initially. Consider a transfer made up of four
399  * different DMA buffers sized in bytes: 4096, 4096, 256, 256, with a 512 byte
400  * packet size.
401  *
402  * Remain	4608	512	256	0
403  * Bytes	4096	4096	256	256
404  * Naive TD	9	1	1	0
405  * Act TD 	10	2	1	0
406  *
407  * This means that the only safe way forward here is to work backwards and see
408  * how many we need to work up to this point.
409  */
410 static int
411 xhci_transfer_get_tdsize(xhci_transfer_t *xt, uint_t off, uint_t mps)
412 {
413 	int i;
414 	uint_t npkt = 0;
415 
416 	/*
417 	 * There are always zero packets for the last TRB.
418 	 */
419 	ASSERT(xt->xt_buffer.xdb_ncookies > 0);
420 	for (i = xt->xt_buffer.xdb_ncookies - 1; i > off; i--) {
421 		size_t len;
422 
423 		/*
424 		 * The maximum value we can return is 31 packets. So, in that
425 		 * case we short-circuit and return.
426 		 */
427 		if (npkt >= 31)
428 			return (31);
429 
430 		len = roundup(xt->xt_buffer.xdb_cookies[i].dmac_size, mps);
431 		npkt += len / mps;
432 	}
433 
434 	return (npkt);
435 }
436 
437 void
438 xhci_transfer_trb_fill_data(xhci_endpoint_t *xep, xhci_transfer_t *xt, int off,
439     boolean_t in)
440 {
441 	uint_t mps, tdsize, flags;
442 	int i;
443 
444 	VERIFY(xt->xt_buffer.xdb_ncookies > 0);
445 	VERIFY(xep->xep_pipe != NULL);
446 	VERIFY(off + xt->xt_buffer.xdb_ncookies <= xt->xt_ntrbs);
447 	mps = xep->xep_pipe->p_ep.wMaxPacketSize;
448 
449 	for (i = 0; i < xt->xt_buffer.xdb_ncookies; i++) {
450 		uint64_t pa, dmasz;
451 
452 		pa = xt->xt_buffer.xdb_cookies[i].dmac_laddress;
453 		dmasz = xt->xt_buffer.xdb_cookies[i].dmac_size;
454 
455 		tdsize = xhci_transfer_get_tdsize(xt, i, mps);
456 
457 		flags = XHCI_TRB_TYPE_NORMAL;
458 		if (i == 0 && xep->xep_type == USB_EP_ATTR_CONTROL) {
459 			flags = XHCI_TRB_TYPE_DATA;
460 			if (in == B_TRUE)
461 				flags |= XHCI_TRB_DIR_IN;
462 		}
463 
464 		/*
465 		 * When reading data in (from the device), we may get shorter
466 		 * transfers than the buffer allowed for. To make sure we get
467 		 * notified about that and handle that, we need to set the ISP
468 		 * flag.
469 		 */
470 		if (in == B_TRUE) {
471 			flags |= XHCI_TRB_ISP;
472 			xt->xt_data_tohost = B_TRUE;
473 		}
474 
475 		/*
476 		 * When we have more than one cookie, we are technically
477 		 * chaining together things according to the controllers view,
478 		 * hence why we need to set the chain flag.
479 		 */
480 		if (xt->xt_buffer.xdb_ncookies > 1 &&
481 		    i != (xt->xt_buffer.xdb_ncookies - 1)) {
482 			flags |= XHCI_TRB_CHAIN;
483 		}
484 
485 		/*
486 		 * If we have a non-control transfer, then we need to make sure
487 		 * that we set ourselves up to be interrupted, which we set for
488 		 * the last entry.
489 		 */
490 		if (i + 1 == xt->xt_buffer.xdb_ncookies &&
491 		    xep->xep_type != USB_EP_ATTR_CONTROL) {
492 			flags |= XHCI_TRB_IOC;
493 		}
494 
495 		xt->xt_trbs[off + i].trb_addr = LE_64(pa);
496 		xt->xt_trbs[off + i].trb_status = LE_32(XHCI_TRB_LEN(dmasz) |
497 		    XHCI_TRB_TDREM(tdsize) | XHCI_TRB_INTR(0));
498 		xt->xt_trbs[off + i].trb_flags = LE_32(flags);
499 	}
500 }
501 
502 /*
503  * These are utility functions for isochronus transfers to help calculate the
504  * transfer burst count (TBC) and transfer last burst packet count (TLPBC)
505  * entries for an isochronus entry. See xHCI 1.1 / 4.11.2.3 for how to calculate
506  * them.
507  */
508 void
509 xhci_transfer_calculate_isoc(xhci_device_t *xd, xhci_endpoint_t *xep,
510     uint_t trb_len, uint_t *tbc, uint_t *tlbpc)
511 {
512 	uint_t mps, tdpc, burst;
513 
514 	/*
515 	 * Even if we're asked to send no data, that actually requires the
516 	 * equivalent of sending one byte of data.
517 	 */
518 	if (trb_len == 0)
519 		trb_len = 1;
520 
521 	mps = XHCI_EPCTX_GET_MPS(xd->xd_endout[xep->xep_num]->xec_info2);
522 	burst = XHCI_EPCTX_GET_MAXB(xd->xd_endout[xep->xep_num]->xec_info2);
523 
524 	/*
525 	 * This is supposed to correspond to the Transfer Descriptor Packet
526 	 * Count from xHCI 1.1 / 4.14.1.
527 	 */
528 	tdpc = howmany(trb_len, mps);
529 	*tbc = howmany(tdpc, burst + 1) - 1;
530 
531 	if ((tdpc % (burst + 1)) == 0)
532 		*tlbpc = burst;
533 	else
534 		*tlbpc = (tdpc % (burst + 1)) - 1;
535 }
536