xref: /illumos-gate/usr/src/uts/common/fs/fdbuffer.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 1998,2001 by Sun Microsystems, Inc.
24  * All rights reserved.
25  *
26  */
27 
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 
30 #include <sys/types.h>
31 #include <sys/cmn_err.h>
32 #include <sys/kmem.h>
33 #include <sys/systm.h>
34 #include <sys/debug.h>
35 #include <sys/ddi.h>
36 
37 #include <sys/fdbuffer.h>
38 
39 #ifdef DEBUG
40 static int fdb_debug;
41 #define	FDB_D_CREATE	001
42 #define	FDB_D_ALLOC	002
43 #define	FDB_D_IO	004
44 #define	FDB_D_ASYNC	010
45 #define	DEBUGF(lvl, args)	{ if ((lvl) & fdb_debug) cmn_err args; }
46 #else
47 #define	DEBUGF(level, args)
48 #endif
49 static struct kmem_cache *fdb_cache;
50 static void fdb_zero_holes(fdbuffer_t *fdb);
51 
52 /* ARGSUSED */
53 static int
54 fdb_cache_constructor(void *buf, void *cdrarg, int kmflags)
55 {
56 	fdbuffer_t *fdb = buf;
57 
58 	mutex_init(&fdb->fd_mutex, NULL, MUTEX_DEFAULT, NULL);
59 
60 	return (0);
61 }
62 
63 /* ARGSUSED */
64 static void
65 fdb_cache_destructor(void *buf, void *cdrarg)
66 {
67 	fdbuffer_t *fdb = buf;
68 
69 	mutex_destroy(&fdb->fd_mutex);
70 }
71 
72 void
73 fdb_init()
74 {
75 	fdb_cache = kmem_cache_create("fdb_cache", sizeof (fdbuffer_t),
76 	    0, fdb_cache_constructor, fdb_cache_destructor,
77 	    NULL, NULL, NULL, 0);
78 }
79 
80 static void
81 fdb_prepare(fdbuffer_t *fdb)
82 {
83 	fdb->fd_holes = NULL;
84 	fdb->fd_iofunc = NULL;
85 	fdb->fd_iargp = NULL;
86 	fdb->fd_parentbp = NULL;
87 	fdb->fd_resid = 0;
88 	fdb->fd_iocount = 0;
89 	fdb->fd_iodispatch = 0;
90 	fdb->fd_err = 0;
91 }
92 
93 fdbuffer_t *
94 fdb_page_create(page_t *pp, size_t len, int flags)
95 {
96 	fdbuffer_t *fdb;
97 
98 	DEBUGF(FDB_D_CREATE, (CE_NOTE,
99 	    "?fdb_page_create: pp: %p len: %lux flags: %x",
100 	    (void *)pp, len, flags));
101 
102 	ASSERT(flags & (FDB_READ|FDB_WRITE));
103 
104 	fdb = kmem_cache_alloc(fdb_cache, KM_SLEEP);
105 
106 	fdb_prepare(fdb);
107 
108 	fdb->fd_type = FDB_PAGEIO;
109 	fdb->fd_len = len;
110 	fdb->fd_state = flags;
111 	fdb->fd_pages = pp;
112 
113 	return (fdb);
114 }
115 
116 fdbuffer_t *
117 fdb_addr_create(
118 	caddr_t addr,
119 	size_t len,
120 	int flags,
121 	page_t **pplist,
122 	struct proc *procp)
123 {
124 	fdbuffer_t *fdb;
125 
126 	DEBUGF(FDB_D_CREATE, (CE_NOTE,
127 	    "?fdb_addr_create: addr: %p len: %lux flags: %x",
128 	    (void *)addr, len, flags));
129 
130 	ASSERT(flags & (FDB_READ|FDB_WRITE));
131 
132 	fdb = kmem_cache_alloc(fdb_cache, KM_SLEEP);
133 
134 	fdb_prepare(fdb);
135 
136 	fdb->fd_type = FDB_VADDR;
137 	fdb->fd_len = len;
138 	fdb->fd_state = flags;
139 	fdb->fd_addr = addr;
140 	fdb->fd_shadow = pplist;
141 	fdb->fd_procp = procp;
142 
143 	return (fdb);
144 }
145 
146 void
147 fdb_set_iofunc(fdbuffer_t *fdb, fdb_iodone_t iofunc, void *ioargp, int flag)
148 {
149 	ASSERT(fdb);
150 	ASSERT(iofunc);
151 	ASSERT((flag & ~FDB_ICALLBACK) == 0);
152 
153 	fdb->fd_iofunc = iofunc;
154 	fdb->fd_iargp = ioargp;
155 
156 	mutex_enter(&fdb->fd_mutex);
157 
158 	if (flag & FDB_ICALLBACK)
159 		fdb->fd_state |= FDB_ICALLBACK;
160 
161 	fdb->fd_state |= FDB_ASYNC;
162 
163 	mutex_exit(&fdb->fd_mutex);
164 }
165 
166 int
167 fdb_get_error(fdbuffer_t *fdb)
168 {
169 	return (fdb->fd_err);
170 }
171 
172 void
173 fdb_free(fdbuffer_t *fdb)
174 {
175 	fdb_holes_t *fdh, *fdhp;
176 
177 	DEBUGF(FDB_D_CREATE, (CE_NOTE, "?fdb_free: addr: %p flags: %x",
178 	    (void *)fdb, fdb->fd_state));
179 
180 	ASSERT(fdb);
181 	ASSERT(fdb->fd_iodispatch == 0);
182 
183 	if (fdb->fd_state & FDB_ZEROHOLE) {
184 		fdb_zero_holes(fdb);
185 	}
186 
187 	for (fdh = fdb->fd_holes; fdh; ) {
188 		fdhp = fdh;
189 		fdh = fdh->next_hole;
190 		kmem_free(fdhp, sizeof (fdb_holes_t));
191 	}
192 
193 	if (fdb->fd_parentbp != NULL) {
194 		switch (fdb->fd_type) {
195 		case FDB_PAGEIO:
196 			pageio_done(fdb->fd_parentbp);
197 			break;
198 		case FDB_VADDR:
199 			kmem_free(fdb->fd_parentbp, sizeof (struct buf));
200 			break;
201 		default:
202 			cmn_err(CE_CONT, "?fdb_free: Unknown fdb type.");
203 			break;
204 		}
205 	}
206 
207 	kmem_cache_free(fdb_cache, fdb);
208 
209 }
210 
211 /*
212  * The offset should be from the begining of the buffer
213  * it has nothing to do with file offset. This fact should be
214  * reflected in the caller of this routine.
215  */
216 
217 void
218 fdb_add_hole(fdbuffer_t *fdb, u_offset_t off, size_t len)
219 {
220 	fdb_holes_t *this_hole;
221 
222 	ASSERT(fdb);
223 	ASSERT(off < fdb->fd_len);
224 
225 	DEBUGF(FDB_D_IO, (CE_NOTE, "?fdb_add_hole: off %llx len %lx",
226 	    off, len));
227 
228 	this_hole = kmem_alloc(sizeof (fdb_holes_t), KM_SLEEP);
229 	this_hole->off = off;
230 	this_hole->len = len;
231 
232 	if (fdb->fd_holes == NULL || off < fdb->fd_holes->off) {
233 		this_hole->next_hole = fdb->fd_holes;
234 		fdb->fd_holes = this_hole;
235 	} else {
236 		fdb_holes_t *fdhp = fdb->fd_holes;
237 
238 		while (fdhp->next_hole && off > fdhp->next_hole->off)
239 			fdhp = fdhp->next_hole;
240 
241 		this_hole->next_hole = fdhp->next_hole;
242 		fdhp->next_hole = this_hole;
243 	}
244 
245 	mutex_enter(&fdb->fd_mutex);
246 
247 	fdb->fd_iocount += len;
248 
249 	mutex_exit(&fdb->fd_mutex);
250 }
251 
252 fdb_holes_t *
253 fdb_get_holes(fdbuffer_t *fdb)
254 {
255 	ASSERT(fdb);
256 
257 	if (fdb->fd_state & FDB_ZEROHOLE) {
258 		fdb_zero_holes(fdb);
259 	}
260 
261 	return (fdb->fd_holes);
262 }
263 
264 /*
265  * Note that offsets refer to offsets from the begining of the buffer
266  * and as such the memory should be cleared accordingly.
267  */
268 
269 static void
270 fdb_zero_holes(fdbuffer_t *fdb)
271 {
272 	fdb_holes_t *fdh = fdb->fd_holes;
273 	page_t *pp;
274 
275 	ASSERT(fdb);
276 
277 	if (!fdh)
278 		return;
279 
280 	switch (fdb->fd_type) {
281 	case FDB_PAGEIO:
282 		pp = fdb->fd_pages;
283 		while (fdh) {
284 			fdb_holes_t *pfdh = fdh;
285 			size_t l = fdh->len;
286 			u_offset_t o = fdh->off;
287 			ASSERT(pp);
288 
289 			do {
290 				int  zerolen;
291 				ASSERT(o >= pp->p_offset);
292 
293 				/*
294 				 * This offset is wrong since
295 				 * the offset passed from the pages
296 				 * perspective starts at some virtual
297 				 * address but the hole is relative
298 				 * to the beginning of the fdbuffer.
299 				 */
300 				if (o >= pp->p_offset + PAGESIZE)
301 					continue;
302 
303 				zerolen = min(PAGESIZE, l);
304 
305 				ASSERT(zerolen > 0);
306 				ASSERT(zerolen <= PAGESIZE);
307 
308 				pagezero(pp, ((uintptr_t)o & PAGEOFFSET),
309 				    zerolen);
310 
311 				l -= zerolen;
312 				o += zerolen;
313 
314 				if (l == 0)
315 					break;
316 
317 			} while (pp = page_list_next(pp));
318 
319 			if (!pp)
320 				break;
321 
322 			fdh = fdh->next_hole;
323 			kmem_free(pfdh, sizeof (fdb_holes_t));
324 		}
325 		break;
326 	case FDB_VADDR:
327 		while (fdh) {
328 			fdb_holes_t *pfdh = fdh;
329 
330 			bzero(fdb->fd_addr + fdh->off, fdh->len);
331 
332 			fdh = fdh->next_hole;
333 			kmem_free(pfdh, sizeof (fdb_holes_t));
334 		}
335 	default:
336 		panic("fdb_zero_holes: Unknown fdb type.");
337 		break;
338 	}
339 }
340 
341 
342 buf_t *
343 fdb_iosetup(fdbuffer_t *fdb, u_offset_t off, size_t len, struct vnode *vp,
344     int b_flags)
345 {
346 	buf_t *bp;
347 
348 	DEBUGF(FDB_D_IO, (CE_NOTE,
349 	    "?fdb_iosetup: off: %llx len: %lux fdb: len: %lux flags: %x",
350 	    off, len, fdb->fd_len, fdb->fd_state));
351 
352 	ASSERT(fdb);
353 
354 	mutex_enter(&fdb->fd_mutex);
355 
356 	ASSERT(((b_flags & B_READ) && (fdb->fd_state & FDB_READ)) ||
357 	    ((b_flags & B_WRITE) && (fdb->fd_state & FDB_WRITE)));
358 	/*
359 	 * The fdb can be used either in sync or async mode, if the
360 	 * buffer has not been used it may be used in either mode, but
361 	 * once you have started to use the buf in either mode all
362 	 * subsequent i/o requests must take place the same way.
363 	 */
364 
365 	ASSERT(((b_flags & B_ASYNC) &&
366 	    ((fdb->fd_state & FDB_ASYNC) || !(fdb->fd_state & FDB_SYNC))) ||
367 	    (!(b_flags & B_ASYNC) &&
368 	    ((fdb->fd_state & FDB_SYNC) || !(fdb->fd_state & FDB_ASYNC))));
369 
370 
371 	fdb->fd_state |= b_flags & B_ASYNC ? FDB_ASYNC : FDB_SYNC;
372 
373 	fdb->fd_iodispatch++;
374 
375 	ASSERT((fdb->fd_state & FDB_ASYNC && fdb->fd_iofunc != NULL) ||
376 	    fdb->fd_state & FDB_SYNC);
377 
378 	mutex_exit(&fdb->fd_mutex);
379 
380 	ASSERT((len & (DEV_BSIZE - 1)) == 0);
381 	ASSERT(off+len <= fdb->fd_len);
382 
383 	switch (fdb->fd_type) {
384 	case FDB_PAGEIO:
385 		if (fdb->fd_parentbp == NULL) {
386 			bp = pageio_setup(fdb->fd_pages, len, vp, b_flags);
387 			fdb->fd_parentbp = bp;
388 		}
389 		break;
390 	case FDB_VADDR:
391 		if (fdb->fd_parentbp == NULL) {
392 
393 			bp = kmem_alloc(sizeof (buf_t), KM_SLEEP);
394 			bioinit(bp);
395 			bp->b_error = 0;
396 			bp->b_proc = fdb->fd_procp;
397 			bp->b_flags = b_flags | B_BUSY | B_PHYS;
398 			bp->b_bcount = len;
399 			bp->b_un.b_addr = fdb->fd_addr;
400 			bp->b_shadow = fdb->fd_shadow;
401 			if (fdb->fd_shadow != NULL)
402 				bp->b_flags |= B_SHADOW;
403 			fdb->fd_parentbp = bp;
404 		}
405 		break;
406 	default:
407 		panic("fdb_iosetup: Unsupported fdb type.");
408 		break;
409 	};
410 
411 	bp = bioclone(fdb->fd_parentbp, off, len, 0, 0,
412 	    (b_flags & B_ASYNC) ? (int (*)())fdb_iodone : NULL,
413 	    NULL, KM_SLEEP);
414 
415 	bp->b_forw = (struct buf *)fdb;
416 
417 	if (b_flags & B_ASYNC)
418 		bp->b_flags |= B_ASYNC;
419 
420 	return (bp);
421 }
422 
423 size_t
424 fdb_get_iolen(fdbuffer_t *fdb)
425 {
426 	ASSERT(fdb);
427 	ASSERT(fdb->fd_iodispatch == 0);
428 
429 	return (fdb->fd_iocount - fdb->fd_resid);
430 }
431 
432 void
433 fdb_ioerrdone(fdbuffer_t *fdb, int error)
434 {
435 	ASSERT(fdb);
436 	ASSERT(fdb->fd_state & FDB_ASYNC);
437 
438 	DEBUGF(FDB_D_IO, (CE_NOTE,
439 	    "?fdb_ioerrdone: fdb: len: %lux flags: %x error: %d",
440 	    fdb->fd_len, fdb->fd_state, error));
441 
442 	mutex_enter(&fdb->fd_mutex);
443 
444 	fdb->fd_err = error;
445 
446 	if (error)
447 		fdb->fd_state |= FDB_ERROR;
448 	else
449 		fdb->fd_state |= FDB_DONE;
450 
451 	/*
452 	 * If there is outstanding i/o return wainting for i/o's to complete.
453 	 */
454 	if (fdb->fd_iodispatch > 0) {
455 		mutex_exit(&fdb->fd_mutex);
456 		return;
457 	}
458 
459 	mutex_exit(&fdb->fd_mutex);
460 	fdb->fd_iofunc(fdb, fdb->fd_iargp, NULL);
461 }
462 
463 void
464 fdb_iodone(buf_t *bp)
465 {
466 	fdbuffer_t *fdb = (fdbuffer_t *)bp->b_forw;
467 	int	error, isasync;
468 	int	icallback;
469 
470 	ASSERT(fdb);
471 
472 	DEBUGF(FDB_D_IO, (CE_NOTE,
473 	    "?fdb_iodone: fdb: len: %lux flags: %x error: %d",
474 	    fdb->fd_len, fdb->fd_state, geterror(bp)));
475 
476 	if (bp->b_flags & B_REMAPPED)
477 		bp_mapout(bp);
478 
479 	mutex_enter(&fdb->fd_mutex);
480 
481 	icallback = fdb->fd_state & FDB_ICALLBACK;
482 	isasync = fdb->fd_state & FDB_ASYNC;
483 
484 	ASSERT(fdb->fd_iodispatch > 0);
485 	fdb->fd_iodispatch--;
486 
487 	if (error = geterror(bp)) {
488 		fdb->fd_err = error;
489 		if (bp->b_resid)
490 			fdb->fd_resid += bp->b_resid;
491 		else
492 			fdb->fd_resid += bp->b_bcount;
493 	}
494 
495 	fdb->fd_iocount += bp->b_bcount;
496 
497 	/*
498 	 * ioack collects the total amount of i/o accounted for
499 	 * this includes:
500 	 *
501 	 *	- i/o completed
502 	 *	- i/o attempted but not completed,
503 	 *	- i/o not done due to holes.
504 	 *
505 	 * Once the entire i/o ranges has been accounted for we'll
506 	 * call the async function associated with the fdb.
507 	 *
508 	 */
509 
510 	if ((fdb->fd_iodispatch == 0) &&
511 	    (fdb->fd_state & (FDB_ERROR|FDB_DONE))) {
512 
513 		mutex_exit(&fdb->fd_mutex);
514 
515 		if (isasync || icallback) {
516 			fdb->fd_iofunc(fdb, fdb->fd_iargp, bp);
517 		}
518 
519 	} else {
520 
521 		mutex_exit(&fdb->fd_mutex);
522 
523 		if (icallback) {
524 			fdb->fd_iofunc(fdb, fdb->fd_iargp, bp);
525 		}
526 	}
527 
528 	freerbuf(bp);
529 }
530