xref: /illumos-gate/usr/src/uts/common/fs/proc/prcontrol.c (revision 56f33205c9ed776c3c909e07d52e94610a675740)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/uio.h>
29 #include <sys/param.h>
30 #include <sys/cmn_err.h>
31 #include <sys/cred.h>
32 #include <sys/policy.h>
33 #include <sys/debug.h>
34 #include <sys/errno.h>
35 #include <sys/file.h>
36 #include <sys/inline.h>
37 #include <sys/kmem.h>
38 #include <sys/proc.h>
39 #include <sys/regset.h>
40 #include <sys/sysmacros.h>
41 #include <sys/systm.h>
42 #include <sys/vfs.h>
43 #include <sys/vnode.h>
44 #include <sys/signal.h>
45 #include <sys/auxv.h>
46 #include <sys/user.h>
47 #include <sys/class.h>
48 #include <sys/fault.h>
49 #include <sys/syscall.h>
50 #include <sys/procfs.h>
51 #include <sys/zone.h>
52 #include <sys/copyops.h>
53 #include <sys/schedctl.h>
54 #include <vm/as.h>
55 #include <vm/seg.h>
56 #include <fs/proc/prdata.h>
57 #include <sys/contract/process_impl.h>
58 
59 static	void	pr_settrace(proc_t *, sigset_t *);
60 static	int	pr_setfpregs(prnode_t *, prfpregset_t *);
61 #if defined(__sparc)
62 static	int	pr_setxregs(prnode_t *, prxregset_t *);
63 static	int	pr_setasrs(prnode_t *, asrset_t);
64 #endif
65 static	int	pr_setvaddr(prnode_t *, caddr_t);
66 static	int	pr_clearsig(prnode_t *);
67 static	int	pr_clearflt(prnode_t *);
68 static	int	pr_watch(prnode_t *, prwatch_t *, int *);
69 static	int	pr_agent(prnode_t *, prgregset_t, int *);
70 static	int	pr_rdwr(proc_t *, enum uio_rw, priovec_t *);
71 static	int	pr_scred(proc_t *, prcred_t *, cred_t *, boolean_t);
72 static	int	pr_spriv(proc_t *, prpriv_t *, cred_t *);
73 static	int	pr_szoneid(proc_t *, zoneid_t, cred_t *);
74 static	void	pauselwps(proc_t *);
75 static	void	unpauselwps(proc_t *);
76 
77 typedef union {
78 	long		sig;		/* PCKILL, PCUNKILL */
79 	long		nice;		/* PCNICE */
80 	long		timeo;		/* PCTWSTOP */
81 	ulong_t		flags;		/* PCRUN, PCSET, PCUNSET */
82 	caddr_t		vaddr;		/* PCSVADDR */
83 	siginfo_t	siginfo;	/* PCSSIG */
84 	sigset_t	sigset;		/* PCSTRACE, PCSHOLD */
85 	fltset_t	fltset;		/* PCSFAULT */
86 	sysset_t	sysset;		/* PCSENTRY, PCSEXIT */
87 	prgregset_t	prgregset;	/* PCSREG, PCAGENT */
88 	prfpregset_t	prfpregset;	/* PCSFPREG */
89 #if defined(__sparc)
90 	prxregset_t	prxregset;	/* PCSXREG */
91 	asrset_t	asrset;		/* PCSASRS */
92 #endif
93 	prwatch_t	prwatch;	/* PCWATCH */
94 	priovec_t	priovec;	/* PCREAD, PCWRITE */
95 	prcred_t	prcred;		/* PCSCRED */
96 	prpriv_t	prpriv;		/* PCSPRIV */
97 	long		przoneid;	/* PCSZONE */
98 } arg_t;
99 
100 static	int	pr_control(long, arg_t *, prnode_t *, cred_t *);
101 
102 static size_t
103 ctlsize(long cmd, size_t resid, arg_t *argp)
104 {
105 	size_t size = sizeof (long);
106 	size_t rnd;
107 	int ngrp;
108 
109 	switch (cmd) {
110 	case PCNULL:
111 	case PCSTOP:
112 	case PCDSTOP:
113 	case PCWSTOP:
114 	case PCCSIG:
115 	case PCCFAULT:
116 		break;
117 	case PCSSIG:
118 		size += sizeof (siginfo_t);
119 		break;
120 	case PCTWSTOP:
121 		size += sizeof (long);
122 		break;
123 	case PCKILL:
124 	case PCUNKILL:
125 	case PCNICE:
126 		size += sizeof (long);
127 		break;
128 	case PCRUN:
129 	case PCSET:
130 	case PCUNSET:
131 		size += sizeof (ulong_t);
132 		break;
133 	case PCSVADDR:
134 		size += sizeof (caddr_t);
135 		break;
136 	case PCSTRACE:
137 	case PCSHOLD:
138 		size += sizeof (sigset_t);
139 		break;
140 	case PCSFAULT:
141 		size += sizeof (fltset_t);
142 		break;
143 	case PCSENTRY:
144 	case PCSEXIT:
145 		size += sizeof (sysset_t);
146 		break;
147 	case PCSREG:
148 	case PCAGENT:
149 		size += sizeof (prgregset_t);
150 		break;
151 	case PCSFPREG:
152 		size += sizeof (prfpregset_t);
153 		break;
154 #if defined(__sparc)
155 	case PCSXREG:
156 		size += sizeof (prxregset_t);
157 		break;
158 	case PCSASRS:
159 		size += sizeof (asrset_t);
160 		break;
161 #endif
162 	case PCWATCH:
163 		size += sizeof (prwatch_t);
164 		break;
165 	case PCREAD:
166 	case PCWRITE:
167 		size += sizeof (priovec_t);
168 		break;
169 	case PCSCRED:
170 		size += sizeof (prcred_t);
171 		break;
172 	case PCSCREDX:
173 		/*
174 		 * We cannot derefence the pr_ngroups fields if it
175 		 * we don't have enough data.
176 		 */
177 		if (resid < size + sizeof (prcred_t) - sizeof (gid_t))
178 			return (0);
179 		ngrp = argp->prcred.pr_ngroups;
180 		if (ngrp < 0 || ngrp > ngroups_max)
181 			return (0);
182 
183 		/* The result can be smaller than sizeof (prcred_t) */
184 		size += sizeof (prcred_t) - sizeof (gid_t);
185 		size += ngrp * sizeof (gid_t);
186 		break;
187 	case PCSPRIV:
188 		if (resid >= size + sizeof (prpriv_t))
189 			size += priv_prgetprivsize(&argp->prpriv);
190 		else
191 			return (0);
192 		break;
193 	case PCSZONE:
194 		size += sizeof (long);
195 		break;
196 	default:
197 		return (0);
198 	}
199 
200 	/* Round up to a multiple of long, unless exact amount written */
201 	if (size < resid) {
202 		rnd = size & (sizeof (long) - 1);
203 
204 		if (rnd != 0)
205 			size += sizeof (long) - rnd;
206 	}
207 
208 	if (size > resid)
209 		return (0);
210 	return (size);
211 }
212 
213 /*
214  * Control operations (lots).
215  */
216 int
217 prwritectl(vnode_t *vp, uio_t *uiop, cred_t *cr)
218 {
219 #define	MY_BUFFER_SIZE \
220 		100 > 1 + sizeof (arg_t) / sizeof (long) ? \
221 		100 : 1 + sizeof (arg_t) / sizeof (long)
222 	long buf[MY_BUFFER_SIZE];
223 	long *bufp;
224 	size_t resid = 0;
225 	size_t size;
226 	prnode_t *pnp = VTOP(vp);
227 	int error;
228 	int locked = 0;
229 
230 	while (uiop->uio_resid) {
231 		/*
232 		 * Read several commands in one gulp.
233 		 */
234 		bufp = buf;
235 		if (resid) {	/* move incomplete command to front of buffer */
236 			long *tail;
237 
238 			if (resid >= sizeof (buf))
239 				break;
240 			tail = (long *)((char *)buf + sizeof (buf) - resid);
241 			do {
242 				*bufp++ = *tail++;
243 			} while ((resid -= sizeof (long)) != 0);
244 		}
245 		resid = sizeof (buf) - ((char *)bufp - (char *)buf);
246 		if (resid > uiop->uio_resid)
247 			resid = uiop->uio_resid;
248 		if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
249 			return (error);
250 		resid += (char *)bufp - (char *)buf;
251 		bufp = buf;
252 
253 		do {		/* loop over commands in buffer */
254 			long cmd = bufp[0];
255 			arg_t *argp = (arg_t *)&bufp[1];
256 
257 			size = ctlsize(cmd, resid, argp);
258 			if (size == 0)	/* incomplete or invalid command */
259 				break;
260 			/*
261 			 * Perform the specified control operation.
262 			 */
263 			if (!locked) {
264 				if ((error = prlock(pnp, ZNO)) != 0)
265 					return (error);
266 				locked = 1;
267 			}
268 			if (error = pr_control(cmd, argp, pnp, cr)) {
269 				if (error == -1)	/* -1 is timeout */
270 					locked = 0;
271 				else
272 					return (error);
273 			}
274 			bufp = (long *)((char *)bufp + size);
275 		} while ((resid -= size) != 0);
276 
277 		if (locked) {
278 			prunlock(pnp);
279 			locked = 0;
280 		}
281 	}
282 	return (resid? EINVAL : 0);
283 }
284 
285 static int
286 pr_control(long cmd, arg_t *argp, prnode_t *pnp, cred_t *cr)
287 {
288 	prcommon_t *pcp;
289 	proc_t *p;
290 	int unlocked;
291 	int error = 0;
292 
293 	if (cmd == PCNULL)
294 		return (0);
295 
296 	pcp = pnp->pr_common;
297 	p = pcp->prc_proc;
298 	ASSERT(p != NULL);
299 
300 	/* System processes defy control. */
301 	if (p->p_flag & SSYS) {
302 		prunlock(pnp);
303 		return (EBUSY);
304 	}
305 
306 	switch (cmd) {
307 
308 	default:
309 		error = EINVAL;
310 		break;
311 
312 	case PCSTOP:	/* direct process or lwp to stop and wait for stop */
313 	case PCDSTOP:	/* direct process or lwp to stop, don't wait */
314 	case PCWSTOP:	/* wait for process or lwp to stop */
315 	case PCTWSTOP:	/* wait for process or lwp to stop, with timeout */
316 		{
317 			time_t timeo;
318 
319 			/*
320 			 * Can't apply to a system process.
321 			 */
322 			if (p->p_as == &kas) {
323 				error = EBUSY;
324 				break;
325 			}
326 
327 			if (cmd == PCSTOP || cmd == PCDSTOP)
328 				pr_stop(pnp);
329 
330 			if (cmd == PCDSTOP)
331 				break;
332 
333 			/*
334 			 * If an lwp is waiting for itself or its process,
335 			 * don't wait. The stopped lwp would never see the
336 			 * fact that it is stopped.
337 			 */
338 			if ((pcp->prc_flags & PRC_LWP)?
339 			    (pcp->prc_thread == curthread) : (p == curproc)) {
340 				if (cmd == PCWSTOP || cmd == PCTWSTOP)
341 					error = EBUSY;
342 				break;
343 			}
344 
345 			timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
346 			if ((error = pr_wait_stop(pnp, timeo)) != 0)
347 				return (error);
348 
349 			break;
350 		}
351 
352 	case PCRUN:	/* make lwp or process runnable */
353 		error = pr_setrun(pnp, argp->flags);
354 		break;
355 
356 	case PCSTRACE:	/* set signal trace mask */
357 		pr_settrace(p,  &argp->sigset);
358 		break;
359 
360 	case PCSSIG:	/* set current signal */
361 		error = pr_setsig(pnp, &argp->siginfo);
362 		if (argp->siginfo.si_signo == SIGKILL && error == 0) {
363 			prunlock(pnp);
364 			pr_wait_die(pnp);
365 			return (-1);
366 		}
367 		break;
368 
369 	case PCKILL:	/* send signal */
370 		error = pr_kill(pnp, (int)argp->sig, cr);
371 		if (error == 0 && argp->sig == SIGKILL) {
372 			prunlock(pnp);
373 			pr_wait_die(pnp);
374 			return (-1);
375 		}
376 		break;
377 
378 	case PCUNKILL:	/* delete a pending signal */
379 		error = pr_unkill(pnp, (int)argp->sig);
380 		break;
381 
382 	case PCNICE:	/* set nice priority */
383 		error = pr_nice(p, (int)argp->nice, cr);
384 		break;
385 
386 	case PCSENTRY:	/* set syscall entry bit mask */
387 	case PCSEXIT:	/* set syscall exit bit mask */
388 		pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
389 		break;
390 
391 	case PCSET:	/* set process flags */
392 		error = pr_set(p, argp->flags);
393 		break;
394 
395 	case PCUNSET:	/* unset process flags */
396 		error = pr_unset(p, argp->flags);
397 		break;
398 
399 	case PCSREG:	/* set general registers */
400 		{
401 			kthread_t *t = pr_thread(pnp);
402 
403 			if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
404 				thread_unlock(t);
405 				error = EBUSY;
406 			} else {
407 				thread_unlock(t);
408 				mutex_exit(&p->p_lock);
409 				prsetprregs(ttolwp(t), argp->prgregset, 0);
410 				mutex_enter(&p->p_lock);
411 			}
412 			break;
413 		}
414 
415 	case PCSFPREG:	/* set floating-point registers */
416 		error = pr_setfpregs(pnp, &argp->prfpregset);
417 		break;
418 
419 	case PCSXREG:	/* set extra registers */
420 #if defined(__sparc)
421 		error = pr_setxregs(pnp, &argp->prxregset);
422 #else
423 		error = EINVAL;
424 #endif
425 		break;
426 
427 #if defined(__sparc)
428 	case PCSASRS:	/* set ancillary state registers */
429 		error = pr_setasrs(pnp, argp->asrset);
430 		break;
431 #endif
432 
433 	case PCSVADDR:	/* set virtual address at which to resume */
434 		error = pr_setvaddr(pnp, argp->vaddr);
435 		break;
436 
437 	case PCSHOLD:	/* set signal-hold mask */
438 		pr_sethold(pnp, &argp->sigset);
439 		break;
440 
441 	case PCSFAULT:	/* set mask of traced faults */
442 		pr_setfault(p, &argp->fltset);
443 		break;
444 
445 	case PCCSIG:	/* clear current signal */
446 		error = pr_clearsig(pnp);
447 		break;
448 
449 	case PCCFAULT:	/* clear current fault */
450 		error = pr_clearflt(pnp);
451 		break;
452 
453 	case PCWATCH:	/* set or clear watched areas */
454 		error = pr_watch(pnp, &argp->prwatch, &unlocked);
455 		if (error && unlocked)
456 			return (error);
457 		break;
458 
459 	case PCAGENT:	/* create the /proc agent lwp in the target process */
460 		error = pr_agent(pnp, argp->prgregset, &unlocked);
461 		if (error && unlocked)
462 			return (error);
463 		break;
464 
465 	case PCREAD:	/* read from the address space */
466 		error = pr_rdwr(p, UIO_READ, &argp->priovec);
467 		break;
468 
469 	case PCWRITE:	/* write to the address space */
470 		error = pr_rdwr(p, UIO_WRITE, &argp->priovec);
471 		break;
472 
473 	case PCSCRED:	/* set the process credentials */
474 	case PCSCREDX:
475 		error = pr_scred(p, &argp->prcred, cr, cmd == PCSCREDX);
476 		break;
477 
478 	case PCSPRIV:	/* set the process privileges */
479 		error = pr_spriv(p, &argp->prpriv, cr);
480 		break;
481 	case PCSZONE:	/* set the process's zoneid credentials */
482 		error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
483 		break;
484 	}
485 
486 	if (error)
487 		prunlock(pnp);
488 	return (error);
489 }
490 
491 #ifdef _SYSCALL32_IMPL
492 
493 typedef union {
494 	int32_t		sig;		/* PCKILL, PCUNKILL */
495 	int32_t		nice;		/* PCNICE */
496 	int32_t		timeo;		/* PCTWSTOP */
497 	uint32_t	flags;		/* PCRUN, PCSET, PCUNSET */
498 	caddr32_t	vaddr;		/* PCSVADDR */
499 	siginfo32_t	siginfo;	/* PCSSIG */
500 	sigset_t	sigset;		/* PCSTRACE, PCSHOLD */
501 	fltset_t	fltset;		/* PCSFAULT */
502 	sysset_t	sysset;		/* PCSENTRY, PCSEXIT */
503 	prgregset32_t	prgregset;	/* PCSREG, PCAGENT */
504 	prfpregset32_t	prfpregset;	/* PCSFPREG */
505 #if defined(__sparc)
506 	prxregset_t	prxregset;	/* PCSXREG */
507 #endif
508 	prwatch32_t	prwatch;	/* PCWATCH */
509 	priovec32_t	priovec;	/* PCREAD, PCWRITE */
510 	prcred32_t	prcred;		/* PCSCRED */
511 	prpriv_t	prpriv;		/* PCSPRIV */
512 	int32_t		przoneid;	/* PCSZONE */
513 } arg32_t;
514 
515 static	int	pr_control32(int32_t, arg32_t *, prnode_t *, cred_t *);
516 static	int	pr_setfpregs32(prnode_t *, prfpregset32_t *);
517 
518 /*
519  * Note that while ctlsize32() can use argp, it must do so only in a way
520  * that assumes 32-bit rather than 64-bit alignment as argp is a pointer
521  * to an array of 32-bit values and only 32-bit alignment is ensured.
522  */
523 static size_t
524 ctlsize32(int32_t cmd, size_t resid, arg32_t *argp)
525 {
526 	size_t size = sizeof (int32_t);
527 	size_t rnd;
528 	int ngrp;
529 
530 	switch (cmd) {
531 	case PCNULL:
532 	case PCSTOP:
533 	case PCDSTOP:
534 	case PCWSTOP:
535 	case PCCSIG:
536 	case PCCFAULT:
537 		break;
538 	case PCSSIG:
539 		size += sizeof (siginfo32_t);
540 		break;
541 	case PCTWSTOP:
542 		size += sizeof (int32_t);
543 		break;
544 	case PCKILL:
545 	case PCUNKILL:
546 	case PCNICE:
547 		size += sizeof (int32_t);
548 		break;
549 	case PCRUN:
550 	case PCSET:
551 	case PCUNSET:
552 		size += sizeof (uint32_t);
553 		break;
554 	case PCSVADDR:
555 		size += sizeof (caddr32_t);
556 		break;
557 	case PCSTRACE:
558 	case PCSHOLD:
559 		size += sizeof (sigset_t);
560 		break;
561 	case PCSFAULT:
562 		size += sizeof (fltset_t);
563 		break;
564 	case PCSENTRY:
565 	case PCSEXIT:
566 		size += sizeof (sysset_t);
567 		break;
568 	case PCSREG:
569 	case PCAGENT:
570 		size += sizeof (prgregset32_t);
571 		break;
572 	case PCSFPREG:
573 		size += sizeof (prfpregset32_t);
574 		break;
575 #if defined(__sparc)
576 	case PCSXREG:
577 		size += sizeof (prxregset_t);
578 		break;
579 #endif
580 	case PCWATCH:
581 		size += sizeof (prwatch32_t);
582 		break;
583 	case PCREAD:
584 	case PCWRITE:
585 		size += sizeof (priovec32_t);
586 		break;
587 	case PCSCRED:
588 		size += sizeof (prcred32_t);
589 		break;
590 	case PCSCREDX:
591 		/*
592 		 * We cannot derefence the pr_ngroups fields if it
593 		 * we don't have enough data.
594 		 */
595 		if (resid < size + sizeof (prcred32_t) - sizeof (gid32_t))
596 			return (0);
597 		ngrp = argp->prcred.pr_ngroups;
598 		if (ngrp < 0 || ngrp > ngroups_max)
599 			return (0);
600 
601 		/* The result can be smaller than sizeof (prcred32_t) */
602 		size += sizeof (prcred32_t) - sizeof (gid32_t);
603 		size += ngrp * sizeof (gid32_t);
604 		break;
605 	case PCSPRIV:
606 		if (resid >= size + sizeof (prpriv_t))
607 			size += priv_prgetprivsize(&argp->prpriv);
608 		else
609 			return (0);
610 		break;
611 	case PCSZONE:
612 		size += sizeof (int32_t);
613 		break;
614 	default:
615 		return (0);
616 	}
617 
618 	/* Round up to a multiple of int32_t */
619 	rnd = size & (sizeof (int32_t) - 1);
620 
621 	if (rnd != 0)
622 		size += sizeof (int32_t) - rnd;
623 
624 	if (size > resid)
625 		return (0);
626 	return (size);
627 }
628 
629 /*
630  * Control operations (lots).
631  */
632 int
633 prwritectl32(struct vnode *vp, struct uio *uiop, cred_t *cr)
634 {
635 #define	MY_BUFFER_SIZE32 \
636 		100 > 1 + sizeof (arg32_t) / sizeof (int32_t) ? \
637 		100 : 1 + sizeof (arg32_t) / sizeof (int32_t)
638 	int32_t buf[MY_BUFFER_SIZE32];
639 	int32_t *bufp;
640 	arg32_t arg;
641 	size_t resid = 0;
642 	size_t size;
643 	prnode_t *pnp = VTOP(vp);
644 	int error;
645 	int locked = 0;
646 
647 	while (uiop->uio_resid) {
648 		/*
649 		 * Read several commands in one gulp.
650 		 */
651 		bufp = buf;
652 		if (resid) {	/* move incomplete command to front of buffer */
653 			int32_t *tail;
654 
655 			if (resid >= sizeof (buf))
656 				break;
657 			tail = (int32_t *)((char *)buf + sizeof (buf) - resid);
658 			do {
659 				*bufp++ = *tail++;
660 			} while ((resid -= sizeof (int32_t)) != 0);
661 		}
662 		resid = sizeof (buf) - ((char *)bufp - (char *)buf);
663 		if (resid > uiop->uio_resid)
664 			resid = uiop->uio_resid;
665 		if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
666 			return (error);
667 		resid += (char *)bufp - (char *)buf;
668 		bufp = buf;
669 
670 		do {		/* loop over commands in buffer */
671 			int32_t cmd = bufp[0];
672 			arg32_t *argp = (arg32_t *)&bufp[1];
673 
674 			size = ctlsize32(cmd, resid, argp);
675 			if (size == 0)	/* incomplete or invalid command */
676 				break;
677 			/*
678 			 * Perform the specified control operation.
679 			 */
680 			if (!locked) {
681 				if ((error = prlock(pnp, ZNO)) != 0)
682 					return (error);
683 				locked = 1;
684 			}
685 
686 			/*
687 			 * Since some members of the arg32_t union contain
688 			 * 64-bit values (which must be 64-bit aligned), we
689 			 * can't simply pass a pointer to the structure as
690 			 * it may be unaligned. Note that we do pass the
691 			 * potentially unaligned structure to ctlsize32()
692 			 * above, but that uses it a way that makes no
693 			 * assumptions about alignment.
694 			 */
695 			ASSERT(size - sizeof (cmd) <= sizeof (arg));
696 			bcopy(argp, &arg, size - sizeof (cmd));
697 
698 			if (error = pr_control32(cmd, &arg, pnp, cr)) {
699 				if (error == -1)	/* -1 is timeout */
700 					locked = 0;
701 				else
702 					return (error);
703 			}
704 			bufp = (int32_t *)((char *)bufp + size);
705 		} while ((resid -= size) != 0);
706 
707 		if (locked) {
708 			prunlock(pnp);
709 			locked = 0;
710 		}
711 	}
712 	return (resid? EINVAL : 0);
713 }
714 
715 static int
716 pr_control32(int32_t cmd, arg32_t *argp, prnode_t *pnp, cred_t *cr)
717 {
718 	prcommon_t *pcp;
719 	proc_t *p;
720 	int unlocked;
721 	int error = 0;
722 
723 	if (cmd == PCNULL)
724 		return (0);
725 
726 	pcp = pnp->pr_common;
727 	p = pcp->prc_proc;
728 	ASSERT(p != NULL);
729 
730 	if (p->p_flag & SSYS) {
731 		prunlock(pnp);
732 		return (EBUSY);
733 	}
734 
735 	switch (cmd) {
736 
737 	default:
738 		error = EINVAL;
739 		break;
740 
741 	case PCSTOP:	/* direct process or lwp to stop and wait for stop */
742 	case PCDSTOP:	/* direct process or lwp to stop, don't wait */
743 	case PCWSTOP:	/* wait for process or lwp to stop */
744 	case PCTWSTOP:	/* wait for process or lwp to stop, with timeout */
745 		{
746 			time_t timeo;
747 
748 			/*
749 			 * Can't apply to a system process.
750 			 */
751 			if (p->p_as == &kas) {
752 				error = EBUSY;
753 				break;
754 			}
755 
756 			if (cmd == PCSTOP || cmd == PCDSTOP)
757 				pr_stop(pnp);
758 
759 			if (cmd == PCDSTOP)
760 				break;
761 
762 			/*
763 			 * If an lwp is waiting for itself or its process,
764 			 * don't wait. The lwp will never see the fact that
765 			 * itself is stopped.
766 			 */
767 			if ((pcp->prc_flags & PRC_LWP)?
768 			    (pcp->prc_thread == curthread) : (p == curproc)) {
769 				if (cmd == PCWSTOP || cmd == PCTWSTOP)
770 					error = EBUSY;
771 				break;
772 			}
773 
774 			timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
775 			if ((error = pr_wait_stop(pnp, timeo)) != 0)
776 				return (error);
777 
778 			break;
779 		}
780 
781 	case PCRUN:	/* make lwp or process runnable */
782 		error = pr_setrun(pnp, (ulong_t)argp->flags);
783 		break;
784 
785 	case PCSTRACE:	/* set signal trace mask */
786 		pr_settrace(p,  &argp->sigset);
787 		break;
788 
789 	case PCSSIG:	/* set current signal */
790 		if (PROCESS_NOT_32BIT(p))
791 			error = EOVERFLOW;
792 		else {
793 			int sig = (int)argp->siginfo.si_signo;
794 			siginfo_t siginfo;
795 
796 			bzero(&siginfo, sizeof (siginfo));
797 			siginfo_32tok(&argp->siginfo, (k_siginfo_t *)&siginfo);
798 			error = pr_setsig(pnp, &siginfo);
799 			if (sig == SIGKILL && error == 0) {
800 				prunlock(pnp);
801 				pr_wait_die(pnp);
802 				return (-1);
803 			}
804 		}
805 		break;
806 
807 	case PCKILL:	/* send signal */
808 		error = pr_kill(pnp, (int)argp->sig, cr);
809 		if (error == 0 && argp->sig == SIGKILL) {
810 			prunlock(pnp);
811 			pr_wait_die(pnp);
812 			return (-1);
813 		}
814 		break;
815 
816 	case PCUNKILL:	/* delete a pending signal */
817 		error = pr_unkill(pnp, (int)argp->sig);
818 		break;
819 
820 	case PCNICE:	/* set nice priority */
821 		error = pr_nice(p, (int)argp->nice, cr);
822 		break;
823 
824 	case PCSENTRY:	/* set syscall entry bit mask */
825 	case PCSEXIT:	/* set syscall exit bit mask */
826 		pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
827 		break;
828 
829 	case PCSET:	/* set process flags */
830 		error = pr_set(p, (long)argp->flags);
831 		break;
832 
833 	case PCUNSET:	/* unset process flags */
834 		error = pr_unset(p, (long)argp->flags);
835 		break;
836 
837 	case PCSREG:	/* set general registers */
838 		if (PROCESS_NOT_32BIT(p))
839 			error = EOVERFLOW;
840 		else {
841 			kthread_t *t = pr_thread(pnp);
842 
843 			if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
844 				thread_unlock(t);
845 				error = EBUSY;
846 			} else {
847 				prgregset_t prgregset;
848 				klwp_t *lwp = ttolwp(t);
849 
850 				thread_unlock(t);
851 				mutex_exit(&p->p_lock);
852 				prgregset_32ton(lwp, argp->prgregset,
853 				    prgregset);
854 				prsetprregs(lwp, prgregset, 0);
855 				mutex_enter(&p->p_lock);
856 			}
857 		}
858 		break;
859 
860 	case PCSFPREG:	/* set floating-point registers */
861 		if (PROCESS_NOT_32BIT(p))
862 			error = EOVERFLOW;
863 		else
864 			error = pr_setfpregs32(pnp, &argp->prfpregset);
865 		break;
866 
867 	case PCSXREG:	/* set extra registers */
868 #if defined(__sparc)
869 		if (PROCESS_NOT_32BIT(p))
870 			error = EOVERFLOW;
871 		else
872 			error = pr_setxregs(pnp, &argp->prxregset);
873 #else
874 		error = EINVAL;
875 #endif
876 		break;
877 
878 	case PCSVADDR:	/* set virtual address at which to resume */
879 		if (PROCESS_NOT_32BIT(p))
880 			error = EOVERFLOW;
881 		else
882 			error = pr_setvaddr(pnp,
883 			    (caddr_t)(uintptr_t)argp->vaddr);
884 		break;
885 
886 	case PCSHOLD:	/* set signal-hold mask */
887 		pr_sethold(pnp, &argp->sigset);
888 		break;
889 
890 	case PCSFAULT:	/* set mask of traced faults */
891 		pr_setfault(p, &argp->fltset);
892 		break;
893 
894 	case PCCSIG:	/* clear current signal */
895 		error = pr_clearsig(pnp);
896 		break;
897 
898 	case PCCFAULT:	/* clear current fault */
899 		error = pr_clearflt(pnp);
900 		break;
901 
902 	case PCWATCH:	/* set or clear watched areas */
903 		if (PROCESS_NOT_32BIT(p))
904 			error = EOVERFLOW;
905 		else {
906 			prwatch_t prwatch;
907 
908 			prwatch.pr_vaddr = argp->prwatch.pr_vaddr;
909 			prwatch.pr_size = argp->prwatch.pr_size;
910 			prwatch.pr_wflags = argp->prwatch.pr_wflags;
911 			prwatch.pr_pad = argp->prwatch.pr_pad;
912 			error = pr_watch(pnp, &prwatch, &unlocked);
913 			if (error && unlocked)
914 				return (error);
915 		}
916 		break;
917 
918 	case PCAGENT:	/* create the /proc agent lwp in the target process */
919 		if (PROCESS_NOT_32BIT(p))
920 			error = EOVERFLOW;
921 		else {
922 			prgregset_t prgregset;
923 			kthread_t *t = pr_thread(pnp);
924 			klwp_t *lwp = ttolwp(t);
925 			thread_unlock(t);
926 			mutex_exit(&p->p_lock);
927 			prgregset_32ton(lwp, argp->prgregset, prgregset);
928 			mutex_enter(&p->p_lock);
929 			error = pr_agent(pnp, prgregset, &unlocked);
930 			if (error && unlocked)
931 				return (error);
932 		}
933 		break;
934 
935 	case PCREAD:	/* read from the address space */
936 	case PCWRITE:	/* write to the address space */
937 		if (PROCESS_NOT_32BIT(p))
938 			error = EOVERFLOW;
939 		else {
940 			enum uio_rw rw = (cmd == PCREAD)? UIO_READ : UIO_WRITE;
941 			priovec_t priovec;
942 
943 			priovec.pio_base =
944 			    (void *)(uintptr_t)argp->priovec.pio_base;
945 			priovec.pio_len = (size_t)argp->priovec.pio_len;
946 			priovec.pio_offset = (off_t)
947 			    (uint32_t)argp->priovec.pio_offset;
948 			error = pr_rdwr(p, rw, &priovec);
949 		}
950 		break;
951 
952 	case PCSCRED:	/* set the process credentials */
953 	case PCSCREDX:
954 		{
955 			/*
956 			 * All the fields in these structures are exactly the
957 			 * same and so the structures are compatible.  In case
958 			 * this ever changes, we catch this with the ASSERT
959 			 * below.
960 			 */
961 			prcred_t *prcred = (prcred_t *)&argp->prcred;
962 
963 #ifndef __lint
964 			ASSERT(sizeof (prcred_t) == sizeof (prcred32_t));
965 #endif
966 
967 			error = pr_scred(p, prcred, cr, cmd == PCSCREDX);
968 			break;
969 		}
970 
971 	case PCSPRIV:	/* set the process privileges */
972 		error = pr_spriv(p, &argp->prpriv, cr);
973 		break;
974 
975 	case PCSZONE:	/* set the process's zoneid */
976 		error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
977 		break;
978 	}
979 
980 	if (error)
981 		prunlock(pnp);
982 	return (error);
983 }
984 
985 #endif	/* _SYSCALL32_IMPL */
986 
987 /*
988  * Return the specific or chosen thread/lwp for a control operation.
989  * Returns with the thread locked via thread_lock(t).
990  */
991 kthread_t *
992 pr_thread(prnode_t *pnp)
993 {
994 	prcommon_t *pcp = pnp->pr_common;
995 	kthread_t *t;
996 
997 	if (pcp->prc_flags & PRC_LWP) {
998 		t = pcp->prc_thread;
999 		ASSERT(t != NULL);
1000 		thread_lock(t);
1001 	} else {
1002 		proc_t *p = pcp->prc_proc;
1003 		t = prchoose(p);	/* returns locked thread */
1004 		ASSERT(t != NULL);
1005 	}
1006 
1007 	return (t);
1008 }
1009 
1010 /*
1011  * Direct the process or lwp to stop.
1012  */
1013 void
1014 pr_stop(prnode_t *pnp)
1015 {
1016 	prcommon_t *pcp = pnp->pr_common;
1017 	proc_t *p = pcp->prc_proc;
1018 	kthread_t *t;
1019 	vnode_t *vp;
1020 
1021 	/*
1022 	 * If already stopped, do nothing; otherwise flag
1023 	 * it to be stopped the next time it tries to run.
1024 	 * If sleeping at interruptible priority, set it
1025 	 * running so it will stop within cv_wait_sig().
1026 	 *
1027 	 * Take care to cooperate with jobcontrol: if an lwp
1028 	 * is stopped due to the default action of a jobcontrol
1029 	 * stop signal, flag it to be stopped the next time it
1030 	 * starts due to a SIGCONT signal.
1031 	 */
1032 	if (pcp->prc_flags & PRC_LWP)
1033 		t = pcp->prc_thread;
1034 	else
1035 		t = p->p_tlist;
1036 	ASSERT(t != NULL);
1037 
1038 	do {
1039 		int notify;
1040 
1041 		notify = 0;
1042 		thread_lock(t);
1043 		if (!ISTOPPED(t)) {
1044 			t->t_proc_flag |= TP_PRSTOP;
1045 			t->t_sig_check = 1;	/* do ISSIG */
1046 		}
1047 
1048 		/* Move the thread from wait queue to run queue */
1049 		if (ISWAITING(t))
1050 			setrun_locked(t);
1051 
1052 		if (ISWAKEABLE(t)) {
1053 			if (t->t_wchan0 == NULL)
1054 				setrun_locked(t);
1055 			else if (!VSTOPPED(t)) {
1056 				/*
1057 				 * Mark it virtually stopped.
1058 				 */
1059 				t->t_proc_flag |= TP_PRVSTOP;
1060 				notify = 1;
1061 			}
1062 		}
1063 		/*
1064 		 * force the thread into the kernel
1065 		 * if it is not already there.
1066 		 */
1067 		prpokethread(t);
1068 		thread_unlock(t);
1069 		if (notify &&
1070 		    (vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace) != NULL)
1071 			prnotify(vp);
1072 		if (pcp->prc_flags & PRC_LWP)
1073 			break;
1074 	} while ((t = t->t_forw) != p->p_tlist);
1075 
1076 	/*
1077 	 * We do this just in case the thread we asked
1078 	 * to stop is in holdlwps() (called from cfork()).
1079 	 */
1080 	cv_broadcast(&p->p_holdlwps);
1081 }
1082 
1083 /*
1084  * Sleep until the lwp stops, but cooperate with
1085  * jobcontrol:  Don't wake up if the lwp is stopped
1086  * due to the default action of a jobcontrol stop signal.
1087  * If this is the process file descriptor, sleep
1088  * until all of the process's lwps stop.
1089  */
1090 int
1091 pr_wait_stop(prnode_t *pnp, time_t timeo)
1092 {
1093 	prcommon_t *pcp = pnp->pr_common;
1094 	proc_t *p = pcp->prc_proc;
1095 	timestruc_t rqtime;
1096 	timestruc_t *rqtp = NULL;
1097 	int timecheck = 0;
1098 	kthread_t *t;
1099 	int error;
1100 
1101 	if (timeo > 0) {	/* millisecond timeout */
1102 		/*
1103 		 * Determine the precise future time of the requested timeout.
1104 		 */
1105 		timestruc_t now;
1106 
1107 		timecheck = timechanged;
1108 		gethrestime(&now);
1109 		rqtp = &rqtime;
1110 		rqtp->tv_sec = timeo / MILLISEC;
1111 		rqtp->tv_nsec = (timeo % MILLISEC) * MICROSEC;
1112 		timespecadd(rqtp, &now);
1113 	}
1114 
1115 	if (pcp->prc_flags & PRC_LWP) {	/* lwp file descriptor */
1116 		t = pcp->prc_thread;
1117 		ASSERT(t != NULL);
1118 		thread_lock(t);
1119 		while (!ISTOPPED(t) && !VSTOPPED(t)) {
1120 			thread_unlock(t);
1121 			mutex_enter(&pcp->prc_mutex);
1122 			prunlock(pnp);
1123 			error = pr_wait(pcp, rqtp, timecheck);
1124 			if (error)	/* -1 is timeout */
1125 				return (error);
1126 			if ((error = prlock(pnp, ZNO)) != 0)
1127 				return (error);
1128 			ASSERT(p == pcp->prc_proc);
1129 			ASSERT(t == pcp->prc_thread);
1130 			thread_lock(t);
1131 		}
1132 		thread_unlock(t);
1133 	} else {			/* process file descriptor */
1134 		t = prchoose(p);	/* returns locked thread */
1135 		ASSERT(t != NULL);
1136 		ASSERT(MUTEX_HELD(&p->p_lock));
1137 		while ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t)) ||
1138 		    (p->p_flag & SEXITLWPS)) {
1139 			thread_unlock(t);
1140 			mutex_enter(&pcp->prc_mutex);
1141 			prunlock(pnp);
1142 			error = pr_wait(pcp, rqtp, timecheck);
1143 			if (error)	/* -1 is timeout */
1144 				return (error);
1145 			if ((error = prlock(pnp, ZNO)) != 0)
1146 				return (error);
1147 			ASSERT(p == pcp->prc_proc);
1148 			t = prchoose(p);	/* returns locked t */
1149 			ASSERT(t != NULL);
1150 		}
1151 		thread_unlock(t);
1152 	}
1153 
1154 	ASSERT(!(pcp->prc_flags & PRC_DESTROY) && p->p_stat != SZOMB &&
1155 	    t != NULL && t->t_state != TS_ZOMB);
1156 
1157 	return (0);
1158 }
1159 
1160 int
1161 pr_setrun(prnode_t *pnp, ulong_t flags)
1162 {
1163 	prcommon_t *pcp = pnp->pr_common;
1164 	proc_t *p = pcp->prc_proc;
1165 	kthread_t *t;
1166 	klwp_t *lwp;
1167 
1168 	/*
1169 	 * Cannot set an lwp running if it is not stopped.
1170 	 * Also, no lwp other than the /proc agent lwp can
1171 	 * be set running so long as the /proc agent lwp exists.
1172 	 */
1173 	t = pr_thread(pnp);	/* returns locked thread */
1174 	if ((!ISTOPPED(t) && !VSTOPPED(t) &&
1175 	    !(t->t_proc_flag & TP_PRSTOP)) ||
1176 	    (p->p_agenttp != NULL &&
1177 	    (t != p->p_agenttp || !(pcp->prc_flags & PRC_LWP)))) {
1178 		thread_unlock(t);
1179 		return (EBUSY);
1180 	}
1181 	thread_unlock(t);
1182 	if (flags & ~(PRCSIG|PRCFAULT|PRSTEP|PRSTOP|PRSABORT))
1183 		return (EINVAL);
1184 	lwp = ttolwp(t);
1185 	if ((flags & PRCSIG) && lwp->lwp_cursig != SIGKILL) {
1186 		/*
1187 		 * Discard current siginfo_t, if any.
1188 		 */
1189 		lwp->lwp_cursig = 0;
1190 		lwp->lwp_extsig = 0;
1191 		if (lwp->lwp_curinfo) {
1192 			siginfofree(lwp->lwp_curinfo);
1193 			lwp->lwp_curinfo = NULL;
1194 		}
1195 	}
1196 	if (flags & PRCFAULT)
1197 		lwp->lwp_curflt = 0;
1198 	/*
1199 	 * We can't hold p->p_lock when we touch the lwp's registers.
1200 	 * It may be swapped out and we will get a page fault.
1201 	 */
1202 	if (flags & PRSTEP) {
1203 		mutex_exit(&p->p_lock);
1204 		prstep(lwp, 0);
1205 		mutex_enter(&p->p_lock);
1206 	}
1207 	if (flags & PRSTOP) {
1208 		t->t_proc_flag |= TP_PRSTOP;
1209 		t->t_sig_check = 1;	/* do ISSIG */
1210 	}
1211 	if (flags & PRSABORT)
1212 		lwp->lwp_sysabort = 1;
1213 	thread_lock(t);
1214 	if ((pcp->prc_flags & PRC_LWP) || (flags & (PRSTEP|PRSTOP))) {
1215 		/*
1216 		 * Here, we are dealing with a single lwp.
1217 		 */
1218 		if (ISTOPPED(t)) {
1219 			t->t_schedflag |= TS_PSTART;
1220 			t->t_dtrace_stop = 0;
1221 			setrun_locked(t);
1222 		} else if (flags & PRSABORT) {
1223 			t->t_proc_flag &=
1224 			    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1225 			setrun_locked(t);
1226 		} else if (!(flags & PRSTOP)) {
1227 			t->t_proc_flag &=
1228 			    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1229 		}
1230 		thread_unlock(t);
1231 	} else {
1232 		/*
1233 		 * Here, we are dealing with the whole process.
1234 		 */
1235 		if (ISTOPPED(t)) {
1236 			/*
1237 			 * The representative lwp is stopped on an event
1238 			 * of interest.  We demote it to PR_REQUESTED and
1239 			 * choose another representative lwp.  If the new
1240 			 * representative lwp is not stopped on an event of
1241 			 * interest (other than PR_REQUESTED), we set the
1242 			 * whole process running, else we leave the process
1243 			 * stopped showing the next event of interest.
1244 			 */
1245 			kthread_t *tx = NULL;
1246 
1247 			if (!(flags & PRSABORT) &&
1248 			    t->t_whystop == PR_SYSENTRY &&
1249 			    t->t_whatstop == SYS_lwp_exit)
1250 				tx = t;		/* remember the exiting lwp */
1251 			t->t_whystop = PR_REQUESTED;
1252 			t->t_whatstop = 0;
1253 			thread_unlock(t);
1254 			t = prchoose(p);	/* returns locked t */
1255 			ASSERT(ISTOPPED(t) || VSTOPPED(t));
1256 			if (VSTOPPED(t) ||
1257 			    t->t_whystop == PR_REQUESTED) {
1258 				thread_unlock(t);
1259 				allsetrun(p);
1260 			} else {
1261 				thread_unlock(t);
1262 				/*
1263 				 * As a special case, if the old representative
1264 				 * lwp was stopped on entry to _lwp_exit()
1265 				 * (and we are not aborting the system call),
1266 				 * we set the old representative lwp running.
1267 				 * We do this so that the next process stop
1268 				 * will find the exiting lwp gone.
1269 				 */
1270 				if (tx != NULL) {
1271 					thread_lock(tx);
1272 					tx->t_schedflag |= TS_PSTART;
1273 					t->t_dtrace_stop = 0;
1274 					setrun_locked(tx);
1275 					thread_unlock(tx);
1276 				}
1277 			}
1278 		} else {
1279 			/*
1280 			 * No event of interest; set all of the lwps running.
1281 			 */
1282 			if (flags & PRSABORT) {
1283 				t->t_proc_flag &=
1284 				    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1285 				setrun_locked(t);
1286 			}
1287 			thread_unlock(t);
1288 			allsetrun(p);
1289 		}
1290 	}
1291 	return (0);
1292 }
1293 
1294 /*
1295  * Wait until process/lwp stops or until timer expires.
1296  * Return EINTR for an interruption, -1 for timeout, else 0.
1297  */
1298 int
1299 pr_wait(prcommon_t *pcp,	/* prcommon referring to process/lwp */
1300 	timestruc_t *ts,	/* absolute time of timeout, if any */
1301 	int timecheck)
1302 {
1303 	int rval;
1304 
1305 	ASSERT(MUTEX_HELD(&pcp->prc_mutex));
1306 	rval = cv_waituntil_sig(&pcp->prc_wait, &pcp->prc_mutex, ts, timecheck);
1307 	mutex_exit(&pcp->prc_mutex);
1308 	switch (rval) {
1309 	case 0:
1310 		return (EINTR);
1311 	case -1:
1312 		return (-1);
1313 	default:
1314 		return (0);
1315 	}
1316 }
1317 
1318 /*
1319  * Make all threads in the process runnable.
1320  */
1321 void
1322 allsetrun(proc_t *p)
1323 {
1324 	kthread_t *t;
1325 
1326 	ASSERT(MUTEX_HELD(&p->p_lock));
1327 
1328 	if ((t = p->p_tlist) != NULL) {
1329 		do {
1330 			thread_lock(t);
1331 			ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1332 			t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1333 			if (ISTOPPED(t)) {
1334 				t->t_schedflag |= TS_PSTART;
1335 				t->t_dtrace_stop = 0;
1336 				setrun_locked(t);
1337 			}
1338 			thread_unlock(t);
1339 		} while ((t = t->t_forw) != p->p_tlist);
1340 	}
1341 }
1342 
1343 /*
1344  * Wait for the process to die.
1345  * We do this after sending SIGKILL because we know it will
1346  * die soon and we want subsequent operations to return ENOENT.
1347  */
1348 void
1349 pr_wait_die(prnode_t *pnp)
1350 {
1351 	proc_t *p;
1352 
1353 	mutex_enter(&pidlock);
1354 	while ((p = pnp->pr_common->prc_proc) != NULL && p->p_stat != SZOMB) {
1355 		if (!cv_wait_sig(&p->p_srwchan_cv, &pidlock))
1356 			break;
1357 	}
1358 	mutex_exit(&pidlock);
1359 }
1360 
1361 static void
1362 pr_settrace(proc_t *p, sigset_t *sp)
1363 {
1364 	prdelset(sp, SIGKILL);
1365 	prassignset(&p->p_sigmask, sp);
1366 	if (!sigisempty(&p->p_sigmask))
1367 		p->p_proc_flag |= P_PR_TRACE;
1368 	else if (prisempty(&p->p_fltmask)) {
1369 		user_t *up = PTOU(p);
1370 		if (up->u_systrap == 0)
1371 			p->p_proc_flag &= ~P_PR_TRACE;
1372 	}
1373 }
1374 
1375 int
1376 pr_setsig(prnode_t *pnp, siginfo_t *sip)
1377 {
1378 	int sig = sip->si_signo;
1379 	prcommon_t *pcp = pnp->pr_common;
1380 	proc_t *p = pcp->prc_proc;
1381 	kthread_t *t;
1382 	klwp_t *lwp;
1383 	int error = 0;
1384 
1385 	t = pr_thread(pnp);	/* returns locked thread */
1386 	thread_unlock(t);
1387 	lwp = ttolwp(t);
1388 	if (sig < 0 || sig >= NSIG)
1389 		/* Zero allowed here */
1390 		error = EINVAL;
1391 	else if (lwp->lwp_cursig == SIGKILL)
1392 		/* "can't happen", but just in case */
1393 		error = EBUSY;
1394 	else if ((lwp->lwp_cursig = (uchar_t)sig) == 0) {
1395 		lwp->lwp_extsig = 0;
1396 		/*
1397 		 * Discard current siginfo_t, if any.
1398 		 */
1399 		if (lwp->lwp_curinfo) {
1400 			siginfofree(lwp->lwp_curinfo);
1401 			lwp->lwp_curinfo = NULL;
1402 		}
1403 	} else {
1404 		kthread_t *tx;
1405 		sigqueue_t *sqp;
1406 
1407 		/* drop p_lock to do kmem_alloc(KM_SLEEP) */
1408 		mutex_exit(&p->p_lock);
1409 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
1410 		mutex_enter(&p->p_lock);
1411 
1412 		if (lwp->lwp_curinfo == NULL)
1413 			lwp->lwp_curinfo = sqp;
1414 		else
1415 			kmem_free(sqp, sizeof (sigqueue_t));
1416 		/*
1417 		 * Copy contents of info to current siginfo_t.
1418 		 */
1419 		bcopy(sip, &lwp->lwp_curinfo->sq_info,
1420 		    sizeof (lwp->lwp_curinfo->sq_info));
1421 		/*
1422 		 * Prevent contents published by si_zoneid-unaware /proc
1423 		 * consumers from being incorrectly filtered.  Because
1424 		 * an uninitialized si_zoneid is the same as
1425 		 * GLOBAL_ZONEID, this means that you can't pr_setsig a
1426 		 * process in a non-global zone with a siginfo which
1427 		 * appears to come from the global zone.
1428 		 */
1429 		if (SI_FROMUSER(sip) && sip->si_zoneid == 0)
1430 			lwp->lwp_curinfo->sq_info.si_zoneid =
1431 			    p->p_zone->zone_id;
1432 		/*
1433 		 * Side-effects for SIGKILL and jobcontrol signals.
1434 		 */
1435 		if (sig == SIGKILL) {
1436 			p->p_flag |= SKILLED;
1437 			p->p_flag &= ~SEXTKILLED;
1438 		} else if (sig == SIGCONT) {
1439 			p->p_flag |= SSCONT;
1440 			sigdelq(p, NULL, SIGSTOP);
1441 			sigdelq(p, NULL, SIGTSTP);
1442 			sigdelq(p, NULL, SIGTTOU);
1443 			sigdelq(p, NULL, SIGTTIN);
1444 			sigdiffset(&p->p_sig, &stopdefault);
1445 			sigdiffset(&p->p_extsig, &stopdefault);
1446 			if ((tx = p->p_tlist) != NULL) {
1447 				do {
1448 					sigdelq(p, tx, SIGSTOP);
1449 					sigdelq(p, tx, SIGTSTP);
1450 					sigdelq(p, tx, SIGTTOU);
1451 					sigdelq(p, tx, SIGTTIN);
1452 					sigdiffset(&tx->t_sig, &stopdefault);
1453 					sigdiffset(&tx->t_extsig, &stopdefault);
1454 				} while ((tx = tx->t_forw) != p->p_tlist);
1455 			}
1456 		} else if (sigismember(&stopdefault, sig)) {
1457 			if (PTOU(p)->u_signal[sig-1] == SIG_DFL &&
1458 			    (sig == SIGSTOP || !p->p_pgidp->pid_pgorphaned))
1459 				p->p_flag &= ~SSCONT;
1460 			sigdelq(p, NULL, SIGCONT);
1461 			sigdelset(&p->p_sig, SIGCONT);
1462 			sigdelset(&p->p_extsig, SIGCONT);
1463 			if ((tx = p->p_tlist) != NULL) {
1464 				do {
1465 					sigdelq(p, tx, SIGCONT);
1466 					sigdelset(&tx->t_sig, SIGCONT);
1467 					sigdelset(&tx->t_extsig, SIGCONT);
1468 				} while ((tx = tx->t_forw) != p->p_tlist);
1469 			}
1470 		}
1471 		thread_lock(t);
1472 		if (ISWAKEABLE(t) || ISWAITING(t)) {
1473 			/* Set signaled sleeping/waiting lwp running */
1474 			setrun_locked(t);
1475 		} else if (t->t_state == TS_STOPPED && sig == SIGKILL) {
1476 			/* If SIGKILL, set stopped lwp running */
1477 			p->p_stopsig = 0;
1478 			t->t_schedflag |= TS_XSTART | TS_PSTART;
1479 			t->t_dtrace_stop = 0;
1480 			setrun_locked(t);
1481 		}
1482 		t->t_sig_check = 1;	/* so ISSIG will be done */
1483 		thread_unlock(t);
1484 		/*
1485 		 * More jobcontrol side-effects.
1486 		 */
1487 		if (sig == SIGCONT && (tx = p->p_tlist) != NULL) {
1488 			p->p_stopsig = 0;
1489 			do {
1490 				thread_lock(tx);
1491 				if (tx->t_state == TS_STOPPED &&
1492 				    tx->t_whystop == PR_JOBCONTROL) {
1493 					tx->t_schedflag |= TS_XSTART;
1494 					setrun_locked(tx);
1495 				}
1496 				thread_unlock(tx);
1497 			} while ((tx = tx->t_forw) != p->p_tlist);
1498 		}
1499 	}
1500 	return (error);
1501 }
1502 
1503 int
1504 pr_kill(prnode_t *pnp, int sig, cred_t *cr)
1505 {
1506 	prcommon_t *pcp = pnp->pr_common;
1507 	proc_t *p = pcp->prc_proc;
1508 	k_siginfo_t info;
1509 
1510 	if (sig <= 0 || sig >= NSIG)
1511 		return (EINVAL);
1512 
1513 	bzero(&info, sizeof (info));
1514 	info.si_signo = sig;
1515 	info.si_code = SI_USER;
1516 	info.si_pid = curproc->p_pid;
1517 	info.si_ctid = PRCTID(curproc);
1518 	info.si_zoneid = getzoneid();
1519 	info.si_uid = crgetruid(cr);
1520 	sigaddq(p, (pcp->prc_flags & PRC_LWP)?
1521 	    pcp->prc_thread : NULL, &info, KM_NOSLEEP);
1522 
1523 	return (0);
1524 }
1525 
1526 int
1527 pr_unkill(prnode_t *pnp, int sig)
1528 {
1529 	prcommon_t *pcp = pnp->pr_common;
1530 	proc_t *p = pcp->prc_proc;
1531 	sigqueue_t *infop = NULL;
1532 
1533 	if (sig <= 0 || sig >= NSIG || sig == SIGKILL)
1534 		return (EINVAL);
1535 
1536 	if (pcp->prc_flags & PRC_LWP)
1537 		sigdeq(p, pcp->prc_thread, sig, &infop);
1538 	else
1539 		sigdeq(p, NULL, sig, &infop);
1540 
1541 	if (infop)
1542 		siginfofree(infop);
1543 
1544 	return (0);
1545 }
1546 
1547 int
1548 pr_nice(proc_t *p, int nice, cred_t *cr)
1549 {
1550 	kthread_t *t;
1551 	int err;
1552 	int error = 0;
1553 
1554 	t = p->p_tlist;
1555 	do {
1556 		ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1557 		err = CL_DONICE(t, cr, nice, (int *)NULL);
1558 		schedctl_set_cidpri(t);
1559 		if (error == 0)
1560 			error = err;
1561 	} while ((t = t->t_forw) != p->p_tlist);
1562 
1563 	return (error);
1564 }
1565 
1566 void
1567 pr_setentryexit(proc_t *p, sysset_t *sysset, int entry)
1568 {
1569 	user_t *up = PTOU(p);
1570 
1571 	if (entry) {
1572 		prassignset(&up->u_entrymask, sysset);
1573 	} else {
1574 		prassignset(&up->u_exitmask, sysset);
1575 	}
1576 	if (!prisempty(&up->u_entrymask) ||
1577 	    !prisempty(&up->u_exitmask)) {
1578 		up->u_systrap = 1;
1579 		p->p_proc_flag |= P_PR_TRACE;
1580 		set_proc_sys(p);	/* set pre and post-sys flags */
1581 	} else {
1582 		up->u_systrap = 0;
1583 		if (sigisempty(&p->p_sigmask) &&
1584 		    prisempty(&p->p_fltmask))
1585 			p->p_proc_flag &= ~P_PR_TRACE;
1586 	}
1587 }
1588 
1589 #define	ALLFLAGS	\
1590 	(PR_FORK|PR_RLC|PR_KLC|PR_ASYNC|PR_BPTADJ|PR_MSACCT|PR_MSFORK|PR_PTRACE)
1591 
1592 int
1593 pr_set(proc_t *p, long flags)
1594 {
1595 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1596 		return (EBUSY);
1597 
1598 	if (flags & ~ALLFLAGS)
1599 		return (EINVAL);
1600 
1601 	if (flags & PR_FORK)
1602 		p->p_proc_flag |= P_PR_FORK;
1603 	if (flags & PR_RLC)
1604 		p->p_proc_flag |= P_PR_RUNLCL;
1605 	if (flags & PR_KLC)
1606 		p->p_proc_flag |= P_PR_KILLCL;
1607 	if (flags & PR_ASYNC)
1608 		p->p_proc_flag |= P_PR_ASYNC;
1609 	if (flags & PR_BPTADJ)
1610 		p->p_proc_flag |= P_PR_BPTADJ;
1611 	if (flags & PR_MSACCT)
1612 		if ((p->p_flag & SMSACCT) == 0)
1613 			estimate_msacct(p->p_tlist, gethrtime());
1614 	if (flags & PR_MSFORK)
1615 		p->p_flag |= SMSFORK;
1616 	if (flags & PR_PTRACE) {
1617 		p->p_proc_flag |= P_PR_PTRACE;
1618 		/* ptraced process must die if parent dead */
1619 		if (p->p_ppid == 1)
1620 			sigtoproc(p, NULL, SIGKILL);
1621 	}
1622 
1623 	return (0);
1624 }
1625 
1626 int
1627 pr_unset(proc_t *p, long flags)
1628 {
1629 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1630 		return (EBUSY);
1631 
1632 	if (flags & ~ALLFLAGS)
1633 		return (EINVAL);
1634 
1635 	if (flags & PR_FORK)
1636 		p->p_proc_flag &= ~P_PR_FORK;
1637 	if (flags & PR_RLC)
1638 		p->p_proc_flag &= ~P_PR_RUNLCL;
1639 	if (flags & PR_KLC)
1640 		p->p_proc_flag &= ~P_PR_KILLCL;
1641 	if (flags & PR_ASYNC)
1642 		p->p_proc_flag &= ~P_PR_ASYNC;
1643 	if (flags & PR_BPTADJ)
1644 		p->p_proc_flag &= ~P_PR_BPTADJ;
1645 	if (flags & PR_MSACCT)
1646 		disable_msacct(p);
1647 	if (flags & PR_MSFORK)
1648 		p->p_flag &= ~SMSFORK;
1649 	if (flags & PR_PTRACE)
1650 		p->p_proc_flag &= ~P_PR_PTRACE;
1651 
1652 	return (0);
1653 }
1654 
1655 static int
1656 pr_setfpregs(prnode_t *pnp, prfpregset_t *prfpregset)
1657 {
1658 	proc_t *p = pnp->pr_common->prc_proc;
1659 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1660 
1661 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1662 		thread_unlock(t);
1663 		return (EBUSY);
1664 	}
1665 	if (!prhasfp()) {
1666 		thread_unlock(t);
1667 		return (EINVAL);	/* No FP support */
1668 	}
1669 
1670 	/* drop p_lock while touching the lwp's stack */
1671 	thread_unlock(t);
1672 	mutex_exit(&p->p_lock);
1673 	prsetprfpregs(ttolwp(t), prfpregset);
1674 	mutex_enter(&p->p_lock);
1675 
1676 	return (0);
1677 }
1678 
1679 #ifdef	_SYSCALL32_IMPL
1680 static int
1681 pr_setfpregs32(prnode_t *pnp, prfpregset32_t *prfpregset)
1682 {
1683 	proc_t *p = pnp->pr_common->prc_proc;
1684 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1685 
1686 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1687 		thread_unlock(t);
1688 		return (EBUSY);
1689 	}
1690 	if (!prhasfp()) {
1691 		thread_unlock(t);
1692 		return (EINVAL);	/* No FP support */
1693 	}
1694 
1695 	/* drop p_lock while touching the lwp's stack */
1696 	thread_unlock(t);
1697 	mutex_exit(&p->p_lock);
1698 	prsetprfpregs32(ttolwp(t), prfpregset);
1699 	mutex_enter(&p->p_lock);
1700 
1701 	return (0);
1702 }
1703 #endif	/* _SYSCALL32_IMPL */
1704 
1705 #if defined(__sparc)
1706 /* ARGSUSED */
1707 static int
1708 pr_setxregs(prnode_t *pnp, prxregset_t *prxregset)
1709 {
1710 	proc_t *p = pnp->pr_common->prc_proc;
1711 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1712 
1713 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1714 		thread_unlock(t);
1715 		return (EBUSY);
1716 	}
1717 	thread_unlock(t);
1718 
1719 	if (!prhasx(p))
1720 		return (EINVAL);	/* No extra register support */
1721 
1722 	/* drop p_lock while touching the lwp's stack */
1723 	mutex_exit(&p->p_lock);
1724 	prsetprxregs(ttolwp(t), (caddr_t)prxregset);
1725 	mutex_enter(&p->p_lock);
1726 
1727 	return (0);
1728 }
1729 
1730 static int
1731 pr_setasrs(prnode_t *pnp, asrset_t asrset)
1732 {
1733 	proc_t *p = pnp->pr_common->prc_proc;
1734 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1735 
1736 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1737 		thread_unlock(t);
1738 		return (EBUSY);
1739 	}
1740 	thread_unlock(t);
1741 
1742 	/* drop p_lock while touching the lwp's stack */
1743 	mutex_exit(&p->p_lock);
1744 	prsetasregs(ttolwp(t), asrset);
1745 	mutex_enter(&p->p_lock);
1746 
1747 	return (0);
1748 }
1749 #endif
1750 
1751 static int
1752 pr_setvaddr(prnode_t *pnp, caddr_t vaddr)
1753 {
1754 	proc_t *p = pnp->pr_common->prc_proc;
1755 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1756 
1757 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1758 		thread_unlock(t);
1759 		return (EBUSY);
1760 	}
1761 
1762 	/* drop p_lock while touching the lwp's stack */
1763 	thread_unlock(t);
1764 	mutex_exit(&p->p_lock);
1765 	prsvaddr(ttolwp(t), vaddr);
1766 	mutex_enter(&p->p_lock);
1767 
1768 	return (0);
1769 }
1770 
1771 void
1772 pr_sethold(prnode_t *pnp, sigset_t *sp)
1773 {
1774 	proc_t *p = pnp->pr_common->prc_proc;
1775 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1776 
1777 	schedctl_finish_sigblock(t);
1778 	sigutok(sp, &t->t_hold);
1779 	if (ISWAKEABLE(t) &&
1780 	    (fsig(&p->p_sig, t) || fsig(&t->t_sig, t)))
1781 		setrun_locked(t);
1782 	t->t_sig_check = 1;	/* so thread will see new holdmask */
1783 	thread_unlock(t);
1784 }
1785 
1786 void
1787 pr_setfault(proc_t *p, fltset_t *fltp)
1788 {
1789 	prassignset(&p->p_fltmask, fltp);
1790 	if (!prisempty(&p->p_fltmask))
1791 		p->p_proc_flag |= P_PR_TRACE;
1792 	else if (sigisempty(&p->p_sigmask)) {
1793 		user_t *up = PTOU(p);
1794 		if (up->u_systrap == 0)
1795 			p->p_proc_flag &= ~P_PR_TRACE;
1796 	}
1797 }
1798 
1799 static int
1800 pr_clearsig(prnode_t *pnp)
1801 {
1802 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1803 	klwp_t *lwp = ttolwp(t);
1804 
1805 	thread_unlock(t);
1806 	if (lwp->lwp_cursig == SIGKILL)
1807 		return (EBUSY);
1808 
1809 	/*
1810 	 * Discard current siginfo_t, if any.
1811 	 */
1812 	lwp->lwp_cursig = 0;
1813 	lwp->lwp_extsig = 0;
1814 	if (lwp->lwp_curinfo) {
1815 		siginfofree(lwp->lwp_curinfo);
1816 		lwp->lwp_curinfo = NULL;
1817 	}
1818 
1819 	return (0);
1820 }
1821 
1822 static int
1823 pr_clearflt(prnode_t *pnp)
1824 {
1825 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1826 
1827 	thread_unlock(t);
1828 	ttolwp(t)->lwp_curflt = 0;
1829 
1830 	return (0);
1831 }
1832 
1833 static int
1834 pr_watch(prnode_t *pnp, prwatch_t *pwp, int *unlocked)
1835 {
1836 	proc_t *p = pnp->pr_common->prc_proc;
1837 	struct as *as = p->p_as;
1838 	uintptr_t vaddr = pwp->pr_vaddr;
1839 	size_t size = pwp->pr_size;
1840 	int wflags = pwp->pr_wflags;
1841 	ulong_t newpage = 0;
1842 	struct watched_area *pwa;
1843 	int error;
1844 
1845 	*unlocked = 0;
1846 
1847 	/*
1848 	 * Can't apply to a system process.
1849 	 */
1850 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1851 		return (EBUSY);
1852 
1853 	/*
1854 	 * Verify that the address range does not wrap
1855 	 * and that only the proper flags were specified.
1856 	 */
1857 	if ((wflags & ~WA_TRAPAFTER) == 0)
1858 		size = 0;
1859 	if (vaddr + size < vaddr ||
1860 	    (wflags & ~(WA_READ|WA_WRITE|WA_EXEC|WA_TRAPAFTER)) != 0 ||
1861 	    ((wflags & ~WA_TRAPAFTER) != 0 && size == 0))
1862 		return (EINVAL);
1863 
1864 	/*
1865 	 * Don't let the address range go above as->a_userlimit.
1866 	 * There is no error here, just a limitation.
1867 	 */
1868 	if (vaddr >= (uintptr_t)as->a_userlimit)
1869 		return (0);
1870 	if (vaddr + size > (uintptr_t)as->a_userlimit)
1871 		size = (uintptr_t)as->a_userlimit - vaddr;
1872 
1873 	/*
1874 	 * Compute maximum number of pages this will add.
1875 	 */
1876 	if ((wflags & ~WA_TRAPAFTER) != 0) {
1877 		ulong_t pagespan = (vaddr + size) - (vaddr & PAGEMASK);
1878 		newpage = btopr(pagespan);
1879 		if (newpage > 2 * prnwatch)
1880 			return (E2BIG);
1881 	}
1882 
1883 	/*
1884 	 * Force the process to be fully stopped.
1885 	 */
1886 	if (p == curproc) {
1887 		prunlock(pnp);
1888 		while (holdwatch() != 0)
1889 			continue;
1890 		if ((error = prlock(pnp, ZNO)) != 0) {
1891 			continuelwps(p);
1892 			*unlocked = 1;
1893 			return (error);
1894 		}
1895 	} else {
1896 		pauselwps(p);
1897 		while (pr_allstopped(p, 0) > 0) {
1898 			/*
1899 			 * This cv/mutex pair is persistent even
1900 			 * if the process disappears after we
1901 			 * unmark it and drop p->p_lock.
1902 			 */
1903 			kcondvar_t *cv = &pr_pid_cv[p->p_slot];
1904 			kmutex_t *mp = &p->p_lock;
1905 
1906 			prunmark(p);
1907 			(void) cv_wait(cv, mp);
1908 			mutex_exit(mp);
1909 			if ((error = prlock(pnp, ZNO)) != 0) {
1910 				/*
1911 				 * Unpause the process if it exists.
1912 				 */
1913 				p = pr_p_lock(pnp);
1914 				mutex_exit(&pr_pidlock);
1915 				if (p != NULL) {
1916 					unpauselwps(p);
1917 					prunlock(pnp);
1918 				}
1919 				*unlocked = 1;
1920 				return (error);
1921 			}
1922 		}
1923 	}
1924 
1925 	/*
1926 	 * Drop p->p_lock in order to perform the rest of this.
1927 	 * The process is still locked with the P_PR_LOCK flag.
1928 	 */
1929 	mutex_exit(&p->p_lock);
1930 
1931 	pwa = kmem_alloc(sizeof (struct watched_area), KM_SLEEP);
1932 	pwa->wa_vaddr = (caddr_t)vaddr;
1933 	pwa->wa_eaddr = (caddr_t)vaddr + size;
1934 	pwa->wa_flags = (ulong_t)wflags;
1935 
1936 	error = ((pwa->wa_flags & ~WA_TRAPAFTER) == 0)?
1937 	    clear_watched_area(p, pwa) : set_watched_area(p, pwa);
1938 
1939 	if (p == curproc) {
1940 		setallwatch();
1941 		mutex_enter(&p->p_lock);
1942 		continuelwps(p);
1943 	} else {
1944 		mutex_enter(&p->p_lock);
1945 		unpauselwps(p);
1946 	}
1947 
1948 	return (error);
1949 }
1950 
1951 /* jobcontrol stopped, but with a /proc directed stop in effect */
1952 #define	JDSTOPPED(t)	\
1953 	((t)->t_state == TS_STOPPED && \
1954 	(t)->t_whystop == PR_JOBCONTROL && \
1955 	((t)->t_proc_flag & TP_PRSTOP))
1956 
1957 /*
1958  * pr_agent() creates the agent lwp. If the process is exiting while
1959  * we are creating an agent lwp, then exitlwps() waits until the
1960  * agent has been created using prbarrier().
1961  */
1962 static int
1963 pr_agent(prnode_t *pnp, prgregset_t prgregset, int *unlocked)
1964 {
1965 	proc_t *p = pnp->pr_common->prc_proc;
1966 	prcommon_t *pcp;
1967 	kthread_t *t;
1968 	kthread_t *ct;
1969 	klwp_t *clwp;
1970 	k_sigset_t smask;
1971 	int cid;
1972 	void *bufp = NULL;
1973 	int error;
1974 
1975 	*unlocked = 0;
1976 
1977 	/*
1978 	 * Cannot create the /proc agent lwp if :-
1979 	 * - the process is not fully stopped or directed to stop.
1980 	 * - there is an agent lwp already.
1981 	 * - the process has been killed.
1982 	 * - the process is exiting.
1983 	 * - it's a vfork(2) parent.
1984 	 */
1985 	t = prchoose(p);	/* returns locked thread */
1986 	ASSERT(t != NULL);
1987 
1988 	if ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t) && !JDSTOPPED(t)) ||
1989 	    p->p_agenttp != NULL ||
1990 	    (p->p_flag & (SKILLED | SEXITING | SVFWAIT))) {
1991 		thread_unlock(t);
1992 		return (EBUSY);
1993 	}
1994 
1995 	thread_unlock(t);
1996 	mutex_exit(&p->p_lock);
1997 
1998 	sigfillset(&smask);
1999 	sigdiffset(&smask, &cantmask);
2000 	clwp = lwp_create(lwp_rtt, NULL, 0, p, TS_STOPPED,
2001 	    t->t_pri, &smask, NOCLASS, 0);
2002 	if (clwp == NULL) {
2003 		mutex_enter(&p->p_lock);
2004 		return (ENOMEM);
2005 	}
2006 	prsetprregs(clwp, prgregset, 1);
2007 retry:
2008 	cid = t->t_cid;
2009 	(void) CL_ALLOC(&bufp, cid, KM_SLEEP);
2010 	mutex_enter(&p->p_lock);
2011 	if (cid != t->t_cid) {
2012 		/*
2013 		 * Someone just changed this thread's scheduling class,
2014 		 * so try pre-allocating the buffer again.  Hopefully we
2015 		 * don't hit this often.
2016 		 */
2017 		mutex_exit(&p->p_lock);
2018 		CL_FREE(cid, bufp);
2019 		goto retry;
2020 	}
2021 
2022 	clwp->lwp_ap = clwp->lwp_arg;
2023 	clwp->lwp_eosys = NORMALRETURN;
2024 	ct = lwptot(clwp);
2025 	ct->t_clfuncs = t->t_clfuncs;
2026 	CL_FORK(t, ct, bufp);
2027 	ct->t_cid = t->t_cid;
2028 	ct->t_proc_flag |= TP_PRSTOP;
2029 	/*
2030 	 * Setting t_sysnum to zero causes post_syscall()
2031 	 * to bypass all syscall checks and go directly to
2032 	 *	if (issig()) psig();
2033 	 * so that the agent lwp will stop in issig_forreal()
2034 	 * showing PR_REQUESTED.
2035 	 */
2036 	ct->t_sysnum = 0;
2037 	ct->t_post_sys = 1;
2038 	ct->t_sig_check = 1;
2039 	p->p_agenttp = ct;
2040 	ct->t_proc_flag &= ~TP_HOLDLWP;
2041 
2042 	pcp = pnp->pr_pcommon;
2043 	mutex_enter(&pcp->prc_mutex);
2044 
2045 	lwp_create_done(ct);
2046 
2047 	/*
2048 	 * Don't return until the agent is stopped on PR_REQUESTED.
2049 	 */
2050 
2051 	for (;;) {
2052 		prunlock(pnp);
2053 		*unlocked = 1;
2054 
2055 		/*
2056 		 * Wait for the agent to stop and notify us.
2057 		 * If we've been interrupted, return that information.
2058 		 */
2059 		error = pr_wait(pcp, NULL, 0);
2060 		if (error == EINTR) {
2061 			error = 0;
2062 			break;
2063 		}
2064 
2065 		/*
2066 		 * Confirm that the agent LWP has stopped.
2067 		 */
2068 
2069 		if ((error = prlock(pnp, ZNO)) != 0)
2070 			break;
2071 		*unlocked = 0;
2072 
2073 		/*
2074 		 * Since we dropped the lock on the process, the agent
2075 		 * may have disappeared or changed. Grab the current
2076 		 * agent and check fail if it has disappeared.
2077 		 */
2078 		if ((ct = p->p_agenttp) == NULL) {
2079 			error = ENOENT;
2080 			break;
2081 		}
2082 
2083 		mutex_enter(&pcp->prc_mutex);
2084 		thread_lock(ct);
2085 
2086 		if (ISTOPPED(ct)) {
2087 			thread_unlock(ct);
2088 			mutex_exit(&pcp->prc_mutex);
2089 			break;
2090 		}
2091 
2092 		thread_unlock(ct);
2093 	}
2094 
2095 	return (error ? error : -1);
2096 }
2097 
2098 static int
2099 pr_rdwr(proc_t *p, enum uio_rw rw, priovec_t *pio)
2100 {
2101 	caddr_t base = (caddr_t)pio->pio_base;
2102 	size_t cnt = pio->pio_len;
2103 	uintptr_t offset = (uintptr_t)pio->pio_offset;
2104 	struct uio auio;
2105 	struct iovec aiov;
2106 	int error = 0;
2107 
2108 	if ((p->p_flag & SSYS) || p->p_as == &kas)
2109 		error = EIO;
2110 	else if ((base + cnt) < base || (offset + cnt) < offset)
2111 		error = EINVAL;
2112 	else if (cnt != 0) {
2113 		aiov.iov_base = base;
2114 		aiov.iov_len = cnt;
2115 
2116 		auio.uio_loffset = offset;
2117 		auio.uio_iov = &aiov;
2118 		auio.uio_iovcnt = 1;
2119 		auio.uio_resid = cnt;
2120 		auio.uio_segflg = UIO_USERSPACE;
2121 		auio.uio_llimit = (longlong_t)MAXOFFSET_T;
2122 		auio.uio_fmode = FREAD|FWRITE;
2123 		auio.uio_extflg = UIO_COPY_DEFAULT;
2124 
2125 		mutex_exit(&p->p_lock);
2126 		error = prusrio(p, rw, &auio, 0);
2127 		mutex_enter(&p->p_lock);
2128 
2129 		/*
2130 		 * We have no way to return the i/o count,
2131 		 * like read() or write() would do, so we
2132 		 * return an error if the i/o was truncated.
2133 		 */
2134 		if (auio.uio_resid != 0 && error == 0)
2135 			error = EIO;
2136 	}
2137 
2138 	return (error);
2139 }
2140 
2141 static int
2142 pr_scred(proc_t *p, prcred_t *prcred, cred_t *cr, boolean_t dogrps)
2143 {
2144 	kthread_t *t;
2145 	cred_t *oldcred;
2146 	cred_t *newcred;
2147 	uid_t oldruid;
2148 	int error;
2149 	zone_t *zone = crgetzone(cr);
2150 
2151 	if (!VALID_UID(prcred->pr_euid, zone) ||
2152 	    !VALID_UID(prcred->pr_ruid, zone) ||
2153 	    !VALID_UID(prcred->pr_suid, zone) ||
2154 	    !VALID_GID(prcred->pr_egid, zone) ||
2155 	    !VALID_GID(prcred->pr_rgid, zone) ||
2156 	    !VALID_GID(prcred->pr_sgid, zone))
2157 		return (EINVAL);
2158 
2159 	if (dogrps) {
2160 		int ngrp = prcred->pr_ngroups;
2161 		int i;
2162 
2163 		if (ngrp < 0 || ngrp > ngroups_max)
2164 			return (EINVAL);
2165 
2166 		for (i = 0; i < ngrp; i++) {
2167 			if (!VALID_GID(prcred->pr_groups[i], zone))
2168 				return (EINVAL);
2169 		}
2170 	}
2171 
2172 	error = secpolicy_allow_setid(cr, prcred->pr_euid, B_FALSE);
2173 
2174 	if (error == 0 && prcred->pr_ruid != prcred->pr_euid)
2175 		error = secpolicy_allow_setid(cr, prcred->pr_ruid, B_FALSE);
2176 
2177 	if (error == 0 && prcred->pr_suid != prcred->pr_euid &&
2178 	    prcred->pr_suid != prcred->pr_ruid)
2179 		error = secpolicy_allow_setid(cr, prcred->pr_suid, B_FALSE);
2180 
2181 	if (error)
2182 		return (error);
2183 
2184 	mutex_exit(&p->p_lock);
2185 
2186 	/* hold old cred so it doesn't disappear while we dup it */
2187 	mutex_enter(&p->p_crlock);
2188 	crhold(oldcred = p->p_cred);
2189 	mutex_exit(&p->p_crlock);
2190 	newcred = crdup(oldcred);
2191 	oldruid = crgetruid(oldcred);
2192 	crfree(oldcred);
2193 
2194 	/* Error checking done above */
2195 	(void) crsetresuid(newcred, prcred->pr_ruid, prcred->pr_euid,
2196 	    prcred->pr_suid);
2197 	(void) crsetresgid(newcred, prcred->pr_rgid, prcred->pr_egid,
2198 	    prcred->pr_sgid);
2199 
2200 	if (dogrps) {
2201 		(void) crsetgroups(newcred, prcred->pr_ngroups,
2202 		    prcred->pr_groups);
2203 
2204 	}
2205 
2206 	mutex_enter(&p->p_crlock);
2207 	oldcred = p->p_cred;
2208 	p->p_cred = newcred;
2209 	mutex_exit(&p->p_crlock);
2210 	crfree(oldcred);
2211 
2212 	/*
2213 	 * Keep count of processes per uid consistent.
2214 	 */
2215 	if (oldruid != prcred->pr_ruid) {
2216 		zoneid_t zoneid = crgetzoneid(newcred);
2217 
2218 		mutex_enter(&pidlock);
2219 		upcount_dec(oldruid, zoneid);
2220 		upcount_inc(prcred->pr_ruid, zoneid);
2221 		mutex_exit(&pidlock);
2222 	}
2223 
2224 	/*
2225 	 * Broadcast the cred change to the threads.
2226 	 */
2227 	mutex_enter(&p->p_lock);
2228 	t = p->p_tlist;
2229 	do {
2230 		t->t_pre_sys = 1; /* so syscall will get new cred */
2231 	} while ((t = t->t_forw) != p->p_tlist);
2232 
2233 	return (0);
2234 }
2235 
2236 /*
2237  * Change process credentials to specified zone.  Used to temporarily
2238  * set a process to run in the global zone; only transitions between
2239  * the process's actual zone and the global zone are allowed.
2240  */
2241 static int
2242 pr_szoneid(proc_t *p, zoneid_t zoneid, cred_t *cr)
2243 {
2244 	kthread_t *t;
2245 	cred_t *oldcred;
2246 	cred_t *newcred;
2247 	zone_t *zptr;
2248 	zoneid_t oldzoneid;
2249 
2250 	if (secpolicy_zone_config(cr) != 0)
2251 		return (EPERM);
2252 	if (zoneid != GLOBAL_ZONEID && zoneid != p->p_zone->zone_id)
2253 		return (EINVAL);
2254 	if ((zptr = zone_find_by_id(zoneid)) == NULL)
2255 		return (EINVAL);
2256 	mutex_exit(&p->p_lock);
2257 	mutex_enter(&p->p_crlock);
2258 	oldcred = p->p_cred;
2259 	crhold(oldcred);
2260 	mutex_exit(&p->p_crlock);
2261 	newcred = crdup(oldcred);
2262 	oldzoneid = crgetzoneid(oldcred);
2263 	crfree(oldcred);
2264 
2265 	crsetzone(newcred, zptr);
2266 	zone_rele(zptr);
2267 
2268 	mutex_enter(&p->p_crlock);
2269 	oldcred = p->p_cred;
2270 	p->p_cred = newcred;
2271 	mutex_exit(&p->p_crlock);
2272 	crfree(oldcred);
2273 
2274 	/*
2275 	 * The target process is changing zones (according to its cred), so
2276 	 * update the per-zone upcounts, which are based on process creds.
2277 	 */
2278 	if (oldzoneid != zoneid) {
2279 		uid_t ruid = crgetruid(newcred);
2280 
2281 		mutex_enter(&pidlock);
2282 		upcount_dec(ruid, oldzoneid);
2283 		upcount_inc(ruid, zoneid);
2284 		mutex_exit(&pidlock);
2285 	}
2286 	/*
2287 	 * Broadcast the cred change to the threads.
2288 	 */
2289 	mutex_enter(&p->p_lock);
2290 	t = p->p_tlist;
2291 	do {
2292 		t->t_pre_sys = 1;	/* so syscall will get new cred */
2293 	} while ((t = t->t_forw) != p->p_tlist);
2294 
2295 	return (0);
2296 }
2297 
2298 static int
2299 pr_spriv(proc_t *p, prpriv_t *prpriv, cred_t *cr)
2300 {
2301 	kthread_t *t;
2302 	int err;
2303 
2304 	ASSERT(MUTEX_HELD(&p->p_lock));
2305 
2306 	if ((err = priv_pr_spriv(p, prpriv, cr)) == 0) {
2307 		/*
2308 		 * Broadcast the cred change to the threads.
2309 		 */
2310 		t = p->p_tlist;
2311 		do {
2312 			t->t_pre_sys = 1; /* so syscall will get new cred */
2313 		} while ((t = t->t_forw) != p->p_tlist);
2314 	}
2315 
2316 	return (err);
2317 }
2318 
2319 /*
2320  * Return -1 if the process is the parent of a vfork(1) whose child has yet to
2321  * terminate or perform an exec(2).
2322  *
2323  * Returns 0 if the process is fully stopped except for the current thread (if
2324  * we are operating on our own process), 1 otherwise.
2325  *
2326  * If the watchstop flag is set, then we ignore threads with TP_WATCHSTOP set.
2327  * See holdwatch() for details.
2328  */
2329 int
2330 pr_allstopped(proc_t *p, int watchstop)
2331 {
2332 	kthread_t *t;
2333 	int rv = 0;
2334 
2335 	ASSERT(MUTEX_HELD(&p->p_lock));
2336 
2337 	if (p->p_flag & SVFWAIT)	/* waiting for vfork'd child to exec */
2338 		return (-1);
2339 
2340 	if ((t = p->p_tlist) != NULL) {
2341 		do {
2342 			if (t == curthread || VSTOPPED(t) ||
2343 			    (watchstop && (t->t_proc_flag & TP_WATCHSTOP)))
2344 				continue;
2345 			thread_lock(t);
2346 			switch (t->t_state) {
2347 			case TS_ZOMB:
2348 			case TS_STOPPED:
2349 				break;
2350 			case TS_SLEEP:
2351 				if (!(t->t_flag & T_WAKEABLE) ||
2352 				    t->t_wchan0 == NULL)
2353 					rv = 1;
2354 				break;
2355 			default:
2356 				rv = 1;
2357 				break;
2358 			}
2359 			thread_unlock(t);
2360 		} while (rv == 0 && (t = t->t_forw) != p->p_tlist);
2361 	}
2362 
2363 	return (rv);
2364 }
2365 
2366 /*
2367  * Cause all lwps in the process to pause (for watchpoint operations).
2368  */
2369 static void
2370 pauselwps(proc_t *p)
2371 {
2372 	kthread_t *t;
2373 
2374 	ASSERT(MUTEX_HELD(&p->p_lock));
2375 	ASSERT(p != curproc);
2376 
2377 	if ((t = p->p_tlist) != NULL) {
2378 		do {
2379 			thread_lock(t);
2380 			t->t_proc_flag |= TP_PAUSE;
2381 			aston(t);
2382 			if ((ISWAKEABLE(t) && (t->t_wchan0 == NULL)) ||
2383 			    ISWAITING(t)) {
2384 				setrun_locked(t);
2385 			}
2386 			prpokethread(t);
2387 			thread_unlock(t);
2388 		} while ((t = t->t_forw) != p->p_tlist);
2389 	}
2390 }
2391 
2392 /*
2393  * undo the effects of pauselwps()
2394  */
2395 static void
2396 unpauselwps(proc_t *p)
2397 {
2398 	kthread_t *t;
2399 
2400 	ASSERT(MUTEX_HELD(&p->p_lock));
2401 	ASSERT(p != curproc);
2402 
2403 	if ((t = p->p_tlist) != NULL) {
2404 		do {
2405 			thread_lock(t);
2406 			t->t_proc_flag &= ~TP_PAUSE;
2407 			if (t->t_state == TS_STOPPED) {
2408 				t->t_schedflag |= TS_UNPAUSE;
2409 				t->t_dtrace_stop = 0;
2410 				setrun_locked(t);
2411 			}
2412 			thread_unlock(t);
2413 		} while ((t = t->t_forw) != p->p_tlist);
2414 	}
2415 }
2416 
2417 /*
2418  * Cancel all watched areas.  Called from prclose().
2419  */
2420 proc_t *
2421 pr_cancel_watch(prnode_t *pnp)
2422 {
2423 	proc_t *p = pnp->pr_pcommon->prc_proc;
2424 	struct as *as;
2425 	kthread_t *t;
2426 
2427 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2428 
2429 	if (!pr_watch_active(p))
2430 		return (p);
2431 
2432 	/*
2433 	 * Pause the process before dealing with the watchpoints.
2434 	 */
2435 	if (p == curproc) {
2436 		prunlock(pnp);
2437 		while (holdwatch() != 0)
2438 			continue;
2439 		p = pr_p_lock(pnp);
2440 		mutex_exit(&pr_pidlock);
2441 		ASSERT(p == curproc);
2442 	} else {
2443 		pauselwps(p);
2444 		while (p != NULL && pr_allstopped(p, 0) > 0) {
2445 			/*
2446 			 * This cv/mutex pair is persistent even
2447 			 * if the process disappears after we
2448 			 * unmark it and drop p->p_lock.
2449 			 */
2450 			kcondvar_t *cv = &pr_pid_cv[p->p_slot];
2451 			kmutex_t *mp = &p->p_lock;
2452 
2453 			prunmark(p);
2454 			(void) cv_wait(cv, mp);
2455 			mutex_exit(mp);
2456 			p = pr_p_lock(pnp);  /* NULL if process disappeared */
2457 			mutex_exit(&pr_pidlock);
2458 		}
2459 	}
2460 
2461 	if (p == NULL)		/* the process disappeared */
2462 		return (NULL);
2463 
2464 	ASSERT(p == pnp->pr_pcommon->prc_proc);
2465 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2466 
2467 	if (pr_watch_active(p)) {
2468 		pr_free_watchpoints(p);
2469 		if ((t = p->p_tlist) != NULL) {
2470 			do {
2471 				watch_disable(t);
2472 
2473 			} while ((t = t->t_forw) != p->p_tlist);
2474 		}
2475 	}
2476 
2477 	if ((as = p->p_as) != NULL) {
2478 		avl_tree_t *tree;
2479 		struct watched_page *pwp;
2480 
2481 		/*
2482 		 * If this is the parent of a vfork, the watched page
2483 		 * list has been moved temporarily to p->p_wpage.
2484 		 */
2485 		if (avl_numnodes(&p->p_wpage) != 0)
2486 			tree = &p->p_wpage;
2487 		else
2488 			tree = &as->a_wpage;
2489 
2490 		mutex_exit(&p->p_lock);
2491 		AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2492 
2493 		for (pwp = avl_first(tree); pwp != NULL;
2494 		    pwp = AVL_NEXT(tree, pwp)) {
2495 			pwp->wp_read = 0;
2496 			pwp->wp_write = 0;
2497 			pwp->wp_exec = 0;
2498 			if ((pwp->wp_flags & WP_SETPROT) == 0) {
2499 				pwp->wp_flags |= WP_SETPROT;
2500 				pwp->wp_prot = pwp->wp_oprot;
2501 				pwp->wp_list = p->p_wprot;
2502 				p->p_wprot = pwp;
2503 			}
2504 		}
2505 
2506 		AS_LOCK_EXIT(as, &as->a_lock);
2507 		mutex_enter(&p->p_lock);
2508 	}
2509 
2510 	/*
2511 	 * Unpause the process now.
2512 	 */
2513 	if (p == curproc)
2514 		continuelwps(p);
2515 	else
2516 		unpauselwps(p);
2517 
2518 	return (p);
2519 }
2520