xref: /illumos-gate/usr/src/uts/common/fs/proc/prcontrol.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/uio.h>
31 #include <sys/param.h>
32 #include <sys/cmn_err.h>
33 #include <sys/cred.h>
34 #include <sys/policy.h>
35 #include <sys/debug.h>
36 #include <sys/errno.h>
37 #include <sys/file.h>
38 #include <sys/inline.h>
39 #include <sys/kmem.h>
40 #include <sys/proc.h>
41 #include <sys/regset.h>
42 #include <sys/sysmacros.h>
43 #include <sys/systm.h>
44 #include <sys/vfs.h>
45 #include <sys/vnode.h>
46 #include <sys/signal.h>
47 #include <sys/auxv.h>
48 #include <sys/user.h>
49 #include <sys/class.h>
50 #include <sys/fault.h>
51 #include <sys/syscall.h>
52 #include <sys/procfs.h>
53 #include <sys/zone.h>
54 #include <sys/copyops.h>
55 #include <sys/schedctl.h>
56 #include <vm/as.h>
57 #include <vm/seg.h>
58 #include <fs/proc/prdata.h>
59 #include <sys/contract/process_impl.h>
60 
61 static	void	pr_settrace(proc_t *, sigset_t *);
62 static	int	pr_setfpregs(prnode_t *, prfpregset_t *);
63 #if defined(__sparc)
64 static	int	pr_setxregs(prnode_t *, prxregset_t *);
65 static	int	pr_setasrs(prnode_t *, asrset_t);
66 #endif
67 static	int	pr_setvaddr(prnode_t *, caddr_t);
68 static	int	pr_clearsig(prnode_t *);
69 static	int	pr_clearflt(prnode_t *);
70 static	int	pr_watch(prnode_t *, prwatch_t *, int *);
71 static	int	pr_agent(prnode_t *, prgregset_t, int *);
72 static	int	pr_rdwr(proc_t *, enum uio_rw, priovec_t *);
73 static	int	pr_scred(proc_t *, prcred_t *, cred_t *, boolean_t);
74 static	int	pr_spriv(proc_t *, prpriv_t *, cred_t *);
75 static	int	pr_szoneid(proc_t *, zoneid_t, cred_t *);
76 static	void	pauselwps(proc_t *);
77 static	void	unpauselwps(proc_t *);
78 
79 typedef union {
80 	long		sig;		/* PCKILL, PCUNKILL */
81 	long		nice;		/* PCNICE */
82 	long		timeo;		/* PCTWSTOP */
83 	ulong_t		flags;		/* PCRUN, PCSET, PCUNSET */
84 	caddr_t		vaddr;		/* PCSVADDR */
85 	siginfo_t	siginfo;	/* PCSSIG */
86 	sigset_t	sigset;		/* PCSTRACE, PCSHOLD */
87 	fltset_t	fltset;		/* PCSFAULT */
88 	sysset_t	sysset;		/* PCSENTRY, PCSEXIT */
89 	prgregset_t	prgregset;	/* PCSREG, PCAGENT */
90 	prfpregset_t	prfpregset;	/* PCSFPREG */
91 #if defined(__sparc)
92 	prxregset_t	prxregset;	/* PCSXREG */
93 	asrset_t	asrset;		/* PCSASRS */
94 #endif
95 	prwatch_t	prwatch;	/* PCWATCH */
96 	priovec_t	priovec;	/* PCREAD, PCWRITE */
97 	prcred_t	prcred;		/* PCSCRED */
98 	prpriv_t	prpriv;		/* PCSPRIV */
99 	long		przoneid;	/* PCSZONE */
100 } arg_t;
101 
102 static	int	pr_control(long, arg_t *, prnode_t *, cred_t *);
103 
104 static size_t
105 ctlsize(long cmd, size_t resid, arg_t *argp)
106 {
107 	size_t size = sizeof (long);
108 	size_t rnd;
109 	int ngrp;
110 
111 	switch (cmd) {
112 	case PCNULL:
113 	case PCSTOP:
114 	case PCDSTOP:
115 	case PCWSTOP:
116 	case PCCSIG:
117 	case PCCFAULT:
118 		break;
119 	case PCSSIG:
120 		size += sizeof (siginfo_t);
121 		break;
122 	case PCTWSTOP:
123 		size += sizeof (long);
124 		break;
125 	case PCKILL:
126 	case PCUNKILL:
127 	case PCNICE:
128 		size += sizeof (long);
129 		break;
130 	case PCRUN:
131 	case PCSET:
132 	case PCUNSET:
133 		size += sizeof (ulong_t);
134 		break;
135 	case PCSVADDR:
136 		size += sizeof (caddr_t);
137 		break;
138 	case PCSTRACE:
139 	case PCSHOLD:
140 		size += sizeof (sigset_t);
141 		break;
142 	case PCSFAULT:
143 		size += sizeof (fltset_t);
144 		break;
145 	case PCSENTRY:
146 	case PCSEXIT:
147 		size += sizeof (sysset_t);
148 		break;
149 	case PCSREG:
150 	case PCAGENT:
151 		size += sizeof (prgregset_t);
152 		break;
153 	case PCSFPREG:
154 		size += sizeof (prfpregset_t);
155 		break;
156 #if defined(__sparc)
157 	case PCSXREG:
158 		size += sizeof (prxregset_t);
159 		break;
160 	case PCSASRS:
161 		size += sizeof (asrset_t);
162 		break;
163 #endif
164 	case PCWATCH:
165 		size += sizeof (prwatch_t);
166 		break;
167 	case PCREAD:
168 	case PCWRITE:
169 		size += sizeof (priovec_t);
170 		break;
171 	case PCSCRED:
172 		size += sizeof (prcred_t);
173 		break;
174 	case PCSCREDX:
175 		/*
176 		 * We cannot derefence the pr_ngroups fields if it
177 		 * we don't have enough data.
178 		 */
179 		if (resid < size + sizeof (prcred_t) - sizeof (gid_t))
180 			return (0);
181 		ngrp = argp->prcred.pr_ngroups;
182 		if (ngrp < 0 || ngrp > ngroups_max)
183 			return (0);
184 
185 		/* The result can be smaller than sizeof (prcred_t) */
186 		size += sizeof (prcred_t) - sizeof (gid_t);
187 		size += ngrp * sizeof (gid_t);
188 		break;
189 	case PCSPRIV:
190 		if (resid >= size + sizeof (prpriv_t))
191 			size += priv_prgetprivsize(&argp->prpriv);
192 		else
193 			return (0);
194 		break;
195 	case PCSZONE:
196 		size += sizeof (long);
197 		break;
198 	default:
199 		return (0);
200 	}
201 
202 	/* Round up to a multiple of long, unless exact amount written */
203 	if (size < resid) {
204 		rnd = size & (sizeof (long) - 1);
205 
206 		if (rnd != 0)
207 			size += sizeof (long) - rnd;
208 	}
209 
210 	if (size > resid)
211 		return (0);
212 	return (size);
213 }
214 
215 /*
216  * Control operations (lots).
217  */
218 int
219 prwritectl(vnode_t *vp, uio_t *uiop, cred_t *cr)
220 {
221 #define	MY_BUFFER_SIZE \
222 		100 > 1 + sizeof (arg_t) / sizeof (long) ? \
223 		100 : 1 + sizeof (arg_t) / sizeof (long)
224 	long buf[MY_BUFFER_SIZE];
225 	long *bufp;
226 	size_t resid = 0;
227 	size_t size;
228 	prnode_t *pnp = VTOP(vp);
229 	int error;
230 	int locked = 0;
231 
232 	while (uiop->uio_resid) {
233 		/*
234 		 * Read several commands in one gulp.
235 		 */
236 		bufp = buf;
237 		if (resid) {	/* move incomplete command to front of buffer */
238 			long *tail;
239 
240 			if (resid >= sizeof (buf))
241 				break;
242 			tail = (long *)((char *)buf + sizeof (buf) - resid);
243 			do {
244 				*bufp++ = *tail++;
245 			} while ((resid -= sizeof (long)) != 0);
246 		}
247 		resid = sizeof (buf) - ((char *)bufp - (char *)buf);
248 		if (resid > uiop->uio_resid)
249 			resid = uiop->uio_resid;
250 		if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
251 			return (error);
252 		resid += (char *)bufp - (char *)buf;
253 		bufp = buf;
254 
255 		do {		/* loop over commands in buffer */
256 			long cmd = bufp[0];
257 			arg_t *argp = (arg_t *)&bufp[1];
258 
259 			size = ctlsize(cmd, resid, argp);
260 			if (size == 0)	/* incomplete or invalid command */
261 				break;
262 			/*
263 			 * Perform the specified control operation.
264 			 */
265 			if (!locked) {
266 				if ((error = prlock(pnp, ZNO)) != 0)
267 					return (error);
268 				locked = 1;
269 			}
270 			if (error = pr_control(cmd, argp, pnp, cr)) {
271 				if (error == -1)	/* -1 is timeout */
272 					locked = 0;
273 				else
274 					return (error);
275 			}
276 			bufp = (long *)((char *)bufp + size);
277 		} while ((resid -= size) != 0);
278 
279 		if (locked) {
280 			prunlock(pnp);
281 			locked = 0;
282 		}
283 	}
284 	return (resid? EINVAL : 0);
285 }
286 
287 static int
288 pr_control(long cmd, arg_t *argp, prnode_t *pnp, cred_t *cr)
289 {
290 	prcommon_t *pcp;
291 	proc_t *p;
292 	int unlocked;
293 	int error = 0;
294 
295 	if (cmd == PCNULL)
296 		return (0);
297 
298 	pcp = pnp->pr_common;
299 	p = pcp->prc_proc;
300 	ASSERT(p != NULL);
301 
302 	switch (cmd) {
303 
304 	default:
305 		error = EINVAL;
306 		break;
307 
308 	case PCSTOP:	/* direct process or lwp to stop and wait for stop */
309 	case PCDSTOP:	/* direct process or lwp to stop, don't wait */
310 	case PCWSTOP:	/* wait for process or lwp to stop */
311 	case PCTWSTOP:	/* wait for process or lwp to stop, with timeout */
312 		{
313 			time_t timeo;
314 
315 			/*
316 			 * Can't apply to a system process.
317 			 */
318 			if ((p->p_flag & SSYS) || p->p_as == &kas) {
319 				error = EBUSY;
320 				break;
321 			}
322 
323 			if (cmd == PCSTOP || cmd == PCDSTOP)
324 				pr_stop(pnp);
325 
326 			if (cmd == PCDSTOP)
327 				break;
328 
329 			/*
330 			 * If an lwp is waiting for itself or its process,
331 			 * don't wait. The stopped lwp would never see the
332 			 * fact that it is stopped.
333 			 */
334 			if ((pcp->prc_flags & PRC_LWP)?
335 			    (pcp->prc_thread == curthread) : (p == curproc)) {
336 				if (cmd == PCWSTOP || cmd == PCTWSTOP)
337 					error = EBUSY;
338 				break;
339 			}
340 
341 			timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
342 			if ((error = pr_wait_stop(pnp, timeo)) != 0)
343 				return (error);
344 
345 			break;
346 		}
347 
348 	case PCRUN:	/* make lwp or process runnable */
349 		error = pr_setrun(pnp, argp->flags);
350 		break;
351 
352 	case PCSTRACE:	/* set signal trace mask */
353 		pr_settrace(p,  &argp->sigset);
354 		break;
355 
356 	case PCSSIG:	/* set current signal */
357 		error = pr_setsig(pnp, &argp->siginfo);
358 		if (argp->siginfo.si_signo == SIGKILL && error == 0) {
359 			prunlock(pnp);
360 			pr_wait_die(pnp);
361 			return (-1);
362 		}
363 		break;
364 
365 	case PCKILL:	/* send signal */
366 		error = pr_kill(pnp, (int)argp->sig, cr);
367 		if (error == 0 && argp->sig == SIGKILL) {
368 			prunlock(pnp);
369 			pr_wait_die(pnp);
370 			return (-1);
371 		}
372 		break;
373 
374 	case PCUNKILL:	/* delete a pending signal */
375 		error = pr_unkill(pnp, (int)argp->sig);
376 		break;
377 
378 	case PCNICE:	/* set nice priority */
379 		error = pr_nice(p, (int)argp->nice, cr);
380 		break;
381 
382 	case PCSENTRY:	/* set syscall entry bit mask */
383 	case PCSEXIT:	/* set syscall exit bit mask */
384 		pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
385 		break;
386 
387 	case PCSET:	/* set process flags */
388 		error = pr_set(p, argp->flags);
389 		break;
390 
391 	case PCUNSET:	/* unset process flags */
392 		error = pr_unset(p, argp->flags);
393 		break;
394 
395 	case PCSREG:	/* set general registers */
396 		{
397 			kthread_t *t = pr_thread(pnp);
398 
399 			if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
400 				thread_unlock(t);
401 				error = EBUSY;
402 			} else {
403 				thread_unlock(t);
404 				mutex_exit(&p->p_lock);
405 				prsetprregs(ttolwp(t), argp->prgregset, 0);
406 				mutex_enter(&p->p_lock);
407 			}
408 			break;
409 		}
410 
411 	case PCSFPREG:	/* set floating-point registers */
412 		error = pr_setfpregs(pnp, &argp->prfpregset);
413 		break;
414 
415 	case PCSXREG:	/* set extra registers */
416 #if defined(__sparc)
417 		error = pr_setxregs(pnp, &argp->prxregset);
418 #else
419 		error = EINVAL;
420 #endif
421 		break;
422 
423 #if defined(__sparc)
424 	case PCSASRS:	/* set ancillary state registers */
425 		error = pr_setasrs(pnp, argp->asrset);
426 		break;
427 #endif
428 
429 	case PCSVADDR:	/* set virtual address at which to resume */
430 		error = pr_setvaddr(pnp, argp->vaddr);
431 		break;
432 
433 	case PCSHOLD:	/* set signal-hold mask */
434 		pr_sethold(pnp, &argp->sigset);
435 		break;
436 
437 	case PCSFAULT:	/* set mask of traced faults */
438 		pr_setfault(p, &argp->fltset);
439 		break;
440 
441 	case PCCSIG:	/* clear current signal */
442 		error = pr_clearsig(pnp);
443 		break;
444 
445 	case PCCFAULT:	/* clear current fault */
446 		error = pr_clearflt(pnp);
447 		break;
448 
449 	case PCWATCH:	/* set or clear watched areas */
450 		error = pr_watch(pnp, &argp->prwatch, &unlocked);
451 		if (error && unlocked)
452 			return (error);
453 		break;
454 
455 	case PCAGENT:	/* create the /proc agent lwp in the target process */
456 		error = pr_agent(pnp, argp->prgregset, &unlocked);
457 		if (error && unlocked)
458 			return (error);
459 		break;
460 
461 	case PCREAD:	/* read from the address space */
462 		error = pr_rdwr(p, UIO_READ, &argp->priovec);
463 		break;
464 
465 	case PCWRITE:	/* write to the address space */
466 		error = pr_rdwr(p, UIO_WRITE, &argp->priovec);
467 		break;
468 
469 	case PCSCRED:	/* set the process credentials */
470 	case PCSCREDX:
471 		error = pr_scred(p, &argp->prcred, cr, cmd == PCSCREDX);
472 		break;
473 
474 	case PCSPRIV:	/* set the process privileges */
475 		error = pr_spriv(p, &argp->prpriv, cr);
476 		break;
477 	case PCSZONE:	/* set the process's zoneid credentials */
478 		error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
479 		break;
480 	}
481 
482 	if (error)
483 		prunlock(pnp);
484 	return (error);
485 }
486 
487 #ifdef _SYSCALL32_IMPL
488 
489 typedef union {
490 	int32_t		sig;		/* PCKILL, PCUNKILL */
491 	int32_t		nice;		/* PCNICE */
492 	int32_t		timeo;		/* PCTWSTOP */
493 	uint32_t	flags;		/* PCRUN, PCSET, PCUNSET */
494 	caddr32_t	vaddr;		/* PCSVADDR */
495 	siginfo32_t	siginfo;	/* PCSSIG */
496 	sigset_t	sigset;		/* PCSTRACE, PCSHOLD */
497 	fltset_t	fltset;		/* PCSFAULT */
498 	sysset_t	sysset;		/* PCSENTRY, PCSEXIT */
499 	prgregset32_t	prgregset;	/* PCSREG, PCAGENT */
500 	prfpregset32_t	prfpregset;	/* PCSFPREG */
501 #if defined(__sparc)
502 	prxregset_t	prxregset;	/* PCSXREG */
503 #endif
504 	prwatch32_t	prwatch;	/* PCWATCH */
505 	priovec32_t	priovec;	/* PCREAD, PCWRITE */
506 	prcred32_t	prcred;		/* PCSCRED */
507 	prpriv_t	prpriv;		/* PCSPRIV */
508 	int32_t		przoneid;	/* PCSZONE */
509 } arg32_t;
510 
511 static	int	pr_control32(int32_t, arg32_t *, prnode_t *, cred_t *);
512 static	int	pr_setfpregs32(prnode_t *, prfpregset32_t *);
513 
514 /*
515  * Note that while ctlsize32() can use argp, it must do so only in a way
516  * that assumes 32-bit rather than 64-bit alignment as argp is a pointer
517  * to an array of 32-bit values and only 32-bit alignment is ensured.
518  */
519 static size_t
520 ctlsize32(int32_t cmd, size_t resid, arg32_t *argp)
521 {
522 	size_t size = sizeof (int32_t);
523 	size_t rnd;
524 	int ngrp;
525 
526 	switch (cmd) {
527 	case PCNULL:
528 	case PCSTOP:
529 	case PCDSTOP:
530 	case PCWSTOP:
531 	case PCCSIG:
532 	case PCCFAULT:
533 		break;
534 	case PCSSIG:
535 		size += sizeof (siginfo32_t);
536 		break;
537 	case PCTWSTOP:
538 		size += sizeof (int32_t);
539 		break;
540 	case PCKILL:
541 	case PCUNKILL:
542 	case PCNICE:
543 		size += sizeof (int32_t);
544 		break;
545 	case PCRUN:
546 	case PCSET:
547 	case PCUNSET:
548 		size += sizeof (uint32_t);
549 		break;
550 	case PCSVADDR:
551 		size += sizeof (caddr32_t);
552 		break;
553 	case PCSTRACE:
554 	case PCSHOLD:
555 		size += sizeof (sigset_t);
556 		break;
557 	case PCSFAULT:
558 		size += sizeof (fltset_t);
559 		break;
560 	case PCSENTRY:
561 	case PCSEXIT:
562 		size += sizeof (sysset_t);
563 		break;
564 	case PCSREG:
565 	case PCAGENT:
566 		size += sizeof (prgregset32_t);
567 		break;
568 	case PCSFPREG:
569 		size += sizeof (prfpregset32_t);
570 		break;
571 #if defined(__sparc)
572 	case PCSXREG:
573 		size += sizeof (prxregset_t);
574 		break;
575 #endif
576 	case PCWATCH:
577 		size += sizeof (prwatch32_t);
578 		break;
579 	case PCREAD:
580 	case PCWRITE:
581 		size += sizeof (priovec32_t);
582 		break;
583 	case PCSCRED:
584 		size += sizeof (prcred32_t);
585 		break;
586 	case PCSCREDX:
587 		/*
588 		 * We cannot derefence the pr_ngroups fields if it
589 		 * we don't have enough data.
590 		 */
591 		if (resid < size + sizeof (prcred32_t) - sizeof (gid32_t))
592 			return (0);
593 		ngrp = argp->prcred.pr_ngroups;
594 		if (ngrp < 0 || ngrp > ngroups_max)
595 			return (0);
596 
597 		/* The result can be smaller than sizeof (prcred32_t) */
598 		size += sizeof (prcred32_t) - sizeof (gid32_t);
599 		size += ngrp * sizeof (gid32_t);
600 		break;
601 	case PCSPRIV:
602 		if (resid >= size + sizeof (prpriv_t))
603 			size += priv_prgetprivsize(&argp->prpriv);
604 		else
605 			return (0);
606 		break;
607 	case PCSZONE:
608 		size += sizeof (int32_t);
609 		break;
610 	default:
611 		return (0);
612 	}
613 
614 	/* Round up to a multiple of int32_t */
615 	rnd = size & (sizeof (int32_t) - 1);
616 
617 	if (rnd != 0)
618 		size += sizeof (int32_t) - rnd;
619 
620 	if (size > resid)
621 		return (0);
622 	return (size);
623 }
624 
625 /*
626  * Control operations (lots).
627  */
628 int
629 prwritectl32(struct vnode *vp, struct uio *uiop, cred_t *cr)
630 {
631 #define	MY_BUFFER_SIZE32 \
632 		100 > 1 + sizeof (arg32_t) / sizeof (int32_t) ? \
633 		100 : 1 + sizeof (arg32_t) / sizeof (int32_t)
634 	int32_t buf[MY_BUFFER_SIZE32];
635 	int32_t *bufp;
636 	arg32_t arg;
637 	size_t resid = 0;
638 	size_t size;
639 	prnode_t *pnp = VTOP(vp);
640 	int error;
641 	int locked = 0;
642 
643 	while (uiop->uio_resid) {
644 		/*
645 		 * Read several commands in one gulp.
646 		 */
647 		bufp = buf;
648 		if (resid) {	/* move incomplete command to front of buffer */
649 			int32_t *tail;
650 
651 			if (resid >= sizeof (buf))
652 				break;
653 			tail = (int32_t *)((char *)buf + sizeof (buf) - resid);
654 			do {
655 				*bufp++ = *tail++;
656 			} while ((resid -= sizeof (int32_t)) != 0);
657 		}
658 		resid = sizeof (buf) - ((char *)bufp - (char *)buf);
659 		if (resid > uiop->uio_resid)
660 			resid = uiop->uio_resid;
661 		if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
662 			return (error);
663 		resid += (char *)bufp - (char *)buf;
664 		bufp = buf;
665 
666 		do {		/* loop over commands in buffer */
667 			int32_t cmd = bufp[0];
668 			arg32_t *argp = (arg32_t *)&bufp[1];
669 
670 			size = ctlsize32(cmd, resid, argp);
671 			if (size == 0)	/* incomplete or invalid command */
672 				break;
673 			/*
674 			 * Perform the specified control operation.
675 			 */
676 			if (!locked) {
677 				if ((error = prlock(pnp, ZNO)) != 0)
678 					return (error);
679 				locked = 1;
680 			}
681 
682 			/*
683 			 * Since some members of the arg32_t union contain
684 			 * 64-bit values (which must be 64-bit aligned), we
685 			 * can't simply pass a pointer to the structure as
686 			 * it may be unaligned. Note that we do pass the
687 			 * potentially unaligned structure to ctlsize32()
688 			 * above, but that uses it a way that makes no
689 			 * assumptions about alignment.
690 			 */
691 			ASSERT(size - sizeof (cmd) <= sizeof (arg));
692 			bcopy(argp, &arg, size - sizeof (cmd));
693 
694 			if (error = pr_control32(cmd, &arg, pnp, cr)) {
695 				if (error == -1)	/* -1 is timeout */
696 					locked = 0;
697 				else
698 					return (error);
699 			}
700 			bufp = (int32_t *)((char *)bufp + size);
701 		} while ((resid -= size) != 0);
702 
703 		if (locked) {
704 			prunlock(pnp);
705 			locked = 0;
706 		}
707 	}
708 	return (resid? EINVAL : 0);
709 }
710 
711 static int
712 pr_control32(int32_t cmd, arg32_t *argp, prnode_t *pnp, cred_t *cr)
713 {
714 	prcommon_t *pcp;
715 	proc_t *p;
716 	int unlocked;
717 	int error = 0;
718 
719 	if (cmd == PCNULL)
720 		return (0);
721 
722 	pcp = pnp->pr_common;
723 	p = pcp->prc_proc;
724 	ASSERT(p != NULL);
725 
726 	switch (cmd) {
727 
728 	default:
729 		error = EINVAL;
730 		break;
731 
732 	case PCSTOP:	/* direct process or lwp to stop and wait for stop */
733 	case PCDSTOP:	/* direct process or lwp to stop, don't wait */
734 	case PCWSTOP:	/* wait for process or lwp to stop */
735 	case PCTWSTOP:	/* wait for process or lwp to stop, with timeout */
736 		{
737 			time_t timeo;
738 
739 			/*
740 			 * Can't apply to a system process.
741 			 */
742 			if ((p->p_flag & SSYS) || p->p_as == &kas) {
743 				error = EBUSY;
744 				break;
745 			}
746 
747 			if (cmd == PCSTOP || cmd == PCDSTOP)
748 				pr_stop(pnp);
749 
750 			if (cmd == PCDSTOP)
751 				break;
752 
753 			/*
754 			 * If an lwp is waiting for itself or its process,
755 			 * don't wait. The lwp will never see the fact that
756 			 * itself is stopped.
757 			 */
758 			if ((pcp->prc_flags & PRC_LWP)?
759 			    (pcp->prc_thread == curthread) : (p == curproc)) {
760 				if (cmd == PCWSTOP || cmd == PCTWSTOP)
761 					error = EBUSY;
762 				break;
763 			}
764 
765 			timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
766 			if ((error = pr_wait_stop(pnp, timeo)) != 0)
767 				return (error);
768 
769 			break;
770 		}
771 
772 	case PCRUN:	/* make lwp or process runnable */
773 		error = pr_setrun(pnp, (ulong_t)argp->flags);
774 		break;
775 
776 	case PCSTRACE:	/* set signal trace mask */
777 		pr_settrace(p,  &argp->sigset);
778 		break;
779 
780 	case PCSSIG:	/* set current signal */
781 		if (PROCESS_NOT_32BIT(p))
782 			error = EOVERFLOW;
783 		else {
784 			int sig = (int)argp->siginfo.si_signo;
785 			siginfo_t siginfo;
786 
787 			bzero(&siginfo, sizeof (siginfo));
788 			siginfo_32tok(&argp->siginfo, (k_siginfo_t *)&siginfo);
789 			error = pr_setsig(pnp, &siginfo);
790 			if (sig == SIGKILL && error == 0) {
791 				prunlock(pnp);
792 				pr_wait_die(pnp);
793 				return (-1);
794 			}
795 		}
796 		break;
797 
798 	case PCKILL:	/* send signal */
799 		error = pr_kill(pnp, (int)argp->sig, cr);
800 		if (error == 0 && argp->sig == SIGKILL) {
801 			prunlock(pnp);
802 			pr_wait_die(pnp);
803 			return (-1);
804 		}
805 		break;
806 
807 	case PCUNKILL:	/* delete a pending signal */
808 		error = pr_unkill(pnp, (int)argp->sig);
809 		break;
810 
811 	case PCNICE:	/* set nice priority */
812 		error = pr_nice(p, (int)argp->nice, cr);
813 		break;
814 
815 	case PCSENTRY:	/* set syscall entry bit mask */
816 	case PCSEXIT:	/* set syscall exit bit mask */
817 		pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
818 		break;
819 
820 	case PCSET:	/* set process flags */
821 		error = pr_set(p, (long)argp->flags);
822 		break;
823 
824 	case PCUNSET:	/* unset process flags */
825 		error = pr_unset(p, (long)argp->flags);
826 		break;
827 
828 	case PCSREG:	/* set general registers */
829 		if (PROCESS_NOT_32BIT(p))
830 			error = EOVERFLOW;
831 		else {
832 			kthread_t *t = pr_thread(pnp);
833 
834 			if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
835 				thread_unlock(t);
836 				error = EBUSY;
837 			} else {
838 				prgregset_t prgregset;
839 				klwp_t *lwp = ttolwp(t);
840 
841 				thread_unlock(t);
842 				mutex_exit(&p->p_lock);
843 				prgregset_32ton(lwp, argp->prgregset,
844 				    prgregset);
845 				prsetprregs(lwp, prgregset, 0);
846 				mutex_enter(&p->p_lock);
847 			}
848 		}
849 		break;
850 
851 	case PCSFPREG:	/* set floating-point registers */
852 		if (PROCESS_NOT_32BIT(p))
853 			error = EOVERFLOW;
854 		else
855 			error = pr_setfpregs32(pnp, &argp->prfpregset);
856 		break;
857 
858 	case PCSXREG:	/* set extra registers */
859 #if defined(__sparc)
860 		if (PROCESS_NOT_32BIT(p))
861 			error = EOVERFLOW;
862 		else
863 			error = pr_setxregs(pnp, &argp->prxregset);
864 #else
865 		error = EINVAL;
866 #endif
867 		break;
868 
869 	case PCSVADDR:	/* set virtual address at which to resume */
870 		if (PROCESS_NOT_32BIT(p))
871 			error = EOVERFLOW;
872 		else
873 			error = pr_setvaddr(pnp,
874 			    (caddr_t)(uintptr_t)argp->vaddr);
875 		break;
876 
877 	case PCSHOLD:	/* set signal-hold mask */
878 		pr_sethold(pnp, &argp->sigset);
879 		break;
880 
881 	case PCSFAULT:	/* set mask of traced faults */
882 		pr_setfault(p, &argp->fltset);
883 		break;
884 
885 	case PCCSIG:	/* clear current signal */
886 		error = pr_clearsig(pnp);
887 		break;
888 
889 	case PCCFAULT:	/* clear current fault */
890 		error = pr_clearflt(pnp);
891 		break;
892 
893 	case PCWATCH:	/* set or clear watched areas */
894 		if (PROCESS_NOT_32BIT(p))
895 			error = EOVERFLOW;
896 		else {
897 			prwatch_t prwatch;
898 
899 			prwatch.pr_vaddr = argp->prwatch.pr_vaddr;
900 			prwatch.pr_size = argp->prwatch.pr_size;
901 			prwatch.pr_wflags = argp->prwatch.pr_wflags;
902 			prwatch.pr_pad = argp->prwatch.pr_pad;
903 			error = pr_watch(pnp, &prwatch, &unlocked);
904 			if (error && unlocked)
905 				return (error);
906 		}
907 		break;
908 
909 	case PCAGENT:	/* create the /proc agent lwp in the target process */
910 		if (PROCESS_NOT_32BIT(p))
911 			error = EOVERFLOW;
912 		else {
913 			prgregset_t prgregset;
914 			kthread_t *t = pr_thread(pnp);
915 			klwp_t *lwp = ttolwp(t);
916 			thread_unlock(t);
917 			mutex_exit(&p->p_lock);
918 			prgregset_32ton(lwp, argp->prgregset, prgregset);
919 			mutex_enter(&p->p_lock);
920 			error = pr_agent(pnp, prgregset, &unlocked);
921 			if (error && unlocked)
922 				return (error);
923 		}
924 		break;
925 
926 	case PCREAD:	/* read from the address space */
927 	case PCWRITE:	/* write to the address space */
928 		if (PROCESS_NOT_32BIT(p))
929 			error = EOVERFLOW;
930 		else {
931 			enum uio_rw rw = (cmd == PCREAD)? UIO_READ : UIO_WRITE;
932 			priovec_t priovec;
933 
934 			priovec.pio_base =
935 			    (void *)(uintptr_t)argp->priovec.pio_base;
936 			priovec.pio_len = (size_t)argp->priovec.pio_len;
937 			priovec.pio_offset = (off_t)
938 			    (uint32_t)argp->priovec.pio_offset;
939 			error = pr_rdwr(p, rw, &priovec);
940 		}
941 		break;
942 
943 	case PCSCRED:	/* set the process credentials */
944 	case PCSCREDX:
945 		{
946 			/*
947 			 * All the fields in these structures are exactly the
948 			 * same and so the structures are compatible.  In case
949 			 * this ever changes, we catch this with the ASSERT
950 			 * below.
951 			 */
952 			prcred_t *prcred = (prcred_t *)&argp->prcred;
953 
954 #ifndef __lint
955 			ASSERT(sizeof (prcred_t) == sizeof (prcred32_t));
956 #endif
957 
958 			error = pr_scred(p, prcred, cr, cmd == PCSCREDX);
959 			break;
960 		}
961 
962 	case PCSPRIV:	/* set the process privileges */
963 		error = pr_spriv(p, &argp->prpriv, cr);
964 		break;
965 
966 	case PCSZONE:	/* set the process's zoneid */
967 		error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
968 		break;
969 	}
970 
971 	if (error)
972 		prunlock(pnp);
973 	return (error);
974 }
975 
976 #endif	/* _SYSCALL32_IMPL */
977 
978 /*
979  * Return the specific or chosen thread/lwp for a control operation.
980  * Returns with the thread locked via thread_lock(t).
981  */
982 kthread_t *
983 pr_thread(prnode_t *pnp)
984 {
985 	prcommon_t *pcp = pnp->pr_common;
986 	kthread_t *t;
987 
988 	if (pcp->prc_flags & PRC_LWP) {
989 		t = pcp->prc_thread;
990 		ASSERT(t != NULL);
991 		thread_lock(t);
992 	} else {
993 		proc_t *p = pcp->prc_proc;
994 		t = prchoose(p);	/* returns locked thread */
995 		ASSERT(t != NULL);
996 	}
997 
998 	return (t);
999 }
1000 
1001 /*
1002  * Direct the process or lwp to stop.
1003  */
1004 void
1005 pr_stop(prnode_t *pnp)
1006 {
1007 	prcommon_t *pcp = pnp->pr_common;
1008 	proc_t *p = pcp->prc_proc;
1009 	kthread_t *t;
1010 	vnode_t *vp;
1011 
1012 	/*
1013 	 * If already stopped, do nothing; otherwise flag
1014 	 * it to be stopped the next time it tries to run.
1015 	 * If sleeping at interruptible priority, set it
1016 	 * running so it will stop within cv_wait_sig().
1017 	 *
1018 	 * Take care to cooperate with jobcontrol: if an lwp
1019 	 * is stopped due to the default action of a jobcontrol
1020 	 * stop signal, flag it to be stopped the next time it
1021 	 * starts due to a SIGCONT signal.
1022 	 */
1023 	if (pcp->prc_flags & PRC_LWP)
1024 		t = pcp->prc_thread;
1025 	else
1026 		t = p->p_tlist;
1027 	ASSERT(t != NULL);
1028 
1029 	do {
1030 		int notify;
1031 
1032 		notify = 0;
1033 		thread_lock(t);
1034 		if (!ISTOPPED(t)) {
1035 			t->t_proc_flag |= TP_PRSTOP;
1036 			t->t_sig_check = 1;	/* do ISSIG */
1037 		}
1038 
1039 		/* Move the thread from wait queue to run queue */
1040 		if (ISWAITING(t))
1041 			setrun_locked(t);
1042 
1043 		if (ISWAKEABLE(t)) {
1044 			if (t->t_wchan0 == NULL)
1045 				setrun_locked(t);
1046 			else if (!VSTOPPED(t)) {
1047 				/*
1048 				 * Mark it virtually stopped.
1049 				 */
1050 				t->t_proc_flag |= TP_PRVSTOP;
1051 				notify = 1;
1052 			}
1053 		}
1054 		/*
1055 		 * force the thread into the kernel
1056 		 * if it is not already there.
1057 		 */
1058 		prpokethread(t);
1059 		thread_unlock(t);
1060 		if (notify &&
1061 		    (vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace) != NULL)
1062 			prnotify(vp);
1063 		if (pcp->prc_flags & PRC_LWP)
1064 			break;
1065 	} while ((t = t->t_forw) != p->p_tlist);
1066 
1067 	/*
1068 	 * We do this just in case the thread we asked
1069 	 * to stop is in holdlwps() (called from cfork()).
1070 	 */
1071 	cv_broadcast(&p->p_holdlwps);
1072 }
1073 
1074 /*
1075  * Sleep until the lwp stops, but cooperate with
1076  * jobcontrol:  Don't wake up if the lwp is stopped
1077  * due to the default action of a jobcontrol stop signal.
1078  * If this is the process file descriptor, sleep
1079  * until all of the process's lwps stop.
1080  */
1081 int
1082 pr_wait_stop(prnode_t *pnp, time_t timeo)
1083 {
1084 	prcommon_t *pcp = pnp->pr_common;
1085 	proc_t *p = pcp->prc_proc;
1086 	timestruc_t rqtime;
1087 	timestruc_t *rqtp = NULL;
1088 	int timecheck = 0;
1089 	kthread_t *t;
1090 	int error;
1091 
1092 	if (timeo > 0) {	/* millisecond timeout */
1093 		/*
1094 		 * Determine the precise future time of the requested timeout.
1095 		 */
1096 		timestruc_t now;
1097 
1098 		timecheck = timechanged;
1099 		gethrestime(&now);
1100 		rqtp = &rqtime;
1101 		rqtp->tv_sec = timeo / MILLISEC;
1102 		rqtp->tv_nsec = (timeo % MILLISEC) * MICROSEC;
1103 		timespecadd(rqtp, &now);
1104 	}
1105 
1106 	if (pcp->prc_flags & PRC_LWP) {	/* lwp file descriptor */
1107 		t = pcp->prc_thread;
1108 		ASSERT(t != NULL);
1109 		thread_lock(t);
1110 		while (!ISTOPPED(t) && !VSTOPPED(t)) {
1111 			thread_unlock(t);
1112 			mutex_enter(&pcp->prc_mutex);
1113 			prunlock(pnp);
1114 			error = pr_wait(pcp, rqtp, timecheck);
1115 			if (error)	/* -1 is timeout */
1116 				return (error);
1117 			if ((error = prlock(pnp, ZNO)) != 0)
1118 				return (error);
1119 			ASSERT(p == pcp->prc_proc);
1120 			ASSERT(t == pcp->prc_thread);
1121 			thread_lock(t);
1122 		}
1123 		thread_unlock(t);
1124 	} else {			/* process file descriptor */
1125 		t = prchoose(p);	/* returns locked thread */
1126 		ASSERT(t != NULL);
1127 		ASSERT(MUTEX_HELD(&p->p_lock));
1128 		while ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t)) ||
1129 		    (p->p_flag & SEXITLWPS)) {
1130 			thread_unlock(t);
1131 			mutex_enter(&pcp->prc_mutex);
1132 			prunlock(pnp);
1133 			error = pr_wait(pcp, rqtp, timecheck);
1134 			if (error)	/* -1 is timeout */
1135 				return (error);
1136 			if ((error = prlock(pnp, ZNO)) != 0)
1137 				return (error);
1138 			ASSERT(p == pcp->prc_proc);
1139 			t = prchoose(p);	/* returns locked t */
1140 			ASSERT(t != NULL);
1141 		}
1142 		thread_unlock(t);
1143 	}
1144 
1145 	ASSERT(!(pcp->prc_flags & PRC_DESTROY) && p->p_stat != SZOMB &&
1146 	    t != NULL && t->t_state != TS_ZOMB);
1147 
1148 	return (0);
1149 }
1150 
1151 int
1152 pr_setrun(prnode_t *pnp, ulong_t flags)
1153 {
1154 	prcommon_t *pcp = pnp->pr_common;
1155 	proc_t *p = pcp->prc_proc;
1156 	kthread_t *t;
1157 	klwp_t *lwp;
1158 
1159 	/*
1160 	 * Cannot set an lwp running if it is not stopped.
1161 	 * Also, no lwp other than the /proc agent lwp can
1162 	 * be set running so long as the /proc agent lwp exists.
1163 	 */
1164 	t = pr_thread(pnp);	/* returns locked thread */
1165 	if ((!ISTOPPED(t) && !VSTOPPED(t) &&
1166 	    !(t->t_proc_flag & TP_PRSTOP)) ||
1167 	    (p->p_agenttp != NULL &&
1168 	    (t != p->p_agenttp || !(pcp->prc_flags & PRC_LWP)))) {
1169 		thread_unlock(t);
1170 		return (EBUSY);
1171 	}
1172 	thread_unlock(t);
1173 	if (flags & ~(PRCSIG|PRCFAULT|PRSTEP|PRSTOP|PRSABORT))
1174 		return (EINVAL);
1175 	lwp = ttolwp(t);
1176 	if ((flags & PRCSIG) && lwp->lwp_cursig != SIGKILL) {
1177 		/*
1178 		 * Discard current siginfo_t, if any.
1179 		 */
1180 		lwp->lwp_cursig = 0;
1181 		lwp->lwp_extsig = 0;
1182 		if (lwp->lwp_curinfo) {
1183 			siginfofree(lwp->lwp_curinfo);
1184 			lwp->lwp_curinfo = NULL;
1185 		}
1186 	}
1187 	if (flags & PRCFAULT)
1188 		lwp->lwp_curflt = 0;
1189 	/*
1190 	 * We can't hold p->p_lock when we touch the lwp's registers.
1191 	 * It may be swapped out and we will get a page fault.
1192 	 */
1193 	if (flags & PRSTEP) {
1194 		mutex_exit(&p->p_lock);
1195 		prstep(lwp, 0);
1196 		mutex_enter(&p->p_lock);
1197 	}
1198 	if (flags & PRSTOP) {
1199 		t->t_proc_flag |= TP_PRSTOP;
1200 		t->t_sig_check = 1;	/* do ISSIG */
1201 	}
1202 	if (flags & PRSABORT)
1203 		lwp->lwp_sysabort = 1;
1204 	thread_lock(t);
1205 	if ((pcp->prc_flags & PRC_LWP) || (flags & (PRSTEP|PRSTOP))) {
1206 		/*
1207 		 * Here, we are dealing with a single lwp.
1208 		 */
1209 		if (ISTOPPED(t)) {
1210 			t->t_schedflag |= TS_PSTART;
1211 			t->t_dtrace_stop = 0;
1212 			setrun_locked(t);
1213 		} else if (flags & PRSABORT) {
1214 			t->t_proc_flag &=
1215 			    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1216 			setrun_locked(t);
1217 		} else if (!(flags & PRSTOP)) {
1218 			t->t_proc_flag &=
1219 			    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1220 		}
1221 		thread_unlock(t);
1222 	} else {
1223 		/*
1224 		 * Here, we are dealing with the whole process.
1225 		 */
1226 		if (ISTOPPED(t)) {
1227 			/*
1228 			 * The representative lwp is stopped on an event
1229 			 * of interest.  We demote it to PR_REQUESTED and
1230 			 * choose another representative lwp.  If the new
1231 			 * representative lwp is not stopped on an event of
1232 			 * interest (other than PR_REQUESTED), we set the
1233 			 * whole process running, else we leave the process
1234 			 * stopped showing the next event of interest.
1235 			 */
1236 			kthread_t *tx = NULL;
1237 
1238 			if (!(flags & PRSABORT) &&
1239 			    t->t_whystop == PR_SYSENTRY &&
1240 			    t->t_whatstop == SYS_lwp_exit)
1241 				tx = t;		/* remember the exiting lwp */
1242 			t->t_whystop = PR_REQUESTED;
1243 			t->t_whatstop = 0;
1244 			thread_unlock(t);
1245 			t = prchoose(p);	/* returns locked t */
1246 			ASSERT(ISTOPPED(t) || VSTOPPED(t));
1247 			if (VSTOPPED(t) ||
1248 			    t->t_whystop == PR_REQUESTED) {
1249 				thread_unlock(t);
1250 				allsetrun(p);
1251 			} else {
1252 				thread_unlock(t);
1253 				/*
1254 				 * As a special case, if the old representative
1255 				 * lwp was stopped on entry to _lwp_exit()
1256 				 * (and we are not aborting the system call),
1257 				 * we set the old representative lwp running.
1258 				 * We do this so that the next process stop
1259 				 * will find the exiting lwp gone.
1260 				 */
1261 				if (tx != NULL) {
1262 					thread_lock(tx);
1263 					tx->t_schedflag |= TS_PSTART;
1264 					t->t_dtrace_stop = 0;
1265 					setrun_locked(tx);
1266 					thread_unlock(tx);
1267 				}
1268 			}
1269 		} else {
1270 			/*
1271 			 * No event of interest; set all of the lwps running.
1272 			 */
1273 			if (flags & PRSABORT) {
1274 				t->t_proc_flag &=
1275 				    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1276 				setrun_locked(t);
1277 			}
1278 			thread_unlock(t);
1279 			allsetrun(p);
1280 		}
1281 	}
1282 	return (0);
1283 }
1284 
1285 /*
1286  * Wait until process/lwp stops or until timer expires.
1287  * Return EINTR for an interruption, -1 for timeout, else 0.
1288  */
1289 int
1290 pr_wait(prcommon_t *pcp,	/* prcommon referring to process/lwp */
1291 	timestruc_t *ts,	/* absolute time of timeout, if any */
1292 	int timecheck)
1293 {
1294 	int rval;
1295 
1296 	ASSERT(MUTEX_HELD(&pcp->prc_mutex));
1297 	rval = cv_waituntil_sig(&pcp->prc_wait, &pcp->prc_mutex, ts, timecheck);
1298 	mutex_exit(&pcp->prc_mutex);
1299 	switch (rval) {
1300 	case 0:
1301 		return (EINTR);
1302 	case -1:
1303 		return (-1);
1304 	default:
1305 		return (0);
1306 	}
1307 }
1308 
1309 /*
1310  * Make all threads in the process runnable.
1311  */
1312 void
1313 allsetrun(proc_t *p)
1314 {
1315 	kthread_t *t;
1316 
1317 	ASSERT(MUTEX_HELD(&p->p_lock));
1318 
1319 	if ((t = p->p_tlist) != NULL) {
1320 		do {
1321 			thread_lock(t);
1322 			ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1323 			t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1324 			if (ISTOPPED(t)) {
1325 				t->t_schedflag |= TS_PSTART;
1326 				t->t_dtrace_stop = 0;
1327 				setrun_locked(t);
1328 			}
1329 			thread_unlock(t);
1330 		} while ((t = t->t_forw) != p->p_tlist);
1331 	}
1332 }
1333 
1334 /*
1335  * Wait for the process to die.
1336  * We do this after sending SIGKILL because we know it will
1337  * die soon and we want subsequent operations to return ENOENT.
1338  */
1339 void
1340 pr_wait_die(prnode_t *pnp)
1341 {
1342 	proc_t *p;
1343 
1344 	mutex_enter(&pidlock);
1345 	while ((p = pnp->pr_common->prc_proc) != NULL && p->p_stat != SZOMB) {
1346 		if (!cv_wait_sig(&p->p_srwchan_cv, &pidlock))
1347 			break;
1348 	}
1349 	mutex_exit(&pidlock);
1350 }
1351 
1352 static void
1353 pr_settrace(proc_t *p, sigset_t *sp)
1354 {
1355 	prdelset(sp, SIGKILL);
1356 	prassignset(&p->p_sigmask, sp);
1357 	if (!sigisempty(&p->p_sigmask))
1358 		p->p_proc_flag |= P_PR_TRACE;
1359 	else if (prisempty(&p->p_fltmask)) {
1360 		user_t *up = PTOU(p);
1361 		if (up->u_systrap == 0)
1362 			p->p_proc_flag &= ~P_PR_TRACE;
1363 	}
1364 }
1365 
1366 int
1367 pr_setsig(prnode_t *pnp, siginfo_t *sip)
1368 {
1369 	int sig = sip->si_signo;
1370 	prcommon_t *pcp = pnp->pr_common;
1371 	proc_t *p = pcp->prc_proc;
1372 	kthread_t *t;
1373 	klwp_t *lwp;
1374 	int error = 0;
1375 
1376 	t = pr_thread(pnp);	/* returns locked thread */
1377 	thread_unlock(t);
1378 	lwp = ttolwp(t);
1379 	if (sig < 0 || sig >= NSIG)
1380 		/* Zero allowed here */
1381 		error = EINVAL;
1382 	else if (lwp->lwp_cursig == SIGKILL)
1383 		/* "can't happen", but just in case */
1384 		error = EBUSY;
1385 	else if ((lwp->lwp_cursig = (uchar_t)sig) == 0) {
1386 		lwp->lwp_extsig = 0;
1387 		/*
1388 		 * Discard current siginfo_t, if any.
1389 		 */
1390 		if (lwp->lwp_curinfo) {
1391 			siginfofree(lwp->lwp_curinfo);
1392 			lwp->lwp_curinfo = NULL;
1393 		}
1394 	} else {
1395 		kthread_t *tx;
1396 		sigqueue_t *sqp;
1397 
1398 		/* drop p_lock to do kmem_alloc(KM_SLEEP) */
1399 		mutex_exit(&p->p_lock);
1400 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
1401 		mutex_enter(&p->p_lock);
1402 
1403 		if (lwp->lwp_curinfo == NULL)
1404 			lwp->lwp_curinfo = sqp;
1405 		else
1406 			kmem_free(sqp, sizeof (sigqueue_t));
1407 		/*
1408 		 * Copy contents of info to current siginfo_t.
1409 		 */
1410 		bcopy(sip, &lwp->lwp_curinfo->sq_info,
1411 		    sizeof (lwp->lwp_curinfo->sq_info));
1412 		/*
1413 		 * Prevent contents published by si_zoneid-unaware /proc
1414 		 * consumers from being incorrectly filtered.  Because
1415 		 * an uninitialized si_zoneid is the same as
1416 		 * GLOBAL_ZONEID, this means that you can't pr_setsig a
1417 		 * process in a non-global zone with a siginfo which
1418 		 * appears to come from the global zone.
1419 		 */
1420 		if (SI_FROMUSER(sip) && sip->si_zoneid == 0)
1421 			lwp->lwp_curinfo->sq_info.si_zoneid =
1422 			    p->p_zone->zone_id;
1423 		/*
1424 		 * Side-effects for SIGKILL and jobcontrol signals.
1425 		 */
1426 		if (sig == SIGKILL) {
1427 			p->p_flag |= SKILLED;
1428 			p->p_flag &= ~SEXTKILLED;
1429 		} else if (sig == SIGCONT) {
1430 			p->p_flag |= SSCONT;
1431 			sigdelq(p, NULL, SIGSTOP);
1432 			sigdelq(p, NULL, SIGTSTP);
1433 			sigdelq(p, NULL, SIGTTOU);
1434 			sigdelq(p, NULL, SIGTTIN);
1435 			sigdiffset(&p->p_sig, &stopdefault);
1436 			sigdiffset(&p->p_extsig, &stopdefault);
1437 			if ((tx = p->p_tlist) != NULL) {
1438 				do {
1439 					sigdelq(p, tx, SIGSTOP);
1440 					sigdelq(p, tx, SIGTSTP);
1441 					sigdelq(p, tx, SIGTTOU);
1442 					sigdelq(p, tx, SIGTTIN);
1443 					sigdiffset(&tx->t_sig, &stopdefault);
1444 					sigdiffset(&tx->t_extsig, &stopdefault);
1445 				} while ((tx = tx->t_forw) != p->p_tlist);
1446 			}
1447 		} else if (sigismember(&stopdefault, sig)) {
1448 			if (PTOU(p)->u_signal[sig-1] == SIG_DFL &&
1449 			    (sig == SIGSTOP || !p->p_pgidp->pid_pgorphaned))
1450 				p->p_flag &= ~SSCONT;
1451 			sigdelq(p, NULL, SIGCONT);
1452 			sigdelset(&p->p_sig, SIGCONT);
1453 			sigdelset(&p->p_extsig, SIGCONT);
1454 			if ((tx = p->p_tlist) != NULL) {
1455 				do {
1456 					sigdelq(p, tx, SIGCONT);
1457 					sigdelset(&tx->t_sig, SIGCONT);
1458 					sigdelset(&tx->t_extsig, SIGCONT);
1459 				} while ((tx = tx->t_forw) != p->p_tlist);
1460 			}
1461 		}
1462 		thread_lock(t);
1463 		if (ISWAKEABLE(t) || ISWAITING(t)) {
1464 			/* Set signaled sleeping/waiting lwp running */
1465 			setrun_locked(t);
1466 		} else if (t->t_state == TS_STOPPED && sig == SIGKILL) {
1467 			/* If SIGKILL, set stopped lwp running */
1468 			p->p_stopsig = 0;
1469 			t->t_schedflag |= TS_XSTART | TS_PSTART;
1470 			t->t_dtrace_stop = 0;
1471 			setrun_locked(t);
1472 		}
1473 		t->t_sig_check = 1;	/* so ISSIG will be done */
1474 		thread_unlock(t);
1475 		/*
1476 		 * More jobcontrol side-effects.
1477 		 */
1478 		if (sig == SIGCONT && (tx = p->p_tlist) != NULL) {
1479 			p->p_stopsig = 0;
1480 			do {
1481 				thread_lock(tx);
1482 				if (tx->t_state == TS_STOPPED &&
1483 				    tx->t_whystop == PR_JOBCONTROL) {
1484 					tx->t_schedflag |= TS_XSTART;
1485 					setrun_locked(tx);
1486 				}
1487 				thread_unlock(tx);
1488 			} while ((tx = tx->t_forw) != p->p_tlist);
1489 		}
1490 	}
1491 	return (error);
1492 }
1493 
1494 int
1495 pr_kill(prnode_t *pnp, int sig, cred_t *cr)
1496 {
1497 	prcommon_t *pcp = pnp->pr_common;
1498 	proc_t *p = pcp->prc_proc;
1499 	k_siginfo_t info;
1500 
1501 	if (sig <= 0 || sig >= NSIG)
1502 		return (EINVAL);
1503 
1504 	bzero(&info, sizeof (info));
1505 	info.si_signo = sig;
1506 	info.si_code = SI_USER;
1507 	info.si_pid = curproc->p_pid;
1508 	info.si_ctid = PRCTID(curproc);
1509 	info.si_zoneid = getzoneid();
1510 	info.si_uid = crgetruid(cr);
1511 	sigaddq(p, (pcp->prc_flags & PRC_LWP)?
1512 	    pcp->prc_thread : NULL, &info, KM_NOSLEEP);
1513 
1514 	return (0);
1515 }
1516 
1517 int
1518 pr_unkill(prnode_t *pnp, int sig)
1519 {
1520 	prcommon_t *pcp = pnp->pr_common;
1521 	proc_t *p = pcp->prc_proc;
1522 	sigqueue_t *infop = NULL;
1523 
1524 	if (sig <= 0 || sig >= NSIG || sig == SIGKILL)
1525 		return (EINVAL);
1526 
1527 	if (pcp->prc_flags & PRC_LWP)
1528 		sigdeq(p, pcp->prc_thread, sig, &infop);
1529 	else
1530 		sigdeq(p, NULL, sig, &infop);
1531 
1532 	if (infop)
1533 		siginfofree(infop);
1534 
1535 	return (0);
1536 }
1537 
1538 int
1539 pr_nice(proc_t *p, int nice, cred_t *cr)
1540 {
1541 	kthread_t *t;
1542 	int err;
1543 	int error = 0;
1544 
1545 	t = p->p_tlist;
1546 	do {
1547 		ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1548 		err = CL_DONICE(t, cr, nice, (int *)NULL);
1549 		schedctl_set_cidpri(t);
1550 		if (error == 0)
1551 			error = err;
1552 	} while ((t = t->t_forw) != p->p_tlist);
1553 
1554 	return (error);
1555 }
1556 
1557 void
1558 pr_setentryexit(proc_t *p, sysset_t *sysset, int entry)
1559 {
1560 	user_t *up = PTOU(p);
1561 
1562 	if (entry) {
1563 		prassignset(&up->u_entrymask, sysset);
1564 	} else {
1565 		prassignset(&up->u_exitmask, sysset);
1566 	}
1567 	if (!prisempty(&up->u_entrymask) ||
1568 	    !prisempty(&up->u_exitmask)) {
1569 		up->u_systrap = 1;
1570 		p->p_proc_flag |= P_PR_TRACE;
1571 		set_proc_sys(p);	/* set pre and post-sys flags */
1572 	} else {
1573 		up->u_systrap = 0;
1574 		if (sigisempty(&p->p_sigmask) &&
1575 		    prisempty(&p->p_fltmask))
1576 			p->p_proc_flag &= ~P_PR_TRACE;
1577 	}
1578 }
1579 
1580 #define	ALLFLAGS	\
1581 	(PR_FORK|PR_RLC|PR_KLC|PR_ASYNC|PR_BPTADJ|PR_MSACCT|PR_MSFORK|PR_PTRACE)
1582 
1583 int
1584 pr_set(proc_t *p, long flags)
1585 {
1586 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1587 		return (EBUSY);
1588 
1589 	if (flags & ~ALLFLAGS)
1590 		return (EINVAL);
1591 
1592 	if (flags & PR_FORK)
1593 		p->p_proc_flag |= P_PR_FORK;
1594 	if (flags & PR_RLC)
1595 		p->p_proc_flag |= P_PR_RUNLCL;
1596 	if (flags & PR_KLC)
1597 		p->p_proc_flag |= P_PR_KILLCL;
1598 	if (flags & PR_ASYNC)
1599 		p->p_proc_flag |= P_PR_ASYNC;
1600 	if (flags & PR_BPTADJ)
1601 		p->p_proc_flag |= P_PR_BPTADJ;
1602 	if (flags & PR_MSACCT)
1603 		if ((p->p_flag & SMSACCT) == 0)
1604 			estimate_msacct(p->p_tlist, gethrtime());
1605 	if (flags & PR_MSFORK)
1606 		p->p_flag |= SMSFORK;
1607 	if (flags & PR_PTRACE) {
1608 		p->p_proc_flag |= P_PR_PTRACE;
1609 		/* ptraced process must die if parent dead */
1610 		if (p->p_ppid == 1)
1611 			sigtoproc(p, NULL, SIGKILL);
1612 	}
1613 
1614 	return (0);
1615 }
1616 
1617 int
1618 pr_unset(proc_t *p, long flags)
1619 {
1620 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1621 		return (EBUSY);
1622 
1623 	if (flags & ~ALLFLAGS)
1624 		return (EINVAL);
1625 
1626 	if (flags & PR_FORK)
1627 		p->p_proc_flag &= ~P_PR_FORK;
1628 	if (flags & PR_RLC)
1629 		p->p_proc_flag &= ~P_PR_RUNLCL;
1630 	if (flags & PR_KLC)
1631 		p->p_proc_flag &= ~P_PR_KILLCL;
1632 	if (flags & PR_ASYNC)
1633 		p->p_proc_flag &= ~P_PR_ASYNC;
1634 	if (flags & PR_BPTADJ)
1635 		p->p_proc_flag &= ~P_PR_BPTADJ;
1636 	if (flags & PR_MSACCT)
1637 		disable_msacct(p);
1638 	if (flags & PR_MSFORK)
1639 		p->p_flag &= ~SMSFORK;
1640 	if (flags & PR_PTRACE)
1641 		p->p_proc_flag &= ~P_PR_PTRACE;
1642 
1643 	return (0);
1644 }
1645 
1646 static int
1647 pr_setfpregs(prnode_t *pnp, prfpregset_t *prfpregset)
1648 {
1649 	proc_t *p = pnp->pr_common->prc_proc;
1650 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1651 
1652 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1653 		thread_unlock(t);
1654 		return (EBUSY);
1655 	}
1656 	if (!prhasfp()) {
1657 		thread_unlock(t);
1658 		return (EINVAL);	/* No FP support */
1659 	}
1660 
1661 	/* drop p_lock while touching the lwp's stack */
1662 	thread_unlock(t);
1663 	mutex_exit(&p->p_lock);
1664 	prsetprfpregs(ttolwp(t), prfpregset);
1665 	mutex_enter(&p->p_lock);
1666 
1667 	return (0);
1668 }
1669 
1670 #ifdef	_SYSCALL32_IMPL
1671 static int
1672 pr_setfpregs32(prnode_t *pnp, prfpregset32_t *prfpregset)
1673 {
1674 	proc_t *p = pnp->pr_common->prc_proc;
1675 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1676 
1677 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1678 		thread_unlock(t);
1679 		return (EBUSY);
1680 	}
1681 	if (!prhasfp()) {
1682 		thread_unlock(t);
1683 		return (EINVAL);	/* No FP support */
1684 	}
1685 
1686 	/* drop p_lock while touching the lwp's stack */
1687 	thread_unlock(t);
1688 	mutex_exit(&p->p_lock);
1689 	prsetprfpregs32(ttolwp(t), prfpregset);
1690 	mutex_enter(&p->p_lock);
1691 
1692 	return (0);
1693 }
1694 #endif	/* _SYSCALL32_IMPL */
1695 
1696 #if defined(__sparc)
1697 /* ARGSUSED */
1698 static int
1699 pr_setxregs(prnode_t *pnp, prxregset_t *prxregset)
1700 {
1701 	proc_t *p = pnp->pr_common->prc_proc;
1702 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1703 
1704 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1705 		thread_unlock(t);
1706 		return (EBUSY);
1707 	}
1708 	thread_unlock(t);
1709 
1710 	if (!prhasx(p))
1711 		return (EINVAL);	/* No extra register support */
1712 
1713 	/* drop p_lock while touching the lwp's stack */
1714 	mutex_exit(&p->p_lock);
1715 	prsetprxregs(ttolwp(t), (caddr_t)prxregset);
1716 	mutex_enter(&p->p_lock);
1717 
1718 	return (0);
1719 }
1720 
1721 static int
1722 pr_setasrs(prnode_t *pnp, asrset_t asrset)
1723 {
1724 	proc_t *p = pnp->pr_common->prc_proc;
1725 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1726 
1727 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1728 		thread_unlock(t);
1729 		return (EBUSY);
1730 	}
1731 	thread_unlock(t);
1732 
1733 	/* drop p_lock while touching the lwp's stack */
1734 	mutex_exit(&p->p_lock);
1735 	prsetasregs(ttolwp(t), asrset);
1736 	mutex_enter(&p->p_lock);
1737 
1738 	return (0);
1739 }
1740 #endif
1741 
1742 static int
1743 pr_setvaddr(prnode_t *pnp, caddr_t vaddr)
1744 {
1745 	proc_t *p = pnp->pr_common->prc_proc;
1746 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1747 
1748 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1749 		thread_unlock(t);
1750 		return (EBUSY);
1751 	}
1752 
1753 	/* drop p_lock while touching the lwp's stack */
1754 	thread_unlock(t);
1755 	mutex_exit(&p->p_lock);
1756 	prsvaddr(ttolwp(t), vaddr);
1757 	mutex_enter(&p->p_lock);
1758 
1759 	return (0);
1760 }
1761 
1762 void
1763 pr_sethold(prnode_t *pnp, sigset_t *sp)
1764 {
1765 	proc_t *p = pnp->pr_common->prc_proc;
1766 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1767 
1768 	schedctl_finish_sigblock(t);
1769 	sigutok(sp, &t->t_hold);
1770 	if (ISWAKEABLE(t) &&
1771 	    (fsig(&p->p_sig, t) || fsig(&t->t_sig, t)))
1772 		setrun_locked(t);
1773 	t->t_sig_check = 1;	/* so thread will see new holdmask */
1774 	thread_unlock(t);
1775 }
1776 
1777 void
1778 pr_setfault(proc_t *p, fltset_t *fltp)
1779 {
1780 	prassignset(&p->p_fltmask, fltp);
1781 	if (!prisempty(&p->p_fltmask))
1782 		p->p_proc_flag |= P_PR_TRACE;
1783 	else if (sigisempty(&p->p_sigmask)) {
1784 		user_t *up = PTOU(p);
1785 		if (up->u_systrap == 0)
1786 			p->p_proc_flag &= ~P_PR_TRACE;
1787 	}
1788 }
1789 
1790 static int
1791 pr_clearsig(prnode_t *pnp)
1792 {
1793 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1794 	klwp_t *lwp = ttolwp(t);
1795 
1796 	thread_unlock(t);
1797 	if (lwp->lwp_cursig == SIGKILL)
1798 		return (EBUSY);
1799 
1800 	/*
1801 	 * Discard current siginfo_t, if any.
1802 	 */
1803 	lwp->lwp_cursig = 0;
1804 	lwp->lwp_extsig = 0;
1805 	if (lwp->lwp_curinfo) {
1806 		siginfofree(lwp->lwp_curinfo);
1807 		lwp->lwp_curinfo = NULL;
1808 	}
1809 
1810 	return (0);
1811 }
1812 
1813 static int
1814 pr_clearflt(prnode_t *pnp)
1815 {
1816 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1817 
1818 	thread_unlock(t);
1819 	ttolwp(t)->lwp_curflt = 0;
1820 
1821 	return (0);
1822 }
1823 
1824 static int
1825 pr_watch(prnode_t *pnp, prwatch_t *pwp, int *unlocked)
1826 {
1827 	proc_t *p = pnp->pr_common->prc_proc;
1828 	struct as *as = p->p_as;
1829 	uintptr_t vaddr = pwp->pr_vaddr;
1830 	size_t size = pwp->pr_size;
1831 	int wflags = pwp->pr_wflags;
1832 	ulong_t newpage = 0;
1833 	struct watched_area *pwa;
1834 	int error;
1835 
1836 	*unlocked = 0;
1837 
1838 	/*
1839 	 * Can't apply to a system process.
1840 	 */
1841 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1842 		return (EBUSY);
1843 
1844 	/*
1845 	 * Verify that the address range does not wrap
1846 	 * and that only the proper flags were specified.
1847 	 */
1848 	if ((wflags & ~WA_TRAPAFTER) == 0)
1849 		size = 0;
1850 	if (vaddr + size < vaddr ||
1851 	    (wflags & ~(WA_READ|WA_WRITE|WA_EXEC|WA_TRAPAFTER)) != 0 ||
1852 	    ((wflags & ~WA_TRAPAFTER) != 0 && size == 0))
1853 		return (EINVAL);
1854 
1855 	/*
1856 	 * Don't let the address range go above as->a_userlimit.
1857 	 * There is no error here, just a limitation.
1858 	 */
1859 	if (vaddr >= (uintptr_t)as->a_userlimit)
1860 		return (0);
1861 	if (vaddr + size > (uintptr_t)as->a_userlimit)
1862 		size = (uintptr_t)as->a_userlimit - vaddr;
1863 
1864 	/*
1865 	 * Compute maximum number of pages this will add.
1866 	 */
1867 	if ((wflags & ~WA_TRAPAFTER) != 0) {
1868 		ulong_t pagespan = (vaddr + size) - (vaddr & PAGEMASK);
1869 		newpage = btopr(pagespan);
1870 		if (newpage > 2 * prnwatch)
1871 			return (E2BIG);
1872 	}
1873 
1874 	/*
1875 	 * Force the process to be fully stopped.
1876 	 */
1877 	if (p == curproc) {
1878 		prunlock(pnp);
1879 		while (holdwatch() != 0)
1880 			continue;
1881 		if ((error = prlock(pnp, ZNO)) != 0) {
1882 			continuelwps(p);
1883 			*unlocked = 1;
1884 			return (error);
1885 		}
1886 	} else {
1887 		pauselwps(p);
1888 		while (pr_allstopped(p, 0) > 0) {
1889 			/*
1890 			 * This cv/mutex pair is persistent even
1891 			 * if the process disappears after we
1892 			 * unmark it and drop p->p_lock.
1893 			 */
1894 			kcondvar_t *cv = &pr_pid_cv[p->p_slot];
1895 			kmutex_t *mp = &p->p_lock;
1896 
1897 			prunmark(p);
1898 			(void) cv_wait(cv, mp);
1899 			mutex_exit(mp);
1900 			if ((error = prlock(pnp, ZNO)) != 0) {
1901 				/*
1902 				 * Unpause the process if it exists.
1903 				 */
1904 				p = pr_p_lock(pnp);
1905 				mutex_exit(&pr_pidlock);
1906 				if (p != NULL) {
1907 					unpauselwps(p);
1908 					prunlock(pnp);
1909 				}
1910 				*unlocked = 1;
1911 				return (error);
1912 			}
1913 		}
1914 	}
1915 
1916 	/*
1917 	 * Drop p->p_lock in order to perform the rest of this.
1918 	 * The process is still locked with the P_PR_LOCK flag.
1919 	 */
1920 	mutex_exit(&p->p_lock);
1921 
1922 	pwa = kmem_alloc(sizeof (struct watched_area), KM_SLEEP);
1923 	pwa->wa_vaddr = (caddr_t)vaddr;
1924 	pwa->wa_eaddr = (caddr_t)vaddr + size;
1925 	pwa->wa_flags = (ulong_t)wflags;
1926 
1927 	error = ((pwa->wa_flags & ~WA_TRAPAFTER) == 0)?
1928 	    clear_watched_area(p, pwa) : set_watched_area(p, pwa);
1929 
1930 	if (p == curproc) {
1931 		setallwatch();
1932 		mutex_enter(&p->p_lock);
1933 		continuelwps(p);
1934 	} else {
1935 		mutex_enter(&p->p_lock);
1936 		unpauselwps(p);
1937 	}
1938 
1939 	return (error);
1940 }
1941 
1942 /* jobcontrol stopped, but with a /proc directed stop in effect */
1943 #define	JDSTOPPED(t)	\
1944 	((t)->t_state == TS_STOPPED && \
1945 	(t)->t_whystop == PR_JOBCONTROL && \
1946 	((t)->t_proc_flag & TP_PRSTOP))
1947 
1948 /*
1949  * pr_agent() creates the agent lwp. If the process is exiting while
1950  * we are creating an agent lwp, then exitlwps() waits until the
1951  * agent has been created using prbarrier().
1952  */
1953 static int
1954 pr_agent(prnode_t *pnp, prgregset_t prgregset, int *unlocked)
1955 {
1956 	proc_t *p = pnp->pr_common->prc_proc;
1957 	prcommon_t *pcp;
1958 	kthread_t *t;
1959 	kthread_t *ct;
1960 	klwp_t *clwp;
1961 	k_sigset_t smask;
1962 	int cid;
1963 	void *bufp = NULL;
1964 	int error;
1965 
1966 	*unlocked = 0;
1967 
1968 	/*
1969 	 * Cannot create the /proc agent lwp if :-
1970 	 * - the process is not fully stopped or directed to stop.
1971 	 * - there is an agent lwp already.
1972 	 * - the process has been killed.
1973 	 * - the process is exiting.
1974 	 * - it's a vfork(2) parent.
1975 	 */
1976 	t = prchoose(p);	/* returns locked thread */
1977 	ASSERT(t != NULL);
1978 
1979 	if ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t) && !JDSTOPPED(t)) ||
1980 	    p->p_agenttp != NULL ||
1981 	    (p->p_flag & (SKILLED | SEXITING | SVFWAIT))) {
1982 		thread_unlock(t);
1983 		return (EBUSY);
1984 	}
1985 
1986 	thread_unlock(t);
1987 	mutex_exit(&p->p_lock);
1988 
1989 	sigfillset(&smask);
1990 	sigdiffset(&smask, &cantmask);
1991 	clwp = lwp_create(lwp_rtt, NULL, 0, p, TS_STOPPED,
1992 	    t->t_pri, &smask, NOCLASS, 0);
1993 	if (clwp == NULL) {
1994 		mutex_enter(&p->p_lock);
1995 		return (ENOMEM);
1996 	}
1997 	prsetprregs(clwp, prgregset, 1);
1998 retry:
1999 	cid = t->t_cid;
2000 	(void) CL_ALLOC(&bufp, cid, KM_SLEEP);
2001 	mutex_enter(&p->p_lock);
2002 	if (cid != t->t_cid) {
2003 		/*
2004 		 * Someone just changed this thread's scheduling class,
2005 		 * so try pre-allocating the buffer again.  Hopefully we
2006 		 * don't hit this often.
2007 		 */
2008 		mutex_exit(&p->p_lock);
2009 		CL_FREE(cid, bufp);
2010 		goto retry;
2011 	}
2012 
2013 	clwp->lwp_ap = clwp->lwp_arg;
2014 	clwp->lwp_eosys = NORMALRETURN;
2015 	ct = lwptot(clwp);
2016 	ct->t_clfuncs = t->t_clfuncs;
2017 	CL_FORK(t, ct, bufp);
2018 	ct->t_cid = t->t_cid;
2019 	ct->t_proc_flag |= TP_PRSTOP;
2020 	/*
2021 	 * Setting t_sysnum to zero causes post_syscall()
2022 	 * to bypass all syscall checks and go directly to
2023 	 *	if (issig()) psig();
2024 	 * so that the agent lwp will stop in issig_forreal()
2025 	 * showing PR_REQUESTED.
2026 	 */
2027 	ct->t_sysnum = 0;
2028 	ct->t_post_sys = 1;
2029 	ct->t_sig_check = 1;
2030 	p->p_agenttp = ct;
2031 	ct->t_proc_flag &= ~TP_HOLDLWP;
2032 
2033 	pcp = pnp->pr_pcommon;
2034 	mutex_enter(&pcp->prc_mutex);
2035 
2036 	lwp_create_done(ct);
2037 
2038 	/*
2039 	 * Don't return until the agent is stopped on PR_REQUESTED.
2040 	 */
2041 
2042 	for (;;) {
2043 		prunlock(pnp);
2044 		*unlocked = 1;
2045 
2046 		/*
2047 		 * Wait for the agent to stop and notify us.
2048 		 * If we've been interrupted, return that information.
2049 		 */
2050 		error = pr_wait(pcp, NULL, 0);
2051 		if (error == EINTR) {
2052 			error = 0;
2053 			break;
2054 		}
2055 
2056 		/*
2057 		 * Confirm that the agent LWP has stopped.
2058 		 */
2059 
2060 		if ((error = prlock(pnp, ZNO)) != 0)
2061 			break;
2062 		*unlocked = 0;
2063 
2064 		/*
2065 		 * Since we dropped the lock on the process, the agent
2066 		 * may have disappeared or changed. Grab the current
2067 		 * agent and check fail if it has disappeared.
2068 		 */
2069 		if ((ct = p->p_agenttp) == NULL) {
2070 			error = ENOENT;
2071 			break;
2072 		}
2073 
2074 		mutex_enter(&pcp->prc_mutex);
2075 		thread_lock(ct);
2076 
2077 		if (ISTOPPED(ct)) {
2078 			thread_unlock(ct);
2079 			mutex_exit(&pcp->prc_mutex);
2080 			break;
2081 		}
2082 
2083 		thread_unlock(ct);
2084 	}
2085 
2086 	return (error ? error : -1);
2087 }
2088 
2089 static int
2090 pr_rdwr(proc_t *p, enum uio_rw rw, priovec_t *pio)
2091 {
2092 	caddr_t base = (caddr_t)pio->pio_base;
2093 	size_t cnt = pio->pio_len;
2094 	uintptr_t offset = (uintptr_t)pio->pio_offset;
2095 	struct uio auio;
2096 	struct iovec aiov;
2097 	int error = 0;
2098 
2099 	if ((p->p_flag & SSYS) || p->p_as == &kas)
2100 		error = EIO;
2101 	else if ((base + cnt) < base || (offset + cnt) < offset)
2102 		error = EINVAL;
2103 	else if (cnt != 0) {
2104 		aiov.iov_base = base;
2105 		aiov.iov_len = cnt;
2106 
2107 		auio.uio_loffset = offset;
2108 		auio.uio_iov = &aiov;
2109 		auio.uio_iovcnt = 1;
2110 		auio.uio_resid = cnt;
2111 		auio.uio_segflg = UIO_USERSPACE;
2112 		auio.uio_llimit = (longlong_t)MAXOFFSET_T;
2113 		auio.uio_fmode = FREAD|FWRITE;
2114 		auio.uio_extflg = UIO_COPY_DEFAULT;
2115 
2116 		mutex_exit(&p->p_lock);
2117 		error = prusrio(p, rw, &auio, 0);
2118 		mutex_enter(&p->p_lock);
2119 
2120 		/*
2121 		 * We have no way to return the i/o count,
2122 		 * like read() or write() would do, so we
2123 		 * return an error if the i/o was truncated.
2124 		 */
2125 		if (auio.uio_resid != 0 && error == 0)
2126 			error = EIO;
2127 	}
2128 
2129 	return (error);
2130 }
2131 
2132 static int
2133 pr_scred(proc_t *p, prcred_t *prcred, cred_t *cr, boolean_t dogrps)
2134 {
2135 	kthread_t *t;
2136 	cred_t *oldcred;
2137 	cred_t *newcred;
2138 	uid_t oldruid;
2139 	int error;
2140 	zone_t *zone = crgetzone(cr);
2141 
2142 	if (!VALID_UID(prcred->pr_euid, zone) ||
2143 	    !VALID_UID(prcred->pr_ruid, zone) ||
2144 	    !VALID_UID(prcred->pr_suid, zone) ||
2145 	    !VALID_GID(prcred->pr_egid, zone) ||
2146 	    !VALID_GID(prcred->pr_rgid, zone) ||
2147 	    !VALID_GID(prcred->pr_sgid, zone))
2148 		return (EINVAL);
2149 
2150 	if (dogrps) {
2151 		int ngrp = prcred->pr_ngroups;
2152 		int i;
2153 
2154 		if (ngrp < 0 || ngrp > ngroups_max)
2155 			return (EINVAL);
2156 
2157 		for (i = 0; i < ngrp; i++) {
2158 			if (!VALID_GID(prcred->pr_groups[i], zone))
2159 				return (EINVAL);
2160 		}
2161 	}
2162 
2163 	error = secpolicy_allow_setid(cr, prcred->pr_euid, B_FALSE);
2164 
2165 	if (error == 0 && prcred->pr_ruid != prcred->pr_euid)
2166 		error = secpolicy_allow_setid(cr, prcred->pr_ruid, B_FALSE);
2167 
2168 	if (error == 0 && prcred->pr_suid != prcred->pr_euid &&
2169 	    prcred->pr_suid != prcred->pr_ruid)
2170 		error = secpolicy_allow_setid(cr, prcred->pr_suid, B_FALSE);
2171 
2172 	if (error)
2173 		return (error);
2174 
2175 	mutex_exit(&p->p_lock);
2176 
2177 	/* hold old cred so it doesn't disappear while we dup it */
2178 	mutex_enter(&p->p_crlock);
2179 	crhold(oldcred = p->p_cred);
2180 	mutex_exit(&p->p_crlock);
2181 	newcred = crdup(oldcred);
2182 	oldruid = crgetruid(oldcred);
2183 	crfree(oldcred);
2184 
2185 	/* Error checking done above */
2186 	(void) crsetresuid(newcred, prcred->pr_ruid, prcred->pr_euid,
2187 	    prcred->pr_suid);
2188 	(void) crsetresgid(newcred, prcred->pr_rgid, prcred->pr_egid,
2189 	    prcred->pr_sgid);
2190 
2191 	if (dogrps) {
2192 		(void) crsetgroups(newcred, prcred->pr_ngroups,
2193 		    prcred->pr_groups);
2194 
2195 	}
2196 
2197 	mutex_enter(&p->p_crlock);
2198 	oldcred = p->p_cred;
2199 	p->p_cred = newcred;
2200 	mutex_exit(&p->p_crlock);
2201 	crfree(oldcred);
2202 
2203 	/*
2204 	 * Keep count of processes per uid consistent.
2205 	 */
2206 	if (oldruid != prcred->pr_ruid) {
2207 		zoneid_t zoneid = crgetzoneid(newcred);
2208 
2209 		mutex_enter(&pidlock);
2210 		upcount_dec(oldruid, zoneid);
2211 		upcount_inc(prcred->pr_ruid, zoneid);
2212 		mutex_exit(&pidlock);
2213 	}
2214 
2215 	/*
2216 	 * Broadcast the cred change to the threads.
2217 	 */
2218 	mutex_enter(&p->p_lock);
2219 	t = p->p_tlist;
2220 	do {
2221 		t->t_pre_sys = 1; /* so syscall will get new cred */
2222 	} while ((t = t->t_forw) != p->p_tlist);
2223 
2224 	return (0);
2225 }
2226 
2227 /*
2228  * Change process credentials to specified zone.  Used to temporarily
2229  * set a process to run in the global zone; only transitions between
2230  * the process's actual zone and the global zone are allowed.
2231  */
2232 static int
2233 pr_szoneid(proc_t *p, zoneid_t zoneid, cred_t *cr)
2234 {
2235 	kthread_t *t;
2236 	cred_t *oldcred;
2237 	cred_t *newcred;
2238 	zone_t *zptr;
2239 	zoneid_t oldzoneid;
2240 
2241 	if (secpolicy_zone_config(cr) != 0)
2242 		return (EPERM);
2243 	if (zoneid != GLOBAL_ZONEID && zoneid != p->p_zone->zone_id)
2244 		return (EINVAL);
2245 	if ((zptr = zone_find_by_id(zoneid)) == NULL)
2246 		return (EINVAL);
2247 	mutex_exit(&p->p_lock);
2248 	mutex_enter(&p->p_crlock);
2249 	oldcred = p->p_cred;
2250 	crhold(oldcred);
2251 	mutex_exit(&p->p_crlock);
2252 	newcred = crdup(oldcred);
2253 	oldzoneid = crgetzoneid(oldcred);
2254 	crfree(oldcred);
2255 
2256 	crsetzone(newcred, zptr);
2257 	zone_rele(zptr);
2258 
2259 	mutex_enter(&p->p_crlock);
2260 	oldcred = p->p_cred;
2261 	p->p_cred = newcred;
2262 	mutex_exit(&p->p_crlock);
2263 	crfree(oldcred);
2264 
2265 	/*
2266 	 * The target process is changing zones (according to its cred), so
2267 	 * update the per-zone upcounts, which are based on process creds.
2268 	 */
2269 	if (oldzoneid != zoneid) {
2270 		uid_t ruid = crgetruid(newcred);
2271 
2272 		mutex_enter(&pidlock);
2273 		upcount_dec(ruid, oldzoneid);
2274 		upcount_inc(ruid, zoneid);
2275 		mutex_exit(&pidlock);
2276 	}
2277 	/*
2278 	 * Broadcast the cred change to the threads.
2279 	 */
2280 	mutex_enter(&p->p_lock);
2281 	t = p->p_tlist;
2282 	do {
2283 		t->t_pre_sys = 1;	/* so syscall will get new cred */
2284 	} while ((t = t->t_forw) != p->p_tlist);
2285 
2286 	return (0);
2287 }
2288 
2289 static int
2290 pr_spriv(proc_t *p, prpriv_t *prpriv, cred_t *cr)
2291 {
2292 	kthread_t *t;
2293 	int err;
2294 
2295 	ASSERT(MUTEX_HELD(&p->p_lock));
2296 
2297 	if ((err = priv_pr_spriv(p, prpriv, cr)) == 0) {
2298 		/*
2299 		 * Broadcast the cred change to the threads.
2300 		 */
2301 		t = p->p_tlist;
2302 		do {
2303 			t->t_pre_sys = 1; /* so syscall will get new cred */
2304 		} while ((t = t->t_forw) != p->p_tlist);
2305 	}
2306 
2307 	return (err);
2308 }
2309 
2310 /*
2311  * Return -1 if the process is the parent of a vfork(1) whose child has yet to
2312  * terminate or perform an exec(2).
2313  *
2314  * Returns 0 if the process is fully stopped except for the current thread (if
2315  * we are operating on our own process), 1 otherwise.
2316  *
2317  * If the watchstop flag is set, then we ignore threads with TP_WATCHSTOP set.
2318  * See holdwatch() for details.
2319  */
2320 int
2321 pr_allstopped(proc_t *p, int watchstop)
2322 {
2323 	kthread_t *t;
2324 	int rv = 0;
2325 
2326 	ASSERT(MUTEX_HELD(&p->p_lock));
2327 
2328 	if (p->p_flag & SVFWAIT)	/* waiting for vfork'd child to exec */
2329 		return (-1);
2330 
2331 	if ((t = p->p_tlist) != NULL) {
2332 		do {
2333 			if (t == curthread || VSTOPPED(t) ||
2334 			    (watchstop && (t->t_proc_flag & TP_WATCHSTOP)))
2335 				continue;
2336 			thread_lock(t);
2337 			switch (t->t_state) {
2338 			case TS_ZOMB:
2339 			case TS_STOPPED:
2340 				break;
2341 			case TS_SLEEP:
2342 				if (!(t->t_flag & T_WAKEABLE) ||
2343 				    t->t_wchan0 == NULL)
2344 					rv = 1;
2345 				break;
2346 			default:
2347 				rv = 1;
2348 				break;
2349 			}
2350 			thread_unlock(t);
2351 		} while (rv == 0 && (t = t->t_forw) != p->p_tlist);
2352 	}
2353 
2354 	return (rv);
2355 }
2356 
2357 /*
2358  * Cause all lwps in the process to pause (for watchpoint operations).
2359  */
2360 static void
2361 pauselwps(proc_t *p)
2362 {
2363 	kthread_t *t;
2364 
2365 	ASSERT(MUTEX_HELD(&p->p_lock));
2366 	ASSERT(p != curproc);
2367 
2368 	if ((t = p->p_tlist) != NULL) {
2369 		do {
2370 			thread_lock(t);
2371 			t->t_proc_flag |= TP_PAUSE;
2372 			aston(t);
2373 			if ((ISWAKEABLE(t) && (t->t_wchan0 == NULL)) ||
2374 			    ISWAITING(t)) {
2375 				setrun_locked(t);
2376 			}
2377 			prpokethread(t);
2378 			thread_unlock(t);
2379 		} while ((t = t->t_forw) != p->p_tlist);
2380 	}
2381 }
2382 
2383 /*
2384  * undo the effects of pauselwps()
2385  */
2386 static void
2387 unpauselwps(proc_t *p)
2388 {
2389 	kthread_t *t;
2390 
2391 	ASSERT(MUTEX_HELD(&p->p_lock));
2392 	ASSERT(p != curproc);
2393 
2394 	if ((t = p->p_tlist) != NULL) {
2395 		do {
2396 			thread_lock(t);
2397 			t->t_proc_flag &= ~TP_PAUSE;
2398 			if (t->t_state == TS_STOPPED) {
2399 				t->t_schedflag |= TS_UNPAUSE;
2400 				t->t_dtrace_stop = 0;
2401 				setrun_locked(t);
2402 			}
2403 			thread_unlock(t);
2404 		} while ((t = t->t_forw) != p->p_tlist);
2405 	}
2406 }
2407 
2408 /*
2409  * Cancel all watched areas.  Called from prclose().
2410  */
2411 proc_t *
2412 pr_cancel_watch(prnode_t *pnp)
2413 {
2414 	proc_t *p = pnp->pr_pcommon->prc_proc;
2415 	struct as *as;
2416 	kthread_t *t;
2417 
2418 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2419 
2420 	if (!pr_watch_active(p))
2421 		return (p);
2422 
2423 	/*
2424 	 * Pause the process before dealing with the watchpoints.
2425 	 */
2426 	if (p == curproc) {
2427 		prunlock(pnp);
2428 		while (holdwatch() != 0)
2429 			continue;
2430 		p = pr_p_lock(pnp);
2431 		mutex_exit(&pr_pidlock);
2432 		ASSERT(p == curproc);
2433 	} else {
2434 		pauselwps(p);
2435 		while (p != NULL && pr_allstopped(p, 0) > 0) {
2436 			/*
2437 			 * This cv/mutex pair is persistent even
2438 			 * if the process disappears after we
2439 			 * unmark it and drop p->p_lock.
2440 			 */
2441 			kcondvar_t *cv = &pr_pid_cv[p->p_slot];
2442 			kmutex_t *mp = &p->p_lock;
2443 
2444 			prunmark(p);
2445 			(void) cv_wait(cv, mp);
2446 			mutex_exit(mp);
2447 			p = pr_p_lock(pnp);  /* NULL if process disappeared */
2448 			mutex_exit(&pr_pidlock);
2449 		}
2450 	}
2451 
2452 	if (p == NULL)		/* the process disappeared */
2453 		return (NULL);
2454 
2455 	ASSERT(p == pnp->pr_pcommon->prc_proc);
2456 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2457 
2458 	if (pr_watch_active(p)) {
2459 		pr_free_watchpoints(p);
2460 		if ((t = p->p_tlist) != NULL) {
2461 			do {
2462 				watch_disable(t);
2463 
2464 			} while ((t = t->t_forw) != p->p_tlist);
2465 		}
2466 	}
2467 
2468 	if ((as = p->p_as) != NULL) {
2469 		avl_tree_t *tree;
2470 		struct watched_page *pwp;
2471 
2472 		/*
2473 		 * If this is the parent of a vfork, the watched page
2474 		 * list has been moved temporarily to p->p_wpage.
2475 		 */
2476 		if (avl_numnodes(&p->p_wpage) != 0)
2477 			tree = &p->p_wpage;
2478 		else
2479 			tree = &as->a_wpage;
2480 
2481 		mutex_exit(&p->p_lock);
2482 		AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2483 
2484 		for (pwp = avl_first(tree); pwp != NULL;
2485 		    pwp = AVL_NEXT(tree, pwp)) {
2486 			pwp->wp_read = 0;
2487 			pwp->wp_write = 0;
2488 			pwp->wp_exec = 0;
2489 			if ((pwp->wp_flags & WP_SETPROT) == 0) {
2490 				pwp->wp_flags |= WP_SETPROT;
2491 				pwp->wp_prot = pwp->wp_oprot;
2492 				pwp->wp_list = p->p_wprot;
2493 				p->p_wprot = pwp;
2494 			}
2495 		}
2496 
2497 		AS_LOCK_EXIT(as, &as->a_lock);
2498 		mutex_enter(&p->p_lock);
2499 	}
2500 
2501 	/*
2502 	 * Unpause the process now.
2503 	 */
2504 	if (p == curproc)
2505 		continuelwps(p);
2506 	else
2507 		unpauselwps(p);
2508 
2509 	return (p);
2510 }
2511