xref: /illumos-gate/usr/src/uts/common/disp/priocntl.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/sysmacros.h>
35 #include <sys/signal.h>
36 #include <sys/pcb.h>
37 #include <sys/user.h>
38 #include <sys/systm.h>
39 #include <sys/sysinfo.h>
40 #include <sys/var.h>
41 #include <sys/errno.h>
42 #include <sys/cred.h>
43 #include <sys/proc.h>
44 #include <sys/procset.h>
45 #include <sys/debug.h>
46 #include <sys/inline.h>
47 #include <sys/priocntl.h>
48 #include <sys/disp.h>
49 #include <sys/class.h>
50 #include <sys/modctl.h>
51 #include <sys/t_lock.h>
52 #include <sys/uadmin.h>
53 #include <sys/cmn_err.h>
54 #include <sys/policy.h>
55 #include <sys/schedctl.h>
56 
57 /*
58  * Structure used to pass arguments to the proccmp() function.
59  * The arguments must be passed in a structure because proccmp()
60  * is called indirectly through the dotoprocs() function which
61  * will only pass through a single one word argument.
62  */
63 struct pcmpargs {
64 	id_t	*pcmp_cidp;
65 	int	*pcmp_cntp;
66 	kthread_t **pcmp_retthreadp;
67 };
68 
69 /*
70  * Structure used to pass arguments to the setparms() function
71  * which is called indirectly through dotoprocs().
72  */
73 struct stprmargs {
74 	struct pcparms	*stp_parmsp;	/* pointer to parameters */
75 	int		stp_error;	/* some errors returned here */
76 };
77 
78 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
79 /*
80  * A vaparm_t is an int followed by a long long -- this packs differently
81  * between the 64-bit kernel ABI and the 32-bit user ABI.
82  */
83 static int
84 copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap, uio_seg_t seg)
85 {
86 	pc_vaparms32_t vaparms32;
87 	pc_vaparm32_t *src;
88 	pc_vaparm_t *dst;
89 	uint_t cnt;
90 
91 	ASSERT(get_udatamodel() == DATAMODEL_ILP32);
92 
93 	if ((seg == UIO_USERSPACE ? copyin : kcopy)(arg, &vaparms32,
94 	    sizeof (vaparms32)))
95 		return (EFAULT);
96 
97 	vap->pc_vaparmscnt = vaparms32.pc_vaparmscnt;
98 	if ((cnt = vaparms32.pc_vaparmscnt) > PC_VAPARMCNT)
99 		cnt = PC_VAPARMCNT;
100 	for (src = vaparms32.pc_parms, dst = vap->pc_parms;
101 	    cnt--; src++, dst++) {
102 		dst->pc_key = src->pc_key;
103 		dst->pc_parm = src->pc_parm;
104 	}
105 	return (0);
106 }
107 
108 #define	COPYIN_VAPARMS(arg, vap, size, seg)	\
109 	(get_udatamodel() == DATAMODEL_NATIVE ?	\
110 	(*copyinfn)(arg, vap, size) : copyin_vaparms32(arg, vap, seg))
111 
112 #else
113 
114 #define	COPYIN_VAPARMS(arg, vap, size, seg)	(*copyinfn)(arg, vap, size)
115 
116 #endif
117 
118 static int donice(procset_t *, pcnice_t *);
119 static int doprio(procset_t *, pcprio_t *);
120 static int proccmp(proc_t *, struct pcmpargs *);
121 static int setparms(proc_t *, struct stprmargs *);
122 extern int threadcmp(struct pcmpargs *, kthread_t *);
123 
124 /*
125  * The priocntl system call.
126  */
127 long
128 priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg,
129     caddr_t arg2, uio_seg_t seg)
130 {
131 	pcinfo_t		pcinfo;
132 	pcparms_t		pcparms;
133 	pcnice_t		pcnice;
134 	pcprio_t		pcprio;
135 	pcadmin_t		pcadmin;
136 	pcpri_t			pcpri;
137 	procset_t		procset;
138 	struct stprmargs	stprmargs;
139 	struct pcmpargs		pcmpargs;
140 	pc_vaparms_t		vaparms;
141 	char			clname[PC_CLNMSZ];
142 	char			*outstr;
143 	int			count;
144 	kthread_t		*retthreadp;
145 	proc_t			*initpp;
146 	int			clnullflag;
147 	int			error = 0;
148 	int			error1 = 0;
149 	int			rv = 0;
150 	pid_t			saved_pid;
151 	id_t			classid;
152 	int			size;
153 	int (*copyinfn)(const void *, void *, size_t);
154 	int (*copyoutfn)(const void *, void *, size_t);
155 
156 	/*
157 	 * First just check the version number. Right now there is only
158 	 * one version we know about and support.  If we get some other
159 	 * version number from the application it may be that the
160 	 * application was built with some future version and is trying
161 	 * to run on an old release of the system (that's us).  In any
162 	 * case if we don't recognize the version number all we can do is
163 	 * return error.
164 	 */
165 	if (pc_version != PC_VERSION)
166 		return (set_errno(EINVAL));
167 
168 	if (seg == UIO_USERSPACE) {
169 		copyinfn = copyin;
170 		copyoutfn = copyout;
171 	} else {
172 		copyinfn = kcopy;
173 		copyoutfn = kcopy;
174 	}
175 
176 	switch (cmd) {
177 	case PC_GETCID:
178 		/*
179 		 * If the arg pointer is NULL, the user just wants to
180 		 * know the number of classes. If non-NULL, the pointer
181 		 * should point to a valid user pcinfo buffer.  In the
182 		 * dynamic world we need to return the number of loaded
183 		 * classes, not the max number of available classes that
184 		 * can be loaded.
185 		 */
186 		if (arg == NULL) {
187 			rv = loaded_classes;
188 			break;
189 		} else {
190 			if ((*copyinfn)(arg, &pcinfo, sizeof (pcinfo)))
191 				return (set_errno(EFAULT));
192 		}
193 
194 		pcinfo.pc_clname[PC_CLNMSZ-1] = '\0';
195 
196 		/*
197 		 * Get the class ID corresponding to user supplied name.
198 		 */
199 		error = getcid(pcinfo.pc_clname, &pcinfo.pc_cid);
200 		if (error)
201 			return (set_errno(error));
202 
203 		/*
204 		 * Can't get info about the sys class.
205 		 */
206 		if (pcinfo.pc_cid == 0)
207 			return (set_errno(EINVAL));
208 
209 		/*
210 		 * Get the class specific information.
211 		 * we MUST make sure that the class has not already
212 		 * been unloaded before we try the CL_GETCLINFO.
213 		 * If it has then we need to load it.
214 		 */
215 		error =
216 		    scheduler_load(pcinfo.pc_clname, &sclass[pcinfo.pc_cid]);
217 		if (error)
218 			return (set_errno(error));
219 		error = CL_GETCLINFO(&sclass[pcinfo.pc_cid], pcinfo.pc_clinfo);
220 		if (error)
221 			return (set_errno(error));
222 
223 		if ((*copyoutfn)(&pcinfo, arg, sizeof (pcinfo)))
224 			return (set_errno(EFAULT));
225 
226 		rv = loaded_classes;
227 
228 		break;
229 
230 	case PC_GETCLINFO:
231 		/*
232 		 * If the arg pointer is NULL, the user just wants to know
233 		 * the number of classes. If non-NULL, the pointer should
234 		 * point to a valid user pcinfo buffer.
235 		 */
236 		if (arg == NULL) {
237 			rv = loaded_classes;
238 			break;
239 		} else {
240 			if ((*copyinfn)(arg, &pcinfo, sizeof (pcinfo)))
241 				return (set_errno(EFAULT));
242 		}
243 
244 		if (pcinfo.pc_cid >= loaded_classes || pcinfo.pc_cid < 1)
245 			return (set_errno(EINVAL));
246 
247 		(void) strncpy(pcinfo.pc_clname, sclass[pcinfo.pc_cid].cl_name,
248 		    PC_CLNMSZ);
249 
250 		/*
251 		 * Get the class specific information.  we MUST make sure
252 		 * that the class has not already been unloaded before we
253 		 * try the CL_GETCLINFO.  If it has then we need to load
254 		 * it.
255 		 */
256 		error =
257 		    scheduler_load(pcinfo.pc_clname, &sclass[pcinfo.pc_cid]);
258 		if (error)
259 			return (set_errno(error));
260 		error = CL_GETCLINFO(&sclass[pcinfo.pc_cid], pcinfo.pc_clinfo);
261 		if (error)
262 			return (set_errno(error));
263 
264 		if ((*copyoutfn)(&pcinfo, arg, sizeof (pcinfo)))
265 			return (set_errno(EFAULT));
266 
267 		rv = loaded_classes;
268 		break;
269 
270 	case PC_SETPARMS:
271 	case PC_SETXPARMS:
272 		/*
273 		 * First check the validity of the parameters we got from
274 		 * the user.  We don't do any permissions checking here
275 		 * because it's done on a per thread basis by parmsset().
276 		 */
277 		if (cmd == PC_SETPARMS) {
278 			if ((*copyinfn)(arg, &pcparms, sizeof (pcparms)))
279 				return (set_errno(EFAULT));
280 
281 			error = parmsin(&pcparms, NULL);
282 		} else {
283 			if ((*copyinfn)(arg, clname, PC_CLNMSZ) ||
284 			    COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms),
285 			    seg))
286 				return (set_errno(EFAULT));
287 			clname[PC_CLNMSZ-1] = '\0';
288 
289 			if (getcid(clname, &pcparms.pc_cid))
290 				return (set_errno(EINVAL));
291 
292 			error = parmsin(&pcparms, &vaparms);
293 		}
294 
295 		if (error)
296 			return (set_errno(error));
297 
298 		/*
299 		 * Get the procset from the user.
300 		 */
301 		if ((*copyinfn)(psp, &procset, sizeof (procset)))
302 			return (set_errno(EFAULT));
303 
304 		/*
305 		 * For performance we do a quick check here to catch
306 		 * common cases where the current thread is the only one
307 		 * in the set.  In such cases we can call parmsset()
308 		 * directly, avoiding the relatively lengthy path through
309 		 * dotoprocs().  The underlying classes expect pidlock to
310 		 * be held.
311 		 */
312 		if (cur_inset_only(&procset) == B_TRUE) {
313 			/* do a single LWP */
314 			if ((procset.p_lidtype == P_LWPID) ||
315 			    (procset.p_ridtype == P_LWPID)) {
316 				mutex_enter(&pidlock);
317 				mutex_enter(&curproc->p_lock);
318 				error = parmsset(&pcparms, curthread);
319 				mutex_exit(&curproc->p_lock);
320 				mutex_exit(&pidlock);
321 			} else {
322 				/* do the entire process otherwise */
323 				stprmargs.stp_parmsp = &pcparms;
324 				stprmargs.stp_error = 0;
325 				mutex_enter(&pidlock);
326 				error = setparms(curproc, &stprmargs);
327 				mutex_exit(&pidlock);
328 				if (error == 0 && stprmargs.stp_error != 0)
329 					error = stprmargs.stp_error;
330 			}
331 			if (error)
332 				return (set_errno(error));
333 		} else {
334 			stprmargs.stp_parmsp = &pcparms;
335 			stprmargs.stp_error = 0;
336 
337 			error1 = error = ESRCH;
338 
339 			/*
340 			 * The dotoprocs() call below will cause
341 			 * setparms() to be called for each thread in the
342 			 * specified procset. setparms() will in turn
343 			 * call parmsset() (which does the real work).
344 			 */
345 			if ((procset.p_lidtype != P_LWPID) ||
346 			    (procset.p_ridtype != P_LWPID)) {
347 				error1 = dotoprocs(&procset, setparms,
348 				    (char *)&stprmargs);
349 			}
350 
351 			/*
352 			 * take care of the case when any of the
353 			 * operands happen to be LWP's
354 			 */
355 
356 			if ((procset.p_lidtype == P_LWPID) ||
357 			    (procset.p_ridtype == P_LWPID)) {
358 				error = dotolwp(&procset, parmsset,
359 				    (char *)&pcparms);
360 				/*
361 				 * Dotolwp() returns with p_lock held.
362 				 * This is required for the GETPARMS case
363 				 * below. So, here we just release the
364 				 * p_lock.
365 				 */
366 				if (MUTEX_HELD(&curproc->p_lock))
367 					mutex_exit(&curproc->p_lock);
368 			}
369 
370 			/*
371 			 * If setparms() encounters a permissions error
372 			 * for one or more of the threads it returns
373 			 * EPERM in stp_error so dotoprocs() will
374 			 * continue through the thread set.  If
375 			 * dotoprocs() returned an error above, it was
376 			 * more serious than permissions and dotoprocs
377 			 * quit when the error was encountered.  We
378 			 * return the more serious error if there was
379 			 * one, otherwise we return EPERM if we got that
380 			 * back.
381 			 */
382 			if (error1 != ESRCH)
383 				error = error1;
384 			if (error == 0 && stprmargs.stp_error != 0)
385 				error = stprmargs.stp_error;
386 		}
387 		break;
388 
389 	case PC_GETPARMS:
390 	case PC_GETXPARMS:
391 		if (cmd == PC_GETPARMS) {
392 			if ((*copyinfn)(arg, &pcparms, sizeof (pcparms)))
393 				return (set_errno(EFAULT));
394 		} else {
395 			if (arg != NULL) {
396 				if ((*copyinfn)(arg, clname, PC_CLNMSZ))
397 					return (set_errno(EFAULT));
398 
399 				clname[PC_CLNMSZ-1] = '\0';
400 
401 				if (getcid(clname, &pcparms.pc_cid))
402 					return (set_errno(EINVAL));
403 			} else
404 				pcparms.pc_cid = PC_CLNULL;
405 
406 			if (COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms),
407 			    seg))
408 				return (set_errno(EFAULT));
409 		}
410 
411 		if (pcparms.pc_cid >= loaded_classes ||
412 		    (pcparms.pc_cid < 1 && pcparms.pc_cid != PC_CLNULL))
413 			return (set_errno(EINVAL));
414 
415 		if ((*copyinfn)(psp, &procset, sizeof (procset)))
416 			return (set_errno(EFAULT));
417 
418 		/*
419 		 * Check to see if the current thread is the only one
420 		 * in the set. If not we must go through the whole set
421 		 * to select a thread.
422 		 */
423 		if (cur_inset_only(&procset) == B_TRUE) {
424 			/* do a single LWP */
425 			if ((procset.p_lidtype == P_LWPID) ||
426 			    (procset.p_ridtype == P_LWPID)) {
427 				if (pcparms.pc_cid != PC_CLNULL &&
428 				    pcparms.pc_cid != curthread->t_cid) {
429 					/*
430 					 * Specified thread not in
431 					 * specified class.
432 					 */
433 					return (set_errno(ESRCH));
434 				} else {
435 					mutex_enter(&curproc->p_lock);
436 					retthreadp = curthread;
437 				}
438 			} else {
439 				count = 0;
440 				retthreadp = NULL;
441 				pcmpargs.pcmp_cidp = &pcparms.pc_cid;
442 				pcmpargs.pcmp_cntp = &count;
443 				pcmpargs.pcmp_retthreadp = &retthreadp;
444 				/*
445 				 * Specified thread not in specified class.
446 				 */
447 				if (pcparms.pc_cid != PC_CLNULL &&
448 				    pcparms.pc_cid != curthread->t_cid)
449 					return (set_errno(ESRCH));
450 				error = proccmp(curproc, &pcmpargs);
451 				if (error) {
452 					if (retthreadp != NULL)
453 						mutex_exit(&(curproc->p_lock));
454 					return (set_errno(error));
455 				}
456 			}
457 		} else {
458 			/*
459 			 * get initpp early to avoid lock ordering problems
460 			 * (we cannot get pidlock while holding any p_lock).
461 			 */
462 			mutex_enter(&pidlock);
463 			initpp = prfind(P_INITPID);
464 			mutex_exit(&pidlock);
465 
466 			/*
467 			 * Select the thread (from the set) whose
468 			 * parameters we are going to return.  First we
469 			 * set up some locations for return values, then
470 			 * we call proccmp() indirectly through
471 			 * dotoprocs().  proccmp() will call a class
472 			 * specific routine which actually does the
473 			 * selection.  To understand how this works take
474 			 * a careful look at the code below, the
475 			 * dotoprocs() function, the proccmp() function,
476 			 * and the class specific cl_proccmp() functions.
477 			 */
478 			if (pcparms.pc_cid == PC_CLNULL)
479 				clnullflag = 1;
480 			else
481 				clnullflag = 0;
482 			count = 0;
483 			retthreadp = NULL;
484 			pcmpargs.pcmp_cidp = &pcparms.pc_cid;
485 			pcmpargs.pcmp_cntp = &count;
486 			pcmpargs.pcmp_retthreadp = &retthreadp;
487 			error1 = error = ESRCH;
488 
489 			if ((procset.p_lidtype != P_LWPID) ||
490 			    (procset.p_ridtype != P_LWPID)) {
491 				error1 = dotoprocs(&procset, proccmp,
492 				    (char *)&pcmpargs);
493 			}
494 
495 			/*
496 			 * take care of combination of LWP and process
497 			 * set case in a procset
498 			 */
499 			if ((procset.p_lidtype == P_LWPID) ||
500 			    (procset.p_ridtype == P_LWPID)) {
501 				error = dotolwp(&procset, threadcmp,
502 				    (char *)&pcmpargs);
503 			}
504 
505 			/*
506 			 * Both proccmp() and threadcmp() return with the
507 			 * p_lock held for the ttoproc(retthreadp). This
508 			 * is required to make sure that the process we
509 			 * chose as the winner doesn't go away
510 			 * i.e. retthreadp has to be a valid pointer.
511 			 *
512 			 * The case below can only happen if the thread
513 			 * with the highest priority was not in your
514 			 * process.  In that case, dotolwp will return
515 			 * holding p_lock for both your process as well
516 			 * as the process in which retthreadp is a
517 			 * thread.
518 			 */
519 			if ((retthreadp != NULL) &&
520 			    (ttoproc(retthreadp) != curproc) &&
521 			    MUTEX_HELD(&(curproc)->p_lock))
522 				mutex_exit(&(curproc)->p_lock);
523 
524 			ASSERT(retthreadp == NULL ||
525 			    MUTEX_HELD(&(ttoproc(retthreadp)->p_lock)));
526 			if (error1 != ESRCH)
527 				error = error1;
528 			if (error) {
529 				if (retthreadp != NULL)
530 				    /* CSTYLED */
531 				    mutex_exit(&(ttoproc(retthreadp)->p_lock));
532 				ASSERT(MUTEX_NOT_HELD(&(curproc)->p_lock));
533 				return (set_errno(error));
534 			}
535 			/*
536 			 * dotoprocs() ignores the init process if it is
537 			 * in the set, unless it was the only process found.
538 			 * Since we are getting parameters here rather than
539 			 * setting them, we want to make sure init is not
540 			 * excluded if it is in the set.
541 			 */
542 			if (initpp != NULL && retthreadp != NULL &&
543 			    ttoproc(retthreadp) != initpp) {
544 				mutex_enter(&initpp->p_lock);
545 				if (procinset(initpp, &procset)) {
546 					mutex_exit(&initpp->p_lock);
547 					(void) proccmp(initpp, &pcmpargs);
548 				} else {
549 					mutex_exit(&initpp->p_lock);
550 				}
551 			}
552 
553 			/*
554 			 * If dotoprocs returned success it found at least
555 			 * one thread in the set.  If proccmp() failed to
556 			 * select a thread it is because the user specified
557 			 * a class and none of the threads in the set
558 			 * belonged to that class, or because the process
559 			 * specified was in the middle of exiting and had
560 			 * cleared its thread list.
561 			 */
562 			if (retthreadp == NULL) {
563 				/*
564 				 * Might be here and still holding p_lock
565 				 * if we did a dotolwp on an lwp that
566 				 * existed but was in the wrong class.
567 				 */
568 				if (MUTEX_HELD(&(curproc)->p_lock))
569 					mutex_exit(&(curproc)->p_lock);
570 				return (set_errno(ESRCH));
571 			}
572 
573 			/*
574 			 * User can only use PC_CLNULL with one thread in set.
575 			 */
576 			if (clnullflag && count > 1) {
577 				if (retthreadp != NULL)
578 					mutex_exit(
579 					    &(ttoproc(retthreadp)->p_lock));
580 				ASSERT(MUTEX_NOT_HELD(&(curproc)->p_lock));
581 				return (set_errno(EINVAL));
582 			}
583 		}
584 
585 		ASSERT(retthreadp == NULL ||
586 		    MUTEX_HELD(&(ttoproc(retthreadp)->p_lock)));
587 		/*
588 		 * It is possible to have retthreadp == NULL. Proccmp()
589 		 * in the rare case (p_tlist == NULL) could return without
590 		 * setting a value for retthreadp.
591 		 */
592 		if (retthreadp == NULL) {
593 			ASSERT(MUTEX_NOT_HELD(&(curproc)->p_lock));
594 			return (set_errno(ESRCH));
595 		}
596 		/*
597 		 * We've selected a thread so now get the parameters.
598 		 */
599 		parmsget(retthreadp, &pcparms);
600 
601 		/*
602 		 * Prepare to return parameters to the user
603 		 */
604 		error = parmsout(&pcparms,
605 		    (cmd == PC_GETPARMS ? NULL : &vaparms));
606 
607 		/*
608 		 * Save pid of selected thread before dropping p_lock.
609 		 */
610 		saved_pid = ttoproc(retthreadp)->p_pid;
611 		mutex_exit(&(ttoproc(retthreadp)->p_lock));
612 		ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
613 
614 		if (error)
615 			return (set_errno(error));
616 
617 		if (cmd == PC_GETPARMS) {
618 			if ((*copyoutfn)(&pcparms, arg, sizeof (pcparms)))
619 				return (set_errno(EFAULT));
620 		} else if ((error = vaparmsout(arg, &pcparms, &vaparms,
621 		    seg)) != 0)
622 			return (set_errno(error));
623 
624 		/*
625 		 * And finally, return the pid of the selected thread.
626 		 */
627 		rv = saved_pid;
628 		break;
629 
630 	case PC_ADMIN:
631 		if (get_udatamodel() == DATAMODEL_NATIVE) {
632 			if ((*copyinfn)(arg, &pcadmin, sizeof (pcadmin_t)))
633 				return (set_errno(EFAULT));
634 #ifdef _SYSCALL32_IMPL
635 		} else {
636 			/* pcadmin struct from ILP32 callers */
637 			pcadmin32_t pcadmin32;
638 
639 			if ((*copyinfn)(arg, &pcadmin32, sizeof (pcadmin32_t)))
640 				return (set_errno(EFAULT));
641 			pcadmin.pc_cid = pcadmin32.pc_cid;
642 			pcadmin.pc_cladmin = (caddr_t)(uintptr_t)
643 			    pcadmin32.pc_cladmin;
644 #endif /* _SYSCALL32_IMPL */
645 		}
646 
647 		if (pcadmin.pc_cid >= loaded_classes ||
648 		    pcadmin.pc_cid < 1)
649 			return (set_errno(EINVAL));
650 
651 		/*
652 		 * Have the class do whatever the user is requesting.
653 		 */
654 		mutex_enter(&ualock);
655 		error = CL_ADMIN(&sclass[pcadmin.pc_cid], pcadmin.pc_cladmin,
656 		    CRED());
657 		mutex_exit(&ualock);
658 		break;
659 
660 	case PC_GETPRIRANGE:
661 		if ((*copyinfn)(arg, &pcpri, sizeof (pcpri_t)))
662 			return (set_errno(EFAULT));
663 
664 		if (pcpri.pc_cid >= loaded_classes || pcpri.pc_cid < 0)
665 			return (set_errno(EINVAL));
666 
667 		error = CL_GETCLPRI(&sclass[pcpri.pc_cid], &pcpri);
668 		if (!error) {
669 			if ((*copyoutfn)(&pcpri, arg, sizeof (pcpri)))
670 				return (set_errno(EFAULT));
671 		}
672 		break;
673 
674 	case PC_DONICE:
675 		/*
676 		 * Get pcnice and procset structures from the user.
677 		 */
678 		if ((*copyinfn)(arg, &pcnice, sizeof (pcnice)) ||
679 		    (*copyinfn)(psp, &procset, sizeof (procset)))
680 			return (set_errno(EFAULT));
681 
682 		error = donice(&procset, &pcnice);
683 
684 		if (!error && (pcnice.pc_op == PC_GETNICE)) {
685 			if ((*copyoutfn)(&pcnice, arg, sizeof (pcnice)))
686 				return (set_errno(EFAULT));
687 		}
688 		break;
689 
690 	case PC_DOPRIO:
691 		/*
692 		 * Get pcprio and procset structures from the user.
693 		 */
694 		if ((*copyinfn)(arg, &pcprio, sizeof (pcprio)) ||
695 		    (*copyinfn)(psp, &procset, sizeof (procset)))
696 			return (set_errno(EFAULT));
697 
698 		error = doprio(&procset, &pcprio);
699 
700 		if (!error && (pcprio.pc_op == PC_GETPRIO)) {
701 			if ((*copyoutfn)(&pcprio, arg, sizeof (pcprio)))
702 				return (set_errno(EFAULT));
703 		}
704 		break;
705 
706 	case PC_SETDFLCL:
707 		if (secpolicy_dispadm(CRED()) != 0)
708 			return (set_errno(EPERM));
709 
710 		if (copyin(arg, (caddr_t)clname, PC_CLNMSZ) != 0)
711 			return (set_errno(EFAULT));
712 		clname[PC_CLNMSZ-1] = '\0';
713 
714 		if (getcid(clname, &classid) != 0)
715 			return (set_errno(EINVAL));
716 		if (classid == syscid)
717 			return (set_errno(EINVAL));
718 		defaultcid = classid;
719 		ASSERT(defaultcid > 0 && defaultcid < loaded_classes);
720 		break;
721 
722 	case PC_GETDFLCL:
723 		mutex_enter(&class_lock);
724 
725 		if (defaultcid >= loaded_classes)
726 			outstr = "";
727 		else
728 			outstr = sclass[defaultcid].cl_name;
729 		size = strlen(outstr) + 1;
730 		if (arg != NULL)
731 			if ((*copyoutfn)(outstr, arg, size) != 0)
732 				error = EFAULT;
733 
734 		mutex_exit(&class_lock);
735 		break;
736 
737 	default:
738 		error = EINVAL;
739 		break;
740 	}
741 	return (error ? (set_errno(error)) : rv);
742 }
743 
744 long
745 priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
746 {
747 	return (priocntl_common(pc_version, psp, cmd, arg, arg2,
748 	    UIO_USERSPACE));
749 }
750 
751 /*
752  * The proccmp() function is part of the implementation of the
753  * PC_GETPARMS command of the priocntl system call.  This function works
754  * with the system call code and with the class specific cl_globpri()
755  * function to select one thread from a specified procset based on class
756  * specific criteria. proccmp() is called indirectly from the priocntl
757  * code through the dotoprocs function.  Basic strategy is dotoprocs()
758  * calls us once for each thread in the set.  We in turn call the class
759  * specific function to compare the current thread from dotoprocs to the
760  * "best" (according to the class criteria) found so far.  We keep the
761  * "best" thread in *pcmp_retthreadp.
762  */
763 static int
764 proccmp(proc_t *pp, struct pcmpargs *argp)
765 {
766 	kthread_t	*tx;
767 	kthread_t	*ty;
768 	int		last_pri = -1;
769 	int		tx_pri;
770 	int		found = 0;
771 
772 	mutex_enter(&pp->p_lock);
773 
774 	if (pp->p_tlist == NULL) {
775 		mutex_exit(&pp->p_lock);
776 		return (0);
777 	}
778 	(*argp->pcmp_cntp)++;	/* Increment count of procs in the set */
779 
780 	if (*argp->pcmp_cidp == PC_CLNULL) {
781 		/*
782 		 * If no cid is specified, then lets just pick the first one.
783 		 * It doesn't matter because if the number of processes in the
784 		 * set are more than 1, then we return EINVAL in priocntlsys.
785 		 */
786 		*argp->pcmp_cidp = pp->p_tlist->t_cid;
787 	}
788 	ty = tx = pp->p_tlist;
789 	do {
790 		if (tx->t_cid == *argp->pcmp_cidp) {
791 			/*
792 			 * We found one which matches the required cid.
793 			 */
794 			found = 1;
795 			if ((tx_pri = CL_GLOBPRI(tx)) > last_pri) {
796 				last_pri = tx_pri;
797 				ty = tx;
798 			}
799 		}
800 	} while ((tx = tx->t_forw) != pp->p_tlist);
801 	if (found) {
802 		if (*argp->pcmp_retthreadp == NULL) {
803 			/*
804 			 * First time through for this set.
805 			 * keep the mutex held. He might be the one!
806 			 */
807 			*argp->pcmp_retthreadp = ty;
808 		} else {
809 			tx = *argp->pcmp_retthreadp;
810 			if (CL_GLOBPRI(ty) <= CL_GLOBPRI(tx)) {
811 				mutex_exit(&pp->p_lock);
812 			} else {
813 				mutex_exit(&(ttoproc(tx)->p_lock));
814 				*argp->pcmp_retthreadp = ty;
815 			}
816 		}
817 	} else {
818 		/*
819 		 * We actually didn't find anything of the same cid in
820 		 * this process.
821 		 */
822 		mutex_exit(&pp->p_lock);
823 	}
824 	return (0);
825 }
826 
827 
828 int
829 threadcmp(struct pcmpargs *argp, kthread_t *tp)
830 {
831 	kthread_t	*tx;
832 	proc_t		*pp;
833 
834 	ASSERT(MUTEX_HELD(&(ttoproc(tp))->p_lock));
835 
836 	(*argp->pcmp_cntp)++;   /* Increment count of procs in the set */
837 	if (*argp->pcmp_cidp == PC_CLNULL) {
838 		/*
839 		 * If no cid is specified, then lets just pick the first one.
840 		 * It doesn't matter because if the number of threads in the
841 		 * set are more than 1, then we return EINVAL in priocntlsys.
842 		 */
843 		*argp->pcmp_cidp = tp->t_cid;
844 	}
845 	if (tp->t_cid == *argp->pcmp_cidp) {
846 		if (*argp->pcmp_retthreadp == NULL) {
847 			/*
848 			 * First time through for this set.
849 			 */
850 			*argp->pcmp_retthreadp = tp;
851 		} else {
852 			tx = *argp->pcmp_retthreadp;
853 			if (CL_GLOBPRI(tp) > CL_GLOBPRI(tx)) {
854 				/*
855 				 * Unlike proccmp(), we don't release the
856 				 * p_lock of the ttoproc(tp) if tp's global
857 				 * priority is less than tx's. We need to go
858 				 * through the entire list before we can do
859 				 * that. The p_lock is released by the caller
860 				 * of dotolwp().
861 				 */
862 				pp = ttoproc(tx);
863 				ASSERT(MUTEX_HELD(&pp->p_lock));
864 				if (pp != curproc) {
865 					mutex_exit(&pp->p_lock);
866 				}
867 				*argp->pcmp_retthreadp = tp;
868 			}
869 		}
870 	}
871 	return (0);
872 }
873 
874 
875 /*
876  * The setparms() function is called indirectly by priocntlsys()
877  * through the dotoprocs() function.  setparms() acts as an
878  * intermediary between dotoprocs() and the parmsset() function,
879  * calling parmsset() for each thread in the set and handling
880  * the error returns on their way back up to dotoprocs().
881  */
882 static int
883 setparms(proc_t *targpp, struct stprmargs *stprmp)
884 {
885 	int error = 0;
886 	kthread_t *t;
887 	int err;
888 
889 	mutex_enter(&targpp->p_lock);
890 	if ((t = targpp->p_tlist) == NULL) {
891 		mutex_exit(&targpp->p_lock);
892 		return (0);
893 	}
894 	do {
895 		err = parmsset(stprmp->stp_parmsp, t);
896 		if (error == 0)
897 			error = err;
898 	} while ((t = t->t_forw) != targpp->p_tlist);
899 	mutex_exit(&targpp->p_lock);
900 	if (error) {
901 		if (error == EPERM) {
902 			stprmp->stp_error = EPERM;
903 			return (0);
904 		} else {
905 			return (error);
906 		}
907 	} else
908 		return (0);
909 }
910 
911 int
912 setthreadnice(pcnice_t *pcnice, kthread_t *tp)
913 {
914 	int error;
915 	int nice;
916 	int inc;
917 	id_t rtcid;
918 
919 	ASSERT(MUTEX_HELD(&pidlock));
920 	ASSERT(MUTEX_HELD(&(ttoproc(tp)->p_lock)));
921 
922 	/*
923 	 * The XPG5 standard requires that any realtime process or thread
924 	 * must be unaffected by a call to setpriority().
925 	 */
926 	error = getcidbyname("RT", &rtcid);
927 	if (error == 0 && tp->t_cid == rtcid) {
928 		if (pcnice->pc_op == PC_SETNICE)
929 			return (0);
930 	}
931 
932 	if ((error = CL_DONICE(tp, CRED(), 0, &nice)) != 0)
933 		return (error);
934 
935 	if (pcnice->pc_op == PC_GETNICE) {
936 		/*
937 		 * If there is no change to priority, we should return the
938 		 * highest priority (lowest numerical value) pertaining to
939 		 * any of the specified threads.
940 		 */
941 		if (nice < pcnice->pc_val)
942 			pcnice->pc_val = nice;
943 	} else {
944 		ASSERT(pcnice->pc_op == PC_SETNICE);
945 		/*
946 		 * Try to change the nice value of the thread.
947 		 */
948 		inc = pcnice->pc_val - nice;
949 
950 		error = CL_DONICE(tp, CRED(), inc, &inc);
951 		schedctl_set_cidpri(tp);
952 	}
953 
954 	return (error);
955 }
956 
957 int
958 setprocnice(proc_t *pp, pcnice_t *pcnice)
959 {
960 	kthread_t *tp;
961 	int retval = 0;
962 	int error;
963 
964 	ASSERT(MUTEX_HELD(&pidlock));
965 	mutex_enter(&pp->p_lock);
966 
967 	if ((tp = pp->p_tlist) == NULL) {
968 		mutex_exit(&pp->p_lock);
969 		return (ESRCH);
970 	}
971 
972 	/*
973 	 * Check permissions before changing the nice value.
974 	 */
975 	if (pcnice->pc_op == PC_SETNICE) {
976 		if (!prochasprocperm(pp, curproc, CRED())) {
977 			mutex_exit(&pp->p_lock);
978 			return (EPERM);
979 		}
980 	}
981 
982 	do {
983 		error = setthreadnice(pcnice, tp);
984 		if (error)
985 			retval = error;
986 	} while ((tp = tp->t_forw) != pp->p_tlist);
987 
988 	mutex_exit(&pp->p_lock);
989 	return (retval);
990 }
991 
992 /*
993  * Update the nice value of the specified LWP or set of processes.
994  */
995 static int
996 donice(procset_t *procset, pcnice_t *pcnice)
997 {
998 	int err_proc = 0;
999 	int err_thread = 0;
1000 	int err = 0;
1001 
1002 	/*
1003 	 * Sanity check.
1004 	 */
1005 	if (pcnice->pc_op != PC_GETNICE && pcnice->pc_op != PC_SETNICE)
1006 		return (EINVAL);
1007 
1008 	/*
1009 	 * If it is PC_GETNICE operation then set pc_val to the largest
1010 	 * possible nice value to help us find the lowest nice value
1011 	 * pertaining to any of the specified processes.
1012 	 */
1013 	if (pcnice->pc_op == PC_GETNICE)
1014 		pcnice->pc_val = NZERO;
1015 
1016 	if (procset->p_lidtype != P_LWPID ||
1017 	    procset->p_ridtype != P_LWPID)
1018 		err_proc = dotoprocs(procset, setprocnice, (char *)pcnice);
1019 
1020 	if (procset->p_lidtype == P_LWPID || procset->p_ridtype == P_LWPID) {
1021 		err_thread = dotolwp(procset, setthreadnice, (char *)pcnice);
1022 		/*
1023 		 * dotolwp() can return with p_lock held.  This is required
1024 		 * for the priocntl GETPARMS case.  So, here we just release
1025 		 * the p_lock.
1026 		 */
1027 		if (MUTEX_HELD(&curproc->p_lock))
1028 			mutex_exit(&curproc->p_lock);
1029 
1030 		/*
1031 		 * If we were called for a single LWP, then ignore ESRCH
1032 		 * returned by the previous dotoprocs() call.
1033 		 */
1034 		if (err_proc == ESRCH)
1035 			err_proc = 0;
1036 	}
1037 
1038 	/*
1039 	 * dotoprocs() ignores the init process if it is in the set, unless
1040 	 * it was the only process found. We want to make sure init is not
1041 	 * excluded if we're going PC_GETNICE operation.
1042 	 */
1043 	if (pcnice->pc_op == PC_GETNICE) {
1044 		proc_t *initpp;
1045 
1046 		mutex_enter(&pidlock);
1047 		if ((initpp = prfind(P_INITPID)) != NULL) {
1048 			mutex_enter(&initpp->p_lock);
1049 			if (procinset(initpp, procset)) {
1050 				mutex_exit(&initpp->p_lock);
1051 				err = setprocnice(initpp, pcnice);
1052 			} else {
1053 				mutex_exit(&initpp->p_lock);
1054 			}
1055 		}
1056 		mutex_exit(&pidlock);
1057 	}
1058 
1059 	/*
1060 	 * We're returning the latest error here that we've got back from
1061 	 * the setthreadnice() or setprocnice(). That is, err_thread and/or
1062 	 * err_proc can be replaced by err.
1063 	 */
1064 	if (!err)
1065 		err = err_thread ? err_thread : err_proc;
1066 
1067 	return (err);
1068 }
1069 
1070 int
1071 setthreadprio(pcprio_t *pcprio, kthread_t *tp)
1072 {
1073 	int prio = 0;
1074 	int incr;
1075 	int error;
1076 
1077 	ASSERT(MUTEX_HELD(&pidlock));
1078 	ASSERT(MUTEX_HELD(&(ttoproc(tp)->p_lock)));
1079 
1080 	if (pcprio->pc_op == PC_SETPRIO && pcprio->pc_cid != tp->t_cid) {
1081 		/*
1082 		 * Target thread must change to new class.
1083 		 * See comments in parmsset(), from where this code was copied.
1084 		 */
1085 		void *bufp = NULL;
1086 		caddr_t clprocp = (caddr_t)tp->t_cldata;
1087 		id_t oldcid = tp->t_cid;
1088 
1089 		error = CL_CANEXIT(tp, NULL);
1090 		if (error)
1091 			return (error);
1092 		if (CL_ALLOC(&bufp, pcprio->pc_cid, KM_NOSLEEP) != 0)
1093 			return (ENOMEM);
1094 		error = CL_ENTERCLASS(tp, pcprio->pc_cid, NULL, CRED(), bufp);
1095 		if (error) {
1096 			CL_FREE(pcprio->pc_cid, bufp);
1097 			return (error);
1098 		}
1099 		CL_EXITCLASS(oldcid, clprocp);
1100 		schedctl_set_cidpri(tp);
1101 	}
1102 
1103 	if ((error = CL_DOPRIO(tp, CRED(), 0, &prio)) != 0)
1104 		return (error);
1105 
1106 	if (pcprio->pc_op == PC_GETPRIO) {
1107 		/*
1108 		 * If we are not setting the priority, we should return the
1109 		 * highest priority pertaining to any of the specified threads.
1110 		 */
1111 		if (prio > pcprio->pc_val) {
1112 			pcprio->pc_cid = tp->t_cid;
1113 			pcprio->pc_val = prio;
1114 		}
1115 	} else if (prio != pcprio->pc_val) {
1116 		/*
1117 		 * Try to change the priority of the thread.
1118 		 */
1119 		incr = pcprio->pc_val - prio;
1120 		error = CL_DOPRIO(tp, CRED(), incr, &prio);
1121 		schedctl_set_cidpri(tp);
1122 	}
1123 
1124 	return (error);
1125 }
1126 
1127 int
1128 setprocprio(proc_t *pp, pcprio_t *pcprio)
1129 {
1130 	kthread_t *tp;
1131 	int retval = 0;
1132 	int error;
1133 
1134 	ASSERT(MUTEX_HELD(&pidlock));
1135 	mutex_enter(&pp->p_lock);
1136 
1137 	if ((tp = pp->p_tlist) == NULL) {
1138 		mutex_exit(&pp->p_lock);
1139 		return (ESRCH);
1140 	}
1141 
1142 	/*
1143 	 * Check permissions before changing the prio value.
1144 	 */
1145 	if (pcprio->pc_op == PC_SETPRIO) {
1146 		if (!prochasprocperm(pp, curproc, CRED())) {
1147 			mutex_exit(&pp->p_lock);
1148 			return (EPERM);
1149 		}
1150 	}
1151 
1152 	do {
1153 		error = setthreadprio(pcprio, tp);
1154 		if (error)
1155 			retval = error;
1156 	} while ((tp = tp->t_forw) != pp->p_tlist);
1157 
1158 	mutex_exit(&pp->p_lock);
1159 	return (retval);
1160 }
1161 
1162 /*
1163  * Set the class and priority of the specified LWP or set of processes.
1164  */
1165 static int
1166 doprio(procset_t *procset, pcprio_t *pcprio)
1167 {
1168 	int err_proc = 0;
1169 	int err_thread = 0;
1170 	int err = 0;
1171 
1172 	/*
1173 	 * Sanity check.
1174 	 */
1175 	if (pcprio->pc_op != PC_GETPRIO && pcprio->pc_op != PC_SETPRIO)
1176 		return (EINVAL);
1177 	if (pcprio->pc_op == PC_SETPRIO &&
1178 	    (pcprio->pc_cid >= loaded_classes || pcprio->pc_cid < 1))
1179 		return (EINVAL);
1180 
1181 	/*
1182 	 * If it is a PC_GETPRIO operation then set pc_val to the smallest
1183 	 * possible prio value to help us find the highest priority
1184 	 * pertaining to any of the specified processes.
1185 	 */
1186 	if (pcprio->pc_op == PC_GETPRIO)
1187 		pcprio->pc_val = SHRT_MIN;
1188 
1189 	if (procset->p_lidtype != P_LWPID ||
1190 	    procset->p_ridtype != P_LWPID)
1191 		err_proc = dotoprocs(procset, setprocprio, (char *)pcprio);
1192 
1193 	if (procset->p_lidtype == P_LWPID || procset->p_ridtype == P_LWPID) {
1194 		err_thread = dotolwp(procset, setthreadprio, (char *)pcprio);
1195 		/*
1196 		 * dotolwp() can return with p_lock held.  This is required
1197 		 * for the priocntl GETPARMS case.  So, here we just release
1198 		 * the p_lock.
1199 		 */
1200 		if (MUTEX_HELD(&curproc->p_lock))
1201 			mutex_exit(&curproc->p_lock);
1202 
1203 		/*
1204 		 * If we were called for a single LWP, then ignore ESRCH
1205 		 * returned by the previous dotoprocs() call.
1206 		 */
1207 		if (err_proc == ESRCH)
1208 			err_proc = 0;
1209 	}
1210 
1211 	/*
1212 	 * dotoprocs() ignores the init process if it is in the set, unless
1213 	 * it was the only process found. We want to make sure init is not
1214 	 * excluded if we're going PC_GETPRIO operation.
1215 	 */
1216 	if (pcprio->pc_op == PC_GETPRIO) {
1217 		proc_t *initpp;
1218 
1219 		mutex_enter(&pidlock);
1220 		if ((initpp = prfind(P_INITPID)) != NULL) {
1221 			mutex_enter(&initpp->p_lock);
1222 			if (procinset(initpp, procset)) {
1223 				mutex_exit(&initpp->p_lock);
1224 				err = setprocprio(initpp, pcprio);
1225 			} else {
1226 				mutex_exit(&initpp->p_lock);
1227 			}
1228 		}
1229 		mutex_exit(&pidlock);
1230 	}
1231 
1232 	/*
1233 	 * We're returning the latest error here that we've got back from
1234 	 * the setthreadprio() or setprocprio(). That is, err_thread and/or
1235 	 * err_proc can be replaced by err.
1236 	 */
1237 	if (!err)
1238 		err = err_thread ? err_thread : err_proc;
1239 
1240 	return (err);
1241 }
1242