xref: /illumos-gate/usr/src/uts/common/syscall/uadmin.c (revision 56f33205c9ed776c3c909e07d52e94610a675740)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/param.h>
28 #include <sys/types.h>
29 #include <sys/sysmacros.h>
30 #include <sys/systm.h>
31 #include <sys/errno.h>
32 #include <sys/vfs.h>
33 #include <sys/vnode.h>
34 #include <sys/swap.h>
35 #include <sys/file.h>
36 #include <sys/proc.h>
37 #include <sys/var.h>
38 #include <sys/uadmin.h>
39 #include <sys/signal.h>
40 #include <sys/time.h>
41 #include <vm/seg_kmem.h>
42 #include <sys/modctl.h>
43 #include <sys/callb.h>
44 #include <sys/dumphdr.h>
45 #include <sys/debug.h>
46 #include <sys/ftrace.h>
47 #include <sys/cmn_err.h>
48 #include <sys/panic.h>
49 #include <sys/ddi.h>
50 #include <sys/sunddi.h>
51 #include <sys/policy.h>
52 #include <sys/zone.h>
53 #include <sys/condvar.h>
54 #include <sys/thread.h>
55 #include <sys/sdt.h>
56 
57 /*
58  * Administrivia system call.  We provide this in two flavors: one for calling
59  * from the system call path (uadmin), and the other for calling from elsewhere
60  * within the kernel (kadmin).  Callers must beware that certain uadmin cmd
61  * values (specifically A_SWAPCTL) are only supported by uadmin and not kadmin.
62  */
63 
64 extern ksema_t fsflush_sema;
65 kmutex_t ualock;
66 kcondvar_t uacond;
67 kthread_t *ua_shutdown_thread = NULL;
68 
69 int sys_shutdown = 0;
70 volatile int fastreboot_dryrun = 0;
71 
72 /*
73  * Kill all user processes in said zone.  A special argument of ALL_ZONES is
74  * passed in when the system as a whole is shutting down.  The lack of per-zone
75  * process lists is likely to make the following a performance bottleneck on a
76  * system with many zones.
77  */
78 void
79 killall(zoneid_t zoneid)
80 {
81 	proc_t *p;
82 
83 	ASSERT(zoneid != GLOBAL_ZONEID);
84 	/*
85 	 * Kill all processes except kernel daemons and ourself.
86 	 * Make a first pass to stop all processes so they won't
87 	 * be trying to restart children as we kill them.
88 	 */
89 	mutex_enter(&pidlock);
90 	for (p = practive; p != NULL; p = p->p_next) {
91 		if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
92 		    p->p_exec != NULLVP &&	/* kernel daemons */
93 		    p->p_as != &kas &&
94 		    p->p_stat != SZOMB) {
95 			mutex_enter(&p->p_lock);
96 			p->p_flag |= SNOWAIT;
97 			sigtoproc(p, NULL, SIGSTOP);
98 			mutex_exit(&p->p_lock);
99 		}
100 	}
101 	p = practive;
102 	while (p != NULL) {
103 		if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
104 		    p->p_exec != NULLVP &&	/* kernel daemons */
105 		    p->p_as != &kas &&
106 		    p->p_stat != SIDL &&
107 		    p->p_stat != SZOMB) {
108 			mutex_enter(&p->p_lock);
109 			if (sigismember(&p->p_sig, SIGKILL)) {
110 				mutex_exit(&p->p_lock);
111 				p = p->p_next;
112 			} else {
113 				sigtoproc(p, NULL, SIGKILL);
114 				mutex_exit(&p->p_lock);
115 				(void) cv_reltimedwait(&p->p_srwchan_cv,
116 				    &pidlock, hz, TR_CLOCK_TICK);
117 				p = practive;
118 			}
119 		} else {
120 			p = p->p_next;
121 		}
122 	}
123 	mutex_exit(&pidlock);
124 }
125 
126 int
127 kadmin(int cmd, int fcn, void *mdep, cred_t *credp)
128 {
129 	int error = 0;
130 	char *buf;
131 	size_t buflen = 0;
132 	boolean_t invoke_cb = B_FALSE;
133 
134 	/*
135 	 * We might be called directly by the kernel's fault-handling code, so
136 	 * we can't assert that the caller is in the global zone.
137 	 */
138 
139 	/*
140 	 * Make sure that cmd is one of the valid <sys/uadmin.h> command codes
141 	 * and that we have appropriate privileges for this action.
142 	 */
143 	switch (cmd) {
144 	case A_FTRACE:
145 	case A_SHUTDOWN:
146 	case A_REBOOT:
147 	case A_REMOUNT:
148 	case A_FREEZE:
149 	case A_DUMP:
150 	case A_SDTTEST:
151 	case A_CONFIG:
152 		if (secpolicy_sys_config(credp, B_FALSE) != 0)
153 			return (EPERM);
154 		break;
155 
156 	default:
157 		return (EINVAL);
158 	}
159 
160 	/*
161 	 * Serialize these operations on ualock.  If it is held, the
162 	 * system should shutdown, reboot, or remount shortly, unless there is
163 	 * an error.  We need a cv rather than just a mutex because proper
164 	 * functioning of A_REBOOT relies on being able to interrupt blocked
165 	 * userland callers.
166 	 *
167 	 * We only clear ua_shutdown_thread after A_REMOUNT or A_CONFIG.
168 	 * Other commands should never return.
169 	 */
170 	if (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_REMOUNT ||
171 	    cmd == A_CONFIG) {
172 		mutex_enter(&ualock);
173 		while (ua_shutdown_thread != NULL) {
174 			if (cv_wait_sig(&uacond, &ualock) == 0) {
175 				/*
176 				 * If we were interrupted, leave, and handle
177 				 * the signal (or exit, depending on what
178 				 * happened)
179 				 */
180 				mutex_exit(&ualock);
181 				return (EINTR);
182 			}
183 		}
184 		ua_shutdown_thread = curthread;
185 		mutex_exit(&ualock);
186 	}
187 
188 	switch (cmd) {
189 	case A_SHUTDOWN:
190 	{
191 		proc_t *p = ttoproc(curthread);
192 
193 		/*
194 		 * Release (almost) all of our own resources if we are called
195 		 * from a user context, however if we are calling kadmin() from
196 		 * a kernel context then we do not release these resources.
197 		 */
198 		if (p != &p0) {
199 			proc_is_exiting(p);
200 			if ((error = exitlwps(0)) != 0) {
201 				/*
202 				 * Another thread in this process also called
203 				 * exitlwps().
204 				 */
205 				mutex_enter(&ualock);
206 				ua_shutdown_thread = NULL;
207 				cv_signal(&uacond);
208 				mutex_exit(&ualock);
209 				return (error);
210 			}
211 			mutex_enter(&p->p_lock);
212 			p->p_flag |= SNOWAIT;
213 			sigfillset(&p->p_ignore);
214 			curthread->t_lwp->lwp_cursig = 0;
215 			curthread->t_lwp->lwp_extsig = 0;
216 			if (p->p_exec) {
217 				vnode_t *exec_vp = p->p_exec;
218 				p->p_exec = NULLVP;
219 				mutex_exit(&p->p_lock);
220 				VN_RELE(exec_vp);
221 			} else {
222 				mutex_exit(&p->p_lock);
223 			}
224 
225 			pollcleanup();
226 			closeall(P_FINFO(curproc));
227 			relvm();
228 
229 		} else {
230 			/*
231 			 * Reset t_cred if not set because much of the
232 			 * filesystem code depends on CRED() being valid.
233 			 */
234 			if (curthread->t_cred == NULL)
235 				curthread->t_cred = kcred;
236 		}
237 
238 		/* indicate shutdown in progress */
239 		sys_shutdown = 1;
240 
241 		/*
242 		 * Communcate that init shouldn't be restarted.
243 		 */
244 		zone_shutdown_global();
245 
246 		killall(ALL_ZONES);
247 		/*
248 		 * If we are calling kadmin() from a kernel context then we
249 		 * do not release these resources.
250 		 */
251 		if (ttoproc(curthread) != &p0) {
252 			VN_RELE(PTOU(curproc)->u_cdir);
253 			if (PTOU(curproc)->u_rdir)
254 				VN_RELE(PTOU(curproc)->u_rdir);
255 			if (PTOU(curproc)->u_cwd)
256 				refstr_rele(PTOU(curproc)->u_cwd);
257 
258 			PTOU(curproc)->u_cdir = rootdir;
259 			PTOU(curproc)->u_rdir = NULL;
260 			PTOU(curproc)->u_cwd = NULL;
261 		}
262 
263 		/*
264 		 * Allow the reboot/halt/poweroff code a chance to do
265 		 * anything it needs to whilst we still have filesystems
266 		 * mounted, like loading any modules necessary for later
267 		 * performing the actual poweroff.
268 		 */
269 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
270 			buf = i_convert_boot_device_name(mdep, NULL, &buflen);
271 			mdpreboot(cmd, fcn, buf);
272 		} else
273 			mdpreboot(cmd, fcn, mdep);
274 
275 		/*
276 		 * Allow fsflush to finish running and then prevent it
277 		 * from ever running again so that vfs_unmountall() and
278 		 * vfs_syncall() can acquire the vfs locks they need.
279 		 */
280 		sema_p(&fsflush_sema);
281 		(void) callb_execute_class(CB_CL_UADMIN_PRE_VFS, NULL);
282 
283 		vfs_unmountall();
284 		(void) VFS_MOUNTROOT(rootvfs, ROOT_UNMOUNT);
285 		vfs_syncall();
286 
287 		dump_ereports();
288 		dump_messages();
289 
290 		invoke_cb = B_TRUE;
291 
292 		/* FALLTHROUGH */
293 	}
294 
295 	case A_REBOOT:
296 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
297 			buf = i_convert_boot_device_name(mdep, NULL, &buflen);
298 			mdboot(cmd, fcn, buf, invoke_cb);
299 		} else
300 			mdboot(cmd, fcn, mdep, invoke_cb);
301 		/* no return expected */
302 		break;
303 
304 	case A_CONFIG:
305 		switch (fcn) {
306 		case AD_UPDATE_BOOT_CONFIG:
307 #ifndef	__sparc
308 		{
309 			extern void fastboot_update_config(const char *);
310 
311 			fastboot_update_config(mdep);
312 		}
313 #endif
314 
315 			break;
316 		}
317 		/* Let other threads enter the shutdown path now */
318 		mutex_enter(&ualock);
319 		ua_shutdown_thread = NULL;
320 		cv_signal(&uacond);
321 		mutex_exit(&ualock);
322 		break;
323 
324 	case A_REMOUNT:
325 		(void) VFS_MOUNTROOT(rootvfs, ROOT_REMOUNT);
326 		/* Let other threads enter the shutdown path now */
327 		mutex_enter(&ualock);
328 		ua_shutdown_thread = NULL;
329 		cv_signal(&uacond);
330 		mutex_exit(&ualock);
331 		break;
332 
333 	case A_FREEZE:
334 	{
335 		/*
336 		 * This is the entrypoint for all suspend/resume actions.
337 		 */
338 		extern int cpr(int, void *);
339 
340 		if (modload("misc", "cpr") == -1)
341 			return (ENOTSUP);
342 		/* Let the CPR module decide what to do with mdep */
343 		error = cpr(fcn, mdep);
344 		break;
345 	}
346 
347 	case A_FTRACE:
348 	{
349 		switch (fcn) {
350 		case AD_FTRACE_START:
351 			(void) FTRACE_START();
352 			break;
353 		case AD_FTRACE_STOP:
354 			(void) FTRACE_STOP();
355 			break;
356 		default:
357 			error = EINVAL;
358 		}
359 		break;
360 	}
361 
362 	case A_DUMP:
363 	{
364 		if (fcn == AD_NOSYNC) {
365 			in_sync = 1;
366 			break;
367 		}
368 
369 		panic_bootfcn = fcn;
370 		panic_forced = 1;
371 
372 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
373 			panic_bootstr = i_convert_boot_device_name(mdep,
374 			    NULL, &buflen);
375 		} else
376 			panic_bootstr = mdep;
377 
378 #ifndef	__sparc
379 		extern void fastboot_update_and_load(int, char *);
380 
381 		fastboot_update_and_load(fcn, mdep);
382 #endif
383 
384 		panic("forced crash dump initiated at user request");
385 		/*NOTREACHED*/
386 	}
387 
388 	case A_SDTTEST:
389 	{
390 		DTRACE_PROBE7(test, int, 1, int, 2, int, 3, int, 4, int, 5,
391 		    int, 6, int, 7);
392 		break;
393 	}
394 
395 	default:
396 		error = EINVAL;
397 	}
398 
399 	return (error);
400 }
401 
402 int
403 uadmin(int cmd, int fcn, uintptr_t mdep)
404 {
405 	int error = 0, rv = 0;
406 	size_t nbytes = 0;
407 	cred_t *credp = CRED();
408 	char *bootargs = NULL;
409 	int reset_status = 0;
410 
411 	if (cmd == A_SHUTDOWN && fcn == AD_FASTREBOOT_DRYRUN) {
412 		ddi_walk_devs(ddi_root_node(), check_driver_quiesce,
413 		    &reset_status);
414 		if (reset_status != 0)
415 			return (EIO);
416 		else
417 			return (0);
418 	}
419 
420 	/*
421 	 * The swapctl system call doesn't have its own entry point: it uses
422 	 * uadmin as a wrapper so we just call it directly from here.
423 	 */
424 	if (cmd == A_SWAPCTL) {
425 		if (get_udatamodel() == DATAMODEL_NATIVE)
426 			error = swapctl(fcn, (void *)mdep, &rv);
427 #if defined(_SYSCALL32_IMPL)
428 		else
429 			error = swapctl32(fcn, (void *)mdep, &rv);
430 #endif /* _SYSCALL32_IMPL */
431 		return (error ? set_errno(error) : rv);
432 	}
433 
434 	/*
435 	 * Certain subcommands intepret a non-NULL mdep value as a pointer to
436 	 * a boot string.  We pull that in as bootargs, if applicable.
437 	 */
438 	if (mdep != NULL &&
439 	    (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_DUMP ||
440 	    cmd == A_FREEZE || cmd == A_CONFIG)) {
441 		bootargs = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP);
442 		if ((error = copyinstr((const char *)mdep, bootargs,
443 		    BOOTARGS_MAX, &nbytes)) != 0) {
444 			kmem_free(bootargs, BOOTARGS_MAX);
445 			return (set_errno(error));
446 		}
447 	}
448 
449 	/*
450 	 * Invoke the appropriate kadmin() routine.
451 	 */
452 	if (getzoneid() != GLOBAL_ZONEID)
453 		error = zone_kadmin(cmd, fcn, bootargs, credp);
454 	else
455 		error = kadmin(cmd, fcn, bootargs, credp);
456 
457 	if (bootargs != NULL)
458 		kmem_free(bootargs, BOOTARGS_MAX);
459 	return (error ? set_errno(error) : 0);
460 }
461