xref: /illumos-gate/usr/src/lib/libpctx/common/libpctx.c (revision ccd81fdda071e031209c777983199d191c35b0a2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * This file contains a set of generic routines for periodically
29  * sampling the state of another process, or tree of processes.
30  *
31  * It is built upon the infrastructure provided by libproc.
32  */
33 
34 #include <sys/wait.h>
35 #include <sys/syscall.h>
36 #include <sys/time.h>
37 #include <libproc.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <errno.h>
41 #include <unistd.h>
42 #include <signal.h>
43 #include <string.h>
44 #include <strings.h>
45 #include <limits.h>
46 #include <ctype.h>
47 #include <libintl.h>
48 #include <libcpc.h>
49 #include <sys/cpc_impl.h>
50 
51 #include "libpctx.h"
52 
53 struct __pctx {
54 	pctx_errfn_t *errfn;
55 	struct ps_prochandle *Pr;
56 	void *uarg;
57 	pctx_sysc_execfn_t *exec;
58 	pctx_sysc_forkfn_t *fork;
59 	pctx_sysc_exitfn_t *exit;
60 	pctx_sysc_lwp_createfn_t *lwp_create;
61 	pctx_init_lwpfn_t *init_lwp;
62 	pctx_fini_lwpfn_t *fini_lwp;
63 	pctx_sysc_lwp_exitfn_t *lwp_exit;
64 	int verbose;
65 	int created;
66 	int sigblocked;
67 	int terminate;
68 	sigset_t savedset;
69 	cpc_t *cpc;
70 };
71 
72 static void (*pctx_cpc_callback)(cpc_t *cpc, struct __pctx *pctx);
73 
74 static void
75 pctx_default_errfn(const char *fn, const char *fmt, va_list ap)
76 {
77 	(void) fprintf(stderr, "libpctx: pctx_%s: ", fn);
78 	(void) vfprintf(stderr, fmt, ap);
79 }
80 
81 /*PRINTFLIKE3*/
82 static void
83 pctx_error(pctx_t *pctx, const char *fn, const char *fmt, ...)
84 {
85 	va_list ap;
86 
87 	va_start(ap, fmt);
88 	pctx->errfn(fn, fmt, ap);
89 	va_end(ap);
90 }
91 
92 /*
93  * Create a new process and bind the user args for it
94  */
95 pctx_t *
96 pctx_create(
97     const char *filename,
98     char *const *argv,
99     void *arg,
100     int verbose,
101     pctx_errfn_t *errfn)
102 {
103 	static const char fn[] = "create";
104 	int err;
105 	pctx_t *pctx;
106 
107 	pctx = calloc(1, sizeof (*pctx));
108 	pctx->uarg = arg;
109 	pctx->verbose = verbose;
110 	pctx->terminate = 0;
111 	pctx->errfn = errfn ? errfn : pctx_default_errfn;
112 
113 	if ((pctx->Pr = Pcreate(filename, argv, &err, 0, 0)) == NULL) {
114 		switch (err) {
115 		case C_PERM:
116 			pctx_error(pctx, fn, gettext("cannot trace set-id or "
117 			    "unreadable program '%s'\n"), filename);
118 			break;
119 		case C_LP64:
120 			pctx_error(pctx, fn, gettext("cannot control LP64 "
121 			    "program '%s'\n"), filename);
122 			break;
123 		case C_NOEXEC:
124 			pctx_error(pctx, fn, gettext("cannot execute "
125 			    "program '%s'\n"), filename);
126 			break;
127 		case C_NOENT:
128 			pctx_error(pctx, fn, gettext("cannot find"
129 			    "program '%s'\n"), filename);
130 			break;
131 		case C_FORK:
132 			pctx_error(pctx, fn, gettext("cannot fork, "
133 			    "program '%s'\n"), filename);
134 			break;
135 		default:
136 			pctx_error(pctx, fn, gettext("%s, program '%s'\n"),
137 			    Pcreate_error(err), filename);
138 			break;
139 		}
140 		free(pctx);
141 		return (NULL);
142 	}
143 
144 	if (Psysentry(pctx->Pr, SYS_exit, 1) == -1) {
145 		pctx_error(pctx, fn,
146 		    gettext("can't stop-on-exit() program '%s'\n"), filename);
147 		Prelease(pctx->Pr, PRELEASE_KILL);
148 		free(pctx);
149 		return (NULL);
150 	}
151 	/*
152 	 * Set kill-on-last-close so the controlled process
153 	 * dies if we die.
154 	 */
155 	pctx->created = 1;
156 	(void) Psetflags(pctx->Pr, PR_KLC);
157 	(void) pctx_set_events(pctx, PCTX_NULL_EVENT);
158 
159 	return (pctx);
160 }
161 
162 /*
163  * Capture an existing process and bind the user args for it
164  */
165 pctx_t *
166 pctx_capture(pid_t pid, void *arg, int verbose, pctx_errfn_t *errfn)
167 {
168 	static const char fn[] = "capture";
169 	int err;
170 	pctx_t *pctx;
171 
172 	pctx = calloc(1, sizeof (*pctx));
173 	pctx->uarg = arg;
174 	pctx->verbose = verbose;
175 	pctx->errfn = errfn ? errfn : pctx_default_errfn;
176 
177 	if ((pctx->Pr = Pgrab(pid, 0, &err)) == NULL) {
178 		switch (err) {
179 		case G_NOPROC:
180 			pctx_error(pctx, fn,
181 			    gettext("pid %d doesn't exist\n"), (int)pid);
182 			break;
183 		case G_ZOMB:
184 			pctx_error(pctx, fn,
185 			    gettext("pid %d is a zombie\n"), (int)pid);
186 			break;
187 		case G_PERM:
188 			pctx_error(pctx, fn,
189 			    gettext("pid %d: permission denied\n"), (int)pid);
190 			break;
191 		case G_BUSY:
192 			pctx_error(pctx, fn,
193 			    gettext("pid %d is already being traced\n"),
194 			    (int)pid);
195 			break;
196 		case G_SYS:
197 			pctx_error(pctx, fn,
198 			    gettext("pid %d is a system process\n"), (int)pid);
199 			break;
200 		case G_SELF:
201 			pctx_error(pctx, fn,
202 			    gettext("cannot capture self!\n"));
203 			break;
204 		case G_LP64:
205 			pctx_error(pctx, fn, gettext("cannot control LP64 "
206 			    "process, pid %d\n"), (int)pid);
207 			break;
208 		default:
209 			pctx_error(pctx, fn, gettext("%s: pid %d\n"),
210 			    Pgrab_error(err), (int)pid);
211 			break;
212 		}
213 		free(pctx);
214 		return (NULL);
215 	}
216 
217 	if (Psysentry(pctx->Pr, SYS_exit, 1) == -1) {
218 		pctx_error(pctx, fn,
219 		    gettext("can't stop-on-exit() pid %d\n"), (int)pid);
220 		Prelease(pctx->Pr, PRELEASE_CLEAR);
221 		free(pctx);
222 		return (NULL);
223 	}
224 
225 	/*
226 	 * Set run-on-last-close so the controlled process
227 	 * runs even if we die on a signal.  This is because
228 	 * we grabbed an existing process - it would be impolite
229 	 * to cause it to die if we exit prematurely.
230 	 */
231 	pctx->created = 0;
232 	(void) Psetflags(pctx->Pr, PR_RLC);
233 	(void) pctx_set_events(pctx, PCTX_NULL_EVENT);
234 
235 	return (pctx);
236 }
237 
238 /*ARGSUSED*/
239 static void
240 default_void(pctx_t *pctx)
241 {}
242 
243 /*ARGSUSED*/
244 static int
245 default_int(pctx_t *pctx)
246 {
247 	return (0);
248 }
249 
250 int
251 pctx_set_events(pctx_t *pctx, ...)
252 {
253 	static const char fn[] = "set_events";
254 	va_list pvar;
255 	int error = 0;
256 	pctx_event_t event;
257 
258 	va_start(pvar, pctx);
259 	do {
260 		switch (event = (pctx_event_t)va_arg(pvar, pctx_event_t)) {
261 		case PCTX_NULL_EVENT:
262 			break;
263 		case PCTX_SYSC_EXEC_EVENT:
264 			pctx->exec = (pctx_sysc_execfn_t *)
265 			    va_arg(pvar, pctx_sysc_execfn_t *);
266 			break;
267 		case PCTX_SYSC_FORK_EVENT:
268 			pctx->fork = (pctx_sysc_forkfn_t *)
269 			    va_arg(pvar, pctx_sysc_forkfn_t *);
270 			break;
271 		case PCTX_SYSC_EXIT_EVENT:	/* always intercepted */
272 			pctx->exit = (pctx_sysc_exitfn_t *)
273 			    va_arg(pvar, pctx_sysc_exitfn_t *);
274 			break;
275 		case PCTX_SYSC_LWP_CREATE_EVENT:
276 			pctx->lwp_create = (pctx_sysc_lwp_createfn_t *)
277 			    va_arg(pvar, pctx_sysc_lwp_createfn_t *);
278 			break;
279 		case PCTX_INIT_LWP_EVENT:
280 			pctx->init_lwp = (pctx_init_lwpfn_t *)
281 			    va_arg(pvar, pctx_init_lwpfn_t *);
282 			break;
283 		case PCTX_FINI_LWP_EVENT:
284 			pctx->fini_lwp = (pctx_fini_lwpfn_t *)
285 			    va_arg(pvar, pctx_fini_lwpfn_t *);
286 			break;
287 		case PCTX_SYSC_LWP_EXIT_EVENT:
288 			pctx->lwp_exit = (pctx_sysc_lwp_exitfn_t *)
289 			    va_arg(pvar, pctx_sysc_lwp_exitfn_t *);
290 			break;
291 		default:
292 			pctx_error(pctx, fn,
293 			    gettext("unknown event type %x\n"), event);
294 			error = -1;
295 			break;
296 		}
297 	} while (event != PCTX_NULL_EVENT && error == 0);
298 	va_end(pvar);
299 
300 	if (error != 0)
301 		return (error);
302 
303 	if (pctx->exec == NULL)
304 		pctx->exec = (pctx_sysc_execfn_t *)default_int;
305 	if (pctx->fork == NULL)
306 		pctx->fork = (pctx_sysc_forkfn_t *)default_void;
307 	if (pctx->exit == NULL)
308 		pctx->exit = (pctx_sysc_exitfn_t *)default_void;
309 	if (pctx->lwp_create == NULL)
310 		pctx->lwp_create = (pctx_sysc_lwp_createfn_t *)default_int;
311 	if (pctx->init_lwp == NULL)
312 		pctx->init_lwp = (pctx_init_lwpfn_t *)default_int;
313 	if (pctx->fini_lwp == NULL)
314 		pctx->fini_lwp = (pctx_fini_lwpfn_t *)default_int;
315 	if (pctx->lwp_exit == NULL)
316 		pctx->lwp_exit = (pctx_sysc_lwp_exitfn_t *)default_int;
317 
318 	if (pctx->fork != (pctx_sysc_forkfn_t *)default_void) {
319 		(void) Psysexit(pctx->Pr, SYS_vfork, 1);
320 		(void) Psysexit(pctx->Pr, SYS_forksys, 1);
321 		if (Psetflags(pctx->Pr, PR_FORK) == -1)
322 			error = -1;
323 	} else {
324 		(void) Psysexit(pctx->Pr, SYS_vfork, 0);
325 		(void) Psysexit(pctx->Pr, SYS_forksys, 0);
326 		if (Punsetflags(pctx->Pr, PR_FORK) == -1)
327 			error = -1;
328 	}
329 
330 	/*
331 	 * exec causes termination of all but the exec-ing lwp,
332 	 * and resets the lwpid to one in the new address space.
333 	 */
334 	if (pctx->exec != (pctx_sysc_execfn_t *)default_int ||
335 	    pctx->fini_lwp != (pctx_fini_lwpfn_t *)default_int ||
336 	    pctx->init_lwp != (pctx_init_lwpfn_t *)default_int) {
337 		(void) Psysexit(pctx->Pr, SYS_execve, 1);
338 		(void) Psysentry(pctx->Pr, SYS_execve, 1);
339 	} else {
340 		(void) Psysexit(pctx->Pr, SYS_execve, 0);
341 		(void) Psysentry(pctx->Pr, SYS_execve, 0);
342 	}
343 
344 	(void) Psysexit(pctx->Pr, SYS_lwp_create,
345 	    pctx->lwp_create != (pctx_sysc_lwp_createfn_t *)default_int ||
346 	    pctx->init_lwp != (pctx_init_lwpfn_t *)default_int);
347 
348 	(void) Psysentry(pctx->Pr, SYS_lwp_exit,
349 	    pctx->lwp_exit != (pctx_sysc_lwp_exitfn_t *)default_int ||
350 	    pctx->fini_lwp != (pctx_fini_lwpfn_t *)default_int);
351 
352 	return (0);
353 }
354 
355 static sigset_t termsig;
356 
357 static void
358 __libpctx_init(void)
359 {
360 	/*
361 	 * Initialize the signal set used to shield ourselves from
362 	 * death-by-terminal-signal while the agent lwp is running.
363 	 */
364 	(void) sigemptyset(&termsig);
365 	(void) sigaddset(&termsig, SIGHUP);
366 	(void) sigaddset(&termsig, SIGTERM);
367 	(void) sigaddset(&termsig, SIGINT);
368 	(void) sigaddset(&termsig, SIGQUIT);
369 }
370 
371 #pragma init(__libpctx_init)
372 
373 static void
374 pctx_begin_syscalls(pctx_t *pctx)
375 {
376 	if (pctx->Pr == NULL)
377 		return;
378 	if (pctx->sigblocked++ == 0) {
379 		(void) sigprocmask(SIG_BLOCK, &termsig, &pctx->savedset);
380 		(void) Pcreate_agent(pctx->Pr);
381 	}
382 }
383 
384 static void
385 pctx_end_syscalls(pctx_t *pctx)
386 {
387 	if (pctx->Pr == NULL)
388 		return;
389 	if (--pctx->sigblocked == 0) {
390 		(void) Pdestroy_agent(pctx->Pr);
391 		(void) sigprocmask(SIG_SETMASK, &pctx->savedset, NULL);
392 	}
393 }
394 
395 /*
396  * Iterate over the valid lwpids in the process, invoking the
397  * action function on each one.
398  */
399 static int
400 pctx_lwpiterate(pctx_t *pctx, int (*action)(pctx_t *, pid_t, id_t, void *))
401 {
402 	const pstatus_t *pstatus;
403 	char lstatus[64];
404 	struct stat statb;
405 	lwpstatus_t *lwps;
406 	prheader_t *prh;
407 	int fd, nlwp;
408 	int ret = 0;
409 
410 	if (action == (int (*)(pctx_t *, pid_t, id_t, void *))default_int)
411 		return (0);
412 
413 	pstatus = Pstatus(pctx->Pr);
414 	if (pstatus->pr_nlwp <= 1) {
415 		pctx_begin_syscalls(pctx);
416 		ret = action(pctx, pstatus->pr_pid, 1, pctx->uarg);
417 		pctx_end_syscalls(pctx);
418 		return (ret);
419 	}
420 
421 	(void) snprintf(lstatus, sizeof (lstatus),
422 	    "/proc/%d/lstatus", (int)pstatus->pr_pid);
423 
424 	if ((fd = open(lstatus, O_RDONLY)) < 0 ||
425 	    fstat(fd, &statb) != 0) {
426 		if (fd >= 0)
427 			(void) close(fd);
428 		return (-1);
429 	}
430 
431 	prh = malloc(statb.st_size);
432 	if (read(fd, prh, statb.st_size) <
433 	    sizeof (prheader_t) + sizeof (lwpstatus_t)) {
434 		(void) close(fd);
435 		free(prh);
436 		return (-1);
437 	}
438 	(void) close(fd);
439 
440 	/* LINTED pointer cast may result in improper alignment */
441 	lwps = (lwpstatus_t *)(prh + 1);
442 	pctx_begin_syscalls(pctx);
443 	for (nlwp = prh->pr_nent; nlwp > 0; nlwp--) {
444 		if (action(pctx,
445 		    pstatus->pr_pid, lwps->pr_lwpid, pctx->uarg) != 0)
446 			ret = -1;
447 		/* LINTED pointer cast may result in improper alignment */
448 		lwps = (lwpstatus_t *)((char *)lwps + prh->pr_entsize);
449 	}
450 	pctx_end_syscalls(pctx);
451 	free(prh);
452 	return (ret);
453 }
454 
455 /*
456  * Free any associated state, but leave the process stopped if it
457  * is still under our control.  (If it isn't under our control,
458  * it should just run to completion when we do our last close)
459  */
460 static void
461 pctx_free(pctx_t *pctx)
462 {
463 	if (pctx->cpc != NULL && pctx_cpc_callback != NULL)
464 		(*pctx_cpc_callback)(pctx->cpc, pctx);
465 	if (pctx->Pr) {
466 		Pfree(pctx->Pr);
467 		pctx->Pr = NULL;
468 	}
469 	pctx->errfn = pctx_default_errfn;
470 }
471 
472 /*
473  * Completely release the process from our control and discard all our state
474  */
475 void
476 pctx_release(pctx_t *pctx)
477 {
478 	if (pctx->Pr) {
479 		Prelease(pctx->Pr, PRELEASE_CLEAR);
480 		pctx->Pr = NULL;
481 	}
482 
483 	pctx_free(pctx);
484 	bzero(pctx, sizeof (*pctx));
485 	free(pctx);
486 }
487 
488 static void
489 msincr(struct timeval *tv, uint_t msec)
490 {
491 	tv->tv_sec += msec / MILLISEC;
492 	tv->tv_usec += (msec % MILLISEC) * MILLISEC;
493 	if (tv->tv_usec > MICROSEC) {
494 		tv->tv_sec++;
495 		tv->tv_usec -= MICROSEC;
496 	}
497 }
498 
499 static uint_t
500 msdiff(struct timeval *tva, struct timeval *tvb)
501 {
502 	time_t sdiff = tva->tv_sec - tvb->tv_sec;
503 	suseconds_t udiff = tva->tv_usec - tvb->tv_usec;
504 
505 	if (sdiff < 0)
506 		return (0);
507 	if (udiff < 0) {
508 		udiff += MICROSEC;
509 		sdiff--;
510 	}
511 	if (sdiff < 0)
512 		return (0);
513 	if (sdiff >= (INT_MAX / MILLISEC))
514 		return ((uint_t)INT_MAX);
515 	return ((uint_t)(sdiff * MILLISEC + udiff / MILLISEC));
516 }
517 
518 int
519 pctx_run(
520 	pctx_t *pctx,
521 	uint_t msec,
522 	uint_t nsamples,
523 	int (*tick)(pctx_t *, pid_t, id_t, void *))
524 {
525 	static const char fn[] = "run";
526 	struct timeval tvgoal, tvnow;
527 	uint_t mswait = 0;
528 	int running = 1;
529 	const pstatus_t *pstatus;
530 	psinfo_t psinfo;
531 	void (*sigsaved)();
532 	id_t lwpid;
533 	pid_t pid = Pstatus(pctx->Pr)->pr_pid;
534 	int pstate;
535 
536 	if (msec == 0)
537 		nsamples = 0;
538 	if (nsamples == 0)
539 		nsamples = UINT_MAX;
540 
541 	/*
542 	 * Casually discard any knowledge of the children we create
543 	 */
544 	sigsaved = signal(SIGCHLD, SIG_IGN);
545 
546 	/*
547 	 * Since we've just "discovered" this process which might have
548 	 * been running for weeks, deliver some init_lwp events so
549 	 * that our caller gets a handle on the process.
550 	 */
551 	if (pctx_lwpiterate(pctx, pctx->init_lwp) != 0) {
552 		if (pctx->verbose)
553 			pctx_error(pctx, fn,
554 			    gettext("%d: lwp discovery failed\n"), (int)pid);
555 		goto bailout;
556 	}
557 
558 	if (msec != 0) {
559 		/*
560 		 * tvgoal represents the time at which the sample
561 		 * should next be taken.
562 		 */
563 		(void) gettimeofday(&tvgoal, 0);
564 		msincr(&tvgoal, msec);
565 	}
566 
567 	/*
568 	 * The event handling loop continues while running is 1.
569 	 * running becomes 0 when either the controlled process has
570 	 * exited successfully or the number of time samples has expired.
571 	 * Otherwise, if an error has occurred, running becomes -1.
572 	 */
573 	while (running == 1 && !pctx->terminate) {
574 
575 		if (Psetrun(pctx->Pr, 0, 0) != 0) {
576 			if (pctx->verbose)
577 				pctx_error(pctx, fn,
578 				    gettext("%d: Psetrun\n"), (int)pid);
579 			break;
580 		}
581 
582 		if (msec != 0) {
583 			/*
584 			 * This timing loop attempts to estimate the number
585 			 * of milliseconds between our "goal" time (when
586 			 * we should stop the process and run the tick
587 			 * routine) and the current time.
588 			 *
589 			 * If we ever find ourselves running behind i.e. we
590 			 * missed our goal, then we skip ahead to the next
591 			 * goal instead.
592 			 */
593 			do {
594 				(void) gettimeofday(&tvnow, 0);
595 				if ((mswait = msdiff(&tvgoal, &tvnow)) == 0) {
596 					msincr(&tvgoal, msec);
597 					/*
598 					 * Skip ahead to the next goal, unless
599 					 * there is only one more sample left
600 					 * to take.
601 					 */
602 					if (nsamples != 1)
603 						nsamples--;
604 				}
605 			} while (mswait == 0 && !pctx->terminate);
606 		}
607 
608 		if (pctx->terminate)
609 			goto bailout;
610 		else
611 			(void) Pwait(pctx->Pr, mswait);
612 
613 checkstate:
614 		switch (pstate = Pstate(pctx->Pr)) {
615 		case PS_RUN:
616 			/*
617 			 * Try again, but wait for up to 5 seconds.
618 			 */
619 			if (Pstop(pctx->Pr, 5 * MILLISEC) == -1 ||
620 			    (pstate = Pstate(pctx->Pr)) != PS_STOP) {
621 				pctx_error(pctx, fn,
622 				    gettext("%d: won't stop\n"), (int)pid);
623 			}
624 			break;
625 		case PS_STOP:
626 			break;
627 		case PS_LOST:
628 			/*
629 			 * Lost control - probably execed a setuid/setgid
630 			 * executable.  Try and get control back again,
631 			 * else bail ..
632 			 */
633 			(void) Preopen(pctx->Pr);
634 			if ((pstate = Pstate(pctx->Pr)) != PS_LOST)
635 				goto checkstate;
636 			pctx_error(pctx, fn,
637 			    gettext("%d: execed a program that cannot "
638 			    "be tracked\n"), (int)pid);
639 			running = -1;
640 			break;
641 		case PS_UNDEAD:
642 		case PS_DEAD:
643 			if (pctx->verbose)
644 				pctx_error(pctx, fn,
645 				    gettext("%d: process terminated\n"),
646 				    (int)pid);
647 			running = -1;
648 			break;
649 		default:
650 			if (pctx->verbose)
651 				pctx_error(pctx, fn,
652 				    gettext("%d: process state 0x%x?\n"),
653 				    (int)pid, pstate);
654 			break;
655 		}
656 
657 		if (pstate != PS_STOP)
658 			break;
659 
660 		pstatus = Pstatus(pctx->Pr);
661 		lwpid = pstatus->pr_lwp.pr_lwpid;
662 		switch (pstatus->pr_lwp.pr_why) {
663 		case PR_REQUESTED:
664 			msincr(&tvgoal, msec);
665 			if (pstatus->pr_flags & PR_VFORKP) {
666 				/*
667 				 * The process is in a vfork stupor until
668 				 * its child releases it via an exec.
669 				 * Don't sample it while it's in this state
670 				 * - we won't be able to create the agent.
671 				 */
672 				break;
673 			}
674 			if (pctx_lwpiterate(pctx, tick) != 0)
675 				running = -1;
676 			if (running == 1 && --nsamples == 0)
677 				running = 0;
678 			break;
679 		case PR_SYSENTRY:
680 			switch (pstatus->pr_lwp.pr_what) {
681 			case SYS_lwp_exit:
682 				pctx_begin_syscalls(pctx);
683 				(void) pctx->fini_lwp(pctx,
684 				    pid, lwpid, pctx->uarg);
685 				(void) pctx->lwp_exit(pctx,
686 				    pid, lwpid, pctx->uarg);
687 				pctx_end_syscalls(pctx);
688 				break;
689 			case SYS_exit:
690 				if (pctx_lwpiterate(pctx, pctx->fini_lwp)
691 				    != 0)
692 					running = -1;
693 				pctx->exit(pctx, pid, lwpid,
694 				    (int)pstatus->pr_lwp.pr_sysarg[0],
695 				    pctx->uarg);
696 				if (running == 1)
697 					running = 0;
698 				break;
699 			case SYS_execve:
700 				(void) pctx_lwpiterate(pctx, pctx->fini_lwp);
701 				break;
702 			default:
703 				pctx_error(pctx, fn,
704 				    "warning - pid %d sysentry(%d)\n",
705 				    (int)pid, pstatus->pr_lwp.pr_what);
706 				break;
707 			}
708 			break;
709 		case PR_SYSEXIT:
710 			switch (pstatus->pr_lwp.pr_what) {
711 			case SYS_execve:
712 				if (pstatus->pr_lwp.pr_errno) {
713 					/*
714 					 * The exec failed completely.
715 					 * Reinstate the lwps we fini'd
716 					 * at exec entrance
717 					 */
718 					if (pctx_lwpiterate(pctx,
719 					    pctx->init_lwp) == 0)
720 						running = 1;
721 					else
722 						running = -1;
723 					break;
724 				}
725 				if (pctx->exec == (pctx_sysc_execfn_t *)
726 				    default_int) {
727 					running = 0;
728 					break;
729 				}
730 				(void) memcpy(&psinfo,
731 				    Ppsinfo(pctx->Pr), sizeof (psinfo));
732 				proc_unctrl_psinfo(&psinfo);
733 				pctx_begin_syscalls(pctx);
734 				if (pctx->exec(pctx, pid, lwpid,
735 				    psinfo.pr_psargs, pctx->uarg) != 0)
736 					running = -1;
737 				if (running == 1 && pctx->init_lwp(pctx,
738 				    pid, 1, pctx->uarg) != 0)
739 					running = -1;
740 				pctx_end_syscalls(pctx);
741 				break;
742 			case SYS_lwp_create:
743 				if (pstatus->pr_lwp.pr_errno ||
744 				    pstatus->pr_lwp.pr_rval1)
745 					break;
746 				pctx_begin_syscalls(pctx);
747 				if (pctx->init_lwp(pctx, pid, lwpid,
748 				    pctx->uarg) != 0)
749 					running = -1;
750 				if (running == 1 && pctx->lwp_create(pctx,
751 				    pid, lwpid, pctx->uarg) != 0)
752 					running = -1;
753 				pctx_end_syscalls(pctx);
754 				break;
755 			case SYS_vfork:
756 			case SYS_forksys:
757 				if (pstatus->pr_lwp.pr_errno)
758 					break;
759 				(void) fflush(NULL);
760 				switch (fork1()) {
761 					pid_t ppid;
762 					int wascreated;
763 					pctx_sysc_forkfn_t *forkfn;
764 				case 0:
765 					ppid = pid;
766 					pid = pstatus->pr_lwp.pr_rval1;
767 					wascreated = pctx->created;
768 					forkfn = pctx->fork;
769 					pctx_free(pctx);
770 					pctx = pctx_capture(pid, pctx->uarg,
771 					    pctx->verbose, pctx->errfn);
772 					if (pctx != NULL) {
773 						if (wascreated) {
774 							/*
775 							 * Set kill on last
776 							 * close so -all-
777 							 * children die.
778 							 */
779 							pctx->created = 1;
780 							(void) Psetflags(
781 							    pctx->Pr, PR_KLC);
782 						}
783 						(*forkfn)(pctx, ppid, pid,
784 						    lwpid, pctx->uarg);
785 						pctx_release(pctx);
786 						_exit(0);
787 					} else {
788 						_exit(1);
789 					}
790 					/*NOTREACHED*/
791 				case -1:
792 					pctx_error(pctx, fn,
793 					    "cannot follow pid %d: %s\n",
794 					    (int)pstatus->pr_lwp.pr_rval1,
795 					    strerror(errno));
796 					break;
797 				default:
798 					break;
799 				}
800 				break;
801 			default:
802 				pctx_error(pctx, fn, gettext(
803 				    "warning - pid %d sysexit(%d)\n"),
804 				    (int)pid, pstatus->pr_lwp.pr_what);
805 				break;
806 			}
807 			break;
808 		case PR_SIGNALLED:
809 			if (pctx->verbose)
810 				pctx_error(pctx, fn,
811 				    gettext("pid %d - signalled\n"), (int)pid);
812 			break;
813 		case PR_JOBCONTROL:
814 			if (pctx->verbose)
815 				pctx_error(pctx, fn,
816 				    gettext("pid %d - job control stop\n"),
817 				    (int)pid);
818 			running = -1;
819 			break;
820 		case PR_FAULTED:
821 			if (pctx->verbose)
822 				pctx_error(pctx, fn,
823 				    gettext("pid %d - faulted\n"), (int)pid);
824 			break;
825 		case PR_SUSPENDED:
826 			if (pctx->verbose)
827 				pctx_error(pctx, fn,
828 				    gettext("pid %d - suspended\n"), (int)pid);
829 			break;
830 		case PR_CHECKPOINT:
831 			if (pctx->verbose)
832 				pctx_error(pctx, fn,
833 				    gettext("pid %d - checkpoint\n"),
834 				    (int)pid);
835 			break;
836 		default:
837 			if (pctx->verbose)
838 				pctx_error(pctx, fn,
839 				    gettext("pid %d - reason %d\n"),
840 				    (int)pid, pstatus->pr_lwp.pr_why);
841 			running = -1;
842 			break;
843 		}
844 	}
845 
846 bailout:
847 	(void) signal(SIGCHLD, sigsaved);
848 
849 	if (pctx->terminate)
850 		return (0);
851 
852 	switch (running) {
853 	case 0:
854 		return (0);
855 	case -1:
856 		return (-1);
857 	default:
858 		pctx_error(pctx, fn, gettext("lost control of pid %d\n"),
859 		    (int)pid);
860 		pctx_free(pctx);
861 		return (-1);
862 	}
863 }
864 
865 /*
866  * Execute the private 'cpc' system call in the context of the
867  * controlled process.
868  */
869 int
870 __pctx_cpc(pctx_t *pctx, cpc_t *cpc,
871     int cmd, id_t lwpid, void *data1, void *data2, void *data3, int bufsize)
872 {
873 	sysret_t rval;
874 	argdes_t argd[5];
875 	argdes_t *adp = &argd[0];
876 	int error;
877 
878 	/*
879 	 * Keep track of the relationship between cpc_t and pctx_t here.
880 	 * We store the last cpc_t used by libpctx, so that when this pctx is
881 	 * destroyed, libpctx can notify libcpc.
882 	 */
883 
884 	if (pctx->cpc != NULL && pctx->cpc != cpc && pctx_cpc_callback != NULL)
885 		(*pctx_cpc_callback)(pctx->cpc, pctx);
886 	pctx->cpc = cpc;
887 
888 	/*
889 	 * cmd and lwpid are passed in by value no matter what the command is.
890 	 */
891 	adp->arg_value = cmd;
892 	adp->arg_object = NULL;
893 	adp->arg_type = AT_BYVAL;
894 	adp->arg_inout = AI_INPUT;
895 	adp->arg_size = 0;
896 	adp++;
897 
898 	adp->arg_value = lwpid;
899 	adp->arg_object = NULL;
900 	adp->arg_type = AT_BYVAL;
901 	adp->arg_inout = AI_INPUT;
902 	adp->arg_size = 0;
903 	adp++;
904 
905 	switch (cmd) {
906 	case CPC_BIND:
907 		adp->arg_value = 0;
908 		adp->arg_object = data1;
909 		adp->arg_type = AT_BYREF;
910 		adp->arg_inout = AI_INPUT;
911 		adp->arg_size = (size_t)data2;
912 		adp++;
913 
914 		adp->arg_value = (size_t)data2;
915 		adp->arg_object = NULL;
916 		adp->arg_type = AT_BYVAL;
917 		adp->arg_inout = AI_INPUT;
918 		adp->arg_size = 0;
919 		adp++;
920 
921 		adp->arg_value = 0;
922 		adp->arg_object = data3;
923 		adp->arg_type = AT_BYREF;
924 		adp->arg_inout = AI_INOUT;
925 		adp->arg_size = sizeof (int);
926 
927 		break;
928 	case CPC_SAMPLE:
929 		adp->arg_value = 0;
930 		adp->arg_object = data1;
931 		adp->arg_type = AT_BYREF;
932 		adp->arg_inout = AI_OUTPUT;
933 		adp->arg_size = bufsize;
934 		adp++;
935 
936 		adp->arg_value = 0;
937 		adp->arg_object = data2;
938 		adp->arg_type = AT_BYREF;
939 		adp->arg_inout = AI_OUTPUT;
940 		adp->arg_size = sizeof (hrtime_t);
941 		adp++;
942 
943 		adp->arg_value = 0;
944 		adp->arg_object = data3;
945 		adp->arg_type = AT_BYREF;
946 		adp->arg_inout = AI_OUTPUT;
947 		adp->arg_size = sizeof (uint64_t);
948 
949 		break;
950 	default:
951 		adp->arg_value = 0;
952 		adp->arg_object = 0;
953 		adp->arg_type = AT_BYVAL;
954 		adp->arg_inout = AI_INPUT;
955 		adp->arg_size = 0;
956 		adp++;
957 
958 		adp->arg_value = 0;
959 		adp->arg_object = 0;
960 		adp->arg_type = AT_BYVAL;
961 		adp->arg_inout = AI_INPUT;
962 		adp->arg_size = 0;
963 		adp++;
964 
965 		adp->arg_value = 0;
966 		adp->arg_object = 0;
967 		adp->arg_type = AT_BYVAL;
968 		adp->arg_inout = AI_INPUT;
969 		adp->arg_size = 0;
970 
971 		break;
972 	}
973 
974 	error = Psyscall(pctx->Pr, &rval, SYS_cpc, 5, &argd[0]);
975 
976 	if (error) {
977 		errno = error > 0 ? error : ENOSYS;
978 		return (-1);
979 	}
980 	return (rval.sys_rval1);
981 }
982 
983 /*
984  * libcpc-private hook used to register a callback. The callback is used to
985  * notify libcpc when a pctx handle is invalidated.
986  */
987 void
988 __pctx_cpc_register_callback(void (*arg)(struct __cpc *, struct __pctx *))
989 {
990 	pctx_cpc_callback = arg;
991 }
992 
993 /*
994  * Tell pctx_run to bail out immediately
995  */
996 void
997 pctx_terminate(struct __pctx *pctx)
998 {
999 	pctx->terminate = 1;
1000 }
1001