xref: /illumos-gate/usr/src/cmd/zlogin/zlogin.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * zlogin provides three types of login which allow users in the global
28  * zone to access non-global zones.
29  *
30  * - "interactive login" is similar to rlogin(1); for example, the user could
31  *   issue 'zlogin my-zone' or 'zlogin -e ^ -l me my-zone'.   The user is
32  *   granted a new pty (which is then shoved into the zone), and an I/O
33  *   loop between parent and child processes takes care of the interactive
34  *   session.  In this mode, login(1) (and its -c option, which means
35  *   "already authenticated") is employed to take care of the initialization
36  *   of the user's session.
37  *
38  * - "non-interactive login" is similar to su(1M); the user could issue
39  *   'zlogin my-zone ls -l' and the command would be run as specified.
40  *   In this mode, zlogin sets up pipes as the communication channel, and
41  *   'su' is used to do the login setup work.
42  *
43  * - "console login" is the equivalent to accessing the tip line for a
44  *   zone.  For example, the user can issue 'zlogin -C my-zone'.
45  *   In this mode, zlogin contacts the zoneadmd process via unix domain
46  *   socket.  If zoneadmd is not running, it starts it.  This allows the
47  *   console to be available anytime the zone is installed, regardless of
48  *   whether it is running.
49  */
50 
51 #include <sys/socket.h>
52 #include <sys/termios.h>
53 #include <sys/utsname.h>
54 #include <sys/stat.h>
55 #include <sys/types.h>
56 #include <sys/contract/process.h>
57 #include <sys/ctfs.h>
58 #include <sys/brand.h>
59 #include <sys/wait.h>
60 #include <alloca.h>
61 #include <assert.h>
62 #include <ctype.h>
63 #include <door.h>
64 #include <errno.h>
65 #include <nss_dbdefs.h>
66 #include <poll.h>
67 #include <priv.h>
68 #include <pwd.h>
69 #include <unistd.h>
70 #include <utmpx.h>
71 #include <sac.h>
72 #include <signal.h>
73 #include <stdarg.h>
74 #include <stdio.h>
75 #include <stdlib.h>
76 #include <string.h>
77 #include <strings.h>
78 #include <stropts.h>
79 #include <wait.h>
80 #include <zone.h>
81 #include <fcntl.h>
82 #include <libdevinfo.h>
83 #include <libintl.h>
84 #include <locale.h>
85 #include <libzonecfg.h>
86 #include <libcontract.h>
87 #include <libbrand.h>
88 
89 static int masterfd;
90 static struct termios save_termios;
91 static struct termios effective_termios;
92 static int save_fd;
93 static struct winsize winsize;
94 static volatile int dead;
95 static volatile pid_t child_pid = -1;
96 static int interactive = 0;
97 static priv_set_t *dropprivs;
98 
99 static int nocmdchar = 0;
100 static int failsafe = 0;
101 static char cmdchar = '~';
102 
103 static int pollerr = 0;
104 
105 static const char *pname;
106 
107 #if !defined(TEXT_DOMAIN)		/* should be defined by cc -D */
108 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it wasn't */
109 #endif
110 
111 #define	SUPATH	"/usr/bin/su"
112 #define	FAILSAFESHELL	"/sbin/sh"
113 #define	DEFAULTSHELL	"/sbin/sh"
114 #define	DEF_PATH	"/usr/sbin:/usr/bin"
115 
116 /*
117  * The ZLOGIN_BUFSIZ is larger than PIPE_BUF so we can be sure we're clearing
118  * out the pipe when the child is exiting.  The ZLOGIN_RDBUFSIZ must be less
119  * than ZLOGIN_BUFSIZ (because we share the buffer in doio).  This value is
120  * also chosen in conjunction with the HI_WATER setting to make sure we
121  * don't fill up the pipe.  We can write FIFOHIWAT (16k) into the pipe before
122  * blocking.  By having ZLOGIN_RDBUFSIZ set to 1k and HI_WATER set to 8k, we
123  * know we can always write a ZLOGIN_RDBUFSIZ chunk into the pipe when there
124  * is less than HI_WATER data already in the pipe.
125  */
126 #define	ZLOGIN_BUFSIZ	8192
127 #define	ZLOGIN_RDBUFSIZ	1024
128 #define	HI_WATER	8192
129 
130 /*
131  * See canonify() below.  CANONIFY_LEN is the maximum length that a
132  * "canonical" sequence will expand to (backslash, three octal digits, NUL).
133  */
134 #define	CANONIFY_LEN 5
135 
136 static void
137 usage(void)
138 {
139 	(void) fprintf(stderr, gettext("usage: %s [ -CES ] [ -e cmdchar ] "
140 	    "[-l user] zonename [command [args ...] ]\n"), pname);
141 	exit(2);
142 }
143 
144 static const char *
145 getpname(const char *arg0)
146 {
147 	const char *p = strrchr(arg0, '/');
148 
149 	if (p == NULL)
150 		p = arg0;
151 	else
152 		p++;
153 
154 	pname = p;
155 	return (p);
156 }
157 
158 static void
159 zerror(const char *fmt, ...)
160 {
161 	va_list alist;
162 
163 	(void) fprintf(stderr, "%s: ", pname);
164 	va_start(alist, fmt);
165 	(void) vfprintf(stderr, fmt, alist);
166 	va_end(alist);
167 	(void) fprintf(stderr, "\n");
168 }
169 
170 static void
171 zperror(const char *str)
172 {
173 	const char *estr;
174 
175 	if ((estr = strerror(errno)) != NULL)
176 		(void) fprintf(stderr, "%s: %s: %s\n", pname, str, estr);
177 	else
178 		(void) fprintf(stderr, "%s: %s: errno %d\n", pname, str, errno);
179 }
180 
181 /*
182  * The first part of our privilege dropping scheme needs to be called before
183  * fork(), since we must have it for security; we don't want to be surprised
184  * later that we couldn't allocate the privset.
185  */
186 static int
187 prefork_dropprivs()
188 {
189 	if ((dropprivs = priv_allocset()) == NULL)
190 		return (1);
191 	priv_emptyset(dropprivs);
192 
193 	/*
194 	 * We need these privileges in order to query session information and
195 	 * send signals.
196 	 */
197 	if (interactive == 0) {
198 		if (priv_addset(dropprivs, "proc_session") == -1)
199 			return (1);
200 		if (priv_addset(dropprivs, "proc_zone") == -1)
201 			return (1);
202 		if (priv_addset(dropprivs, "proc_owner") == -1)
203 			return (1);
204 	}
205 
206 	return (0);
207 }
208 
209 /*
210  * The second part of the privilege drop.  We are paranoid about being attacked
211  * by the zone, so we drop all privileges.  This should prevent a compromise
212  * which gets us to fork(), exec(), symlink(), etc.
213  */
214 static void
215 postfork_dropprivs()
216 {
217 	if ((setppriv(PRIV_SET, PRIV_PERMITTED, dropprivs)) == -1) {
218 		zperror(gettext("Warning: could not set permitted privileges"));
219 	}
220 	if ((setppriv(PRIV_SET, PRIV_LIMIT, dropprivs)) == -1) {
221 		zperror(gettext("Warning: could not set limit privileges"));
222 	}
223 	if ((setppriv(PRIV_SET, PRIV_INHERITABLE, dropprivs)) == -1) {
224 		zperror(gettext("Warning: could not set inheritable "
225 		    "privileges"));
226 	}
227 }
228 
229 /*
230  * Create the unix domain socket and call the zoneadmd server; handshake
231  * with it to determine whether it will allow us to connect.
232  */
233 static int
234 get_console_master(const char *zname)
235 {
236 	int sockfd = -1;
237 	struct sockaddr_un servaddr;
238 	char clientid[MAXPATHLEN];
239 	char handshake[MAXPATHLEN], c;
240 	int msglen;
241 	int i = 0, err = 0;
242 
243 	if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
244 		zperror(gettext("could not create socket"));
245 		return (-1);
246 	}
247 
248 	bzero(&servaddr, sizeof (servaddr));
249 	servaddr.sun_family = AF_UNIX;
250 	(void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
251 	    "%s/%s.console_sock", ZONES_TMPDIR, zname);
252 
253 	if (connect(sockfd, (struct sockaddr *)&servaddr,
254 	    sizeof (servaddr)) == -1) {
255 		zperror(gettext("Could not connect to zone console"));
256 		goto bad;
257 	}
258 	masterfd = sockfd;
259 
260 	msglen = snprintf(clientid, sizeof (clientid), "IDENT %lu %s\n",
261 	    getpid(), setlocale(LC_MESSAGES, NULL));
262 
263 	if (msglen >= sizeof (clientid) || msglen < 0) {
264 		zerror("protocol error");
265 		goto bad;
266 	}
267 
268 	if (write(masterfd, clientid, msglen) != msglen) {
269 		zerror("protocol error");
270 		goto bad;
271 	}
272 
273 	bzero(handshake, sizeof (handshake));
274 
275 	/*
276 	 * Take care not to accumulate more than our fill, and leave room for
277 	 * the NUL at the end.
278 	 */
279 	while ((err = read(masterfd, &c, 1)) == 1) {
280 		if (i >= (sizeof (handshake) - 1))
281 			break;
282 		if (c == '\n')
283 			break;
284 		handshake[i] = c;
285 		i++;
286 	}
287 
288 	/*
289 	 * If something went wrong during the handshake we bail; perhaps
290 	 * the server died off.
291 	 */
292 	if (err == -1) {
293 		zperror(gettext("Could not connect to zone console"));
294 		goto bad;
295 	}
296 
297 	if (strncmp(handshake, "OK", sizeof (handshake)) == 0)
298 		return (0);
299 
300 	zerror(gettext("Console is already in use by process ID %s."),
301 	    handshake);
302 bad:
303 	(void) close(sockfd);
304 	masterfd = -1;
305 	return (-1);
306 }
307 
308 
309 /*
310  * Routines to handle pty creation upon zone entry and to shuttle I/O back
311  * and forth between the two terminals.  We also compute and store the
312  * name of the slave terminal associated with the master side.
313  */
314 static int
315 get_master_pty()
316 {
317 	if ((masterfd = open("/dev/ptmx", O_RDWR|O_NONBLOCK)) < 0) {
318 		zperror(gettext("failed to obtain a pseudo-tty"));
319 		return (-1);
320 	}
321 	if (tcgetattr(STDIN_FILENO, &save_termios) == -1) {
322 		zperror(gettext("failed to get terminal settings from stdin"));
323 		return (-1);
324 	}
325 	(void) ioctl(STDIN_FILENO, TIOCGWINSZ, (char *)&winsize);
326 
327 	return (0);
328 }
329 
330 /*
331  * This is a bit tricky; normally a pts device will belong to the zone it
332  * is granted to.  But in the case of "entering" a zone, we need to establish
333  * the pty before entering the zone so that we can vector I/O to and from it
334  * from the global zone.
335  *
336  * We use the zonept() call to let the ptm driver know what we are up to;
337  * the only other hairy bit is the setting of zoneslavename (which happens
338  * above, in get_master_pty()).
339  */
340 static int
341 init_slave_pty(zoneid_t zoneid, char *devroot)
342 {
343 	int slavefd = -1;
344 	char *slavename, zoneslavename[MAXPATHLEN];
345 
346 	/*
347 	 * Set slave permissions, zone the pts, then unlock it.
348 	 */
349 	if (grantpt(masterfd) != 0) {
350 		zperror(gettext("grantpt failed"));
351 		return (-1);
352 	}
353 
354 	if (unlockpt(masterfd) != 0) {
355 		zperror(gettext("unlockpt failed"));
356 		return (-1);
357 	}
358 
359 	/*
360 	 * We must open the slave side before zoning this pty; otherwise
361 	 * the kernel would refuse us the open-- zoning a pty makes it
362 	 * inaccessible to the global zone.  Note we are trying to open
363 	 * the device node via the $ZONEROOT/dev path for this pty.
364 	 *
365 	 * Later we'll close the slave out when once we've opened it again
366 	 * from within the target zone.  Blarg.
367 	 */
368 	if ((slavename = ptsname(masterfd)) == NULL) {
369 		zperror(gettext("failed to get name for pseudo-tty"));
370 		return (-1);
371 	}
372 
373 	(void) snprintf(zoneslavename, sizeof (zoneslavename), "%s%s",
374 	    devroot, slavename);
375 
376 	if ((slavefd = open(zoneslavename, O_RDWR)) < 0) {
377 		zerror(gettext("failed to open %s: %s"), zoneslavename,
378 		    strerror(errno));
379 		return (-1);
380 	}
381 
382 	/*
383 	 * Push hardware emulation (ptem), line discipline (ldterm),
384 	 * and V7/4BSD/Xenix compatibility (ttcompat) modules.
385 	 */
386 	if (ioctl(slavefd, I_PUSH, "ptem") == -1) {
387 		zperror(gettext("failed to push ptem module"));
388 		if (!failsafe)
389 			goto bad;
390 	}
391 
392 	/*
393 	 * Anchor the stream to prevent malicious I_POPs; we prefer to do
394 	 * this prior to entering the zone so that we can detect any errors
395 	 * early, and so that we can set the anchor from the global zone.
396 	 */
397 	if (ioctl(slavefd, I_ANCHOR) == -1) {
398 		zperror(gettext("failed to set stream anchor"));
399 		if (!failsafe)
400 			goto bad;
401 	}
402 
403 	if (ioctl(slavefd, I_PUSH, "ldterm") == -1) {
404 		zperror(gettext("failed to push ldterm module"));
405 		if (!failsafe)
406 			goto bad;
407 	}
408 	if (ioctl(slavefd, I_PUSH, "ttcompat") == -1) {
409 		zperror(gettext("failed to push ttcompat module"));
410 		if (!failsafe)
411 			goto bad;
412 	}
413 
414 	/*
415 	 * Propagate terminal settings from the external term to the new one.
416 	 */
417 	if (tcsetattr(slavefd, TCSAFLUSH, &save_termios) == -1) {
418 		zperror(gettext("failed to set terminal settings"));
419 		if (!failsafe)
420 			goto bad;
421 	}
422 	(void) ioctl(slavefd, TIOCSWINSZ, (char *)&winsize);
423 
424 	if (zonept(masterfd, zoneid) != 0) {
425 		zperror(gettext("could not set zoneid of pty"));
426 		goto bad;
427 	}
428 
429 	return (slavefd);
430 
431 bad:
432 	(void) close(slavefd);
433 	return (-1);
434 }
435 
436 /*
437  * Place terminal into raw mode.
438  */
439 static int
440 set_tty_rawmode(int fd)
441 {
442 	struct termios term;
443 	if (tcgetattr(fd, &term) < 0) {
444 		zperror(gettext("failed to get user terminal settings"));
445 		return (-1);
446 	}
447 
448 	/* Stash for later, so we can revert back to previous mode */
449 	save_termios = term;
450 	save_fd = fd;
451 
452 	/* disable 8->7 bit strip, start/stop, enable any char to restart */
453 	term.c_iflag &= ~(ISTRIP|IXON|IXANY);
454 	/* disable NL->CR, CR->NL, ignore CR, UPPER->lower */
455 	term.c_iflag &= ~(INLCR|ICRNL|IGNCR|IUCLC);
456 	/* disable output post-processing */
457 	term.c_oflag &= ~OPOST;
458 	/* disable canonical mode, signal chars, echo & extended functions */
459 	term.c_lflag &= ~(ICANON|ISIG|ECHO|IEXTEN);
460 
461 	term.c_cc[VMIN] = 1;    /* byte-at-a-time */
462 	term.c_cc[VTIME] = 0;
463 
464 	if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &term)) {
465 		zperror(gettext("failed to set user terminal to raw mode"));
466 		return (-1);
467 	}
468 
469 	/*
470 	 * We need to know the value of VEOF so that we can properly process for
471 	 * client-side ~<EOF>.  But we have obliterated VEOF in term,
472 	 * because VMIN overloads the same array slot in non-canonical mode.
473 	 * Stupid @&^%!
474 	 *
475 	 * So here we construct the "effective" termios from the current
476 	 * terminal settings, and the corrected VEOF and VEOL settings.
477 	 */
478 	if (tcgetattr(STDIN_FILENO, &effective_termios) < 0) {
479 		zperror(gettext("failed to get user terminal settings"));
480 		return (-1);
481 	}
482 	effective_termios.c_cc[VEOF] = save_termios.c_cc[VEOF];
483 	effective_termios.c_cc[VEOL] = save_termios.c_cc[VEOL];
484 
485 	return (0);
486 }
487 
488 /*
489  * Copy terminal window size from our terminal to the pts.
490  */
491 /*ARGSUSED*/
492 static void
493 sigwinch(int s)
494 {
495 	struct winsize ws;
496 
497 	if (ioctl(0, TIOCGWINSZ, &ws) == 0)
498 		(void) ioctl(masterfd, TIOCSWINSZ, &ws);
499 }
500 
501 static volatile int close_on_sig = -1;
502 
503 static void
504 /*ARGSUSED*/
505 sigcld(int s)
506 {
507 	int status;
508 	pid_t pid;
509 
510 	/*
511 	 * Peek at the exit status.  If this isn't the process we cared
512 	 * about, then just reap it.
513 	 */
514 	if ((pid = waitpid(child_pid, &status, WNOHANG|WNOWAIT)) != -1) {
515 		if (pid == child_pid &&
516 		    (WIFEXITED(status) || WIFSIGNALED(status))) {
517 			dead = 1;
518 			if (close_on_sig != -1) {
519 				(void) write(close_on_sig, "a", 1);
520 				(void) close(close_on_sig);
521 				close_on_sig = -1;
522 			}
523 		} else {
524 			(void) waitpid(pid, &status, WNOHANG);
525 		}
526 	}
527 }
528 
529 /*
530  * Some signals (currently, SIGINT) must be forwarded on to the process
531  * group of the child process.
532  */
533 static void
534 sig_forward(int s)
535 {
536 	if (child_pid != -1) {
537 		pid_t pgid = getpgid(child_pid);
538 		if (pgid != -1)
539 			(void) sigsend(P_PGID, pgid, s);
540 	}
541 }
542 
543 /*
544  * reset terminal settings for global environment
545  */
546 static void
547 reset_tty()
548 {
549 	(void) tcsetattr(save_fd, TCSADRAIN, &save_termios);
550 }
551 
552 /*
553  * Convert character to printable representation, for display with locally
554  * echoed command characters (like when we need to display ~^D)
555  */
556 static void
557 canonify(char c, char *cc)
558 {
559 	if (isprint(c)) {
560 		cc[0] = c;
561 		cc[1] = '\0';
562 	} else if (c >= 0 && c <= 31) {	/* ^@ through ^_ */
563 		cc[0] = '^';
564 		cc[1] = c + '@';
565 		cc[2] = '\0';
566 	} else {
567 		cc[0] = '\\';
568 		cc[1] = ((c >> 6) & 7) + '0';
569 		cc[2] = ((c >> 3) & 7) + '0';
570 		cc[3] = (c & 7) + '0';
571 		cc[4] = '\0';
572 	}
573 }
574 
575 /*
576  * process_user_input watches the input stream for the escape sequence for
577  * 'quit' (by default, tilde-period).  Because we might be fed just one
578  * keystroke at a time, state associated with the user input (are we at the
579  * beginning of the line?  are we locally echoing the next character?) is
580  * maintained by beginning_of_line and local_echo across calls to the routine.
581  * If the write to outfd fails, we'll try to read from infd in an attempt
582  * to prevent deadlock between the two processes.
583  *
584  * This routine returns -1 when the 'quit' escape sequence has been issued,
585  * or an error is encountered, 1 if stdin is EOF, and 0 otherwise.
586  */
587 static int
588 process_user_input(int outfd, int infd)
589 {
590 	static boolean_t beginning_of_line = B_TRUE;
591 	static boolean_t local_echo = B_FALSE;
592 	char ibuf[ZLOGIN_BUFSIZ];
593 	int nbytes;
594 	char *buf = ibuf;
595 	char c = *buf;
596 
597 	nbytes = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
598 	if (nbytes == -1 && (errno != EINTR || dead))
599 		return (-1);
600 
601 	if (nbytes == -1)	/* The read was interrupted. */
602 		return (0);
603 
604 	/* 0 read means EOF, close the pipe to the child */
605 	if (nbytes == 0)
606 		return (1);
607 
608 	for (c = *buf; nbytes > 0; c = *buf, --nbytes) {
609 		buf++;
610 		if (beginning_of_line && !nocmdchar) {
611 			beginning_of_line = B_FALSE;
612 			if (c == cmdchar) {
613 				local_echo = B_TRUE;
614 				continue;
615 			}
616 		} else if (local_echo) {
617 			local_echo = B_FALSE;
618 			if (c == '.' || c == effective_termios.c_cc[VEOF]) {
619 				char cc[CANONIFY_LEN];
620 
621 				canonify(c, cc);
622 				(void) write(STDOUT_FILENO, &cmdchar, 1);
623 				(void) write(STDOUT_FILENO, cc, strlen(cc));
624 				return (-1);
625 			}
626 		}
627 retry:
628 		if (write(outfd, &c, 1) <= 0) {
629 			/*
630 			 * Since the fd we are writing to is opened with
631 			 * O_NONBLOCK it is possible to get EAGAIN if the
632 			 * pipe is full.  One way this could happen is if we
633 			 * are writing a lot of data into the pipe in this loop
634 			 * and the application on the other end is echoing that
635 			 * data back out to its stdout.  The output pipe can
636 			 * fill up since we are stuck here in this loop and not
637 			 * draining the other pipe.  We can try to read some of
638 			 * the data to see if we can drain the pipe so that the
639 			 * application can continue to make progress.  The read
640 			 * is non-blocking so we won't hang here.  We also wait
641 			 * a bit before retrying since there could be other
642 			 * reasons why the pipe is full and we don't want to
643 			 * continuously retry.
644 			 */
645 			if (errno == EAGAIN) {
646 				struct timespec rqtp;
647 				int ln;
648 				char obuf[ZLOGIN_BUFSIZ];
649 
650 				if ((ln = read(infd, obuf, ZLOGIN_BUFSIZ)) > 0)
651 					(void) write(STDOUT_FILENO, obuf, ln);
652 
653 				/* sleep for 10 milliseconds */
654 				rqtp.tv_sec = 0;
655 				rqtp.tv_nsec = 10 * (NANOSEC / MILLISEC);
656 				(void) nanosleep(&rqtp, NULL);
657 				if (!dead)
658 					goto retry;
659 			}
660 
661 			return (-1);
662 		}
663 		beginning_of_line = (c == '\r' || c == '\n' ||
664 		    c == effective_termios.c_cc[VKILL] ||
665 		    c == effective_termios.c_cc[VEOL] ||
666 		    c == effective_termios.c_cc[VSUSP] ||
667 		    c == effective_termios.c_cc[VINTR]);
668 	}
669 	return (0);
670 }
671 
672 /*
673  * This function prevents deadlock between zlogin and the application in the
674  * zone that it is talking to.  This can happen when we read from zlogin's
675  * stdin and write the data down the pipe to the application.  If the pipe
676  * is full, we'll block in the write.  Because zlogin could be blocked in
677  * the write, it would never read the application's stdout/stderr so the
678  * application can then block on those writes (when the pipe fills up).  If the
679  * the application gets blocked this way, it can never get around to reading
680  * its stdin so that zlogin can unblock from its write.  Once in this state,
681  * the two processes are deadlocked.
682  *
683  * To prevent this, we want to verify that we can write into the pipe before we
684  * read from our stdin.  If the pipe already is pretty full, we bypass the read
685  * for now.  We'll circle back here again after the poll() so that we can
686  * try again.  When this function is called, we already know there is data
687  * ready to read on STDIN_FILENO.  We return -1 if there is a problem, 1 if
688  * stdin is EOF, and 0 if everything is ok (even though we might not have
689  * read/written any data into the pipe on this iteration).
690  */
691 static int
692 process_raw_input(int stdin_fd, int appin_fd)
693 {
694 	int cc;
695 	struct stat64 sb;
696 	char ibuf[ZLOGIN_RDBUFSIZ];
697 
698 	/* Check how much data is already in the pipe */
699 	if (fstat64(appin_fd, &sb) == -1) {
700 		perror("stat failed");
701 		return (-1);
702 	}
703 
704 	if (dead)
705 		return (-1);
706 
707 	/*
708 	 * The pipe already has a lot of data in it,  don't write any more
709 	 * right now.
710 	 */
711 	if (sb.st_size >= HI_WATER)
712 		return (0);
713 
714 	cc = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
715 	if (cc == -1 && (errno != EINTR || dead))
716 		return (-1);
717 
718 	if (cc == -1)	/* The read was interrupted. */
719 		return (0);
720 
721 	/* 0 read means EOF, close the pipe to the child */
722 	if (cc == 0)
723 		return (1);
724 
725 	/*
726 	 * stdin_fd is stdin of the target; so, the thing we'll write the user
727 	 * data *to*.
728 	 */
729 	if (write(stdin_fd, ibuf, cc) == -1)
730 		return (-1);
731 
732 	return (0);
733 }
734 
735 /*
736  * Write the output from the application running in the zone.  We can get
737  * a signal during the write (usually it would be SIGCHLD when the application
738  * has exited) so we loop to make sure we have written all of the data we read.
739  */
740 static int
741 process_output(int in_fd, int out_fd)
742 {
743 	int wrote = 0;
744 	int cc;
745 	char ibuf[ZLOGIN_BUFSIZ];
746 
747 	cc = read(in_fd, ibuf, ZLOGIN_BUFSIZ);
748 	if (cc == -1 && (errno != EINTR || dead))
749 		return (-1);
750 	if (cc == 0)	/* EOF */
751 		return (-1);
752 	if (cc == -1)	/* The read was interrupted. */
753 		return (0);
754 
755 	do {
756 		int len;
757 
758 		len = write(out_fd, ibuf + wrote, cc - wrote);
759 		if (len == -1 && errno != EINTR)
760 			return (-1);
761 		if (len != -1)
762 			wrote += len;
763 	} while (wrote < cc);
764 
765 	return (0);
766 }
767 
768 /*
769  * This is the main I/O loop, and is shared across all zlogin modes.
770  * Parameters:
771  * 	stdin_fd:  The fd representing 'stdin' for the slave side; input to
772  *		   the zone will be written here.
773  *
774  * 	appin_fd:  The fd representing the other end of the 'stdin' pipe (when
775  *		   we're running non-interactive); used in process_raw_input
776  *		   to ensure we don't fill up the application's stdin pipe.
777  *
778  *	stdout_fd: The fd representing 'stdout' for the slave side; output
779  *		   from the zone will arrive here.
780  *
781  *	stderr_fd: The fd representing 'stderr' for the slave side; output
782  *		   from the zone will arrive here.
783  *
784  *	raw_mode:  If TRUE, then no processing (for example, for '~.') will
785  *		   be performed on the input coming from STDIN.
786  *
787  * stderr_fd may be specified as -1 if there is no stderr (only non-interactive
788  * mode supplies a stderr).
789  *
790  */
791 static void
792 doio(int stdin_fd, int appin_fd, int stdout_fd, int stderr_fd, int sig_fd,
793     boolean_t raw_mode)
794 {
795 	struct pollfd pollfds[4];
796 	char ibuf[ZLOGIN_BUFSIZ];
797 	int cc, ret;
798 
799 	/* read from stdout of zone and write to stdout of global zone */
800 	pollfds[0].fd = stdout_fd;
801 	pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
802 
803 	/* read from stderr of zone and write to stderr of global zone */
804 	pollfds[1].fd = stderr_fd;
805 	pollfds[1].events = pollfds[0].events;
806 
807 	/* read from stdin of global zone and write to stdin of zone */
808 	pollfds[2].fd = STDIN_FILENO;
809 	pollfds[2].events = pollfds[0].events;
810 
811 	/* read from signalling pipe so we know when child dies */
812 	pollfds[3].fd = sig_fd;
813 	pollfds[3].events = pollfds[0].events;
814 
815 	for (;;) {
816 		pollfds[0].revents = pollfds[1].revents =
817 		    pollfds[2].revents = pollfds[3].revents = 0;
818 
819 		if (dead)
820 			break;
821 
822 		/*
823 		 * There is a race condition here where we can receive the
824 		 * child death signal, set the dead flag, but since we have
825 		 * passed the test above, we would go into poll and hang.
826 		 * To avoid this we use the sig_fd as an additional poll fd.
827 		 * The signal handler writes into the other end of this pipe
828 		 * when the child dies so that the poll will always see that
829 		 * input and proceed.  We just loop around at that point and
830 		 * then notice the dead flag.
831 		 */
832 
833 		ret = poll(pollfds,
834 		    sizeof (pollfds) / sizeof (struct pollfd), -1);
835 
836 		if (ret == -1 && errno != EINTR) {
837 			perror("poll failed");
838 			break;
839 		}
840 
841 		if (errno == EINTR && dead) {
842 			break;
843 		}
844 
845 		/* event from master side stdout */
846 		if (pollfds[0].revents) {
847 			if (pollfds[0].revents &
848 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
849 				if (process_output(stdout_fd, STDOUT_FILENO)
850 				    != 0)
851 					break;
852 			} else {
853 				pollerr = pollfds[0].revents;
854 				break;
855 			}
856 		}
857 
858 		/* event from master side stderr */
859 		if (pollfds[1].revents) {
860 			if (pollfds[1].revents &
861 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
862 				if (process_output(stderr_fd, STDERR_FILENO)
863 				    != 0)
864 					break;
865 			} else {
866 				pollerr = pollfds[1].revents;
867 				break;
868 			}
869 		}
870 
871 		/* event from user STDIN side */
872 		if (pollfds[2].revents) {
873 			if (pollfds[2].revents &
874 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
875 				/*
876 				 * stdin fd is stdin of the target; so,
877 				 * the thing we'll write the user data *to*.
878 				 *
879 				 * Also, unlike on the output side, we
880 				 * close the pipe on a zero-length message.
881 				 */
882 				int res;
883 
884 				if (raw_mode)
885 					res = process_raw_input(stdin_fd,
886 					    appin_fd);
887 				else
888 					res = process_user_input(stdin_fd,
889 					    stdout_fd);
890 
891 				if (res < 0)
892 					break;
893 				if (res > 0) {
894 					/* EOF (close) child's stdin_fd */
895 					pollfds[2].fd = -1;
896 					while ((res = close(stdin_fd)) != 0 &&
897 					    errno == EINTR)
898 						;
899 					if (res != 0)
900 						break;
901 				}
902 
903 			} else if (raw_mode && pollfds[2].revents & POLLHUP) {
904 				/*
905 				 * It's OK to get a POLLHUP on STDIN-- it
906 				 * always happens if you do:
907 				 *
908 				 * echo foo | zlogin <zone> <command>
909 				 *
910 				 * We reset fd to -1 in this case to clear
911 				 * the condition and close the pipe (EOF) to
912 				 * the other side in order to wrap things up.
913 				 */
914 				int res;
915 
916 				pollfds[2].fd = -1;
917 				while ((res = close(stdin_fd)) != 0 &&
918 				    errno == EINTR)
919 					;
920 				if (res != 0)
921 					break;
922 			} else {
923 				pollerr = pollfds[2].revents;
924 				break;
925 			}
926 		}
927 	}
928 
929 	/*
930 	 * We are in the midst of dying, but try to poll with a short
931 	 * timeout to see if we can catch the last bit of I/O from the
932 	 * children.
933 	 */
934 retry:
935 	pollfds[0].revents = pollfds[1].revents = 0;
936 	(void) poll(pollfds, 2, 100);
937 	if (pollfds[0].revents &
938 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
939 		if ((cc = read(stdout_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
940 			(void) write(STDOUT_FILENO, ibuf, cc);
941 			goto retry;
942 		}
943 	}
944 	if (pollfds[1].revents &
945 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
946 		if ((cc = read(stderr_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
947 			(void) write(STDERR_FILENO, ibuf, cc);
948 			goto retry;
949 		}
950 	}
951 }
952 
953 /*
954  * Fetch the user_cmd brand hook for getting a user's passwd(4) entry.
955  */
956 static const char *
957 zone_get_user_cmd(brand_handle_t bh, const char *login, char *user_cmd,
958     size_t len)
959 {
960 	bzero(user_cmd, sizeof (user_cmd));
961 	if (brand_get_user_cmd(bh, login, user_cmd, len) != 0)
962 		return (NULL);
963 
964 	return (user_cmd);
965 }
966 
967 /* From libc */
968 extern int str2passwd(const char *, int, void *, char *, int);
969 
970 /*
971  * exec() the user_cmd brand hook, and convert the output string to a
972  * struct passwd.  This is to be called after zone_enter().
973  *
974  */
975 static struct passwd *
976 zone_get_user_pw(const char *user_cmd, struct passwd *pwent, char *pwbuf,
977     int pwbuflen)
978 {
979 	char pwline[NSS_BUFLEN_PASSWD];
980 	char *cin = NULL;
981 	FILE *fin;
982 	int status;
983 
984 	assert(getzoneid() != GLOBAL_ZONEID);
985 
986 	if ((fin = popen(user_cmd, "r")) == NULL)
987 		return (NULL);
988 
989 	while (cin == NULL && !feof(fin))
990 		cin = fgets(pwline, sizeof (pwline), fin);
991 
992 	if (cin == NULL) {
993 		(void) pclose(fin);
994 		return (NULL);
995 	}
996 
997 	status = pclose(fin);
998 	if (!WIFEXITED(status))
999 		return (NULL);
1000 	if (WEXITSTATUS(status) != 0)
1001 		return (NULL);
1002 
1003 	if (str2passwd(pwline, sizeof (pwline), pwent, pwbuf, pwbuflen) == 0)
1004 		return (pwent);
1005 	else
1006 		return (NULL);
1007 }
1008 
1009 static char **
1010 zone_login_cmd(brand_handle_t bh, const char *login)
1011 {
1012 	static char result_buf[ARG_MAX];
1013 	char **new_argv, *ptr, *lasts;
1014 	int n, a;
1015 
1016 	/* Get the login command for the target zone. */
1017 	bzero(result_buf, sizeof (result_buf));
1018 	if (brand_get_login_cmd(bh, login,
1019 	    result_buf, sizeof (result_buf)) != 0)
1020 		return (NULL);
1021 
1022 	/*
1023 	 * We got back a string that we'd like to execute.  But since
1024 	 * we're not doing the execution via a shell we'll need to convert
1025 	 * the exec string to an array of strings.  We'll do that here
1026 	 * but we're going to be very simplistic about it and break stuff
1027 	 * up based on spaces.  We're not even going to support any kind
1028 	 * of quoting or escape characters.  It's truly amazing that
1029 	 * there is no library function in OpenSolaris to do this for us.
1030 	 */
1031 
1032 	/*
1033 	 * Be paranoid.  Since we're deliniating based on spaces make
1034 	 * sure there are no adjacent spaces.
1035 	 */
1036 	if (strstr(result_buf, "  ") != NULL)
1037 		return (NULL);
1038 
1039 	/* Remove any trailing whitespace.  */
1040 	n = strlen(result_buf);
1041 	if (result_buf[n - 1] == ' ')
1042 		result_buf[n - 1] = '\0';
1043 
1044 	/* Count how many elements there are in the exec string. */
1045 	ptr = result_buf;
1046 	for (n = 2; ((ptr = strchr(ptr + 1, (int)' ')) != NULL); n++)
1047 		;
1048 
1049 	/* Allocate the argv array that we're going to return. */
1050 	if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1051 		return (NULL);
1052 
1053 	/* Tokenize the exec string and return. */
1054 	a = 0;
1055 	new_argv[a++] = result_buf;
1056 	if (n > 2) {
1057 		(void) strtok_r(result_buf, " ", &lasts);
1058 		while ((new_argv[a++] = strtok_r(NULL, " ", &lasts)) != NULL)
1059 			;
1060 	} else {
1061 		new_argv[a++] = NULL;
1062 	}
1063 	assert(n == a);
1064 	return (new_argv);
1065 }
1066 
1067 /*
1068  * Prepare argv array for exec'd process; if we're passing commands to the
1069  * new process, then use su(1M) to do the invocation.  Otherwise, use
1070  * 'login -z <from_zonename> -f' (-z is an undocumented option which tells
1071  * login that we're coming from another zone, and to disregard its CONSOLE
1072  * checks).
1073  */
1074 static char **
1075 prep_args(brand_handle_t bh, const char *login, char **argv)
1076 {
1077 	int argc = 0, a = 0, i, n = -1;
1078 	char **new_argv;
1079 
1080 	if (argv != NULL) {
1081 		size_t subshell_len = 1;
1082 		char *subshell;
1083 
1084 		while (argv[argc] != NULL)
1085 			argc++;
1086 
1087 		for (i = 0; i < argc; i++) {
1088 			subshell_len += strlen(argv[i]) + 1;
1089 		}
1090 		if ((subshell = calloc(1, subshell_len)) == NULL)
1091 			return (NULL);
1092 
1093 		for (i = 0; i < argc; i++) {
1094 			(void) strcat(subshell, argv[i]);
1095 			(void) strcat(subshell, " ");
1096 		}
1097 
1098 		if (failsafe) {
1099 			n = 4;
1100 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1101 				return (NULL);
1102 
1103 			new_argv[a++] = FAILSAFESHELL;
1104 		} else {
1105 			n = 5;
1106 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1107 				return (NULL);
1108 
1109 			new_argv[a++] = SUPATH;
1110 			new_argv[a++] = (char *)login;
1111 		}
1112 		new_argv[a++] = "-c";
1113 		new_argv[a++] = subshell;
1114 		new_argv[a++] = NULL;
1115 		assert(a == n);
1116 	} else {
1117 		if (failsafe) {
1118 			n = 2;
1119 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1120 				return (NULL);
1121 			new_argv[a++] = FAILSAFESHELL;
1122 			new_argv[a++] = NULL;
1123 			assert(n == a);
1124 		} else {
1125 			new_argv = zone_login_cmd(bh, login);
1126 		}
1127 	}
1128 
1129 	return (new_argv);
1130 }
1131 
1132 /*
1133  * Helper routine for prep_env below.
1134  */
1135 static char *
1136 add_env(char *name, char *value)
1137 {
1138 	size_t sz = strlen(name) + strlen(value) + 2; /* name, =, value, NUL */
1139 	char *str;
1140 
1141 	if ((str = malloc(sz)) == NULL)
1142 		return (NULL);
1143 
1144 	(void) snprintf(str, sz, "%s=%s", name, value);
1145 	return (str);
1146 }
1147 
1148 /*
1149  * Prepare envp array for exec'd process.
1150  */
1151 static char **
1152 prep_env()
1153 {
1154 	int e = 0, size = 1;
1155 	char **new_env, *estr;
1156 	char *term = getenv("TERM");
1157 
1158 	size++;	/* for $PATH */
1159 	if (term != NULL)
1160 		size++;
1161 
1162 	/*
1163 	 * In failsafe mode we set $HOME, since '-l' isn't valid in this mode.
1164 	 * We also set $SHELL, since neither login nor su will be around to do
1165 	 * it.
1166 	 */
1167 	if (failsafe)
1168 		size += 2;
1169 
1170 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1171 		return (NULL);
1172 
1173 	if ((estr = add_env("PATH", DEF_PATH)) == NULL)
1174 		return (NULL);
1175 	new_env[e++] = estr;
1176 
1177 	if (term != NULL) {
1178 		if ((estr = add_env("TERM", term)) == NULL)
1179 			return (NULL);
1180 		new_env[e++] = estr;
1181 	}
1182 
1183 	if (failsafe) {
1184 		if ((estr = add_env("HOME", "/")) == NULL)
1185 			return (NULL);
1186 		new_env[e++] = estr;
1187 
1188 		if ((estr = add_env("SHELL", FAILSAFESHELL)) == NULL)
1189 			return (NULL);
1190 		new_env[e++] = estr;
1191 	}
1192 
1193 	new_env[e++] = NULL;
1194 
1195 	assert(e == size);
1196 
1197 	return (new_env);
1198 }
1199 
1200 /*
1201  * Finish the preparation of the envp array for exec'd non-interactive
1202  * zlogins.  This is called in the child process *after* we zone_enter(), since
1203  * it derives things we can only know within the zone, such as $HOME, $SHELL,
1204  * etc.  We need only do this in the non-interactive, mode, since otherwise
1205  * login(1) will do it.  We don't do this in failsafe mode, since it presents
1206  * additional ways in which the command could fail, and we'd prefer to avoid
1207  * that.
1208  */
1209 static char **
1210 prep_env_noninteractive(const char *user_cmd, char **env)
1211 {
1212 	size_t size;
1213 	char **new_env;
1214 	int e, i;
1215 	char *estr;
1216 	char varmail[LOGNAME_MAX + 11]; /* strlen(/var/mail/) = 10, NUL */
1217 	char pwbuf[NSS_BUFLEN_PASSWD + 1];
1218 	struct passwd pwent;
1219 	struct passwd *pw = NULL;
1220 
1221 	assert(env != NULL);
1222 	assert(failsafe == 0);
1223 
1224 	/*
1225 	 * Exec the "user_cmd" brand hook to get a pwent for the
1226 	 * login user.  If this fails, HOME will be set to "/", SHELL
1227 	 * will be set to $DEFAULTSHELL, and we will continue to exec
1228 	 * SUPATH <login> -c <cmd>.
1229 	 */
1230 	pw = zone_get_user_pw(user_cmd, &pwent, pwbuf, sizeof (pwbuf));
1231 
1232 	/*
1233 	 * Get existing envp size.
1234 	 */
1235 	for (size = 0; env[size] != NULL; size++)
1236 		;
1237 
1238 	e = size;
1239 
1240 	/*
1241 	 * Finish filling out the environment; we duplicate the environment
1242 	 * setup described in login(1), for lack of a better precedent.
1243 	 */
1244 	if (pw != NULL)
1245 		size += 3;	/* LOGNAME, HOME, MAIL */
1246 	else
1247 		size += 1;	/* HOME */
1248 
1249 	size++;	/* always fill in SHELL */
1250 	size++; /* terminating NULL */
1251 
1252 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1253 		goto malloc_fail;
1254 
1255 	/*
1256 	 * Copy existing elements of env into new_env.
1257 	 */
1258 	for (i = 0; env[i] != NULL; i++) {
1259 		if ((new_env[i] = strdup(env[i])) == NULL)
1260 			goto malloc_fail;
1261 	}
1262 	assert(e == i);
1263 
1264 	if (pw != NULL) {
1265 		if ((estr = add_env("LOGNAME", pw->pw_name)) == NULL)
1266 			goto malloc_fail;
1267 		new_env[e++] = estr;
1268 
1269 		if ((estr = add_env("HOME", pw->pw_dir)) == NULL)
1270 			goto malloc_fail;
1271 		new_env[e++] = estr;
1272 
1273 		if (chdir(pw->pw_dir) != 0)
1274 			zerror(gettext("Could not chdir to home directory "
1275 			    "%s: %s"), pw->pw_dir, strerror(errno));
1276 
1277 		(void) snprintf(varmail, sizeof (varmail), "/var/mail/%s",
1278 		    pw->pw_name);
1279 		if ((estr = add_env("MAIL", varmail)) == NULL)
1280 			goto malloc_fail;
1281 		new_env[e++] = estr;
1282 	} else {
1283 		if ((estr = add_env("HOME", "/")) == NULL)
1284 			goto malloc_fail;
1285 		new_env[e++] = estr;
1286 	}
1287 
1288 	if (pw != NULL && strlen(pw->pw_shell) > 0) {
1289 		if ((estr = add_env("SHELL", pw->pw_shell)) == NULL)
1290 			goto malloc_fail;
1291 		new_env[e++] = estr;
1292 	} else {
1293 		if ((estr = add_env("SHELL", DEFAULTSHELL)) == NULL)
1294 			goto malloc_fail;
1295 		new_env[e++] = estr;
1296 	}
1297 
1298 	new_env[e++] = NULL;	/* add terminating NULL */
1299 
1300 	assert(e == size);
1301 	return (new_env);
1302 
1303 malloc_fail:
1304 	zperror(gettext("failed to allocate memory for process environment"));
1305 	return (NULL);
1306 }
1307 
1308 static int
1309 close_func(void *slavefd, int fd)
1310 {
1311 	if (fd != *(int *)slavefd)
1312 		(void) close(fd);
1313 	return (0);
1314 }
1315 
1316 static void
1317 set_cmdchar(char *cmdcharstr)
1318 {
1319 	char c;
1320 	long lc;
1321 
1322 	if ((c = *cmdcharstr) != '\\') {
1323 		cmdchar = c;
1324 		return;
1325 	}
1326 
1327 	c = cmdcharstr[1];
1328 	if (c == '\0' || c == '\\') {
1329 		cmdchar = '\\';
1330 		return;
1331 	}
1332 
1333 	if (c < '0' || c > '7') {
1334 		zerror(gettext("Unrecognized escape character option %s"),
1335 		    cmdcharstr);
1336 		usage();
1337 	}
1338 
1339 	lc = strtol(cmdcharstr + 1, NULL, 8);
1340 	if (lc < 0 || lc > 255) {
1341 		zerror(gettext("Octal escape character '%s' too large"),
1342 		    cmdcharstr);
1343 		usage();
1344 	}
1345 	cmdchar = (char)lc;
1346 }
1347 
1348 static int
1349 setup_utmpx(char *slavename)
1350 {
1351 	struct utmpx ut;
1352 
1353 	bzero(&ut, sizeof (ut));
1354 	(void) strncpy(ut.ut_user, ".zlogin", sizeof (ut.ut_user));
1355 	(void) strncpy(ut.ut_line, slavename, sizeof (ut.ut_line));
1356 	ut.ut_pid = getpid();
1357 	ut.ut_id[0] = 'z';
1358 	ut.ut_id[1] = ut.ut_id[2] = ut.ut_id[3] = (char)SC_WILDC;
1359 	ut.ut_type = LOGIN_PROCESS;
1360 	(void) time(&ut.ut_tv.tv_sec);
1361 
1362 	if (makeutx(&ut) == NULL) {
1363 		zerror(gettext("makeutx failed"));
1364 		return (-1);
1365 	}
1366 	return (0);
1367 }
1368 
1369 static void
1370 release_lock_file(int lockfd)
1371 {
1372 	(void) close(lockfd);
1373 }
1374 
1375 static int
1376 grab_lock_file(const char *zone_name, int *lockfd)
1377 {
1378 	char pathbuf[PATH_MAX];
1379 	struct flock flock;
1380 
1381 	if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
1382 		zerror(gettext("could not mkdir %s: %s"), ZONES_TMPDIR,
1383 		    strerror(errno));
1384 		return (-1);
1385 	}
1386 	(void) chmod(ZONES_TMPDIR, S_IRWXU);
1387 	(void) snprintf(pathbuf, sizeof (pathbuf), "%s/%s.zoneadm.lock",
1388 	    ZONES_TMPDIR, zone_name);
1389 
1390 	if ((*lockfd = open(pathbuf, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) < 0) {
1391 		zerror(gettext("could not open %s: %s"), pathbuf,
1392 		    strerror(errno));
1393 		return (-1);
1394 	}
1395 	/*
1396 	 * Lock the file to synchronize with other zoneadmds
1397 	 */
1398 	flock.l_type = F_WRLCK;
1399 	flock.l_whence = SEEK_SET;
1400 	flock.l_start = (off_t)0;
1401 	flock.l_len = (off_t)0;
1402 	if (fcntl(*lockfd, F_SETLKW, &flock) < 0) {
1403 		zerror(gettext("unable to lock %s: %s"), pathbuf,
1404 		    strerror(errno));
1405 		release_lock_file(*lockfd);
1406 		return (-1);
1407 	}
1408 	return (Z_OK);
1409 }
1410 
1411 static int
1412 start_zoneadmd(const char *zone_name)
1413 {
1414 	pid_t retval;
1415 	int pstatus = 0, error = -1, lockfd, doorfd;
1416 	struct door_info info;
1417 	char doorpath[MAXPATHLEN];
1418 
1419 	(void) snprintf(doorpath, sizeof (doorpath), ZONE_DOOR_PATH, zone_name);
1420 
1421 	if (grab_lock_file(zone_name, &lockfd) != Z_OK)
1422 		return (-1);
1423 	/*
1424 	 * We must do the door check with the lock held.  Otherwise, we
1425 	 * might race against another zoneadm/zlogin process and wind
1426 	 * up with two processes trying to start zoneadmd at the same
1427 	 * time.  zoneadmd will detect this, and fail, but we prefer this
1428 	 * to be as seamless as is practical, from a user perspective.
1429 	 */
1430 	if ((doorfd = open(doorpath, O_RDONLY)) < 0) {
1431 		if (errno != ENOENT) {
1432 			zerror("failed to open %s: %s", doorpath,
1433 			    strerror(errno));
1434 			goto out;
1435 		}
1436 	} else {
1437 		/*
1438 		 * Seems to be working ok.
1439 		 */
1440 		if (door_info(doorfd, &info) == 0 &&
1441 		    ((info.di_attributes & DOOR_REVOKED) == 0)) {
1442 			error = 0;
1443 			goto out;
1444 		}
1445 	}
1446 
1447 	if ((child_pid = fork()) == -1) {
1448 		zperror(gettext("could not fork"));
1449 		goto out;
1450 	} else if (child_pid == 0) {
1451 		/* child process */
1452 		(void) execl("/usr/lib/zones/zoneadmd", "zoneadmd", "-z",
1453 		    zone_name, NULL);
1454 		zperror(gettext("could not exec zoneadmd"));
1455 		_exit(1);
1456 	}
1457 
1458 	/* parent process */
1459 	do {
1460 		retval = waitpid(child_pid, &pstatus, 0);
1461 	} while (retval != child_pid);
1462 	if (WIFSIGNALED(pstatus) ||
1463 	    (WIFEXITED(pstatus) && WEXITSTATUS(pstatus) != 0)) {
1464 		zerror(gettext("could not start %s"), "zoneadmd");
1465 		goto out;
1466 	}
1467 	error = 0;
1468 out:
1469 	release_lock_file(lockfd);
1470 	(void) close(doorfd);
1471 	return (error);
1472 }
1473 
1474 static int
1475 init_template(void)
1476 {
1477 	int fd;
1478 	int err = 0;
1479 
1480 	fd = open64(CTFS_ROOT "/process/template", O_RDWR);
1481 	if (fd == -1)
1482 		return (-1);
1483 
1484 	/*
1485 	 * zlogin doesn't do anything with the contract.
1486 	 * Deliver no events, don't inherit, and allow it to be orphaned.
1487 	 */
1488 	err |= ct_tmpl_set_critical(fd, 0);
1489 	err |= ct_tmpl_set_informative(fd, 0);
1490 	err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
1491 	err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
1492 	if (err || ct_tmpl_activate(fd)) {
1493 		(void) close(fd);
1494 		return (-1);
1495 	}
1496 
1497 	return (fd);
1498 }
1499 
1500 static int
1501 noninteractive_login(char *zonename, const char *user_cmd, zoneid_t zoneid,
1502     char **new_args, char **new_env)
1503 {
1504 	pid_t retval;
1505 	int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2], dead_child_pipe[2];
1506 	int child_status;
1507 	int tmpl_fd;
1508 	sigset_t block_cld;
1509 
1510 	if ((tmpl_fd = init_template()) == -1) {
1511 		reset_tty();
1512 		zperror(gettext("could not create contract"));
1513 		return (1);
1514 	}
1515 
1516 	if (pipe(stdin_pipe) != 0) {
1517 		zperror(gettext("could not create STDIN pipe"));
1518 		return (1);
1519 	}
1520 	/*
1521 	 * When the user types ^D, we get a zero length message on STDIN.
1522 	 * We need to echo that down the pipe to send it to the other side;
1523 	 * but by default, pipes don't propagate zero-length messages.  We
1524 	 * toggle that behavior off using I_SWROPT.  See streamio(7i).
1525 	 */
1526 	if (ioctl(stdin_pipe[0], I_SWROPT, SNDZERO) != 0) {
1527 		zperror(gettext("could not configure STDIN pipe"));
1528 		return (1);
1529 
1530 	}
1531 	if (pipe(stdout_pipe) != 0) {
1532 		zperror(gettext("could not create STDOUT pipe"));
1533 		return (1);
1534 	}
1535 	if (pipe(stderr_pipe) != 0) {
1536 		zperror(gettext("could not create STDERR pipe"));
1537 		return (1);
1538 	}
1539 
1540 	if (pipe(dead_child_pipe) != 0) {
1541 		zperror(gettext("could not create signalling pipe"));
1542 		return (1);
1543 	}
1544 	close_on_sig = dead_child_pipe[0];
1545 
1546 	/*
1547 	 * If any of the pipe FD's winds up being less than STDERR, then we
1548 	 * have a mess on our hands-- and we are lacking some of the I/O
1549 	 * streams we would expect anyway.  So we bail.
1550 	 */
1551 	if (stdin_pipe[0] <= STDERR_FILENO ||
1552 	    stdin_pipe[1] <= STDERR_FILENO ||
1553 	    stdout_pipe[0] <= STDERR_FILENO ||
1554 	    stdout_pipe[1] <= STDERR_FILENO ||
1555 	    stderr_pipe[0] <= STDERR_FILENO ||
1556 	    stderr_pipe[1] <= STDERR_FILENO ||
1557 	    dead_child_pipe[0] <= STDERR_FILENO ||
1558 	    dead_child_pipe[1] <= STDERR_FILENO) {
1559 		zperror(gettext("process lacks valid STDIN, STDOUT, STDERR"));
1560 		return (1);
1561 	}
1562 
1563 	if (prefork_dropprivs() != 0) {
1564 		zperror(gettext("could not allocate privilege set"));
1565 		return (1);
1566 	}
1567 
1568 	(void) sigset(SIGCLD, sigcld);
1569 	(void) sigemptyset(&block_cld);
1570 	(void) sigaddset(&block_cld, SIGCLD);
1571 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1572 
1573 	if ((child_pid = fork()) == -1) {
1574 		(void) ct_tmpl_clear(tmpl_fd);
1575 		(void) close(tmpl_fd);
1576 		zperror(gettext("could not fork"));
1577 		return (1);
1578 	} else if (child_pid == 0) { /* child process */
1579 		(void) ct_tmpl_clear(tmpl_fd);
1580 
1581 		/*
1582 		 * Do a dance to get the pipes hooked up as FD's 0, 1 and 2.
1583 		 */
1584 		(void) close(STDIN_FILENO);
1585 		(void) close(STDOUT_FILENO);
1586 		(void) close(STDERR_FILENO);
1587 		(void) dup2(stdin_pipe[1], STDIN_FILENO);
1588 		(void) dup2(stdout_pipe[1], STDOUT_FILENO);
1589 		(void) dup2(stderr_pipe[1], STDERR_FILENO);
1590 		(void) closefrom(STDERR_FILENO + 1);
1591 
1592 		(void) sigset(SIGCLD, SIG_DFL);
1593 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1594 		/*
1595 		 * In case any of stdin, stdout or stderr are streams,
1596 		 * anchor them to prevent malicious I_POPs.
1597 		 */
1598 		(void) ioctl(STDIN_FILENO, I_ANCHOR);
1599 		(void) ioctl(STDOUT_FILENO, I_ANCHOR);
1600 		(void) ioctl(STDERR_FILENO, I_ANCHOR);
1601 
1602 		if (zone_enter(zoneid) == -1) {
1603 			zerror(gettext("could not enter zone %s: %s"),
1604 			    zonename, strerror(errno));
1605 			_exit(1);
1606 		}
1607 
1608 		/*
1609 		 * For non-native zones, tell libc where it can find locale
1610 		 * specific getttext() messages.
1611 		 */
1612 		if (access("/.SUNWnative/usr/lib/locale", R_OK) == 0)
1613 			(void) bindtextdomain(TEXT_DOMAIN,
1614 			    "/.SUNWnative/usr/lib/locale");
1615 		else if (access("/native/usr/lib/locale", R_OK) == 0)
1616 			(void) bindtextdomain(TEXT_DOMAIN,
1617 			    "/native/usr/lib/locale");
1618 
1619 		if (!failsafe)
1620 			new_env = prep_env_noninteractive(user_cmd, new_env);
1621 
1622 		if (new_env == NULL) {
1623 			_exit(1);
1624 		}
1625 
1626 		/*
1627 		 * Move into a new process group; the zone_enter will have
1628 		 * placed us into zsched's session, and we want to be in
1629 		 * a unique process group.
1630 		 */
1631 		(void) setpgid(getpid(), getpid());
1632 
1633 		(void) execve(new_args[0], new_args, new_env);
1634 		zperror(gettext("exec failure"));
1635 		_exit(1);
1636 	}
1637 	/* parent */
1638 
1639 	/* close pipe sides written by child */
1640 	(void) close(stdout_pipe[1]);
1641 	(void) close(stderr_pipe[1]);
1642 
1643 	(void) sigset(SIGINT, sig_forward);
1644 
1645 	postfork_dropprivs();
1646 
1647 	(void) ct_tmpl_clear(tmpl_fd);
1648 	(void) close(tmpl_fd);
1649 
1650 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1651 	doio(stdin_pipe[0], stdin_pipe[1], stdout_pipe[0], stderr_pipe[0],
1652 	    dead_child_pipe[1], B_TRUE);
1653 	do {
1654 		retval = waitpid(child_pid, &child_status, 0);
1655 		if (retval == -1) {
1656 			child_status = 0;
1657 		}
1658 	} while (retval != child_pid && errno != ECHILD);
1659 
1660 	return (WEXITSTATUS(child_status));
1661 }
1662 
1663 int
1664 main(int argc, char **argv)
1665 {
1666 	int arg, console = 0;
1667 	zoneid_t zoneid;
1668 	zone_state_t st;
1669 	char *login = "root";
1670 	int lflag = 0;
1671 	char *zonename = NULL;
1672 	char **proc_args = NULL;
1673 	char **new_args, **new_env;
1674 	sigset_t block_cld;
1675 	char devroot[MAXPATHLEN];
1676 	char *slavename, slaveshortname[MAXPATHLEN];
1677 	priv_set_t *privset;
1678 	int tmpl_fd;
1679 	char zonebrand[MAXNAMELEN];
1680 	struct stat sb;
1681 	char kernzone[ZONENAME_MAX];
1682 	brand_handle_t bh;
1683 	char user_cmd[MAXPATHLEN];
1684 
1685 	(void) setlocale(LC_ALL, "");
1686 	(void) textdomain(TEXT_DOMAIN);
1687 
1688 	(void) getpname(argv[0]);
1689 
1690 	while ((arg = getopt(argc, argv, "ECR:Se:l:")) != EOF) {
1691 		switch (arg) {
1692 		case 'C':
1693 			console = 1;
1694 			break;
1695 		case 'E':
1696 			nocmdchar = 1;
1697 			break;
1698 		case 'R':	/* undocumented */
1699 			if (*optarg != '/') {
1700 				zerror(gettext("root path must be absolute."));
1701 				exit(2);
1702 			}
1703 			if (stat(optarg, &sb) == -1 || !S_ISDIR(sb.st_mode)) {
1704 				zerror(
1705 				    gettext("root path must be a directory."));
1706 				exit(2);
1707 			}
1708 			zonecfg_set_root(optarg);
1709 			break;
1710 		case 'S':
1711 			failsafe = 1;
1712 			break;
1713 		case 'e':
1714 			set_cmdchar(optarg);
1715 			break;
1716 		case 'l':
1717 			login = optarg;
1718 			lflag = 1;
1719 			break;
1720 		default:
1721 			usage();
1722 		}
1723 	}
1724 
1725 	if (console != 0 && lflag != 0) {
1726 		zerror(gettext("-l may not be specified for console login"));
1727 		usage();
1728 	}
1729 
1730 	if (console != 0 && failsafe != 0) {
1731 		zerror(gettext("-S may not be specified for console login"));
1732 		usage();
1733 	}
1734 
1735 	if (console != 0 && zonecfg_in_alt_root()) {
1736 		zerror(gettext("-R may not be specified for console login"));
1737 		exit(2);
1738 	}
1739 
1740 	if (failsafe != 0 && lflag != 0) {
1741 		zerror(gettext("-l may not be specified for failsafe login"));
1742 		usage();
1743 	}
1744 
1745 	if (optind == (argc - 1)) {
1746 		/*
1747 		 * zone name, no process name; this should be an interactive
1748 		 * as long as STDIN is really a tty.
1749 		 */
1750 		if (isatty(STDIN_FILENO))
1751 			interactive = 1;
1752 		zonename = argv[optind];
1753 	} else if (optind < (argc - 1)) {
1754 		if (console) {
1755 			zerror(gettext("Commands may not be specified for "
1756 			    "console login."));
1757 			usage();
1758 		}
1759 		/* zone name and process name, and possibly some args */
1760 		zonename = argv[optind];
1761 		proc_args = &argv[optind + 1];
1762 		interactive = 0;
1763 	} else {
1764 		usage();
1765 	}
1766 
1767 	if (getzoneid() != GLOBAL_ZONEID) {
1768 		zerror(gettext("'%s' may only be used from the global zone"),
1769 		    pname);
1770 		return (1);
1771 	}
1772 
1773 	if (strcmp(zonename, GLOBAL_ZONENAME) == 0) {
1774 		zerror(gettext("'%s' not applicable to the global zone"),
1775 		    pname);
1776 		return (1);
1777 	}
1778 
1779 	if (zone_get_state(zonename, &st) != Z_OK) {
1780 		zerror(gettext("zone '%s' unknown"), zonename);
1781 		return (1);
1782 	}
1783 
1784 	if (st < ZONE_STATE_INSTALLED) {
1785 		zerror(gettext("cannot login to a zone which is '%s'"),
1786 		    zone_state_str(st));
1787 		return (1);
1788 	}
1789 
1790 	/*
1791 	 * In both console and non-console cases, we require all privs.
1792 	 * In the console case, because we may need to startup zoneadmd.
1793 	 * In the non-console case in order to do zone_enter(2), zonept()
1794 	 * and other tasks.
1795 	 *
1796 	 * Future work: this solution is temporary.  Ultimately, we need to
1797 	 * move to a flexible system which allows the global admin to
1798 	 * designate that a particular user can zlogin (and probably zlogin
1799 	 * -C) to a particular zone.  This all-root business we have now is
1800 	 * quite sketchy.
1801 	 */
1802 	if ((privset = priv_allocset()) == NULL) {
1803 		zperror(gettext("priv_allocset failed"));
1804 		return (1);
1805 	}
1806 
1807 	if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1808 		zperror(gettext("getppriv failed"));
1809 		priv_freeset(privset);
1810 		return (1);
1811 	}
1812 
1813 	if (priv_isfullset(privset) == B_FALSE) {
1814 		zerror(gettext("You lack sufficient privilege to run "
1815 		    "this command (all privs required)"));
1816 		priv_freeset(privset);
1817 		return (1);
1818 	}
1819 	priv_freeset(privset);
1820 
1821 	/*
1822 	 * The console is a separate case from the rest of the code; handle
1823 	 * it first.
1824 	 */
1825 	if (console) {
1826 		/*
1827 		 * Ensure that zoneadmd for this zone is running.
1828 		 */
1829 		if (start_zoneadmd(zonename) == -1)
1830 			return (1);
1831 
1832 		/*
1833 		 * Make contact with zoneadmd.
1834 		 */
1835 		if (get_console_master(zonename) == -1)
1836 			return (1);
1837 
1838 		(void) printf(gettext("[Connected to zone '%s' console]\n"),
1839 		    zonename);
1840 
1841 		if (set_tty_rawmode(STDIN_FILENO) == -1) {
1842 			reset_tty();
1843 			zperror(gettext("failed to set stdin pty to raw mode"));
1844 			return (1);
1845 		}
1846 
1847 		(void) sigset(SIGWINCH, sigwinch);
1848 		(void) sigwinch(0);
1849 
1850 		/*
1851 		 * Run the I/O loop until we get disconnected.
1852 		 */
1853 		doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
1854 		reset_tty();
1855 		(void) printf(gettext("\n[Connection to zone '%s' console "
1856 		    "closed]\n"), zonename);
1857 
1858 		return (0);
1859 	}
1860 
1861 	if (st != ZONE_STATE_RUNNING && st != ZONE_STATE_MOUNTED) {
1862 		zerror(gettext("login allowed only to running zones "
1863 		    "(%s is '%s')."), zonename, zone_state_str(st));
1864 		return (1);
1865 	}
1866 
1867 	(void) strlcpy(kernzone, zonename, sizeof (kernzone));
1868 	if (zonecfg_in_alt_root()) {
1869 		FILE *fp = zonecfg_open_scratch("", B_FALSE);
1870 
1871 		if (fp == NULL || zonecfg_find_scratch(fp, zonename,
1872 		    zonecfg_get_root(), kernzone, sizeof (kernzone)) == -1) {
1873 			zerror(gettext("cannot find scratch zone %s"),
1874 			    zonename);
1875 			if (fp != NULL)
1876 				zonecfg_close_scratch(fp);
1877 			return (1);
1878 		}
1879 		zonecfg_close_scratch(fp);
1880 	}
1881 
1882 	if ((zoneid = getzoneidbyname(kernzone)) == -1) {
1883 		zerror(gettext("failed to get zoneid for zone '%s'"),
1884 		    zonename);
1885 		return (1);
1886 	}
1887 
1888 	/*
1889 	 * We need the zone root path only if we are setting up a pty.
1890 	 */
1891 	if (zone_get_devroot(zonename, devroot, sizeof (devroot)) == -1) {
1892 		zerror(gettext("could not get dev path for zone %s"),
1893 		    zonename);
1894 		return (1);
1895 	}
1896 
1897 	/* Get a handle to the brand info for this zone */
1898 	if ((zone_get_brand(zonename, zonebrand, sizeof (zonebrand)) != Z_OK) ||
1899 	    ((bh = brand_open(zonebrand)) == NULL)) {
1900 		zerror(gettext("could not get brand for zone %s"), zonename);
1901 		return (1);
1902 	}
1903 	if ((new_args = prep_args(bh, login, proc_args)) == NULL) {
1904 		zperror(gettext("could not assemble new arguments"));
1905 		brand_close(bh);
1906 		return (1);
1907 	}
1908 	/*
1909 	 * Get the brand specific user_cmd.  This command is used to get
1910 	 * a passwd(4) entry for login.
1911 	 */
1912 	if (!interactive && !failsafe) {
1913 		if (zone_get_user_cmd(bh, login, user_cmd,
1914 		    sizeof (user_cmd)) == NULL) {
1915 			zerror(gettext("could not get user_cmd for zone %s"),
1916 			    zonename);
1917 			brand_close(bh);
1918 			return (1);
1919 		}
1920 	}
1921 	brand_close(bh);
1922 
1923 	if ((new_env = prep_env()) == NULL) {
1924 		zperror(gettext("could not assemble new environment"));
1925 		return (1);
1926 	}
1927 
1928 	if (!interactive)
1929 		return (noninteractive_login(zonename, user_cmd, zoneid,
1930 		    new_args, new_env));
1931 
1932 	if (zonecfg_in_alt_root()) {
1933 		zerror(gettext("cannot use interactive login with scratch "
1934 		    "zone"));
1935 		return (1);
1936 	}
1937 
1938 	/*
1939 	 * Things are more complex in interactive mode; we get the
1940 	 * master side of the pty, then place the user's terminal into
1941 	 * raw mode.
1942 	 */
1943 	if (get_master_pty() == -1) {
1944 		zerror(gettext("could not setup master pty device"));
1945 		return (1);
1946 	}
1947 
1948 	/*
1949 	 * Compute the "short name" of the pts.  /dev/pts/2 --> pts/2
1950 	 */
1951 	if ((slavename = ptsname(masterfd)) == NULL) {
1952 		zperror(gettext("failed to get name for pseudo-tty"));
1953 		return (1);
1954 	}
1955 	if (strncmp(slavename, "/dev/", strlen("/dev/")) == 0)
1956 		(void) strlcpy(slaveshortname, slavename + strlen("/dev/"),
1957 		    sizeof (slaveshortname));
1958 	else
1959 		(void) strlcpy(slaveshortname, slavename,
1960 		    sizeof (slaveshortname));
1961 
1962 	(void) printf(gettext("[Connected to zone '%s' %s]\n"), zonename,
1963 	    slaveshortname);
1964 
1965 	if (set_tty_rawmode(STDIN_FILENO) == -1) {
1966 		reset_tty();
1967 		zperror(gettext("failed to set stdin pty to raw mode"));
1968 		return (1);
1969 	}
1970 
1971 	if (prefork_dropprivs() != 0) {
1972 		reset_tty();
1973 		zperror(gettext("could not allocate privilege set"));
1974 		return (1);
1975 	}
1976 
1977 	/*
1978 	 * We must mask SIGCLD until after we have coped with the fork
1979 	 * sufficiently to deal with it; otherwise we can race and receive the
1980 	 * signal before child_pid has been initialized (yes, this really
1981 	 * happens).
1982 	 */
1983 	(void) sigset(SIGCLD, sigcld);
1984 	(void) sigemptyset(&block_cld);
1985 	(void) sigaddset(&block_cld, SIGCLD);
1986 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1987 
1988 	/*
1989 	 * We activate the contract template at the last minute to
1990 	 * avoid intermediate functions that could be using fork(2)
1991 	 * internally.
1992 	 */
1993 	if ((tmpl_fd = init_template()) == -1) {
1994 		reset_tty();
1995 		zperror(gettext("could not create contract"));
1996 		return (1);
1997 	}
1998 
1999 	if ((child_pid = fork()) == -1) {
2000 		(void) ct_tmpl_clear(tmpl_fd);
2001 		reset_tty();
2002 		zperror(gettext("could not fork"));
2003 		return (1);
2004 	} else if (child_pid == 0) { /* child process */
2005 		int slavefd, newslave;
2006 
2007 		(void) ct_tmpl_clear(tmpl_fd);
2008 		(void) close(tmpl_fd);
2009 
2010 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2011 
2012 		if ((slavefd = init_slave_pty(zoneid, devroot)) == -1)
2013 			return (1);
2014 
2015 		/*
2016 		 * Close all fds except for the slave pty.
2017 		 */
2018 		(void) fdwalk(close_func, &slavefd);
2019 
2020 		/*
2021 		 * Temporarily dup slavefd to stderr; that way if we have
2022 		 * to print out that zone_enter failed, the output will
2023 		 * have somewhere to go.
2024 		 */
2025 		if (slavefd != STDERR_FILENO)
2026 			(void) dup2(slavefd, STDERR_FILENO);
2027 
2028 		if (zone_enter(zoneid) == -1) {
2029 			zerror(gettext("could not enter zone %s: %s"),
2030 			    zonename, strerror(errno));
2031 			return (1);
2032 		}
2033 
2034 		if (slavefd != STDERR_FILENO)
2035 			(void) close(STDERR_FILENO);
2036 
2037 		/*
2038 		 * We take pains to get this process into a new process
2039 		 * group, and subsequently a new session.  In this way,
2040 		 * we'll have a session which doesn't yet have a controlling
2041 		 * terminal.  When we open the slave, it will become the
2042 		 * controlling terminal; no PIDs concerning pgrps or sids
2043 		 * will leak inappropriately into the zone.
2044 		 */
2045 		(void) setpgrp();
2046 
2047 		/*
2048 		 * We need the slave pty to be referenced from the zone's
2049 		 * /dev in order to ensure that the devt's, etc are all
2050 		 * correct.  Otherwise we break ttyname and the like.
2051 		 */
2052 		if ((newslave = open(slavename, O_RDWR)) == -1) {
2053 			(void) close(slavefd);
2054 			return (1);
2055 		}
2056 		(void) close(slavefd);
2057 		slavefd = newslave;
2058 
2059 		/*
2060 		 * dup the slave to the various FDs, so that when the
2061 		 * spawned process does a write/read it maps to the slave
2062 		 * pty.
2063 		 */
2064 		(void) dup2(slavefd, STDIN_FILENO);
2065 		(void) dup2(slavefd, STDOUT_FILENO);
2066 		(void) dup2(slavefd, STDERR_FILENO);
2067 		if (slavefd != STDIN_FILENO && slavefd != STDOUT_FILENO &&
2068 		    slavefd != STDERR_FILENO) {
2069 			(void) close(slavefd);
2070 		}
2071 
2072 		/*
2073 		 * In failsafe mode, we don't use login(1), so don't try
2074 		 * setting up a utmpx entry.
2075 		 *
2076 		 * A branded zone may have very different utmpx semantics.
2077 		 * At the moment, we only have two brand types:
2078 		 * Solaris-like (native, sn1) and Linux.  In the Solaris
2079 		 * case, we know exactly how to do the necessary utmpx
2080 		 * setup.  Fortunately for us, the Linux /bin/login is
2081 		 * prepared to deal with a non-initialized utmpx entry, so
2082 		 * we can simply skip it.  If future brands don't fall into
2083 		 * either category, we'll have to add a per-brand utmpx
2084 		 * setup hook.
2085 		 */
2086 		if (!failsafe && (strcmp(zonebrand, "lx") != 0))
2087 			if (setup_utmpx(slaveshortname) == -1)
2088 				return (1);
2089 
2090 		(void) execve(new_args[0], new_args, new_env);
2091 		zperror(gettext("exec failure"));
2092 		return (1);
2093 	}
2094 	(void) ct_tmpl_clear(tmpl_fd);
2095 	(void) close(tmpl_fd);
2096 
2097 	/*
2098 	 * The rest is only for the parent process.
2099 	 */
2100 	(void) sigset(SIGWINCH, sigwinch);
2101 
2102 	postfork_dropprivs();
2103 
2104 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2105 	doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
2106 
2107 	reset_tty();
2108 	(void) fprintf(stderr,
2109 	    gettext("\n[Connection to zone '%s' %s closed]\n"), zonename,
2110 	    slaveshortname);
2111 
2112 	if (pollerr != 0) {
2113 		(void) fprintf(stderr, gettext("Error: connection closed due "
2114 		    "to unexpected pollevents=0x%x.\n"), pollerr);
2115 		return (1);
2116 	}
2117 
2118 	return (0);
2119 }
2120