xref: /illumos-gate/usr/src/uts/common/brand/solaris10/s10_brand.c (revision 56f33205c9ed776c3c909e07d52e94610a675740)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/errno.h>
27 #include <sys/exec.h>
28 #include <sys/file.h>
29 #include <sys/kmem.h>
30 #include <sys/modctl.h>
31 #include <sys/model.h>
32 #include <sys/proc.h>
33 #include <sys/syscall.h>
34 #include <sys/systm.h>
35 #include <sys/thread.h>
36 #include <sys/cmn_err.h>
37 #include <sys/archsystm.h>
38 #include <sys/pathname.h>
39 #include <sys/sunddi.h>
40 
41 #include <sys/machbrand.h>
42 #include <sys/brand.h>
43 #include "s10_brand.h"
44 
45 char *s10_emulation_table = NULL;
46 
47 void	s10_init_brand_data(zone_t *);
48 void	s10_free_brand_data(zone_t *);
49 void	s10_setbrand(proc_t *);
50 int	s10_getattr(zone_t *, int, void *, size_t *);
51 int	s10_setattr(zone_t *, int, void *, size_t);
52 int	s10_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
53 		uintptr_t, uintptr_t, uintptr_t);
54 void	s10_copy_procdata(proc_t *, proc_t *);
55 void	s10_proc_exit(struct proc *, klwp_t *);
56 void	s10_exec();
57 int	s10_initlwp(klwp_t *);
58 void	s10_forklwp(klwp_t *, klwp_t *);
59 void	s10_freelwp(klwp_t *);
60 void	s10_lwpexit(klwp_t *);
61 int	s10_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
62 	long *, int, caddr_t, cred_t *, int);
63 
64 /* s10 brand */
65 struct brand_ops s10_brops = {
66 	s10_init_brand_data,
67 	s10_free_brand_data,
68 	s10_brandsys,
69 	s10_setbrand,
70 	s10_getattr,
71 	s10_setattr,
72 	s10_copy_procdata,
73 	s10_proc_exit,
74 	s10_exec,
75 	lwp_setrval,
76 	s10_initlwp,
77 	s10_forklwp,
78 	s10_freelwp,
79 	s10_lwpexit,
80 	s10_elfexec
81 };
82 
83 #ifdef	sparc
84 
85 struct brand_mach_ops s10_mops = {
86 	s10_brand_syscall_callback,
87 	s10_brand_syscall32_callback
88 };
89 
90 #else	/* sparc */
91 
92 #ifdef	__amd64
93 
94 struct brand_mach_ops s10_mops = {
95 	s10_brand_sysenter_callback,
96 	NULL,
97 	s10_brand_int91_callback,
98 	s10_brand_syscall_callback,
99 	s10_brand_syscall32_callback,
100 	NULL
101 };
102 
103 #else	/* ! __amd64 */
104 
105 struct brand_mach_ops s10_mops = {
106 	s10_brand_sysenter_callback,
107 	NULL,
108 	NULL,
109 	s10_brand_syscall_callback,
110 	NULL,
111 	NULL
112 };
113 #endif	/* __amd64 */
114 
115 #endif	/* _sparc */
116 
117 struct brand	s10_brand = {
118 	BRAND_VER_1,
119 	"solaris10",
120 	&s10_brops,
121 	&s10_mops
122 };
123 
124 static struct modlbrand modlbrand = {
125 	&mod_brandops,		/* type of module */
126 	"Solaris 10 Brand",	/* description of module */
127 	&s10_brand		/* driver ops */
128 };
129 
130 static struct modlinkage modlinkage = {
131 	MODREV_1, (void *)&modlbrand, NULL
132 };
133 
134 void
135 s10_setbrand(proc_t *p)
136 {
137 	ASSERT(p->p_brand == &s10_brand);
138 	ASSERT(p->p_brand_data == NULL);
139 
140 	/*
141 	 * We should only be called from exec(), when we know the process
142 	 * is single-threaded.
143 	 */
144 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
145 
146 	p->p_brand_data = kmem_zalloc(sizeof (s10_proc_data_t), KM_SLEEP);
147 	(void) s10_initlwp(p->p_tlist->t_lwp);
148 }
149 
150 /*ARGSUSED*/
151 int
152 s10_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize)
153 {
154 	ASSERT(zone->zone_brand == &s10_brand);
155 	if (attr == S10_EMUL_BITMAP) {
156 		if (buf == NULL || *bufsize != sizeof (s10_emul_bitmap_t))
157 			return (EINVAL);
158 		if (copyout(((s10_zone_data_t *)zone->zone_brand_data)->
159 		    emul_bitmap, buf, sizeof (s10_emul_bitmap_t)) != 0)
160 			return (EFAULT);
161 		return (0);
162 	}
163 
164 	return (EINVAL);
165 }
166 
167 int
168 s10_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
169 {
170 	ASSERT(zone->zone_brand == &s10_brand);
171 	if (attr == S10_EMUL_BITMAP) {
172 		if (buf == NULL || bufsize != sizeof (s10_emul_bitmap_t))
173 			return (EINVAL);
174 		if (copyin(buf, ((s10_zone_data_t *)zone->zone_brand_data)->
175 		    emul_bitmap, sizeof (s10_emul_bitmap_t)) != 0)
176 			return (EFAULT);
177 		return (0);
178 	}
179 
180 	return (EINVAL);
181 }
182 
183 #ifdef	__amd64
184 /*
185  * The Nevada kernel clears %fs for threads in 64-bit x86 processes but S10's
186  * libc expects %fs to be nonzero.  This causes some committed
187  * libc/libthread interfaces (e.g., thr_main()) to fail, which impacts several
188  * libraries, including libdoor.  This function sets the specified LWP's %fs
189  * register to the legacy S10 selector value (LWPFS_SEL).
190  *
191  * The best solution to the aforementioned problem is backporting CRs
192  * 6467491 to Solaris 10 so that 64-bit x86 Solaris 10 processes
193  * would accept zero for %fs.  Backporting the CRs is a requirement for running
194  * S10 Containers in PV domUs because 64-bit Xen clears %fsbase when %fs is
195  * nonzero.  Such behavior breaks 64-bit processes because Xen has to fetch the
196  * FS segments' base addresses from the LWPs' GDTs, which are only capable of
197  * 32-bit addressing.
198  */
199 /*ARGSUSED*/
200 static void
201 s10_amd64_correct_fsreg(klwp_t *l)
202 {
203 	if (lwp_getdatamodel(l) == DATAMODEL_NATIVE) {
204 		kpreempt_disable();
205 		l->lwp_pcb.pcb_fs = LWPFS_SEL;
206 		l->lwp_pcb.pcb_rupdate = 1;
207 		lwptot(l)->t_post_sys = 1;	/* Guarantee update_sregs() */
208 		kpreempt_enable();
209 	}
210 }
211 #endif	/* __amd64 */
212 
213 int
214 s10_native()
215 {
216 	struct user	*up = PTOU(curproc);
217 	char		*args_new, *comm_new, *p;
218 	int		len;
219 
220 	len = sizeof (S10_NATIVE_LINKER32 " ") - 1;
221 
222 	/*
223 	 * Make sure that the process' interpreter is the native dynamic linker.
224 	 * Convention dictates that native processes executing within solaris10-
225 	 * branded zones are interpreted by the native dynamic linker (the
226 	 * process and its arguments are specified as arguments to the dynamic
227 	 * linker).  If this convention is violated (i.e.,
228 	 * brandsys(B_S10_NATIVE, ...) is invoked by a process that shouldn't be
229 	 * native), then do nothing and silently indicate success.
230 	 */
231 	if (strcmp(up->u_comm, S10_LINKER_NAME) != 0)
232 		return (0);
233 	if (strncmp(up->u_psargs, S10_NATIVE_LINKER64 " /", len + 4) == 0)
234 		len += 3;		/* to account for "/64" in the path */
235 	else if (strncmp(up->u_psargs, S10_NATIVE_LINKER32 " /", len + 1) != 0)
236 		return (0);
237 
238 	args_new = strdup(&up->u_psargs[len]);
239 	if ((p = strchr(args_new, ' ')) != NULL)
240 		*p = '\0';
241 	if ((comm_new = strrchr(args_new, '/')) != NULL)
242 		comm_new = strdup(comm_new + 1);
243 	else
244 		comm_new = strdup(args_new);
245 	if (p != NULL)
246 		*p = ' ';
247 
248 	if ((strlen(args_new) != 0) && (strlen(comm_new) != 0)) {
249 		mutex_enter(&curproc->p_lock);
250 		(void) strlcpy(up->u_comm, comm_new, MAXCOMLEN+1);
251 		(void) strlcpy(up->u_psargs, args_new, PSARGSZ);
252 		mutex_exit(&curproc->p_lock);
253 	}
254 
255 	strfree(args_new);
256 	strfree(comm_new);
257 	return (0);
258 }
259 
260 /*
261  * Get the address of the user-space system call handler from the user
262  * process and attach it to the proc structure.
263  */
264 /*ARGSUSED*/
265 int
266 s10_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
267     uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
268 {
269 	s10_proc_data_t	*spd;
270 	s10_brand_reg_t	reg;
271 	proc_t		*p = curproc;
272 	int		err;
273 
274 	*rval = 0;
275 
276 	/*
277 	 * B_EXEC_BRAND is redundant
278 	 * since the kernel assumes a native process doing an exec
279 	 * in a branded zone is going to run a branded processes.
280 	 * hence we don't support this operation.
281 	 */
282 	if (cmd == B_EXEC_BRAND)
283 		return (ENOSYS);
284 
285 	if (cmd == B_S10_NATIVE)
286 		return (s10_native());
287 
288 	/* For all other operations this must be a branded process. */
289 	if (p->p_brand == &native_brand)
290 		return (ENOSYS);
291 
292 	ASSERT(p->p_brand == &s10_brand);
293 	ASSERT(p->p_brand_data != NULL);
294 
295 	spd = (s10_proc_data_t *)p->p_brand_data;
296 
297 	switch (cmd) {
298 	case B_EXEC_NATIVE:
299 		err = exec_common(
300 		    (char *)arg1, (const char **)arg2, (const char **)arg3,
301 		    EBA_NATIVE);
302 		return (err);
303 
304 	case B_REGISTER:
305 		if (p->p_model == DATAMODEL_NATIVE) {
306 			if (copyin((void *)arg1, &reg, sizeof (reg)) != 0)
307 				return (EFAULT);
308 #if defined(_LP64)
309 		} else {
310 			s10_brand_reg32_t reg32;
311 
312 			if (copyin((void *)arg1, &reg32, sizeof (reg32)) != 0)
313 				return (EFAULT);
314 			reg.sbr_version = reg32.sbr_version;
315 			reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler;
316 #endif /* _LP64 */
317 		}
318 
319 		if (reg.sbr_version != S10_VERSION)
320 			return (ENOTSUP);
321 		spd->spd_handler = reg.sbr_handler;
322 		return (0);
323 
324 	case B_ELFDATA:
325 		if (p->p_model == DATAMODEL_NATIVE) {
326 			if (copyout(&spd->spd_elf_data, (void *)arg1,
327 			    sizeof (s10_elf_data_t)) != 0)
328 				return (EFAULT);
329 #if defined(_LP64)
330 		} else {
331 			s10_elf_data32_t sed32;
332 
333 			sed32.sed_phdr = spd->spd_elf_data.sed_phdr;
334 			sed32.sed_phent = spd->spd_elf_data.sed_phent;
335 			sed32.sed_phnum = spd->spd_elf_data.sed_phnum;
336 			sed32.sed_entry = spd->spd_elf_data.sed_entry;
337 			sed32.sed_base = spd->spd_elf_data.sed_base;
338 			sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry;
339 			sed32.sed_lddata = spd->spd_elf_data.sed_lddata;
340 			if (copyout(&sed32, (void *)arg1, sizeof (sed32)) != 0)
341 				return (EFAULT);
342 #endif /* _LP64 */
343 		}
344 		return (0);
345 
346 	case B_S10_PIDINFO:
347 		/*
348 		 * The s10 brand needs to be able to get the pid of the
349 		 * current process and the pid of the zone's init, and it
350 		 * needs to do this on every process startup.  Early in
351 		 * brand startup, we can't call getpid() because calls to
352 		 * getpid() represent a magical signal to some old-skool
353 		 * debuggers.  By merging all of this into one call, we
354 		 * make this quite a bit cheaper and easier to handle in
355 		 * the brand module.
356 		 */
357 		if (copyout(&p->p_pid, (void *)arg1, sizeof (pid_t)) != 0)
358 			return (EFAULT);
359 		if (copyout(&p->p_zone->zone_proc_initpid, (void *)arg2,
360 		    sizeof (pid_t)) != 0)
361 			return (EFAULT);
362 		return (0);
363 
364 	case B_S10_TRUSS_POINT:
365 		/*
366 		 * This subcommand exists so that we can see truss output
367 		 * from interposed system calls that return without first
368 		 * calling any other system call, meaning they would be
369 		 * invisible to truss(1).
370 		 *
371 		 * If the second argument is set non-zero, set errno to that
372 		 * value as well.
373 		 *
374 		 * Arguments are:
375 		 *
376 		 *    arg1: syscall number
377 		 *    arg2: errno
378 		 */
379 		return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2));
380 
381 	case B_S10_ISFDXATTRDIR: {
382 		/*
383 		 * This subcommand enables the userland brand emulation library
384 		 * to determine whether a file descriptor refers to an extended
385 		 * file attributes directory.  There is no standard syscall or
386 		 * libc function that can make such a determination.
387 		 */
388 		file_t *dir_filep;
389 
390 		dir_filep = getf((int)arg1);
391 		if (dir_filep == NULL)
392 			return (EBADF);
393 		ASSERT(dir_filep->f_vnode != NULL);
394 		*rval = IS_XATTRDIR(dir_filep->f_vnode);
395 		releasef((int)arg1);
396 		return (0);
397 	}
398 
399 #ifdef	__amd64
400 	case B_S10_FSREGCORRECTION:
401 		/*
402 		 * This subcommand exists so that the SYS_lwp_private and
403 		 * SYS_lwp_create syscalls can manually set the current thread's
404 		 * %fs register to the legacy S10 selector value for 64-bit x86
405 		 * processes.
406 		 */
407 		s10_amd64_correct_fsreg(ttolwp(curthread));
408 		return (0);
409 #endif	/* __amd64 */
410 	}
411 
412 	return (EINVAL);
413 }
414 
415 /*
416  * Copy the per-process brand data from a parent proc to a child.
417  */
418 void
419 s10_copy_procdata(proc_t *child, proc_t *parent)
420 {
421 	s10_proc_data_t	*spd;
422 
423 	ASSERT(parent->p_brand == &s10_brand);
424 	ASSERT(child->p_brand == &s10_brand);
425 	ASSERT(parent->p_brand_data != NULL);
426 	ASSERT(child->p_brand_data == NULL);
427 
428 	/* Just duplicate all the proc data of the parent for the child */
429 	spd = kmem_alloc(sizeof (s10_proc_data_t), KM_SLEEP);
430 	bcopy(parent->p_brand_data, spd, sizeof (s10_proc_data_t));
431 	child->p_brand_data = spd;
432 }
433 
434 /*ARGSUSED*/
435 void
436 s10_proc_exit(struct proc *p, klwp_t *l)
437 {
438 	ASSERT(p->p_brand == &s10_brand);
439 	ASSERT(p->p_brand_data != NULL);
440 
441 	/*
442 	 * We should only be called from proc_exit(), when we know that
443 	 * process is single-threaded.
444 	 */
445 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
446 
447 	/* upon exit, free our lwp brand data */
448 	(void) s10_freelwp(ttolwp(curthread));
449 
450 	/* upon exit, free our proc brand data */
451 	kmem_free(p->p_brand_data, sizeof (s10_proc_data_t));
452 	p->p_brand_data = NULL;
453 }
454 
455 void
456 s10_exec()
457 {
458 	s10_proc_data_t	*spd = curproc->p_brand_data;
459 
460 	ASSERT(curproc->p_brand == &s10_brand);
461 	ASSERT(curproc->p_brand_data != NULL);
462 	ASSERT(ttolwp(curthread)->lwp_brand != NULL);
463 
464 	/*
465 	 * We should only be called from exec(), when we know the process
466 	 * is single-threaded.
467 	 */
468 	ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw);
469 
470 	/* Upon exec, reset our lwp brand data. */
471 	(void) s10_freelwp(ttolwp(curthread));
472 	(void) s10_initlwp(ttolwp(curthread));
473 
474 	/*
475 	 * Upon exec, reset all the proc brand data, except for the elf
476 	 * data associated with the executable we are exec'ing.
477 	 */
478 	spd->spd_handler = NULL;
479 }
480 
481 /*ARGSUSED*/
482 int
483 s10_initlwp(klwp_t *l)
484 {
485 	ASSERT(l->lwp_procp->p_brand == &s10_brand);
486 	ASSERT(l->lwp_procp->p_brand_data != NULL);
487 	ASSERT(l->lwp_brand == NULL);
488 	l->lwp_brand = (void *)-1;
489 	return (0);
490 }
491 
492 /*ARGSUSED*/
493 void
494 s10_forklwp(klwp_t *p, klwp_t *c)
495 {
496 	ASSERT(p->lwp_procp->p_brand == &s10_brand);
497 	ASSERT(c->lwp_procp->p_brand == &s10_brand);
498 
499 	ASSERT(p->lwp_procp->p_brand_data != NULL);
500 	ASSERT(c->lwp_procp->p_brand_data != NULL);
501 
502 	/* Both LWPs have already had been initialized via s10_initlwp() */
503 	ASSERT(p->lwp_brand != NULL);
504 	ASSERT(c->lwp_brand != NULL);
505 
506 #ifdef	__amd64
507 	/*
508 	 * Only correct the child's %fs register if the parent's %fs register
509 	 * is LWPFS_SEL.  If the parent's %fs register is zero, then the Solaris
510 	 * 10 environment that we're emulating uses a version of libc that
511 	 * works when %fs is zero (i.e., it contains backports of CRs 6467491
512 	 * and 6501650).
513 	 */
514 	if (p->lwp_pcb.pcb_fs == LWPFS_SEL)
515 		s10_amd64_correct_fsreg(c);
516 #endif	/* __amd64 */
517 }
518 
519 /*ARGSUSED*/
520 void
521 s10_freelwp(klwp_t *l)
522 {
523 	ASSERT(l->lwp_procp->p_brand == &s10_brand);
524 	ASSERT(l->lwp_procp->p_brand_data != NULL);
525 	ASSERT(l->lwp_brand != NULL);
526 	l->lwp_brand = NULL;
527 }
528 
529 /*ARGSUSED*/
530 void
531 s10_lwpexit(klwp_t *l)
532 {
533 	ASSERT(l->lwp_procp->p_brand == &s10_brand);
534 	ASSERT(l->lwp_procp->p_brand_data != NULL);
535 	ASSERT(l->lwp_brand != NULL);
536 
537 	/*
538 	 * We should never be called for the last thread in a process.
539 	 * (That case is handled by s10_proc_exit().)  There for this lwp
540 	 * must be exiting from a multi-threaded process.
541 	 */
542 	ASSERT(l->lwp_procp->p_tlist != l->lwp_procp->p_tlist->t_forw);
543 
544 	l->lwp_brand = NULL;
545 }
546 
547 void
548 s10_free_brand_data(zone_t *zone)
549 {
550 	kmem_free(zone->zone_brand_data, sizeof (s10_zone_data_t));
551 }
552 
553 void
554 s10_init_brand_data(zone_t *zone)
555 {
556 	ASSERT(zone->zone_brand == &s10_brand);
557 	ASSERT(zone->zone_brand_data == NULL);
558 	zone->zone_brand_data = kmem_zalloc(sizeof (s10_zone_data_t), KM_SLEEP);
559 }
560 
561 #if defined(_LP64)
562 static void
563 Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst)
564 {
565 	bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident));
566 	dst->e_type =		src->e_type;
567 	dst->e_machine =	src->e_machine;
568 	dst->e_version =	src->e_version;
569 	dst->e_entry =		src->e_entry;
570 	dst->e_phoff =		src->e_phoff;
571 	dst->e_shoff =		src->e_shoff;
572 	dst->e_flags =		src->e_flags;
573 	dst->e_ehsize =		src->e_ehsize;
574 	dst->e_phentsize =	src->e_phentsize;
575 	dst->e_phnum =		src->e_phnum;
576 	dst->e_shentsize =	src->e_shentsize;
577 	dst->e_shnum =		src->e_shnum;
578 	dst->e_shstrndx =	src->e_shstrndx;
579 }
580 #endif /* _LP64 */
581 
582 int
583 s10_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
584 	int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
585 	int brand_action)
586 {
587 	vnode_t		*nvp;
588 	Ehdr		ehdr;
589 	Addr		uphdr_vaddr;
590 	intptr_t	voffset;
591 	int		interp;
592 	int		i, err;
593 	struct execenv	env;
594 	struct user	*up = PTOU(curproc);
595 	s10_proc_data_t	*spd;
596 	s10_elf_data_t	sed, *sedp;
597 	char		*linker;
598 	uintptr_t	lddata; /* lddata of executable's linker */
599 
600 	ASSERT(curproc->p_brand == &s10_brand);
601 	ASSERT(curproc->p_brand_data != NULL);
602 
603 	spd = (s10_proc_data_t *)curproc->p_brand_data;
604 	sedp = &spd->spd_elf_data;
605 
606 	args->brandname = S10_BRANDNAME;
607 
608 	/*
609 	 * We will exec the brand library and then map in the target
610 	 * application and (optionally) the brand's default linker.
611 	 */
612 	if (args->to_model == DATAMODEL_NATIVE) {
613 		args->emulator = S10_LIB;
614 		linker = S10_LINKER;
615 #if defined(_LP64)
616 	} else {
617 		args->emulator = S10_LIB32;
618 		linker = S10_LINKER32;
619 #endif /* _LP64 */
620 	}
621 
622 	if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW, NULLVPP,
623 	    &nvp)) != 0) {
624 		uprintf("%s: not found.", args->emulator);
625 		return (err);
626 	}
627 
628 	if (args->to_model == DATAMODEL_NATIVE) {
629 		err = elfexec(nvp, uap, args, idatap, level + 1, execsz,
630 		    setid, exec_file, cred, brand_action);
631 #if defined(_LP64)
632 	} else {
633 		err = elf32exec(nvp, uap, args, idatap, level + 1, execsz,
634 		    setid, exec_file, cred, brand_action);
635 #endif /* _LP64 */
636 	}
637 	VN_RELE(nvp);
638 	if (err != 0)
639 		return (err);
640 
641 	/*
642 	 * The u_auxv vectors are set up by elfexec to point to the brand
643 	 * emulation library and linker.  Save these so they can be copied to
644 	 * the specific brand aux vectors.
645 	 */
646 	bzero(&sed, sizeof (sed));
647 	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
648 		switch (up->u_auxv[i].a_type) {
649 		case AT_SUN_LDDATA:
650 			sed.sed_lddata = up->u_auxv[i].a_un.a_val;
651 			break;
652 		case AT_BASE:
653 			sed.sed_base = up->u_auxv[i].a_un.a_val;
654 			break;
655 		case AT_ENTRY:
656 			sed.sed_entry = up->u_auxv[i].a_un.a_val;
657 			break;
658 		case AT_PHDR:
659 			sed.sed_phdr = up->u_auxv[i].a_un.a_val;
660 			break;
661 		case AT_PHENT:
662 			sed.sed_phent = up->u_auxv[i].a_un.a_val;
663 			break;
664 		case AT_PHNUM:
665 			sed.sed_phnum = up->u_auxv[i].a_un.a_val;
666 			break;
667 		default:
668 			break;
669 		}
670 	}
671 	/* Make sure the emulator has an entry point */
672 	ASSERT(sed.sed_entry != NULL);
673 	ASSERT(sed.sed_phdr != NULL);
674 
675 	bzero(&env, sizeof (env));
676 	if (args->to_model == DATAMODEL_NATIVE) {
677 		err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr, &voffset,
678 		    exec_file, &interp, &env.ex_bssbase, &env.ex_brkbase,
679 		    &env.ex_brksize, NULL);
680 #if defined(_LP64)
681 	} else {
682 		Elf32_Ehdr ehdr32;
683 		Elf32_Addr uphdr_vaddr32;
684 		err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
685 		    &voffset, exec_file, &interp, &env.ex_bssbase,
686 		    &env.ex_brkbase, &env.ex_brksize, NULL);
687 		Ehdr32to64(&ehdr32, &ehdr);
688 		if (uphdr_vaddr32 == (Elf32_Addr)-1)
689 			uphdr_vaddr = (Addr)-1;
690 		else
691 			uphdr_vaddr = uphdr_vaddr32;
692 #endif /* _LP64 */
693 	}
694 	if (err != 0)
695 		return (err);
696 
697 	/*
698 	 * Save off the important properties of the executable. The brand
699 	 * library will ask us for this data later, when it is initializing
700 	 * and getting ready to transfer control to the brand application.
701 	 */
702 	if (uphdr_vaddr == (Addr)-1)
703 		sedp->sed_phdr = voffset + ehdr.e_phoff;
704 	else
705 		sedp->sed_phdr = voffset + uphdr_vaddr;
706 	sedp->sed_entry = voffset + ehdr.e_entry;
707 	sedp->sed_phent = ehdr.e_phentsize;
708 	sedp->sed_phnum = ehdr.e_phnum;
709 
710 	if (interp) {
711 		if (ehdr.e_type == ET_DYN) {
712 			/*
713 			 * This is a shared object executable, so we need to
714 			 * pick a reasonable place to put the heap. Just don't
715 			 * use the first page.
716 			 */
717 			env.ex_brkbase = (caddr_t)PAGESIZE;
718 			env.ex_bssbase = (caddr_t)PAGESIZE;
719 		}
720 
721 		/*
722 		 * If the program needs an interpreter (most do), map it in and
723 		 * store relevant information about it in the aux vector, where
724 		 * the brand library can find it.
725 		 */
726 		if ((err = lookupname(linker, UIO_SYSSPACE,
727 		    FOLLOW, NULLVPP, &nvp)) != 0) {
728 			uprintf("%s: not found.", S10_LINKER);
729 			return (err);
730 		}
731 		if (args->to_model == DATAMODEL_NATIVE) {
732 			err = mapexec_brand(nvp, args, &ehdr,
733 			    &uphdr_vaddr, &voffset, exec_file, &interp,
734 			    NULL, NULL, NULL, &lddata);
735 #if defined(_LP64)
736 		} else {
737 			Elf32_Ehdr ehdr32;
738 			Elf32_Addr uphdr_vaddr32;
739 			err = mapexec32_brand(nvp, args, &ehdr32,
740 			    &uphdr_vaddr32, &voffset, exec_file, &interp,
741 			    NULL, NULL, NULL, &lddata);
742 			Ehdr32to64(&ehdr32, &ehdr);
743 			if (uphdr_vaddr32 == (Elf32_Addr)-1)
744 				uphdr_vaddr = (Addr)-1;
745 			else
746 				uphdr_vaddr = uphdr_vaddr32;
747 #endif /* _LP64 */
748 		}
749 		VN_RELE(nvp);
750 		if (err != 0)
751 			return (err);
752 
753 		/*
754 		 * Now that we know the base address of the brand's linker,
755 		 * place it in the aux vector.
756 		 */
757 		sedp->sed_base = voffset;
758 		sedp->sed_ldentry = voffset + ehdr.e_entry;
759 		sedp->sed_lddata = voffset + lddata;
760 	} else {
761 		/*
762 		 * This program has no interpreter. The brand library will
763 		 * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector,
764 		 * so in this case, put the entry point of the main executable
765 		 * there.
766 		 */
767 		if (ehdr.e_type == ET_EXEC) {
768 			/*
769 			 * An executable with no interpreter, this must be a
770 			 * statically linked executable, which means we loaded
771 			 * it at the address specified in the elf header, in
772 			 * which case the e_entry field of the elf header is an
773 			 * absolute address.
774 			 */
775 			sedp->sed_ldentry = ehdr.e_entry;
776 			sedp->sed_entry = ehdr.e_entry;
777 			sedp->sed_lddata = NULL;
778 			sedp->sed_base = NULL;
779 		} else {
780 			/*
781 			 * A shared object with no interpreter, we use the
782 			 * calculated address from above.
783 			 */
784 			sedp->sed_ldentry = sedp->sed_entry;
785 			sedp->sed_entry = NULL;
786 			sedp->sed_phdr = NULL;
787 			sedp->sed_phent = NULL;
788 			sedp->sed_phnum = NULL;
789 			sedp->sed_lddata = NULL;
790 			sedp->sed_base = voffset;
791 
792 			if (ehdr.e_type == ET_DYN) {
793 				/*
794 				 * Delay setting the brkbase until the first
795 				 * call to brk(); see elfexec() for details.
796 				 */
797 				env.ex_bssbase = (caddr_t)0;
798 				env.ex_brkbase = (caddr_t)0;
799 				env.ex_brksize = 0;
800 			}
801 		}
802 	}
803 
804 	env.ex_magic = elfmagic;
805 	env.ex_vp = vp;
806 	setexecenv(&env);
807 
808 	/*
809 	 * It's time to manipulate the process aux vectors.  First
810 	 * we need to update the AT_SUN_AUXFLAGS aux vector to set
811 	 * the AF_SUN_NOPLM flag.
812 	 */
813 	if (args->to_model == DATAMODEL_NATIVE) {
814 		auxv_t		auxflags_auxv;
815 
816 		if (copyin(args->auxp_auxflags, &auxflags_auxv,
817 		    sizeof (auxflags_auxv)) != 0)
818 			return (EFAULT);
819 
820 		ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS);
821 		auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM;
822 		if (copyout(&auxflags_auxv, args->auxp_auxflags,
823 		    sizeof (auxflags_auxv)) != 0)
824 			return (EFAULT);
825 #if defined(_LP64)
826 	} else {
827 		auxv32_t	auxflags_auxv32;
828 
829 		if (copyin(args->auxp_auxflags, &auxflags_auxv32,
830 		    sizeof (auxflags_auxv32)) != 0)
831 			return (EFAULT);
832 
833 		ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS);
834 		auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM;
835 		if (copyout(&auxflags_auxv32, args->auxp_auxflags,
836 		    sizeof (auxflags_auxv32)) != 0)
837 			return (EFAULT);
838 #endif /* _LP64 */
839 	}
840 
841 	/* Second, copy out the brand specific aux vectors. */
842 	if (args->to_model == DATAMODEL_NATIVE) {
843 		auxv_t s10_auxv[] = {
844 		    { AT_SUN_BRAND_AUX1, 0 },
845 		    { AT_SUN_BRAND_AUX2, 0 },
846 		    { AT_SUN_BRAND_AUX3, 0 }
847 		};
848 
849 		ASSERT(s10_auxv[0].a_type == AT_SUN_BRAND_S10_LDDATA);
850 		s10_auxv[0].a_un.a_val = sed.sed_lddata;
851 
852 		if (copyout(&s10_auxv, args->auxp_brand,
853 		    sizeof (s10_auxv)) != 0)
854 			return (EFAULT);
855 #if defined(_LP64)
856 	} else {
857 		auxv32_t s10_auxv32[] = {
858 		    { AT_SUN_BRAND_AUX1, 0 },
859 		    { AT_SUN_BRAND_AUX2, 0 },
860 		    { AT_SUN_BRAND_AUX3, 0 }
861 		};
862 
863 		ASSERT(s10_auxv32[0].a_type == AT_SUN_BRAND_S10_LDDATA);
864 		s10_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata;
865 		if (copyout(&s10_auxv32, args->auxp_brand,
866 		    sizeof (s10_auxv32)) != 0)
867 			return (EFAULT);
868 #endif /* _LP64 */
869 	}
870 
871 	/*
872 	 * Third, the the /proc aux vectors set up by elfexec() point to brand
873 	 * emulation library and it's linker.  Copy these to the /proc brand
874 	 * specific aux vector, and update the regular /proc aux vectors to
875 	 * point to the executable (and it's linker).  This will enable
876 	 * debuggers to access the executable via the usual /proc or elf notes
877 	 * aux vectors.
878 	 *
879 	 * The brand emulation library's linker will get it's aux vectors off
880 	 * the stack, and then update the stack with the executable's aux
881 	 * vectors before jumping to the executable's linker.
882 	 *
883 	 * Debugging the brand emulation library must be done from
884 	 * the global zone, where the librtld_db module knows how to fetch the
885 	 * brand specific aux vectors to access the brand emulation libraries
886 	 * linker.
887 	 */
888 	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
889 		ulong_t val;
890 
891 		switch (up->u_auxv[i].a_type) {
892 		case AT_SUN_BRAND_S10_LDDATA:
893 			up->u_auxv[i].a_un.a_val = sed.sed_lddata;
894 			continue;
895 		case AT_BASE:
896 			val = sedp->sed_base;
897 			break;
898 		case AT_ENTRY:
899 			val = sedp->sed_entry;
900 			break;
901 		case AT_PHDR:
902 			val = sedp->sed_phdr;
903 			break;
904 		case AT_PHENT:
905 			val = sedp->sed_phent;
906 			break;
907 		case AT_PHNUM:
908 			val = sedp->sed_phnum;
909 			break;
910 		case AT_SUN_LDDATA:
911 			val = sedp->sed_lddata;
912 			break;
913 		default:
914 			continue;
915 		}
916 
917 		up->u_auxv[i].a_un.a_val = val;
918 		if (val == NULL) {
919 			/* Hide the entry for static binaries */
920 			up->u_auxv[i].a_type = AT_IGNORE;
921 		}
922 	}
923 
924 	/*
925 	 * The last thing we do here is clear spd->spd_handler.  This is
926 	 * important because if we're already a branded process and if this
927 	 * exec succeeds, there is a window between when the exec() first
928 	 * returns to the userland of the new process and when our brand
929 	 * library get's initialized, during which we don't want system
930 	 * calls to be re-directed to our brand library since it hasn't
931 	 * been initialized yet.
932 	 */
933 	spd->spd_handler = NULL;
934 
935 	return (0);
936 }
937 
938 
939 int
940 _init(void)
941 {
942 	int err;
943 
944 	/*
945 	 * Set up the table indicating which system calls we want to
946 	 * interpose on.  We should probably build this automatically from
947 	 * a list of system calls that is shared with the user-space
948 	 * library.
949 	 */
950 	s10_emulation_table = kmem_zalloc(NSYSCALL, KM_SLEEP);
951 	s10_emulation_table[SYS_exec] = 1;			/*  11 */
952 	s10_emulation_table[SYS_ioctl] = 1;			/*  54 */
953 	s10_emulation_table[SYS_execve] = 1;			/*  59 */
954 	s10_emulation_table[SYS_acctctl] = 1;			/*  71 */
955 	s10_emulation_table[S10_SYS_issetugid] = 1;		/*  75 */
956 	s10_emulation_table[SYS_getdents] = 1;			/*  81 */
957 	s10_emulation_table[SYS_uname] = 1;			/* 135 */
958 	s10_emulation_table[SYS_systeminfo] = 1;		/* 139 */
959 #ifdef	__amd64
960 	s10_emulation_table[SYS_lwp_create] = 1;		/* 159 */
961 	s10_emulation_table[SYS_lwp_private] = 1;		/* 166 */
962 #endif	/* __amd64 */
963 	s10_emulation_table[SYS_pwrite] = 1;			/* 174 */
964 	s10_emulation_table[SYS_auditsys] = 1;			/* 186 */
965 	s10_emulation_table[SYS_sigqueue] = 1;			/* 190 */
966 	s10_emulation_table[SYS_lwp_mutex_timedlock] = 1;	/* 210 */
967 	s10_emulation_table[SYS_getdents64] = 1;		/* 213 */
968 	s10_emulation_table[SYS_pwrite64] = 1;			/* 223 */
969 	s10_emulation_table[SYS_zone] = 1;			/* 227 */
970 	s10_emulation_table[SYS_lwp_mutex_trylock] = 1;		/* 251 */
971 
972 	err = mod_install(&modlinkage);
973 	if (err) {
974 		cmn_err(CE_WARN, "Couldn't install brand module");
975 		kmem_free(s10_emulation_table, NSYSCALL);
976 	}
977 
978 	return (err);
979 }
980 
981 int
982 _info(struct modinfo *modinfop)
983 {
984 	return (mod_info(&modlinkage, modinfop));
985 }
986 
987 int
988 _fini(void)
989 {
990 	int err;
991 
992 	/*
993 	 * If there are any zones using this brand, we can't allow it to be
994 	 * unloaded.
995 	 */
996 	if (brand_zone_count(&s10_brand))
997 		return (EBUSY);
998 
999 	kmem_free(s10_emulation_table, NSYSCALL);
1000 	s10_emulation_table = NULL;
1001 
1002 	err = mod_remove(&modlinkage);
1003 	if (err)
1004 		cmn_err(CE_WARN, "Couldn't unload s10 brand module");
1005 
1006 	return (err);
1007 }
1008