xref: /illumos-gate/usr/src/uts/common/exec/elf/elf_notes.c (revision 5f82aa32fbc5dc2c59bca6ff315f44a4c4c9ea86)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright 2012 DEY Storage Systems, Inc.  All rights reserved.
29  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/thread.h>
35 #include <sys/sysmacros.h>
36 #include <sys/signal.h>
37 #include <sys/cred.h>
38 #include <sys/priv.h>
39 #include <sys/user.h>
40 #include <sys/file.h>
41 #include <sys/errno.h>
42 #include <sys/vnode.h>
43 #include <sys/mode.h>
44 #include <sys/vfs.h>
45 #include <sys/mman.h>
46 #include <sys/kmem.h>
47 #include <sys/proc.h>
48 #include <sys/pathname.h>
49 #include <sys/cmn_err.h>
50 #include <sys/systm.h>
51 #include <sys/elf.h>
52 #include <sys/vmsystm.h>
53 #include <sys/debug.h>
54 #include <sys/procfs.h>
55 #include <sys/regset.h>
56 #include <sys/auxv.h>
57 #include <sys/exec.h>
58 #include <sys/prsystm.h>
59 #include <sys/utsname.h>
60 #include <sys/zone.h>
61 #include <vm/as.h>
62 #include <vm/rm.h>
63 #include <sys/modctl.h>
64 #include <sys/systeminfo.h>
65 #include <sys/machelf.h>
66 #include <sys/sunddi.h>
67 #include "elf_impl.h"
68 #if defined(__i386) || defined(__i386_COMPAT)
69 #include <sys/sysi86.h>
70 #endif
71 
72 void
73 setup_note_header(Phdr *v, proc_t *p)
74 {
75 	int nlwp = p->p_lwpcnt;
76 	int nzomb = p->p_zombcnt;
77 	int nfd;
78 	size_t size;
79 	prcred_t *pcrp;
80 	uf_info_t *fip;
81 	uf_entry_t *ufp;
82 	int fd;
83 
84 	fip = P_FINFO(p);
85 	nfd = 0;
86 	mutex_enter(&fip->fi_lock);
87 	for (fd = 0; fd < fip->fi_nfiles; fd++) {
88 		UF_ENTER(ufp, fip, fd);
89 		if ((ufp->uf_file != NULL) && (ufp->uf_file->f_count > 0))
90 			nfd++;
91 		UF_EXIT(ufp);
92 	}
93 	mutex_exit(&fip->fi_lock);
94 
95 	v[0].p_type = PT_NOTE;
96 	v[0].p_flags = PF_R;
97 	v[0].p_filesz = (sizeof (Note) * (10 + 2 * nlwp + nzomb + nfd))
98 	    + roundup(sizeof (psinfo_t), sizeof (Word))
99 	    + roundup(sizeof (pstatus_t), sizeof (Word))
100 	    + roundup(prgetprivsize(), sizeof (Word))
101 	    + roundup(priv_get_implinfo_size(), sizeof (Word))
102 	    + roundup(strlen(platform) + 1, sizeof (Word))
103 	    + roundup(strlen(p->p_zone->zone_name) + 1, sizeof (Word))
104 	    + roundup(__KERN_NAUXV_IMPL * sizeof (aux_entry_t), sizeof (Word))
105 	    + roundup(sizeof (utsname), sizeof (Word))
106 	    + roundup(sizeof (core_content_t), sizeof (Word))
107 	    + roundup(sizeof (prsecflags_t), sizeof (Word))
108 	    + (nlwp + nzomb) * roundup(sizeof (lwpsinfo_t), sizeof (Word))
109 	    + nlwp * roundup(sizeof (lwpstatus_t), sizeof (Word))
110 	    + nfd * roundup(sizeof (prfdinfo_t), sizeof (Word));
111 
112 	if (curproc->p_agenttp != NULL) {
113 		v[0].p_filesz += sizeof (Note) +
114 		    roundup(sizeof (psinfo_t), sizeof (Word));
115 	}
116 
117 	size = sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1);
118 	pcrp = kmem_alloc(size, KM_SLEEP);
119 	prgetcred(p, pcrp);
120 	if (pcrp->pr_ngroups != 0) {
121 		v[0].p_filesz += sizeof (Note) + roundup(sizeof (prcred_t) +
122 		    sizeof (gid_t) * (pcrp->pr_ngroups - 1), sizeof (Word));
123 	} else {
124 		v[0].p_filesz += sizeof (Note) +
125 		    roundup(sizeof (prcred_t), sizeof (Word));
126 	}
127 	kmem_free(pcrp, size);
128 
129 
130 #if defined(__i386) || defined(__i386_COMPAT)
131 	mutex_enter(&p->p_ldtlock);
132 	size = prnldt(p) * sizeof (struct ssd);
133 	mutex_exit(&p->p_ldtlock);
134 	if (size != 0)
135 		v[0].p_filesz += sizeof (Note) + roundup(size, sizeof (Word));
136 #endif	/* __i386 || __i386_COMPAT */
137 
138 	if ((size = prhasx(p)? prgetprxregsize(p) : 0) != 0)
139 		v[0].p_filesz += nlwp * sizeof (Note)
140 		    + nlwp * roundup(size, sizeof (Word));
141 
142 #if defined(__sparc)
143 	/*
144 	 * Figure out the number and sizes of register windows.
145 	 */
146 	{
147 		kthread_t *t = p->p_tlist;
148 		do {
149 			if ((size = prnwindows(ttolwp(t))) != 0) {
150 				size = sizeof (gwindows_t) -
151 				    (SPARC_MAXREGWINDOW - size) *
152 				    sizeof (struct rwindow);
153 				v[0].p_filesz += sizeof (Note) +
154 				    roundup(size, sizeof (Word));
155 			}
156 		} while ((t = t->t_forw) != p->p_tlist);
157 	}
158 	/*
159 	 * Space for the Ancillary State Registers.
160 	 */
161 	if (p->p_model == DATAMODEL_LP64)
162 		v[0].p_filesz += nlwp * sizeof (Note)
163 		    + nlwp * roundup(sizeof (asrset_t), sizeof (Word));
164 #endif /* __sparc */
165 }
166 
167 int
168 write_elfnotes(proc_t *p, int sig, vnode_t *vp, offset_t offset,
169     rlim64_t rlimit, cred_t *credp, core_content_t content)
170 {
171 	union {
172 		psinfo_t	psinfo;
173 		pstatus_t	pstatus;
174 		lwpsinfo_t	lwpsinfo;
175 		lwpstatus_t	lwpstatus;
176 #if defined(__sparc)
177 		gwindows_t	gwindows;
178 		asrset_t	asrset;
179 #endif /* __sparc */
180 		char		xregs[1];
181 		aux_entry_t	auxv[__KERN_NAUXV_IMPL];
182 		prcred_t	pcred;
183 		prpriv_t	ppriv;
184 		priv_impl_info_t prinfo;
185 		struct utsname	uts;
186 		prsecflags_t	psecflags;
187 	} *bigwad;
188 
189 	size_t xregsize = prhasx(p)? prgetprxregsize(p) : 0;
190 	size_t crsize = sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1);
191 	size_t psize = prgetprivsize();
192 	size_t bigsize = MAX(psize, MAX(sizeof (*bigwad),
193 	    MAX(xregsize, crsize)));
194 
195 	priv_impl_info_t *prii;
196 
197 	lwpdir_t *ldp;
198 	lwpent_t *lep;
199 	kthread_t *t;
200 	klwp_t *lwp;
201 	user_t *up;
202 	int i;
203 	int nlwp;
204 	int nzomb;
205 	int error;
206 	uchar_t oldsig;
207 	uf_info_t *fip;
208 	int fd;
209 	vnode_t *vroot;
210 
211 #if defined(__i386) || defined(__i386_COMPAT)
212 	struct ssd *ssd;
213 	size_t ssdsize;
214 #endif	/* __i386 || __i386_COMPAT */
215 
216 	bigsize = MAX(bigsize, priv_get_implinfo_size());
217 
218 	bigwad = kmem_alloc(bigsize, KM_SLEEP);
219 
220 	/*
221 	 * The order of the elfnote entries should be same here
222 	 * and in the gcore(1) command.  Synchronization is
223 	 * needed between the kernel and gcore(1).
224 	 */
225 
226 	/*
227 	 * Get the psinfo, and set the wait status to indicate that a core was
228 	 * dumped.  We have to forge this since p->p_wcode is not set yet.
229 	 */
230 	mutex_enter(&p->p_lock);
231 	prgetpsinfo(p, &bigwad->psinfo);
232 	mutex_exit(&p->p_lock);
233 	bigwad->psinfo.pr_wstat = wstat(CLD_DUMPED, sig);
234 
235 	error = elfnote(vp, &offset, NT_PSINFO, sizeof (bigwad->psinfo),
236 	    (caddr_t)&bigwad->psinfo, rlimit, credp);
237 	if (error)
238 		goto done;
239 
240 	/*
241 	 * Modify t_whystop and lwp_cursig so it appears that the current LWP
242 	 * is stopped after faulting on the signal that caused the core dump.
243 	 * As a result, prgetstatus() will record that signal, the saved
244 	 * lwp_siginfo, and its signal handler in the core file status.  We
245 	 * restore lwp_cursig in case a subsequent signal was received while
246 	 * dumping core.
247 	 */
248 	mutex_enter(&p->p_lock);
249 	lwp = ttolwp(curthread);
250 
251 	oldsig = lwp->lwp_cursig;
252 	lwp->lwp_cursig = (uchar_t)sig;
253 	curthread->t_whystop = PR_FAULTED;
254 
255 	prgetstatus(p, &bigwad->pstatus, p->p_zone);
256 	bigwad->pstatus.pr_lwp.pr_why = 0;
257 
258 	curthread->t_whystop = 0;
259 	lwp->lwp_cursig = oldsig;
260 	mutex_exit(&p->p_lock);
261 
262 	error = elfnote(vp, &offset, NT_PSTATUS, sizeof (bigwad->pstatus),
263 	    (caddr_t)&bigwad->pstatus, rlimit, credp);
264 	if (error)
265 		goto done;
266 
267 	error = elfnote(vp, &offset, NT_PLATFORM, strlen(platform) + 1,
268 	    platform, rlimit, credp);
269 	if (error)
270 		goto done;
271 
272 	up = PTOU(p);
273 	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
274 		bigwad->auxv[i].a_type = up->u_auxv[i].a_type;
275 		bigwad->auxv[i].a_un.a_val = up->u_auxv[i].a_un.a_val;
276 	}
277 	error = elfnote(vp, &offset, NT_AUXV, sizeof (bigwad->auxv),
278 	    (caddr_t)bigwad->auxv, rlimit, credp);
279 	if (error)
280 		goto done;
281 
282 	bcopy(&utsname, &bigwad->uts, sizeof (struct utsname));
283 	if (!INGLOBALZONE(p)) {
284 		bcopy(p->p_zone->zone_nodename, &bigwad->uts.nodename,
285 		    _SYS_NMLN);
286 	}
287 	error = elfnote(vp, &offset, NT_UTSNAME, sizeof (struct utsname),
288 	    (caddr_t)&bigwad->uts, rlimit, credp);
289 	if (error)
290 		goto done;
291 
292 	prgetsecflags(p, &bigwad->psecflags);
293 	error = elfnote(vp, &offset, NT_SECFLAGS, sizeof (prsecflags_t),
294 	    (caddr_t)&bigwad->psecflags, rlimit, credp);
295 	if (error)
296 		goto done;
297 
298 	prgetcred(p, &bigwad->pcred);
299 
300 	if (bigwad->pcred.pr_ngroups != 0) {
301 		crsize = sizeof (prcred_t) +
302 		    sizeof (gid_t) * (bigwad->pcred.pr_ngroups - 1);
303 	} else
304 		crsize = sizeof (prcred_t);
305 
306 	error = elfnote(vp, &offset, NT_PRCRED, crsize,
307 	    (caddr_t)&bigwad->pcred, rlimit, credp);
308 	if (error)
309 		goto done;
310 
311 	error = elfnote(vp, &offset, NT_CONTENT, sizeof (core_content_t),
312 	    (caddr_t)&content, rlimit, credp);
313 	if (error)
314 		goto done;
315 
316 	prgetpriv(p, &bigwad->ppriv);
317 
318 	error = elfnote(vp, &offset, NT_PRPRIV, psize,
319 	    (caddr_t)&bigwad->ppriv, rlimit, credp);
320 	if (error)
321 		goto done;
322 
323 	prii = priv_hold_implinfo();
324 	error = elfnote(vp, &offset, NT_PRPRIVINFO, priv_get_implinfo_size(),
325 	    (caddr_t)prii, rlimit, credp);
326 	priv_release_implinfo();
327 	if (error)
328 		goto done;
329 
330 	/* zone can't go away as long as process exists */
331 	error = elfnote(vp, &offset, NT_ZONENAME,
332 	    strlen(p->p_zone->zone_name) + 1, p->p_zone->zone_name,
333 	    rlimit, credp);
334 	if (error)
335 		goto done;
336 
337 
338 	/* open file table */
339 	vroot = PTOU(p)->u_rdir;
340 	if (vroot == NULL)
341 		vroot = rootdir;
342 
343 	VN_HOLD(vroot);
344 
345 	fip = P_FINFO(p);
346 
347 	for (fd = 0; fd < fip->fi_nfiles; fd++) {
348 		uf_entry_t *ufp;
349 		vnode_t *fvp;
350 		struct file *fp;
351 		vattr_t vattr;
352 		prfdinfo_t fdinfo;
353 
354 		bzero(&fdinfo, sizeof (fdinfo));
355 
356 		mutex_enter(&fip->fi_lock);
357 		UF_ENTER(ufp, fip, fd);
358 		if (((fp = ufp->uf_file) == NULL) || (fp->f_count < 1)) {
359 			UF_EXIT(ufp);
360 			mutex_exit(&fip->fi_lock);
361 			continue;
362 		}
363 
364 		fdinfo.pr_fd = fd;
365 		fdinfo.pr_fdflags = ufp->uf_flag;
366 		fdinfo.pr_fileflags = fp->f_flag2;
367 		fdinfo.pr_fileflags <<= 16;
368 		fdinfo.pr_fileflags |= fp->f_flag;
369 		if ((fdinfo.pr_fileflags & (FSEARCH | FEXEC)) == 0)
370 			fdinfo.pr_fileflags += FOPEN;
371 		fdinfo.pr_offset = fp->f_offset;
372 
373 
374 		fvp = fp->f_vnode;
375 		VN_HOLD(fvp);
376 		UF_EXIT(ufp);
377 		mutex_exit(&fip->fi_lock);
378 
379 		/*
380 		 * There are some vnodes that have no corresponding
381 		 * path.  Its reasonable for this to fail, in which
382 		 * case the path will remain an empty string.
383 		 */
384 		(void) vnodetopath(vroot, fvp, fdinfo.pr_path,
385 		    sizeof (fdinfo.pr_path), credp);
386 
387 		if (VOP_GETATTR(fvp, &vattr, 0, credp, NULL) != 0) {
388 			/*
389 			 * Try to write at least a subset of information
390 			 */
391 			fdinfo.pr_major = 0;
392 			fdinfo.pr_minor = 0;
393 			fdinfo.pr_ino = 0;
394 			fdinfo.pr_mode = 0;
395 			fdinfo.pr_uid = (uid_t)-1;
396 			fdinfo.pr_gid = (gid_t)-1;
397 			fdinfo.pr_rmajor = 0;
398 			fdinfo.pr_rminor = 0;
399 			fdinfo.pr_size = -1;
400 
401 			error = elfnote(vp, &offset, NT_FDINFO,
402 			    sizeof (fdinfo), &fdinfo, rlimit, credp);
403 			VN_RELE(fvp);
404 			if (error) {
405 				VN_RELE(vroot);
406 				goto done;
407 			}
408 			continue;
409 		}
410 
411 		if (fvp->v_type == VSOCK)
412 			fdinfo.pr_fileflags |= sock_getfasync(fvp);
413 
414 		VN_RELE(fvp);
415 
416 		/*
417 		 * This logic mirrors fstat(), which we cannot use
418 		 * directly, as it calls copyout().
419 		 */
420 		fdinfo.pr_major = getmajor(vattr.va_fsid);
421 		fdinfo.pr_minor = getminor(vattr.va_fsid);
422 		fdinfo.pr_ino = (ino64_t)vattr.va_nodeid;
423 		fdinfo.pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode;
424 		fdinfo.pr_uid = vattr.va_uid;
425 		fdinfo.pr_gid = vattr.va_gid;
426 		fdinfo.pr_rmajor = getmajor(vattr.va_rdev);
427 		fdinfo.pr_rminor = getminor(vattr.va_rdev);
428 		fdinfo.pr_size = (off64_t)vattr.va_size;
429 
430 		error = elfnote(vp, &offset, NT_FDINFO,
431 		    sizeof (fdinfo), &fdinfo, rlimit, credp);
432 		if (error) {
433 			VN_RELE(vroot);
434 			goto done;
435 		}
436 	}
437 
438 	VN_RELE(vroot);
439 
440 #if defined(__i386) || defined(__i386_COMPAT)
441 	mutex_enter(&p->p_ldtlock);
442 	ssdsize = prnldt(p) * sizeof (struct ssd);
443 	if (ssdsize != 0) {
444 		ssd = kmem_alloc(ssdsize, KM_SLEEP);
445 		prgetldt(p, ssd);
446 		error = elfnote(vp, &offset, NT_LDT, ssdsize,
447 		    (caddr_t)ssd, rlimit, credp);
448 		kmem_free(ssd, ssdsize);
449 	}
450 	mutex_exit(&p->p_ldtlock);
451 	if (error)
452 		goto done;
453 #endif	/* __i386 || defined(__i386_COMPAT) */
454 
455 	nlwp = p->p_lwpcnt;
456 	nzomb = p->p_zombcnt;
457 	/* for each entry in the lwp directory ... */
458 	for (ldp = p->p_lwpdir; nlwp + nzomb != 0; ldp++) {
459 
460 		if ((lep = ldp->ld_entry) == NULL)	/* empty slot */
461 			continue;
462 
463 		if ((t = lep->le_thread) != NULL) {	/* active lwp */
464 			ASSERT(nlwp != 0);
465 			nlwp--;
466 			lwp = ttolwp(t);
467 			mutex_enter(&p->p_lock);
468 			prgetlwpsinfo(t, &bigwad->lwpsinfo);
469 			mutex_exit(&p->p_lock);
470 		} else {				/* zombie lwp */
471 			ASSERT(nzomb != 0);
472 			nzomb--;
473 			bzero(&bigwad->lwpsinfo, sizeof (bigwad->lwpsinfo));
474 			bigwad->lwpsinfo.pr_lwpid = lep->le_lwpid;
475 			bigwad->lwpsinfo.pr_state = SZOMB;
476 			bigwad->lwpsinfo.pr_sname = 'Z';
477 			bigwad->lwpsinfo.pr_start.tv_sec = lep->le_start;
478 		}
479 		error = elfnote(vp, &offset, NT_LWPSINFO,
480 		    sizeof (bigwad->lwpsinfo), (caddr_t)&bigwad->lwpsinfo,
481 		    rlimit, credp);
482 		if (error)
483 			goto done;
484 		if (t == NULL)		/* nothing more to do for a zombie */
485 			continue;
486 
487 		mutex_enter(&p->p_lock);
488 		if (t == curthread) {
489 			/*
490 			 * Modify t_whystop and lwp_cursig so it appears that
491 			 * the current LWP is stopped after faulting on the
492 			 * signal that caused the core dump.  As a result,
493 			 * prgetlwpstatus() will record that signal, the saved
494 			 * lwp_siginfo, and its signal handler in the core file
495 			 * status.  We restore lwp_cursig in case a subsequent
496 			 * signal was received while dumping core.
497 			 */
498 			oldsig = lwp->lwp_cursig;
499 			lwp->lwp_cursig = (uchar_t)sig;
500 			t->t_whystop = PR_FAULTED;
501 
502 			prgetlwpstatus(t, &bigwad->lwpstatus, p->p_zone);
503 			bigwad->lwpstatus.pr_why = 0;
504 
505 			t->t_whystop = 0;
506 			lwp->lwp_cursig = oldsig;
507 		} else {
508 			prgetlwpstatus(t, &bigwad->lwpstatus, p->p_zone);
509 		}
510 		mutex_exit(&p->p_lock);
511 		error = elfnote(vp, &offset, NT_LWPSTATUS,
512 		    sizeof (bigwad->lwpstatus), (caddr_t)&bigwad->lwpstatus,
513 		    rlimit, credp);
514 		if (error)
515 			goto done;
516 
517 #if defined(__sparc)
518 		/*
519 		 * Unspilled SPARC register windows.
520 		 */
521 		{
522 			size_t size = prnwindows(lwp);
523 
524 			if (size != 0) {
525 				size = sizeof (gwindows_t) -
526 				    (SPARC_MAXREGWINDOW - size) *
527 				    sizeof (struct rwindow);
528 				prgetwindows(lwp, &bigwad->gwindows);
529 				error = elfnote(vp, &offset, NT_GWINDOWS,
530 				    size, (caddr_t)&bigwad->gwindows,
531 				    rlimit, credp);
532 				if (error)
533 					goto done;
534 			}
535 		}
536 		/*
537 		 * Ancillary State Registers.
538 		 */
539 		if (p->p_model == DATAMODEL_LP64) {
540 			prgetasregs(lwp, bigwad->asrset);
541 			error = elfnote(vp, &offset, NT_ASRS,
542 			    sizeof (asrset_t), (caddr_t)bigwad->asrset,
543 			    rlimit, credp);
544 			if (error)
545 				goto done;
546 		}
547 #endif /* __sparc */
548 
549 		if (xregsize) {
550 			prgetprxregs(lwp, bigwad->xregs);
551 			error = elfnote(vp, &offset, NT_PRXREG,
552 			    xregsize, bigwad->xregs, rlimit, credp);
553 			if (error)
554 				goto done;
555 		}
556 
557 		if (t->t_lwp->lwp_spymaster != NULL) {
558 			void *psaddr = t->t_lwp->lwp_spymaster;
559 #ifdef _ELF32_COMPAT
560 			/*
561 			 * On a 64-bit kernel with 32-bit ELF compatibility,
562 			 * this file is compiled into two different objects:
563 			 * one is compiled normally, and the other is compiled
564 			 * with _ELF32_COMPAT set -- and therefore with a
565 			 * psinfo_t defined to be a psinfo32_t.  However, the
566 			 * psinfo_t denoting our spymaster is always of the
567 			 * native type; if we are in the _ELF32_COMPAT case,
568 			 * we need to explicitly convert it.
569 			 */
570 			if (p->p_model == DATAMODEL_ILP32) {
571 				psinfo_kto32(psaddr, &bigwad->psinfo);
572 				psaddr = &bigwad->psinfo;
573 			}
574 #endif
575 
576 			error = elfnote(vp, &offset, NT_SPYMASTER,
577 			    sizeof (psinfo_t), psaddr, rlimit, credp);
578 			if (error)
579 				goto done;
580 		}
581 	}
582 	ASSERT(nlwp == 0);
583 
584 done:
585 	kmem_free(bigwad, bigsize);
586 	return (error);
587 }
588