xref: /illumos-gate/usr/src/cmd/sgs/gprof/common/readelf.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include	"gprof.h"
30 #include	<stdlib.h>
31 #include	<sys/file.h>
32 #include	<fcntl.h>
33 #include	<unistd.h>
34 #include	<string.h>
35 #include	<sysexits.h>
36 #include	<libelf.h>
37 #include 	"gelf.h"
38 
39 #ifdef DEBUG
40 static void	debug_dup_del(nltype *, nltype *);
41 
42 #define	DPRINTF(msg, file)	if (debug & ELFDEBUG) \
43 					(void) printf(msg, file);
44 
45 #define	PRINTF(msg)		if (debug & ELFDEBUG) \
46 					(void) printf(msg);
47 
48 #define	DEBUG_DUP_DEL(keeper, louser)	if (debug & ELFDEBUG) \
49 						debug_dup_del(keeper, louser);
50 
51 #else
52 #define	DPRINTF(msg, file)
53 #define	PRINTF(msg)
54 #define	DEBUG_DUP_DEL(keeper, louser)
55 #endif
56 
57 size_t	textbegin, textsize;
58 
59 /* Prototype definitions first */
60 
61 static void	process(char *filename, int fd);
62 static void	get_symtab(Elf *elf, mod_info_t *module);
63 static void	get_textseg(Elf *elf, int fd);
64 static void	save_aout_info(char *);
65 
66 static void
67 fatal_error(char *error)
68 {
69 	(void) fprintf(stderr,
70 	    "Fatal ELF error: %s (%s)\n", error, elf_errmsg(-1));
71 	exit(EX_SOFTWARE);
72 }
73 
74 bool
75 is_shared_obj(char *name)
76 {
77 	int		fd;
78 	Elf		*elf;
79 	GElf_Ehdr	ehdr;
80 
81 	if ((fd = open(name, O_RDONLY)) == -1) {
82 		(void) fprintf(stderr, "%s: can't open `%s'\n", whoami, name);
83 		exit(EX_NOINPUT);
84 	}
85 
86 	if (elf_version(EV_CURRENT) == EV_NONE)
87 		fatal_error("libelf is out of date");
88 
89 	if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
90 		fatal_error("can't read as ELF file");
91 
92 	if (gelf_getehdr(elf, &ehdr) == NULL)
93 		fatal_error("can't read ehdr");
94 
95 	(void) elf_end(elf);
96 	(void) close(fd);
97 
98 	if (ehdr.e_type == ET_DYN)
99 		return (TRUE);
100 	else
101 		return (FALSE);
102 }
103 
104 static void
105 save_aout_info(char *aoutname)
106 {
107 	struct stat		buf;
108 	extern fl_info_t	aout_info;
109 
110 	if (stat(aoutname, &buf) == -1) {
111 		(void) fprintf(stderr, "%s: can't get info on `%s'\n",
112 							whoami, aoutname);
113 		exit(EX_NOINPUT);
114 	}
115 
116 	aout_info.dev = buf.st_dev;
117 	aout_info.ino = buf.st_ino;
118 	aout_info.mtime = buf.st_mtime;
119 	aout_info.size = buf.st_size;
120 }
121 
122 void
123 getnfile(char *aoutname)
124 {
125 	int	fd;
126 
127 	DPRINTF(" Attempting to open %s  \n", aoutname);
128 	if ((fd = open((aoutname), O_RDONLY)) == -1) {
129 		(void) fprintf(stderr, "%s: can't open `%s'\n",
130 							whoami, aoutname);
131 		exit(EX_NOINPUT);
132 	}
133 	process(aoutname, fd);
134 	save_aout_info(aoutname);
135 
136 	(void) close(fd);
137 }
138 
139 static GElf_Addr
140 get_txtorigin(Elf *elf)
141 {
142 	GElf_Ehdr	ehdr;
143 	GElf_Phdr	phdr;
144 	GElf_Half	ndx;
145 	GElf_Addr	txt_origin = 0;
146 	bool		first_load_seg = TRUE;
147 
148 	if (gelf_getehdr(elf, &ehdr) == NULL)
149 		fatal_error("can't read ehdr");
150 
151 	for (ndx = 0; ndx < ehdr.e_phnum; ndx++) {
152 		if (gelf_getphdr(elf, ndx, &phdr) == NULL)
153 			continue;
154 
155 		if ((phdr.p_type == PT_LOAD) && !(phdr.p_flags & PF_W)) {
156 			if (first_load_seg || phdr.p_vaddr < txt_origin)
157 				txt_origin = phdr.p_vaddr;
158 
159 			if (first_load_seg)
160 				first_load_seg = FALSE;
161 		}
162 	}
163 
164 	return (txt_origin);
165 }
166 
167 void
168 process_namelist(mod_info_t *module)
169 {
170 	int		fd;
171 	Elf		*elf;
172 
173 	if ((fd = open(module->name, O_RDONLY)) == -1) {
174 		(void) fprintf(stderr, "%s: can't read %s\n",
175 							whoami, module->name);
176 		(void) fprintf(stderr, "Exiting due to error(s)...\n");
177 		exit(EX_NOINPUT);
178 	}
179 
180 	/*
181 	 * libelf's version already verified in processing a.out,
182 	 * so directly do elf_begin()
183 	 */
184 	if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
185 		fatal_error("can't read as ELF file");
186 
187 	module->next = NULL;
188 	module->txt_origin = get_txtorigin(elf);
189 	get_symtab(elf, module);
190 	module->active = TRUE;
191 }
192 
193 /*
194  * Get the ELF header and,  if it exists, call get_symtab()
195  * to begin processing of the file; otherwise, return from
196  * processing the file with a warning.
197  */
198 static void
199 process(char *filename, int fd)
200 {
201 	Elf			*elf;
202 	extern bool		cflag;
203 	extern bool		Bflag;
204 
205 	if (elf_version(EV_CURRENT) == EV_NONE)
206 		fatal_error("libelf is out of date");
207 
208 	if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
209 		fatal_error("can't read as ELF file");
210 
211 	if (gelf_getclass(elf) == ELFCLASS64)
212 		Bflag = TRUE;
213 
214 	/*
215 	 * Initialize active modules list. Note that we set the end
216 	 * address while reading the symbol table, in get_symtab
217 	 */
218 	modules.id = 1;
219 	modules.next = NULL;
220 	modules.txt_origin = get_txtorigin(elf);
221 	modules.load_base = modules.txt_origin;
222 	if ((modules.name = malloc(strlen(filename) + 1)) == NULL) {
223 		(void) fprintf(stderr, "%s: can't malloc %d bytes",
224 					    whoami, strlen(filename) + 1);
225 		exit(EX_UNAVAILABLE);
226 	}
227 	(void) strcpy(modules.name, filename);
228 
229 	get_symtab(elf, &modules);
230 
231 	modules.load_end = modules.data_end;
232 	modules.active = TRUE;
233 	n_modules = 1;
234 
235 	if (cflag)
236 		get_textseg(elf, fd);
237 }
238 
239 static void
240 get_textseg(Elf *elf, int fd)
241 {
242 	GElf_Ehdr ehdr;
243 	GElf_Phdr phdr;
244 	GElf_Half i;
245 
246 	if (gelf_getehdr(elf, &ehdr) == NULL)
247 		fatal_error("can't read ehdr");
248 
249 	for (i = 0; i < ehdr.e_phnum; i++) {
250 
251 		if (gelf_getphdr(elf, i, &phdr) == NULL)
252 			continue;
253 
254 		if (!(phdr.p_flags & PF_W) && (phdr.p_filesz > textsize)) {
255 			size_t chk;
256 
257 			/*
258 			 * We could have multiple loadable text segments;
259 			 * keep the largest we find.
260 			 */
261 			if (textspace)
262 				free(textspace);
263 
264 			/*
265 			 * gprof is a 32-bit program;  if this text segment
266 			 * has a > 32-bit offset or length, it's too big.
267 			 */
268 			chk = (size_t)phdr.p_vaddr + (size_t)phdr.p_filesz;
269 			if (phdr.p_vaddr + phdr.p_filesz != (GElf_Xword)chk)
270 				fatal_error("text segment too large for -c");
271 
272 			textbegin = (size_t)phdr.p_vaddr;
273 			textsize = (size_t)phdr.p_filesz;
274 
275 			textspace = malloc(textsize);
276 
277 			if (lseek(fd, (off_t)phdr.p_offset, SEEK_SET) !=
278 			    (off_t)phdr.p_offset)
279 				fatal_error("cannot seek to text section");
280 
281 			if (read(fd, textspace, textsize) != textsize)
282 				fatal_error("cannot read text");
283 		}
284 	}
285 
286 	if (textsize == 0)
287 		fatal_error("can't find text segment");
288 }
289 
290 #ifdef DEBUG
291 static void
292 debug_dup_del(nltype * keeper, nltype * louser)
293 {
294 	(void) printf("remove_dup_syms: discarding sym %s over sym %s\n",
295 		louser->name, keeper->name);
296 }
297 #endif /* DEBUG */
298 
299 static void
300 remove_dup_syms(nltype *nl, sztype *sym_count)
301 {
302 	int	i;
303 	int	index;
304 	int	nextsym;
305 
306 	nltype *	orig_list;
307 	if ((orig_list = malloc(sizeof (nltype) * *sym_count)) == NULL) {
308 		(void) fprintf(stderr,
309 		    "gprof: remove_dup_syms: malloc failed\n");
310 		(void) fprintf(stderr, "Exiting due to error(s)...\n");
311 		exit(EX_UNAVAILABLE);
312 	}
313 	(void) memcpy(orig_list, nl, sizeof (nltype) * *sym_count);
314 
315 	for (i = 0, index = 0, nextsym = 1; nextsym < *sym_count; nextsym++) {
316 		int	i_type;
317 		int	n_bind;
318 		int	n_type;
319 
320 		/*
321 		 * If orig_list[nextsym] points to a new symvalue, then we
322 		 * will copy our keeper and move on to the next symbol.
323 		 */
324 		if ((orig_list + i)->value < (orig_list + nextsym)->value) {
325 			*(nl + index++) = *(orig_list +i);
326 			i = nextsym;
327 			continue;
328 		}
329 
330 		/*
331 		 * If these two symbols have the same info, then we
332 		 * keep the first and keep checking for dups.
333 		 */
334 		if ((orig_list + i)->syminfo ==
335 		    (orig_list + nextsym)->syminfo) {
336 			DEBUG_DUP_DEL(orig_list + i, orig_list + nextsym);
337 			continue;
338 		}
339 		n_bind = ELF32_ST_BIND((orig_list + nextsym)->syminfo);
340 		i_type = ELF32_ST_TYPE((orig_list + i)->syminfo);
341 		n_type = ELF32_ST_TYPE((orig_list + nextsym)->syminfo);
342 
343 		/*
344 		 * If they have the same type we take the stronger
345 		 * bound function.
346 		 */
347 		if (i_type == n_type) {
348 			if (n_bind == STB_WEAK) {
349 				DEBUG_DUP_DEL((orig_list + i),
350 				    (orig_list + nextsym));
351 				continue;
352 			}
353 			DEBUG_DUP_DEL((orig_list + nextsym),
354 			    (orig_list + i));
355 			i = nextsym;
356 			continue;
357 		}
358 
359 		/*
360 		 * If the first symbol isn't of type NOTYPE then it must
361 		 * be the keeper.
362 		 */
363 		if (i_type != STT_NOTYPE) {
364 			DEBUG_DUP_DEL((orig_list + i),
365 			    (orig_list + nextsym));
366 			continue;
367 		}
368 
369 		/*
370 		 * Throw away the first one and take the new
371 		 * symbol
372 		 */
373 		DEBUG_DUP_DEL((orig_list + nextsym), (orig_list + i));
374 		i = nextsym;
375 	}
376 
377 	if ((orig_list + i)->value > (nl + index - 1)->value)
378 		*(nl + index++) = *(orig_list +i);
379 
380 	*sym_count = index;
381 }
382 
383 /*
384  * compare either by name or by value for sorting.
385  * This is the comparison function called by qsort to
386  * sort the symbols either by name or value when requested.
387  */
388 static int
389 compare(const void *arg1, const void *arg2)
390 {
391 	nltype *a = (nltype *)arg1;
392 	nltype *b = (nltype *)arg2;
393 
394 	if (a->value > b->value)
395 		return (1);
396 	else
397 		return ((a->value == b->value) - 1);
398 }
399 
400 static int
401 is_function(Elf *elf, GElf_Sym *sym)
402 {
403 	Elf_Scn *scn;
404 	GElf_Shdr shdr;
405 
406 	/*
407 	 * With shared objects, it is possible we come across a function
408 	 * that's global, but is undefined. The definition is probably
409 	 * elsewhere, so we'll have to skip it as far as this object is
410 	 * concerned.
411 	 */
412 	if (sym->st_shndx == SHN_UNDEF)
413 		return (0);
414 
415 	if (GELF_ST_TYPE(sym->st_info) == STT_FUNC) {
416 		if (GELF_ST_BIND(sym->st_info) == STB_GLOBAL)
417 			return (1);
418 
419 		if (GELF_ST_BIND(sym->st_info) == STB_WEAK)
420 			return (1);
421 
422 		if (!aflag && GELF_ST_BIND(sym->st_info) == STB_LOCAL)
423 			return (1);
424 	}
425 
426 	/*
427 	 * It's not a function; determine if it's in an executable section.
428 	 */
429 	if (GELF_ST_TYPE(sym->st_info) != STT_NOTYPE)
430 		return (0);
431 
432 	/*
433 	 * If it isn't global, and it isn't weak, and it either isn't
434 	 * local or the "all flag" isn't set, then get out.
435 	 */
436 	if (GELF_ST_BIND(sym->st_info) != STB_GLOBAL &&
437 	    GELF_ST_BIND(sym->st_info) != STB_WEAK &&
438 	    (GELF_ST_BIND(sym->st_info) != STB_LOCAL || aflag))
439 		return (0);
440 
441 	if (sym->st_shndx >= SHN_LORESERVE)
442 		return (0);
443 
444 	scn = elf_getscn(elf, sym->st_shndx);
445 	(void) gelf_getshdr(scn, &shdr);
446 
447 	if (!(shdr.sh_flags & SHF_EXECINSTR))
448 		return (0);
449 
450 	return (1);
451 }
452 
453 static void
454 get_symtab(Elf *elf, mod_info_t *module)
455 {
456 	Elf_Scn		*scn = NULL, *sym_pri = NULL, *sym_aux = NULL;
457 	GElf_Word	strndx = 0;
458 	sztype		nsyms, i;
459 	Elf_Data	*symdata_pri;
460 	Elf_Data	*symdata_aux;
461 	GElf_Xword	nsyms_pri, nsyms_aux = 0;
462 	nltype		*etext = NULL;
463 	nltype		*l_nl, *l_npe;
464 	sztype		l_nname;
465 	extern sztype	total_names;
466 	int		symtab_found = 0;
467 
468 
469 	/*
470 	 * Scan the section headers looking for a symbol table. Our
471 	 * preference is to use .symtab, because it contains the full
472 	 * set of symbols. If we find it, we stop looking immediately
473 	 * and use it. In the absence of a .symtab section, we are
474 	 * willing to use the dynamic symbol table (.dynsym), possibly
475 	 * augmented by the .SUNW_ldynsym, which contains local symbols.
476 	 */
477 	while ((symtab_found == 0) && ((scn = elf_nextscn(elf, scn)) != NULL)) {
478 		GElf_Shdr shdr;
479 
480 		if (gelf_getshdr(scn, &shdr) == NULL)
481 			continue;
482 
483 		switch (shdr.sh_type) {
484 		case SHT_SYMTAB:
485 			nsyms_pri = shdr.sh_size / shdr.sh_entsize;
486 			strndx = shdr.sh_link;
487 			sym_pri = scn;
488 			/* Throw away .SUNW_ldynsym. It is for .dynsym only */
489 			nsyms_aux = 0;
490 			sym_aux = NULL;
491 			/* We have found the best symbol table. Stop looking */
492 			symtab_found = 1;
493 			break;
494 
495 		case SHT_DYNSYM:
496 			/* We will use .dynsym if no .symtab is found */
497 			nsyms_pri = shdr.sh_size / shdr.sh_entsize;
498 			strndx = shdr.sh_link;
499 			sym_pri = scn;
500 			break;
501 
502 		case SHT_SUNW_LDYNSYM:
503 			/* Auxiliary table, used with .dynsym */
504 			nsyms_aux = shdr.sh_size / shdr.sh_entsize;
505 			sym_aux = scn;
506 			break;
507 		}
508 	}
509 
510 	if (sym_pri == NULL || strndx == 0)
511 		fatal_error("can't find symbol table.\n");
512 
513 	nsyms = (sztype)(nsyms_pri + nsyms_aux);
514 	if ((nsyms_pri + nsyms_aux) != (GElf_Xword)nsyms)
515 		fatal_error(
516 		    "32-bit gprof cannot handle more than 2^32 symbols");
517 
518 	if ((symdata_pri = elf_getdata(sym_pri, NULL)) == NULL)
519 		fatal_error("can't read symbol data.\n");
520 
521 	if ((sym_aux != NULL) &&
522 	    ((symdata_aux = elf_getdata(sym_aux, NULL)) == NULL))
523 		fatal_error("can't read .SUNW_ldynsym symbol data.\n");
524 
525 	if ((l_nl = l_npe = (nltype *)calloc(nsyms + PRF_SYMCNT,
526 	    sizeof (nltype))) == NULL)
527 		fatal_error("cannot allocate symbol data.\n");
528 
529 	/*
530 	 * Now we need to cruise through the symbol table eliminating
531 	 * all non-functions from consideration, and making strings
532 	 * real.
533 	 */
534 	l_nname = 0;
535 
536 	for (i = 1; i < nsyms; i++) {
537 		GElf_Sym gsym;
538 		char *name;
539 
540 		/*
541 		 * Look up the symbol. In the case where we have a
542 		 * .SUNW_ldynsym/.dynsym pair, we treat them as a single
543 		 * logical table, with the data from .SUNW_ldynsym coming
544 		 * before the data in .dynsym.
545 		 */
546 		if (i >= nsyms_aux)
547 			(void) gelf_getsym(symdata_pri, i - nsyms_aux, &gsym);
548 		else
549 			(void) gelf_getsym(symdata_aux, i, &gsym);
550 
551 		name = elf_strptr(elf, strndx, gsym.st_name);
552 
553 		/*
554 		 * We're interested in this symbol if it's a function or
555 		 * if it's the symbol "_etext"
556 		 */
557 		if (is_function(elf, &gsym) || strcmp(name, PRF_ETEXT) == 0) {
558 
559 			l_npe->name = name;
560 			l_npe->value = gsym.st_value;
561 			l_npe->sz = gsym.st_size;
562 			l_npe->syminfo = gsym.st_info;
563 			l_npe->module = module;
564 
565 			if (strcmp(name, PRF_ETEXT) == 0)
566 				etext = l_npe;
567 
568 			if (lflag == TRUE &&
569 			    GELF_ST_BIND(gsym.st_info) == STB_LOCAL) {
570 				/*
571 				 * If the "locals only" flag is on, then
572 				 * we add the local symbols to the
573 				 * exclusion lists.
574 				 */
575 				addlist(Elist, name);
576 				addlist(elist, name);
577 			}
578 			DPRINTF("Index %lld:", l_nname);
579 			DPRINTF("\tValue: 0x%llx\t", l_npe->value);
580 			DPRINTF("Name: %s \n", l_npe->name);
581 			l_npe++;
582 			l_nname++;
583 		}
584 
585 		if (strcmp(name, PRF_END) == 0)
586 			module->data_end = gsym.st_value;
587 	}
588 
589 	if (l_npe == l_nl)
590 		fatal_error("no valid functions found");
591 
592 	/*
593 	 * Finally, we need to construct some dummy entries.
594 	 */
595 	if (etext) {
596 		l_npe->name = PRF_EXTSYM;
597 		l_npe->value = etext->value + 1;
598 		l_npe->syminfo = GELF_ST_INFO(STB_GLOBAL, STT_FUNC);
599 		l_npe->module = module;
600 		l_npe++;
601 		l_nname++;
602 	}
603 
604 	l_npe->name = PRF_MEMTERM;
605 	l_npe->value = (pctype)-1;
606 	l_npe->syminfo = GELF_ST_INFO(STB_GLOBAL, STT_FUNC);
607 	l_npe->module = module;
608 	l_npe++;
609 	l_nname++;
610 
611 	/*
612 	 * We're almost done;  all we need to do is sort the symbols
613 	 * and then remove the duplicates.
614 	 */
615 	qsort(l_nl, (size_t)l_nname, sizeof (nltype), compare);
616 	remove_dup_syms(l_nl, &l_nname);
617 
618 	module->nl = l_nl;
619 	module->npe = l_npe;
620 	module->nname = l_nname;
621 
622 	total_names += l_nname;
623 }
624