xref: /illumos-gate/usr/src/lib/libdemangle/common/demangle.c (revision f96a0cef040313f6281fbc014a0b63d5c5cc760f)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2021 Jason King
14  * Copyright 2019, Joyent, Inc.
15  */
16 
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <errno.h>
21 #include <limits.h>
22 #include <pthread.h>
23 #include <sys/ctype.h>
24 #include <sys/debug.h>
25 #include <sys/sysmacros.h>
26 #include <stdarg.h>
27 #include "demangle-sys.h"
28 #include "demangle_int.h"
29 #include "strview.h"
30 
31 #define	DEMANGLE_DEBUG	"DEMANGLE_DEBUG"
32 
33 static pthread_once_t debug_once = PTHREAD_ONCE_INIT;
34 volatile boolean_t demangle_debug;
35 FILE *debugf = stderr;
36 
37 static struct {
38 	const char	*str;
39 	sysdem_lang_t	lang;
40 } lang_tbl[] = {
41 	{ "auto", SYSDEM_LANG_AUTO },
42 	{ "c++", SYSDEM_LANG_CPP },
43 	{ "rust", SYSDEM_LANG_RUST },
44 };
45 
46 static const char *
47 langstr(sysdem_lang_t lang)
48 {
49 	size_t i;
50 
51 	for (i = 0; i < ARRAY_SIZE(lang_tbl); i++) {
52 		if (lang == lang_tbl[i].lang)
53 			return (lang_tbl[i].str);
54 	}
55 	return ("invalid");
56 }
57 
58 boolean_t
59 sysdem_parse_lang(const char *str, sysdem_lang_t *langp)
60 {
61 	size_t i;
62 
63 	for (i = 0; i < ARRAY_SIZE(lang_tbl); i++) {
64 		if (strcmp(str, lang_tbl[i].str) == 0) {
65 			*langp = lang_tbl[i].lang;
66 			return (B_TRUE);
67 		}
68 	}
69 
70 	return (B_FALSE);
71 }
72 
73 /*
74  * A quick check if str can possibly be a mangled string. Currently, that
75  * means it must start with _Z or __Z.
76  */
77 static boolean_t
78 is_mangled(const char *str, size_t n)
79 {
80 	strview_t sv;
81 
82 	sv_init_str(&sv, str, str + n);
83 
84 	if (!sv_consume_if_c(&sv, '_'))
85 		return (B_FALSE);
86 	(void) sv_consume_if_c(&sv, '_');
87 	if (sv_consume_if_c(&sv, 'Z'))
88 		return (B_TRUE);
89 
90 	return (B_FALSE);
91 }
92 
93 static void
94 check_debug(void)
95 {
96 	if (getenv(DEMANGLE_DEBUG))
97 		demangle_debug = B_TRUE;
98 }
99 
100 char *
101 sysdemangle(const char *str, sysdem_lang_t lang, sysdem_ops_t *ops)
102 {
103 	char *res = NULL;
104 	/*
105 	 * While the language specific demangler code can handle non-NUL
106 	 * terminated strings, we currently don't expose this to consumers.
107 	 * Consumers should still pass in a NUL-terminated string.
108 	 */
109 	size_t slen;
110 
111 	VERIFY0(pthread_once(&debug_once, check_debug));
112 
113 	DEMDEBUG("name = '%s'", (str == NULL) ? "(NULL)" : str);
114 	DEMDEBUG("lang = %s (%d)", langstr(lang), lang);
115 
116 	if (str == NULL) {
117 		errno = EINVAL;
118 		return (NULL);
119 	}
120 
121 	slen = strlen(str);
122 
123 	switch (lang) {
124 		case SYSDEM_LANG_AUTO:
125 		case SYSDEM_LANG_CPP:
126 		case SYSDEM_LANG_RUST:
127 			break;
128 		default:
129 			errno = EINVAL;
130 			return (NULL);
131 	}
132 
133 	if (ops == NULL)
134 		ops = sysdem_ops_default;
135 
136 	/*
137 	 * If we were given an explicit language to demangle, we always
138 	 * use that. If not, we try to demangle as rust, then c++. Any
139 	 * mangled C++ symbol that manages to successfully demangle as a
140 	 * legacy rust symbol _should_ look the same as it can really
141 	 * only be a very simple C++ symbol. Otherwise, the rust demangling
142 	 * should fail and we can try C++.
143 	 */
144 	switch (lang) {
145 	case SYSDEM_LANG_CPP:
146 		return (cpp_demangle(str, slen, ops));
147 	case SYSDEM_LANG_RUST:
148 		return (rust_demangle(str, slen, ops));
149 	case SYSDEM_LANG_AUTO:
150 		break;
151 	}
152 
153 	/*
154 	 * To save us some potential work, if the symbol cannot
155 	 * possibly be a rust or C++ mangled name, we don't
156 	 * even attempt to demangle either.
157 	 */
158 	if (!is_mangled(str, slen)) {
159 		/*
160 		 * This does mean if we somehow get a string > 2GB
161 		 * the debugging output will be truncated, but that
162 		 * seems an acceptable tradeoff.
163 		 */
164 		int len = slen > INT_MAX ? INT_MAX : slen;
165 
166 		DEMDEBUG("ERROR: '%.*s' cannot be a mangled string", len, str);
167 		errno = EINVAL;
168 		return (NULL);
169 	}
170 
171 	DEMDEBUG("trying rust");
172 	res = rust_demangle(str, slen, ops);
173 
174 	IMPLY(ret != NULL, errno == 0);
175 	if (res != NULL)
176 		return (res);
177 
178 	DEMDEBUG("trying C++");
179 	return (cpp_demangle(str, slen, ops));
180 }
181 
182 int
183 demdebug(const char *fmt, ...)
184 {
185 	va_list ap;
186 
187 	flockfile(debugf);
188 	(void) fprintf(debugf, "LIBDEMANGLE: ");
189 	va_start(ap, fmt);
190 	(void) vfprintf(debugf, fmt, ap);
191 	(void) fputc('\n', debugf);
192 	(void) fflush(debugf);
193 	va_end(ap);
194 	funlockfile(debugf);
195 
196 	return (0);
197 }
198