xref: /illumos-gate/usr/src/tools/smatch/src/smatch_mtag.c (revision f52943a93040563107b95bccb9db87d9971ef47d)
1 /*
2  * Copyright (C) 2017 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, see http://www.gnu.org/copyleft/gpl.txt
16  */
17 
18 /*
19  * One problem that I have is that it's really hard to track how pointers are
20  * passed around.  For example, it would be nice to know that the probe() and
21  * remove() functions get the same pci_dev pointer.  It would be good to know
22  * what pointers we're passing to the open() and close() functions.  But that
23  * information gets lost in a call tree full of function pointer calls.
24  *
25  * I think the first step is to start naming specific pointers.  So when a
26  * pointer is allocated, then it gets a tag.  So calls to kmalloc() generate a
27  * tag.  But we might not use that, because there might be a better name like
28  * framebuffer_alloc(). The framebuffer_alloc() is interesting because there is
29  * one per driver and it's passed around to all the file operations.
30  *
31  * Perhaps we could make a list of functions like framebuffer_alloc() which take
32  * a size and say that those are the interesting alloc functions.
33  *
34  * Another place where we would maybe name the pointer is when they are passed
35  * to the probe().  Because that's an important pointer, since there is one
36  * per driver (sort of).
37  *
38  * My vision is that you could take a pointer and trace it back to a global.  So
39  * I'm going to track that pointer_tag - 28 bytes takes you to another pointer
40  * tag.  You could follow that one back and so on.  Also when we pass a pointer
41  * to a function that would be recorded as sort of a link or path or something.
42  *
43  */
44 
45 #include "smatch.h"
46 #include "smatch_slist.h"
47 #include "smatch_extra.h"
48 
49 #include <openssl/md5.h>
50 
51 static int my_id;
52 
53 static mtag_t str_to_tag(const char *str)
54 {
55 	unsigned char c[MD5_DIGEST_LENGTH];
56 	unsigned long long *tag = (unsigned long long *)&c;
57 	MD5_CTX mdContext;
58 	int len;
59 
60 	len = strlen(str);
61 	MD5_Init(&mdContext);
62 	MD5_Update(&mdContext, str, len);
63 	MD5_Final(c, &mdContext);
64 
65 	*tag &= ~MTAG_ALIAS_BIT;
66 	*tag &= ~MTAG_OFFSET_MASK;
67 
68 	return *tag;
69 }
70 
71 const struct {
72 	const char *name;
73 	int size_arg;
74 } allocator_info[] = {
75 	{ "kmalloc", 0 },
76 	{ "kzalloc", 0 },
77 	{ "devm_kmalloc", 1},
78 	{ "devm_kzalloc", 1},
79 };
80 
81 static bool is_mtag_call(struct expression *expr)
82 {
83 	struct expression *arg;
84 	int i;
85 	sval_t sval;
86 
87 	if (expr->type != EXPR_CALL ||
88 	    expr->fn->type != EXPR_SYMBOL ||
89 	    !expr->fn->symbol)
90 		return false;
91 
92 	for (i = 0; i < ARRAY_SIZE(allocator_info); i++) {
93 		if (strcmp(expr->fn->symbol->ident->name, allocator_info[i].name) == 0)
94 			break;
95 	}
96 	if (i == ARRAY_SIZE(allocator_info))
97 		return false;
98 
99 	arg = get_argument_from_call_expr(expr->args, allocator_info[i].size_arg);
100 	if (!get_implied_value(arg, &sval))
101 		return false;
102 
103 	return true;
104 }
105 
106 struct smatch_state *swap_mtag_return(struct expression *expr, struct smatch_state *state)
107 {
108 	struct expression *left, *right;
109 	char *left_name, *right_name;
110 	struct symbol *left_sym;
111 	struct range_list *rl;
112 	char buf[256];
113 	mtag_t tag;
114 	sval_t tag_sval;
115 
116 	if (!expr || expr->type != EXPR_ASSIGNMENT || expr->op != '=')
117 		return state;
118 
119 	if (!estate_rl(state) || strcmp(state->name, "0,4096-ptr_max") != 0)
120 		return state;
121 
122 	left = strip_expr(expr->left);
123 	right = strip_expr(expr->right);
124 
125 	if (!is_mtag_call(right))
126 		return state;
127 
128 	left_name = expr_to_str_sym(left, &left_sym);
129 	if (!left_name || !left_sym)
130 		return state;
131 	right_name = expr_to_str(right);
132 
133 	snprintf(buf, sizeof(buf), "%s %s %s %s", get_filename(), get_function(),
134 		 left_name, right_name);
135 	tag = str_to_tag(buf);
136 	tag_sval.type = estate_type(state);
137 	tag_sval.uvalue = tag;
138 
139 	rl = rl_filter(estate_rl(state), valid_ptr_rl);
140 	rl = clone_rl(rl);
141 	add_range(&rl, tag_sval, tag_sval);
142 
143 	sql_insert_mtag_about(tag, left_name, buf);
144 
145 	free_string(left_name);
146 	free_string(right_name);
147 
148 	return alloc_estate_rl(rl);
149 }
150 
151 int get_string_mtag(struct expression *expr, mtag_t *tag)
152 {
153 	mtag_t xor;
154 
155 	if (expr->type != EXPR_STRING || !expr->string)
156 		return 0;
157 
158 	/* I was worried about collisions so I added a xor */
159 	xor = str_to_tag("__smatch string");
160 	*tag = str_to_tag(expr->string->data);
161 	*tag = *tag ^ xor;
162 
163 	return 1;
164 }
165 
166 int get_toplevel_mtag(struct symbol *sym, mtag_t *tag)
167 {
168 	char buf[256];
169 
170 	if (!sym)
171 		return 0;
172 
173 	if (!sym->ident ||
174 	    !(sym->ctype.modifiers & MOD_TOPLEVEL))
175 		return 0;
176 
177 	snprintf(buf, sizeof(buf), "%s %s",
178 		 (sym->ctype.modifiers & MOD_STATIC) ? get_filename() : "extern",
179 		 sym->ident->name);
180 	*tag = str_to_tag(buf);
181 	return 1;
182 }
183 
184 bool get_symbol_mtag(struct symbol *sym, mtag_t *tag)
185 {
186 	char buf[256];
187 
188 	if (!sym || !sym->ident)
189 		return false;
190 
191 	if (get_toplevel_mtag(sym, tag))
192 		return true;
193 
194 	if (get_param_num_from_sym(sym) >= 0)
195 		return false;
196 
197 	snprintf(buf, sizeof(buf), "%s %s %s",
198 		 get_filename(), get_function(), sym->ident->name);
199 	*tag = str_to_tag(buf);
200 	return true;
201 }
202 
203 static void global_variable(struct symbol *sym)
204 {
205 	mtag_t tag;
206 
207 	if (!get_toplevel_mtag(sym, &tag))
208 		return;
209 
210 	sql_insert_mtag_about(tag,
211 			      sym->ident->name,
212 			      (sym->ctype.modifiers & MOD_STATIC) ? get_filename() : "extern");
213 }
214 
215 static int get_array_mtag_offset(struct expression *expr, mtag_t *tag, int *offset)
216 {
217 	struct expression *array, *offset_expr;
218 	struct symbol *type;
219 	sval_t sval;
220 	int start_offset;
221 
222 	if (!is_array(expr))
223 		return 0;
224 
225 	array = get_array_base(expr);
226 	if (!array)
227 		return 0;
228 	type = get_type(array);
229 	if (!type || type->type != SYM_ARRAY)
230 		return 0;
231 	type = get_real_base_type(type);
232 	if (!type_bytes(type))
233 		return 0;
234 
235 	if (!expr_to_mtag_offset(array, tag, &start_offset))
236 		return 0;
237 
238 	offset_expr = get_array_offset(expr);
239 	if (!get_value(offset_expr, &sval))
240 		return 0;
241 	*offset = start_offset + sval.value * type_bytes(type);
242 
243 	return 1;
244 }
245 
246 struct range_list *swap_mtag_seed(struct expression *expr, struct range_list *rl)
247 {
248 	char buf[256];
249 	char *name;
250 	sval_t sval;
251 	mtag_t tag;
252 
253 	if (!rl_to_sval(rl, &sval))
254 		return rl;
255 	if (sval.type->type != SYM_PTR || sval.uvalue != MTAG_SEED)
256 		return rl;
257 
258 	name = expr_to_str(expr);
259 	snprintf(buf, sizeof(buf), "%s %s %s", get_filename(), get_function(), name);
260 	free_string(name);
261 	tag = str_to_tag(buf);
262 	sval.value = tag;
263 	return alloc_rl(sval, sval);
264 }
265 
266 int create_mtag_alias(mtag_t tag, struct expression *expr, mtag_t *new)
267 {
268 	char buf[256];
269 	int lines_from_start;
270 	char *str;
271 
272 	/*
273 	 * We need the alias to be unique.  It's not totally required that it
274 	 * be the same from one DB build to then next, but it makes debugging
275 	 * a bit simpler.
276 	 *
277 	 */
278 
279 	if (!cur_func_sym)
280 		return 0;
281 
282 	lines_from_start = expr->pos.line - cur_func_sym->pos.line;
283 	str = expr_to_str(expr);
284 	snprintf(buf, sizeof(buf), "%lld %d %s", tag, lines_from_start, str);
285 	free_string(str);
286 
287 	*new = str_to_tag(buf);
288 	sql_insert_mtag_alias(tag, *new);
289 
290 	return 1;
291 }
292 
293 static int get_implied_mtag_offset(struct expression *expr, mtag_t *tag, int *offset)
294 {
295 	struct smatch_state *state;
296 	struct symbol *type;
297 	sval_t sval;
298 
299 	type = get_type(expr);
300 	if (!type_is_ptr(type))
301 		return 0;
302 	state = get_extra_state(expr);
303 	if (!state || !estate_get_single_value(state, &sval) || sval.value == 0)
304 		return 0;
305 
306 	*tag = sval.uvalue & ~MTAG_OFFSET_MASK;
307 	*offset = sval.uvalue & MTAG_OFFSET_MASK;
308 	return 1;
309 }
310 
311 /*
312  * The point of this function is to give you the mtag and the offset so
313  * you can look up the data in the DB.  It takes an expression.
314  *
315  * So say you give it "foo->bar".  Then it would give you the offset of "bar"
316  * and the implied value of "foo".  Or if you lookup "*foo" then the offset is
317  * zero and we look up the implied value of "foo.  But if the expression is
318  * foo, then if "foo" is a global variable, then we get the mtag and the offset
319  * is zero.  If "foo" is a local variable, then there is nothing to look up in
320  * the mtag_data table because that's handled by smatch_extra.c to this returns
321  * false.
322  *
323  */
324 int expr_to_mtag_offset(struct expression *expr, mtag_t *tag, int *offset)
325 {
326 	*tag = 0;
327 	*offset = 0;
328 
329 	if (bits_in_pointer != 64)
330 		return 0;
331 
332 	expr = strip_expr(expr);
333 	if (!expr)
334 		return 0;
335 
336 	if (is_array(expr))
337 		return get_array_mtag_offset(expr, tag, offset);
338 
339 	if (expr->type == EXPR_PREOP && expr->op == '*') {
340 		expr = strip_expr(expr->unop);
341 		return get_implied_mtag_offset(expr, tag, offset);
342 	} else if (expr->type == EXPR_DEREF) {
343 		int tmp, tmp_offset = 0;
344 
345 		while (expr->type == EXPR_DEREF) {
346 			tmp = get_member_offset_from_deref(expr);
347 			if (tmp < 0)
348 				return 0;
349 			tmp_offset += tmp;
350 			expr = strip_expr(expr->deref);
351 		}
352 		*offset = tmp_offset;
353 		if (expr->type == EXPR_PREOP && expr->op == '*') {
354 			expr = strip_expr(expr->unop);
355 
356 			if (get_implied_mtag_offset(expr, tag, &tmp_offset)) {
357 				// FIXME:  look it up recursively?
358 				if (tmp_offset)
359 					return 0;
360 				return 1;
361 			}
362 			return 0;
363 		} else if (expr->type == EXPR_SYMBOL) {
364 			return get_symbol_mtag(expr->symbol, tag);
365 		}
366 		return 0;
367 	} else if (expr->type == EXPR_SYMBOL) {
368 		return get_symbol_mtag(expr->symbol, tag);
369 	}
370 	return 0;
371 }
372 
373 /*
374  * This function takes an address and returns an sval.  Let's take some
375  * example things you might pass to it:
376  * foo->bar:
377  *   If we were only called from smatch_math, we wouldn't need to bother with
378  *   this because it's already been looked up in smatch_extra.c but this is
379  *   also called from other places so we have to check smatch_extra.c.
380  * &foo
381  *   If "foo" is global return the mtag for "foo".
382  * &foo.bar
383  *   If "foo" is global return the mtag for "foo" + the offset of ".bar".
384  * It also handles string literals.
385  *
386  */
387 int get_mtag_sval(struct expression *expr, sval_t *sval)
388 {
389 	struct symbol *type;
390 	mtag_t tag;
391 	int offset = 0;
392 
393 	if (bits_in_pointer != 64)
394 		return 0;
395 
396 	expr = strip_expr(expr);
397 
398 	type = get_type(expr);
399 	if (!type_is_ptr(type))
400 		return 0;
401 	/*
402 	 * There are several options:
403 	 *
404 	 * If the expr is a string literal, that's an address/mtag.
405 	 * SYM_ARRAY and SYM_FN are mtags.  There are "&foo" type addresses.
406 	 * And there are saved pointers "p = &foo;"
407 	 *
408 	 */
409 
410 	if (expr->type == EXPR_STRING && get_string_mtag(expr, &tag))
411 		goto found;
412 
413 	if (expr->type == EXPR_SYMBOL &&
414 	    (type->type == SYM_ARRAY || type->type == SYM_FN) &&
415 	    get_toplevel_mtag(expr->symbol, &tag))
416 		goto found;
417 
418 	if (expr->type == EXPR_PREOP && expr->op == '&') {
419 		expr = strip_expr(expr->unop);
420 		if (expr_to_mtag_offset(expr, &tag, &offset))
421 			goto found;
422 		return 0;
423 	}
424 
425 	if (get_implied_mtag_offset(expr, &tag, &offset))
426 		goto found;
427 
428 	return 0;
429 found:
430 	if (offset >= MTAG_OFFSET_MASK)
431 		return 0;
432 
433 	sval->type = type;
434 	sval->uvalue = tag | offset;
435 
436 	return 1;
437 }
438 
439 void register_mtag(int id)
440 {
441 	my_id = id;
442 
443 
444 	/*
445 	 * The mtag stuff only works on 64 systems because we store the
446 	 * information in the pointer itself.
447 	 * bit 63   : set for alias mtags
448 	 * bit 62-12: mtag hash
449 	 * bit 11-0 : offset
450 	 *
451 	 */
452 
453 	add_hook(&global_variable, BASE_HOOK);
454 }
455