xref: /illumos-gate/usr/src/tools/smatch/src/smatch_mtag.c (revision c94be9439c4f0773ef60e2cec21d548359cfea20)
1 /*
2  * Copyright (C) 2017 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, see http://www.gnu.org/copyleft/gpl.txt
16  */
17 
18 /*
19  * One problem that I have is that it's really hard to track how pointers are
20  * passed around.  For example, it would be nice to know that the probe() and
21  * remove() functions get the same pci_dev pointer.  It would be good to know
22  * what pointers we're passing to the open() and close() functions.  But that
23  * information gets lost in a call tree full of function pointer calls.
24  *
25  * I think the first step is to start naming specific pointers.  So when a
26  * pointer is allocated, then it gets a tag.  So calls to kmalloc() generate a
27  * tag.  But we might not use that, because there might be a better name like
28  * framebuffer_alloc(). The framebuffer_alloc() is interesting because there is
29  * one per driver and it's passed around to all the file operations.
30  *
31  * Perhaps we could make a list of functions like framebuffer_alloc() which take
32  * a size and say that those are the interesting alloc functions.
33  *
34  * Another place where we would maybe name the pointer is when they are passed
35  * to the probe().  Because that's an important pointer, since there is one
36  * per driver (sort of).
37  *
38  * My vision is that you could take a pointer and trace it back to a global.  So
39  * I'm going to track that pointer_tag - 28 bytes takes you to another pointer
40  * tag.  You could follow that one back and so on.  Also when we pass a pointer
41  * to a function that would be recorded as sort of a link or path or something.
42  *
43  */
44 
45 #include "smatch.h"
46 #include "smatch_slist.h"
47 #include "smatch_extra.h"
48 
49 #include <openssl/md5.h>
50 
51 static int my_id;
52 
53 mtag_t str_to_mtag(const char *str)
54 {
55 	unsigned char c[MD5_DIGEST_LENGTH];
56 	unsigned long long *tag = (unsigned long long *)&c;
57 	MD5_CTX mdContext;
58 	int len;
59 
60 	len = strlen(str);
61 	MD5_Init(&mdContext);
62 	MD5_Update(&mdContext, str, len);
63 	MD5_Final(c, &mdContext);
64 
65 	*tag &= ~MTAG_ALIAS_BIT;
66 	*tag &= ~MTAG_OFFSET_MASK;
67 
68 	return *tag;
69 }
70 
71 static int save_allocator(void *_allocator, int argc, char **argv, char **azColName)
72 {
73 	char **allocator = _allocator;
74 
75 	if (*allocator) {
76 		if (strcmp(*allocator, argv[0]) == 0)
77 			return 0;
78 		/* should be impossible */
79 		free_string(*allocator);
80 		*allocator = alloc_string("unknown");
81 		return 0;
82 	}
83 	*allocator = alloc_string(argv[0]);
84 	return 0;
85 }
86 
87 char *get_allocator_info_from_tag(mtag_t tag)
88 {
89 	char *allocator = NULL;
90 
91 	run_sql(save_allocator, &allocator,
92 		"select value from mtag_info where tag = %lld and type = %d;",
93 		tag, ALLOCATOR);
94 
95 	return allocator;
96 }
97 
98 static char *get_allocator_info(struct expression *expr, struct smatch_state *state)
99 {
100 	sval_t sval;
101 
102 	if (expr->type != EXPR_ASSIGNMENT)
103 		return NULL;
104 	if (estate_get_single_value(state, &sval))
105 		return get_allocator_info_from_tag(sval.value);
106 
107 	expr = strip_expr(expr->right);
108 	if (expr->type != EXPR_CALL ||
109 	    !expr->fn ||
110 	    expr->fn->type != EXPR_SYMBOL)
111 		return NULL;
112 	return expr_to_str(expr->fn);
113 }
114 
115 static void update_mtag_info(struct expression *expr, mtag_t tag,
116 			     const char *left_name, const char *tag_info,
117 			     struct smatch_state *state)
118 {
119 	char *allocator;
120 
121 	sql_insert_mtag_about(tag, left_name, tag_info);
122 
123 	allocator = get_allocator_info(expr, state);
124 	if (allocator)
125 		sql_insert_mtag_info(tag, ALLOCATOR, allocator);
126 }
127 
128 struct smatch_state *get_mtag_return(struct expression *expr, struct smatch_state *state)
129 {
130 	struct expression *left, *right;
131 	char *left_name, *right_name;
132 	struct symbol *left_sym;
133 	struct range_list *rl;
134 	char buf[256];
135 	mtag_t tag;
136 	sval_t tag_sval;
137 
138 	if (!expr || expr->type != EXPR_ASSIGNMENT || expr->op != '=')
139 		return NULL;
140 	if (!is_fresh_alloc(expr->right))
141 		return NULL;
142 	if (!rl_intersection(estate_rl(state), valid_ptr_rl))
143 		return NULL;
144 
145 	left = strip_expr(expr->left);
146 	right = strip_expr(expr->right);
147 
148 	left_name = expr_to_str_sym(left, &left_sym);
149 	if (!left_name || !left_sym)
150 		return NULL;
151 	right_name = expr_to_str(right);
152 
153 	snprintf(buf, sizeof(buf), "%s %s %s %s", get_filename(), get_function(),
154 		 left_name, right_name);
155 	tag = str_to_mtag(buf);
156 	tag_sval.type = estate_type(state);
157 	tag_sval.uvalue = tag;
158 
159 	rl = rl_filter(estate_rl(state), valid_ptr_rl);
160 	rl = clone_rl(rl);
161 	add_range(&rl, tag_sval, tag_sval);
162 
163 	update_mtag_info(expr, tag, left_name, buf, state);
164 
165 	free_string(left_name);
166 	free_string(right_name);
167 
168 	return alloc_estate_rl(rl);
169 }
170 
171 int get_string_mtag(struct expression *expr, mtag_t *tag)
172 {
173 	mtag_t xor;
174 
175 	if (expr->type != EXPR_STRING || !expr->string)
176 		return 0;
177 
178 	/* I was worried about collisions so I added a xor */
179 	xor = str_to_mtag("__smatch string");
180 	*tag = str_to_mtag(expr->string->data);
181 	*tag = *tag ^ xor;
182 
183 	return 1;
184 }
185 
186 int get_toplevel_mtag(struct symbol *sym, mtag_t *tag)
187 {
188 	char buf[256];
189 
190 	if (!sym)
191 		return 0;
192 
193 	if (!sym->ident ||
194 	    !(sym->ctype.modifiers & MOD_TOPLEVEL))
195 		return 0;
196 
197 	snprintf(buf, sizeof(buf), "%s %s",
198 		 (sym->ctype.modifiers & MOD_STATIC) ? get_filename() : "extern",
199 		 sym->ident->name);
200 	*tag = str_to_mtag(buf);
201 	return 1;
202 }
203 
204 bool get_symbol_mtag(struct symbol *sym, mtag_t *tag)
205 {
206 	char buf[256];
207 
208 	if (!sym || !sym->ident)
209 		return false;
210 
211 	if (get_toplevel_mtag(sym, tag))
212 		return true;
213 
214 	if (get_param_num_from_sym(sym) >= 0)
215 		return false;
216 
217 	snprintf(buf, sizeof(buf), "%s %s %s",
218 		 get_filename(), get_function(), sym->ident->name);
219 	*tag = str_to_mtag(buf);
220 	return true;
221 }
222 
223 static void global_variable(struct symbol *sym)
224 {
225 	mtag_t tag;
226 
227 	if (!get_toplevel_mtag(sym, &tag))
228 		return;
229 
230 	sql_insert_mtag_about(tag,
231 			      sym->ident->name,
232 			      (sym->ctype.modifiers & MOD_STATIC) ? get_filename() : "extern");
233 }
234 
235 static int get_array_mtag_offset(struct expression *expr, mtag_t *tag, int *offset)
236 {
237 	struct expression *array, *offset_expr;
238 	struct symbol *type;
239 	sval_t sval;
240 	int start_offset;
241 
242 	if (!is_array(expr))
243 		return 0;
244 
245 	array = get_array_base(expr);
246 	if (!array)
247 		return 0;
248 	type = get_type(array);
249 	if (!type || type->type != SYM_ARRAY)
250 		return 0;
251 	type = get_real_base_type(type);
252 	if (!type_bytes(type))
253 		return 0;
254 
255 	if (!expr_to_mtag_offset(array, tag, &start_offset))
256 		return 0;
257 
258 	offset_expr = get_array_offset(expr);
259 	if (!get_value(offset_expr, &sval))
260 		return 0;
261 	*offset = start_offset + sval.value * type_bytes(type);
262 
263 	return 1;
264 }
265 
266 struct range_list *swap_mtag_seed(struct expression *expr, struct range_list *rl)
267 {
268 	char buf[256];
269 	char *name;
270 	sval_t sval;
271 	mtag_t tag;
272 
273 	if (!rl_to_sval(rl, &sval))
274 		return rl;
275 	if (sval.type->type != SYM_PTR || sval.uvalue != MTAG_SEED)
276 		return rl;
277 
278 	name = expr_to_str(expr);
279 	snprintf(buf, sizeof(buf), "%s %s %s", get_filename(), get_function(), name);
280 	free_string(name);
281 	tag = str_to_mtag(buf);
282 	sval.value = tag;
283 	return alloc_rl(sval, sval);
284 }
285 
286 int create_mtag_alias(mtag_t tag, struct expression *expr, mtag_t *new)
287 {
288 	char buf[256];
289 	int lines_from_start;
290 	char *str;
291 
292 	/*
293 	 * We need the alias to be unique.  It's not totally required that it
294 	 * be the same from one DB build to then next, but it makes debugging
295 	 * a bit simpler.
296 	 *
297 	 */
298 
299 	if (!cur_func_sym)
300 		return 0;
301 
302 	lines_from_start = expr->pos.line - cur_func_sym->pos.line;
303 	str = expr_to_str(expr);
304 	snprintf(buf, sizeof(buf), "%lld %d %s", tag, lines_from_start, str);
305 	free_string(str);
306 
307 	*new = str_to_mtag(buf);
308 	sql_insert_mtag_alias(tag, *new);
309 
310 	return 1;
311 }
312 
313 static int get_implied_mtag_offset(struct expression *expr, mtag_t *tag, int *offset)
314 {
315 	struct smatch_state *state;
316 	struct symbol *type;
317 	sval_t sval;
318 
319 	type = get_type(expr);
320 	if (!type_is_ptr(type))
321 		return 0;
322 	state = get_extra_state(expr);
323 	if (!state || !estate_get_single_value(state, &sval) || sval.value == 0)
324 		return 0;
325 
326 	*tag = sval.uvalue & ~MTAG_OFFSET_MASK;
327 	*offset = sval.uvalue & MTAG_OFFSET_MASK;
328 	return 1;
329 }
330 
331 /*
332  * The point of this function is to give you the mtag and the offset so
333  * you can look up the data in the DB.  It takes an expression.
334  *
335  * So say you give it "foo->bar".  Then it would give you the offset of "bar"
336  * and the implied value of "foo".  Or if you lookup "*foo" then the offset is
337  * zero and we look up the implied value of "foo.  But if the expression is
338  * foo, then if "foo" is a global variable, then we get the mtag and the offset
339  * is zero.  If "foo" is a local variable, then there is nothing to look up in
340  * the mtag_data table because that's handled by smatch_extra.c to this returns
341  * false.
342  *
343  */
344 int expr_to_mtag_offset(struct expression *expr, mtag_t *tag, int *offset)
345 {
346 	*tag = 0;
347 	*offset = 0;
348 
349 	if (bits_in_pointer != 64)
350 		return 0;
351 
352 	expr = strip_expr(expr);
353 	if (!expr)
354 		return 0;
355 
356 	if (is_array(expr))
357 		return get_array_mtag_offset(expr, tag, offset);
358 
359 	if (expr->type == EXPR_PREOP && expr->op == '*') {
360 		expr = strip_expr(expr->unop);
361 		return get_implied_mtag_offset(expr, tag, offset);
362 	} else if (expr->type == EXPR_DEREF) {
363 		int tmp, tmp_offset = 0;
364 
365 		while (expr->type == EXPR_DEREF) {
366 			tmp = get_member_offset_from_deref(expr);
367 			if (tmp < 0)
368 				return 0;
369 			tmp_offset += tmp;
370 			expr = strip_expr(expr->deref);
371 		}
372 		*offset = tmp_offset;
373 		if (expr->type == EXPR_PREOP && expr->op == '*') {
374 			expr = strip_expr(expr->unop);
375 
376 			if (get_implied_mtag_offset(expr, tag, &tmp_offset)) {
377 				// FIXME:  look it up recursively?
378 				if (tmp_offset)
379 					return 0;
380 				return 1;
381 			}
382 			return 0;
383 		} else if (expr->type == EXPR_SYMBOL) {
384 			return get_symbol_mtag(expr->symbol, tag);
385 		}
386 		return 0;
387 	} else if (expr->type == EXPR_SYMBOL) {
388 		return get_symbol_mtag(expr->symbol, tag);
389 	}
390 	return 0;
391 }
392 
393 /*
394  * This function takes an address and returns an sval.  Let's take some
395  * example things you might pass to it:
396  * foo->bar:
397  *   If we were only called from smatch_math, we wouldn't need to bother with
398  *   this because it's already been looked up in smatch_extra.c but this is
399  *   also called from other places so we have to check smatch_extra.c.
400  * &foo
401  *   If "foo" is global return the mtag for "foo".
402  * &foo.bar
403  *   If "foo" is global return the mtag for "foo" + the offset of ".bar".
404  * It also handles string literals.
405  *
406  */
407 int get_mtag_sval(struct expression *expr, sval_t *sval)
408 {
409 	struct symbol *type;
410 	mtag_t tag;
411 	int offset = 0;
412 
413 	if (bits_in_pointer != 64)
414 		return 0;
415 
416 	expr = strip_expr(expr);
417 
418 	type = get_type(expr);
419 	if (!type_is_ptr(type))
420 		return 0;
421 	/*
422 	 * There are several options:
423 	 *
424 	 * If the expr is a string literal, that's an address/mtag.
425 	 * SYM_ARRAY and SYM_FN are mtags.  There are "&foo" type addresses.
426 	 * And there are saved pointers "p = &foo;"
427 	 *
428 	 */
429 
430 	if (expr->type == EXPR_STRING && get_string_mtag(expr, &tag))
431 		goto found;
432 
433 	if (expr->type == EXPR_SYMBOL &&
434 	    (type->type == SYM_ARRAY || type->type == SYM_FN) &&
435 	    get_toplevel_mtag(expr->symbol, &tag))
436 		goto found;
437 
438 	if (expr->type == EXPR_PREOP && expr->op == '&') {
439 		expr = strip_expr(expr->unop);
440 		if (expr_to_mtag_offset(expr, &tag, &offset))
441 			goto found;
442 		return 0;
443 	}
444 
445 	if (get_implied_mtag_offset(expr, &tag, &offset))
446 		goto found;
447 
448 	return 0;
449 found:
450 	if (offset >= MTAG_OFFSET_MASK)
451 		return 0;
452 
453 	sval->type = type;
454 	sval->uvalue = tag | offset;
455 
456 	return 1;
457 }
458 
459 void register_mtag(int id)
460 {
461 	my_id = id;
462 
463 
464 	/*
465 	 * The mtag stuff only works on 64 systems because we store the
466 	 * information in the pointer itself.
467 	 * bit 63   : set for alias mtags
468 	 * bit 62-12: mtag hash
469 	 * bit 11-0 : offset
470 	 *
471 	 */
472 
473 	add_hook(&global_variable, BASE_HOOK);
474 }
475