xref: /linux/include/linux/tracepoint.h (revision 164666fa66669d437bdcc8d5f1744a2aee73be41)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 #ifndef _LINUX_TRACEPOINT_H
3 #define _LINUX_TRACEPOINT_H
4 
5 /*
6  * Kernel Tracepoint API.
7  *
8  * See Documentation/trace/tracepoints.rst.
9  *
10  * Copyright (C) 2008-2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
11  *
12  * Heavily inspired from the Linux Kernel Markers.
13  */
14 
15 #include <linux/smp.h>
16 #include <linux/srcu.h>
17 #include <linux/errno.h>
18 #include <linux/types.h>
19 #include <linux/cpumask.h>
20 #include <linux/rcupdate.h>
21 #include <linux/tracepoint-defs.h>
22 #include <linux/static_call.h>
23 
24 struct module;
25 struct tracepoint;
26 struct notifier_block;
27 
28 struct trace_eval_map {
29 	const char		*system;
30 	const char		*eval_string;
31 	unsigned long		eval_value;
32 };
33 
34 #define TRACEPOINT_DEFAULT_PRIO	10
35 
36 extern struct srcu_struct tracepoint_srcu;
37 
38 extern int
39 tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
40 extern int
41 tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data,
42 			       int prio);
43 extern int
44 tracepoint_probe_register_prio_may_exist(struct tracepoint *tp, void *probe, void *data,
45 					 int prio);
46 extern int
47 tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data);
48 static inline int
49 tracepoint_probe_register_may_exist(struct tracepoint *tp, void *probe,
50 				    void *data)
51 {
52 	return tracepoint_probe_register_prio_may_exist(tp, probe, data,
53 							TRACEPOINT_DEFAULT_PRIO);
54 }
55 extern void
56 for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
57 		void *priv);
58 
59 #ifdef CONFIG_MODULES
60 struct tp_module {
61 	struct list_head list;
62 	struct module *mod;
63 };
64 
65 bool trace_module_has_bad_taint(struct module *mod);
66 extern int register_tracepoint_module_notifier(struct notifier_block *nb);
67 extern int unregister_tracepoint_module_notifier(struct notifier_block *nb);
68 #else
69 static inline bool trace_module_has_bad_taint(struct module *mod)
70 {
71 	return false;
72 }
73 static inline
74 int register_tracepoint_module_notifier(struct notifier_block *nb)
75 {
76 	return 0;
77 }
78 static inline
79 int unregister_tracepoint_module_notifier(struct notifier_block *nb)
80 {
81 	return 0;
82 }
83 #endif /* CONFIG_MODULES */
84 
85 /*
86  * tracepoint_synchronize_unregister must be called between the last tracepoint
87  * probe unregistration and the end of module exit to make sure there is no
88  * caller executing a probe when it is freed.
89  */
90 #ifdef CONFIG_TRACEPOINTS
91 static inline void tracepoint_synchronize_unregister(void)
92 {
93 	synchronize_srcu(&tracepoint_srcu);
94 	synchronize_rcu();
95 }
96 #else
97 static inline void tracepoint_synchronize_unregister(void)
98 { }
99 #endif
100 
101 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
102 extern int syscall_regfunc(void);
103 extern void syscall_unregfunc(void);
104 #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
105 
106 #ifndef PARAMS
107 #define PARAMS(args...) args
108 #endif
109 
110 #define TRACE_DEFINE_ENUM(x)
111 #define TRACE_DEFINE_SIZEOF(x)
112 
113 #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
114 static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
115 {
116 	return offset_to_ptr(p);
117 }
118 
119 #define __TRACEPOINT_ENTRY(name)					\
120 	asm("	.section \"__tracepoints_ptrs\", \"a\"		\n"	\
121 	    "	.balign 4					\n"	\
122 	    "	.long 	__tracepoint_" #name " - .		\n"	\
123 	    "	.previous					\n")
124 #else
125 static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
126 {
127 	return *p;
128 }
129 
130 #define __TRACEPOINT_ENTRY(name)					 \
131 	static tracepoint_ptr_t __tracepoint_ptr_##name __used		 \
132 	__section("__tracepoints_ptrs") = &__tracepoint_##name
133 #endif
134 
135 #endif /* _LINUX_TRACEPOINT_H */
136 
137 /*
138  * Note: we keep the TRACE_EVENT and DECLARE_TRACE outside the include
139  *  file ifdef protection.
140  *  This is due to the way trace events work. If a file includes two
141  *  trace event headers under one "CREATE_TRACE_POINTS" the first include
142  *  will override the TRACE_EVENT and break the second include.
143  */
144 
145 #ifndef DECLARE_TRACE
146 
147 #define TP_PROTO(args...)	args
148 #define TP_ARGS(args...)	args
149 #define TP_CONDITION(args...)	args
150 
151 /*
152  * Individual subsystem my have a separate configuration to
153  * enable their tracepoints. By default, this file will create
154  * the tracepoints if CONFIG_TRACEPOINT is defined. If a subsystem
155  * wants to be able to disable its tracepoints from being created
156  * it can define NOTRACE before including the tracepoint headers.
157  */
158 #if defined(CONFIG_TRACEPOINTS) && !defined(NOTRACE)
159 #define TRACEPOINTS_ENABLED
160 #endif
161 
162 #ifdef TRACEPOINTS_ENABLED
163 
164 #ifdef CONFIG_HAVE_STATIC_CALL
165 #define __DO_TRACE_CALL(name, args)					\
166 	do {								\
167 		struct tracepoint_func *it_func_ptr;			\
168 		void *__data;						\
169 		it_func_ptr =						\
170 			rcu_dereference_raw((&__tracepoint_##name)->funcs); \
171 		if (it_func_ptr) {					\
172 			__data = (it_func_ptr)->data;			\
173 			static_call(tp_func_##name)(__data, args);	\
174 		}							\
175 	} while (0)
176 #else
177 #define __DO_TRACE_CALL(name, args)	__traceiter_##name(NULL, args)
178 #endif /* CONFIG_HAVE_STATIC_CALL */
179 
180 /*
181  * it_func[0] is never NULL because there is at least one element in the array
182  * when the array itself is non NULL.
183  */
184 #define __DO_TRACE(name, args, cond, rcuidle)				\
185 	do {								\
186 		int __maybe_unused __idx = 0;				\
187 									\
188 		if (!(cond))						\
189 			return;						\
190 									\
191 		/* srcu can't be used from NMI */			\
192 		WARN_ON_ONCE(rcuidle && in_nmi());			\
193 									\
194 		/* keep srcu and sched-rcu usage consistent */		\
195 		preempt_disable_notrace();				\
196 									\
197 		/*							\
198 		 * For rcuidle callers, use srcu since sched-rcu	\
199 		 * doesn't work from the idle path.			\
200 		 */							\
201 		if (rcuidle) {						\
202 			__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
203 			rcu_irq_enter_irqson();				\
204 		}							\
205 									\
206 		__DO_TRACE_CALL(name, TP_ARGS(args));			\
207 									\
208 		if (rcuidle) {						\
209 			rcu_irq_exit_irqson();				\
210 			srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
211 		}							\
212 									\
213 		preempt_enable_notrace();				\
214 	} while (0)
215 
216 #ifndef MODULE
217 #define __DECLARE_TRACE_RCU(name, proto, args, cond)			\
218 	static inline void trace_##name##_rcuidle(proto)		\
219 	{								\
220 		if (static_key_false(&__tracepoint_##name.key))		\
221 			__DO_TRACE(name,				\
222 				TP_ARGS(args),				\
223 				TP_CONDITION(cond), 1);			\
224 	}
225 #else
226 #define __DECLARE_TRACE_RCU(name, proto, args, cond)
227 #endif
228 
229 /*
230  * Make sure the alignment of the structure in the __tracepoints section will
231  * not add unwanted padding between the beginning of the section and the
232  * structure. Force alignment to the same alignment as the section start.
233  *
234  * When lockdep is enabled, we make sure to always do the RCU portions of
235  * the tracepoint code, regardless of whether tracing is on. However,
236  * don't check if the condition is false, due to interaction with idle
237  * instrumentation. This lets us find RCU issues triggered with tracepoints
238  * even when this tracepoint is off. This code has no purpose other than
239  * poking RCU a bit.
240  */
241 #define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
242 	extern int __traceiter_##name(data_proto);			\
243 	DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name);	\
244 	extern struct tracepoint __tracepoint_##name;			\
245 	static inline void trace_##name(proto)				\
246 	{								\
247 		if (static_key_false(&__tracepoint_##name.key))		\
248 			__DO_TRACE(name,				\
249 				TP_ARGS(args),				\
250 				TP_CONDITION(cond), 0);			\
251 		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
252 			rcu_read_lock_sched_notrace();			\
253 			rcu_dereference_sched(__tracepoint_##name.funcs);\
254 			rcu_read_unlock_sched_notrace();		\
255 		}							\
256 	}								\
257 	__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args),		\
258 			    PARAMS(cond))				\
259 	static inline int						\
260 	register_trace_##name(void (*probe)(data_proto), void *data)	\
261 	{								\
262 		return tracepoint_probe_register(&__tracepoint_##name,	\
263 						(void *)probe, data);	\
264 	}								\
265 	static inline int						\
266 	register_trace_prio_##name(void (*probe)(data_proto), void *data,\
267 				   int prio)				\
268 	{								\
269 		return tracepoint_probe_register_prio(&__tracepoint_##name, \
270 					      (void *)probe, data, prio); \
271 	}								\
272 	static inline int						\
273 	unregister_trace_##name(void (*probe)(data_proto), void *data)	\
274 	{								\
275 		return tracepoint_probe_unregister(&__tracepoint_##name,\
276 						(void *)probe, data);	\
277 	}								\
278 	static inline void						\
279 	check_trace_callback_type_##name(void (*cb)(data_proto))	\
280 	{								\
281 	}								\
282 	static inline bool						\
283 	trace_##name##_enabled(void)					\
284 	{								\
285 		return static_key_false(&__tracepoint_##name.key);	\
286 	}
287 
288 /*
289  * We have no guarantee that gcc and the linker won't up-align the tracepoint
290  * structures, so we create an array of pointers that will be used for iteration
291  * on the tracepoints.
292  */
293 #define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args)		\
294 	static const char __tpstrtab_##_name[]				\
295 	__section("__tracepoints_strings") = #_name;			\
296 	extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name);	\
297 	int __traceiter_##_name(void *__data, proto);			\
298 	struct tracepoint __tracepoint_##_name	__used			\
299 	__section("__tracepoints") = {					\
300 		.name = __tpstrtab_##_name,				\
301 		.key = STATIC_KEY_INIT_FALSE,				\
302 		.static_call_key = &STATIC_CALL_KEY(tp_func_##_name),	\
303 		.static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \
304 		.iterator = &__traceiter_##_name,			\
305 		.regfunc = _reg,					\
306 		.unregfunc = _unreg,					\
307 		.funcs = NULL };					\
308 	__TRACEPOINT_ENTRY(_name);					\
309 	int __traceiter_##_name(void *__data, proto)			\
310 	{								\
311 		struct tracepoint_func *it_func_ptr;			\
312 		void *it_func;						\
313 									\
314 		it_func_ptr =						\
315 			rcu_dereference_raw((&__tracepoint_##_name)->funcs); \
316 		if (it_func_ptr) {					\
317 			do {						\
318 				it_func = READ_ONCE((it_func_ptr)->func); \
319 				__data = (it_func_ptr)->data;		\
320 				((void(*)(void *, proto))(it_func))(__data, args); \
321 			} while ((++it_func_ptr)->func);		\
322 		}							\
323 		return 0;						\
324 	}								\
325 	DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name);
326 
327 #define DEFINE_TRACE(name, proto, args)		\
328 	DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));
329 
330 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)				\
331 	EXPORT_SYMBOL_GPL(__tracepoint_##name);				\
332 	EXPORT_SYMBOL_GPL(__traceiter_##name);				\
333 	EXPORT_STATIC_CALL_GPL(tp_func_##name)
334 #define EXPORT_TRACEPOINT_SYMBOL(name)					\
335 	EXPORT_SYMBOL(__tracepoint_##name);				\
336 	EXPORT_SYMBOL(__traceiter_##name);				\
337 	EXPORT_STATIC_CALL(tp_func_##name)
338 
339 
340 #else /* !TRACEPOINTS_ENABLED */
341 #define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
342 	static inline void trace_##name(proto)				\
343 	{ }								\
344 	static inline void trace_##name##_rcuidle(proto)		\
345 	{ }								\
346 	static inline int						\
347 	register_trace_##name(void (*probe)(data_proto),		\
348 			      void *data)				\
349 	{								\
350 		return -ENOSYS;						\
351 	}								\
352 	static inline int						\
353 	unregister_trace_##name(void (*probe)(data_proto),		\
354 				void *data)				\
355 	{								\
356 		return -ENOSYS;						\
357 	}								\
358 	static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \
359 	{								\
360 	}								\
361 	static inline bool						\
362 	trace_##name##_enabled(void)					\
363 	{								\
364 		return false;						\
365 	}
366 
367 #define DEFINE_TRACE_FN(name, reg, unreg, proto, args)
368 #define DEFINE_TRACE(name, proto, args)
369 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
370 #define EXPORT_TRACEPOINT_SYMBOL(name)
371 
372 #endif /* TRACEPOINTS_ENABLED */
373 
374 #ifdef CONFIG_TRACING
375 /**
376  * tracepoint_string - register constant persistent string to trace system
377  * @str - a constant persistent string that will be referenced in tracepoints
378  *
379  * If constant strings are being used in tracepoints, it is faster and
380  * more efficient to just save the pointer to the string and reference
381  * that with a printf "%s" instead of saving the string in the ring buffer
382  * and wasting space and time.
383  *
384  * The problem with the above approach is that userspace tools that read
385  * the binary output of the trace buffers do not have access to the string.
386  * Instead they just show the address of the string which is not very
387  * useful to users.
388  *
389  * With tracepoint_string(), the string will be registered to the tracing
390  * system and exported to userspace via the debugfs/tracing/printk_formats
391  * file that maps the string address to the string text. This way userspace
392  * tools that read the binary buffers have a way to map the pointers to
393  * the ASCII strings they represent.
394  *
395  * The @str used must be a constant string and persistent as it would not
396  * make sense to show a string that no longer exists. But it is still fine
397  * to be used with modules, because when modules are unloaded, if they
398  * had tracepoints, the ring buffers are cleared too. As long as the string
399  * does not change during the life of the module, it is fine to use
400  * tracepoint_string() within a module.
401  */
402 #define tracepoint_string(str)						\
403 	({								\
404 		static const char *___tp_str __tracepoint_string = str; \
405 		___tp_str;						\
406 	})
407 #define __tracepoint_string	__used __section("__tracepoint_str")
408 #else
409 /*
410  * tracepoint_string() is used to save the string address for userspace
411  * tracing tools. When tracing isn't configured, there's no need to save
412  * anything.
413  */
414 # define tracepoint_string(str) str
415 # define __tracepoint_string
416 #endif
417 
418 #define DECLARE_TRACE(name, proto, args)				\
419 	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
420 			cpu_online(raw_smp_processor_id()),		\
421 			PARAMS(void *__data, proto))
422 
423 #define DECLARE_TRACE_CONDITION(name, proto, args, cond)		\
424 	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
425 			cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \
426 			PARAMS(void *__data, proto))
427 
428 #define TRACE_EVENT_FLAGS(event, flag)
429 
430 #define TRACE_EVENT_PERF_PERM(event, expr...)
431 
432 #endif /* DECLARE_TRACE */
433 
434 #ifndef TRACE_EVENT
435 /*
436  * For use with the TRACE_EVENT macro:
437  *
438  * We define a tracepoint, its arguments, its printk format
439  * and its 'fast binary record' layout.
440  *
441  * Firstly, name your tracepoint via TRACE_EVENT(name : the
442  * 'subsystem_event' notation is fine.
443  *
444  * Think about this whole construct as the
445  * 'trace_sched_switch() function' from now on.
446  *
447  *
448  *  TRACE_EVENT(sched_switch,
449  *
450  *	*
451  *	* A function has a regular function arguments
452  *	* prototype, declare it via TP_PROTO():
453  *	*
454  *
455  *	TP_PROTO(struct rq *rq, struct task_struct *prev,
456  *		 struct task_struct *next),
457  *
458  *	*
459  *	* Define the call signature of the 'function'.
460  *	* (Design sidenote: we use this instead of a
461  *	*  TP_PROTO1/TP_PROTO2/TP_PROTO3 ugliness.)
462  *	*
463  *
464  *	TP_ARGS(rq, prev, next),
465  *
466  *	*
467  *	* Fast binary tracing: define the trace record via
468  *	* TP_STRUCT__entry(). You can think about it like a
469  *	* regular C structure local variable definition.
470  *	*
471  *	* This is how the trace record is structured and will
472  *	* be saved into the ring buffer. These are the fields
473  *	* that will be exposed to user-space in
474  *	* /sys/kernel/debug/tracing/events/<*>/format.
475  *	*
476  *	* The declared 'local variable' is called '__entry'
477  *	*
478  *	* __field(pid_t, prev_pid) is equivalent to a standard declaration:
479  *	*
480  *	*	pid_t	prev_pid;
481  *	*
482  *	* __array(char, prev_comm, TASK_COMM_LEN) is equivalent to:
483  *	*
484  *	*	char	prev_comm[TASK_COMM_LEN];
485  *	*
486  *
487  *	TP_STRUCT__entry(
488  *		__array(	char,	prev_comm,	TASK_COMM_LEN	)
489  *		__field(	pid_t,	prev_pid			)
490  *		__field(	int,	prev_prio			)
491  *		__array(	char,	next_comm,	TASK_COMM_LEN	)
492  *		__field(	pid_t,	next_pid			)
493  *		__field(	int,	next_prio			)
494  *	),
495  *
496  *	*
497  *	* Assign the entry into the trace record, by embedding
498  *	* a full C statement block into TP_fast_assign(). You
499  *	* can refer to the trace record as '__entry' -
500  *	* otherwise you can put arbitrary C code in here.
501  *	*
502  *	* Note: this C code will execute every time a trace event
503  *	* happens, on an active tracepoint.
504  *	*
505  *
506  *	TP_fast_assign(
507  *		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
508  *		__entry->prev_pid	= prev->pid;
509  *		__entry->prev_prio	= prev->prio;
510  *		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
511  *		__entry->next_pid	= next->pid;
512  *		__entry->next_prio	= next->prio;
513  *	),
514  *
515  *	*
516  *	* Formatted output of a trace record via TP_printk().
517  *	* This is how the tracepoint will appear under ftrace
518  *	* plugins that make use of this tracepoint.
519  *	*
520  *	* (raw-binary tracing wont actually perform this step.)
521  *	*
522  *
523  *	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
524  *		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
525  *		__entry->next_comm, __entry->next_pid, __entry->next_prio),
526  *
527  * );
528  *
529  * This macro construct is thus used for the regular printk format
530  * tracing setup, it is used to construct a function pointer based
531  * tracepoint callback (this is used by programmatic plugins and
532  * can also by used by generic instrumentation like SystemTap), and
533  * it is also used to expose a structured trace record in
534  * /sys/kernel/debug/tracing/events/.
535  *
536  * A set of (un)registration functions can be passed to the variant
537  * TRACE_EVENT_FN to perform any (un)registration work.
538  */
539 
540 #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
541 #define DEFINE_EVENT(template, name, proto, args)		\
542 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
543 #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\
544 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
545 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
546 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
547 #define DEFINE_EVENT_CONDITION(template, name, proto,		\
548 			       args, cond)			\
549 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
550 				PARAMS(args), PARAMS(cond))
551 
552 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
553 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
554 #define TRACE_EVENT_FN(name, proto, args, struct,		\
555 		assign, print, reg, unreg)			\
556 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
557 #define TRACE_EVENT_FN_COND(name, proto, args, cond, struct,		\
558 		assign, print, reg, unreg)			\
559 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),	\
560 			PARAMS(args), PARAMS(cond))
561 #define TRACE_EVENT_CONDITION(name, proto, args, cond,		\
562 			      struct, assign, print)		\
563 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
564 				PARAMS(args), PARAMS(cond))
565 
566 #define TRACE_EVENT_FLAGS(event, flag)
567 
568 #define TRACE_EVENT_PERF_PERM(event, expr...)
569 
570 #define DECLARE_EVENT_NOP(name, proto, args)				\
571 	static inline void trace_##name(proto)				\
572 	{ }								\
573 	static inline bool trace_##name##_enabled(void)			\
574 	{								\
575 		return false;						\
576 	}
577 
578 #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print)	\
579 	DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
580 
581 #define DECLARE_EVENT_CLASS_NOP(name, proto, args, tstruct, assign, print)
582 #define DEFINE_EVENT_NOP(template, name, proto, args)			\
583 	DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
584 
585 #endif /* ifdef TRACE_EVENT (see note above) */
586