要理解kernel的trace_event机制,最好的方式是读懂 samples/trace_events/trace_events_sample的例子代码。这个例子的代码非常难以读懂。我是按照以下三个阶段读懂的。怎么申明和定义的,怎么注册到系统中的,怎么使能的。
在这个例子里面有一个非常有用的宏,这一个宏展开即完成了trace_event的申明和定义,看例子时,我们只看foo_bar这个trace_event的流程。
首先,我们需要看trace-events-sample.h这个头文件,这个头文件比较怪异。可以被多次包含。在头文件的最后还包含了另外一个头文件trace/define_trace.h。包含这个头文件是魔法开始的地方。
下面开始分析trace-events-sample.h头文件
trace-events-sample.h头文件在开始linux/tracepoint.h头文件,注意这个头文件只能被包含一次。不能重复包含 被 ifndef endif保护。
紧接着写了一个宏如下。
TRACE_EVENT(foo_bar,
TP_PROTO(const char *foo, int bar, const int *lst,
const char *string, const struct cpumask *mask),
TP_ARGS(foo, bar, lst, string, mask),
TP_STRUCT__entry(
__array( char, foo, 10 )
__field( int, bar )
__dynamic_array(int, list, __length_of(lst))
__string( str, string )
__bitmask( cpus, num_possible_cpus() )
),
TP_fast_assign(
strlcpy(__entry->foo, foo, 10);
__entry->bar = bar;
memcpy(__get_dynamic_array(list), lst,
__length_of(lst) * sizeof(int));
__assign_str(str, string);
__assign_bitmask(cpus, cpumask_bits(mask), num_possible_cpus());
),
TP_printk("foo %s %d %s %s %s %s (%s)", __entry->foo, __entry->bar,
/*
* Notice here the use of some helper functions. This includes:
*
* __print_symbolic( variable, { value, "string" }, ... ),
*
* The variable is tested against each value of the { } pair. If
* the variable matches one of the values, then it will print the
* string in that pair. If non are matched, it returns a string
* version of the number (if __entry->bar == 7 then "7" is returned).
*/
__print_symbolic(__entry->bar,
{ 0, "zero" },
{ TRACE_SAMPLE_FOO, "TWO" },
{ TRACE_SAMPLE_BAR, "FOUR" },
{ TRACE_SAMPLE_ZOO, "EIGHT" },
{ 10, "TEN" }
),
/*
* __print_flags( variable, "delim", { value, "flag" }, ... ),
*
* This is similar to __print_symbolic, except that it tests the bits
* of the value. If ((FLAG & variable) == FLAG) then the string is
* printed. If more than one flag matches, then each one that does is
* also printed with delim in between them.
* If not all bits are accounted for, then the not found bits will be
* added in hex format: 0x506 will show BIT2|BIT4|0x500
*/
__print_flags(__entry->bar, "|",
{ 1, "BIT1" },
{ 2, "BIT2" },
{ 4, "BIT3" },
{ 8, "BIT4" }
),
/*
* __print_array( array, len, element_size )
*
* This prints out the array that is defined by __array in a nice format.
*/
__print_array(__get_dynamic_array(list),
__get_dynamic_array_len(list) / sizeof(int),
sizeof(int)),
__get_str(str), __get_bitmask(cpus))
);
在linux/tracepoint.h头文件中对这个宏有定义,宏展开方式按照linux/tracepoint.h中定义的展开
#define TRACE_EVENT(name, proto, args, struct, assign, print) \
DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
再看DECLARE_TRACE宏展开
#define DECLARE_TRACE(name, proto, args) \
__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \
cpu_online(raw_smp_processor_id()), \
PARAMS(void *__data, proto), \
PARAMS(__data, args))
看 __DECLARE_TRACE展开
#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
extern struct tracepoint __tracepoint_##name; \
static inline void trace_##name(proto) \ //这个函数是在需要trace的地方插入,例如例子中在需要trace的地方写了trace_foo_bar
{ \
if (static_key_false(&__tracepoint_##name.key)) \
__DO_TRACE(&__tracepoint_##name, \
TP_PROTO(data_proto), \
TP_ARGS(data_args), \
TP_CONDITION(cond), 0); \
if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \
rcu_read_lock_sched_notrace(); \
rcu_dereference_sched(__tracepoint_##name.funcs);\
rcu_read_unlock_sched_notrace(); \
} \
} \
__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \
PARAMS(cond), PARAMS(data_proto), PARAMS(data_args)) \
static inline int \
register_trace_##name(void (*probe)(data_proto), void *data) \ //注册探测函数,tracepoint会用到 trace_event中不用这个方式注册
{ \
return tracepoint_probe_register(&__tracepoint_##name, \
(void *)probe, data); \
} \
static inline int \
register_trace_prio_##name(void (*probe)(data_proto), void *data,\
int prio) \
{ \
return tracepoint_probe_register_prio(&__tracepoint_##name, \
(void *)probe, data, prio); \
} \
static inline int \
unregister_trace_##name(void (*probe)(data_proto), void *data) \
{ \
return tracepoint_probe_unregister(&__tracepoint_##name,\
(void *)probe, data); \
} \
static inline void \
check_trace_callback_type_##name(void (*cb)(data_proto)) \
{ \
} \
static inline bool \
trace_##name##_enabled(void) \//使能探测,同样单独使用tracepoint时需要,在使用trace_event时不需要使用这个函数。
{ \
return static_key_false(&__tracepoint_##name.key); \
}
#define DEFINE_TRACE_FN(name, reg, unreg) \
static const char __tpstrtab_##name[] \
__attribute__((section("__tracepoints_strings"))) = #name; \
struct tracepoint __tracepoint_##name \ //tracepoint 结构体定义。
__attribute__((section("__tracepoints"))) = \
{ __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\
static struct tracepoint * const __tracepoint_ptr_##name __used \
__attribute__((section("__tracepoints_ptrs"))) = \
&__tracepoint_##name;
#define DEFINE_TRACE(name) \
DEFINE_TRACE_FN(name, NULL, NULL);
以上都很常规,真正魔法开始的地方在下面
#define TRACE_INCLUDE_PATH . //定义头文件路径,在define_trace.h中会用到
/*
* TRACE_INCLUDE_FILE is not needed if the filename and TRACE_SYSTEM are equal
*/
#define TRACE_INCLUDE_FILE trace-events-sample //定义头文件名称,在define_trace.h中会用到
#include <trace/define_trace.h> //包含trace/define_trace.h
trace/define_trace.h文件在开始就对TRACE_EVENT宏进行了重定义
#undef TRACE_EVENT
#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
DEFINE_TRACE(name)
接下来这一段如下,
#ifndef TRACE_INCLUDE_PATH
# define __TRACE_INCLUDE(system) <trace/events/system.h>
# define UNDEF_TRACE_INCLUDE_PATH
#else
# define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h)
#endif
# define TRACE_INCLUDE(system) __TRACE_INCLUDE(system)
/* Let the trace headers be reread */
#define TRACE_HEADER_MULTI_READ
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
这段代码的大意 就是重新包含TRACE_INCLUDE_FIELE也就是重新包含trace-events-sample.h,重新展开TRACE_EVENT宏。这段展开是tracepoint定义
define_trace.h包trace_events.h 这个文件是trace_event的核心。
#include <trace/trace_events.h>
在trace_events.h 头文件中,在一开始又重新定义了 TRACE_EVENT宏。
#undef TRACE_EVENT
#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
DECLARE_EVENT_CLASS(name, \
PARAMS(proto), \
PARAMS(args), \
PARAMS(tstruct), \
PARAMS(assign), \
PARAMS(print)); \
DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
在剩下的代码中会数次重新定义DECLARE_EVENT_CLASS 和 DEFINE_EVENT宏,然后重新包含trace-events-sample.h。对TRACE_EVENT宏进行7次展开。中间的展开很简单就不一一说明了。
直接看最后一次展开。
#undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
_TRACE_PERF_PROTO(call, PARAMS(proto)); \
static char print_fmt_##call[] = print; \
static struct trace_event_class __used __refdata event_class_##call = { \
.system = TRACE_SYSTEM_STRING, \
.define_fields = trace_event_define_fields_##call, \
.fields = LIST_HEAD_INIT(event_class_##call.fields),\
.raw_init = trace_event_raw_init, \
.probe = trace_event_raw_event_##call, \
.reg = trace_event_reg, \
_TRACE_PERF_INIT(call) \
};
#undef DEFINE_EVENT
#define DEFINE_EVENT(template, call, proto, args) \
\
static struct trace_event_call __used event_##call = { \
.class = &event_class_##template, \
{ \
.tp = &__tracepoint_##call, \
}, \
.event.funcs = &trace_event_type_funcs_##template, \
.print_fmt = print_fmt_##template, \
.flags = TRACE_EVENT_FL_TRACEPOINT, \
}; \
static struct trace_event_call __used \
__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call
这一次宏展开,定义了一个trace_event_call的结构体。并且定义编译时将结构体存放在section("_ftrace_events"))段。当模块被加载时,内核会读取该段的数据,获取trace_event_call结构体。然后将trace_event_call注册到系统中。 下篇介绍,如何注册及使能。