分类
未分类

kernel trace_event 宏展开注解

要理解kernel的trace_event机制,最好的方式是读懂 samples/trace_events/trace_events_sample的例子代码。这个例子的代码非常难以读懂。我是按照以下三个阶段读懂的。怎么申明和定义的,怎么注册到系统中的,怎么使能的。
在这个例子里面有一个非常有用的宏,这一个宏展开即完成了trace_event的申明和定义,看例子时,我们只看foo_bar这个trace_event的流程。
首先,我们需要看trace-events-sample.h这个头文件,这个头文件比较怪异。可以被多次包含。在头文件的最后还包含了另外一个头文件trace/define_trace.h。包含这个头文件是魔法开始的地方。
下面开始分析trace-events-sample.h头文件
trace-events-sample.h头文件在开始linux/tracepoint.h头文件,注意这个头文件只能被包含一次。不能重复包含 被 ifndef endif保护。
紧接着写了一个宏如下。

TRACE_EVENT(foo_bar,

        TP_PROTO(const char *foo, int bar, const int *lst,
                 const char *string, const struct cpumask *mask),

        TP_ARGS(foo, bar, lst, string, mask),

        TP_STRUCT__entry(
                __array(        char,   foo,    10              )
                __field(        int,    bar                     )
                __dynamic_array(int,    list,   __length_of(lst))
                __string(       str,    string                  )
                __bitmask(      cpus,   num_possible_cpus()     )
        ),

        TP_fast_assign(
                strlcpy(__entry->foo, foo, 10);
                __entry->bar    = bar;
                memcpy(__get_dynamic_array(list), lst,
                       __length_of(lst) * sizeof(int));
                __assign_str(str, string);
                __assign_bitmask(cpus, cpumask_bits(mask), num_possible_cpus());
        ),

        TP_printk("foo %s %d %s %s %s %s (%s)", __entry->foo, __entry->bar,

/*
 * Notice here the use of some helper functions. This includes:
 *
 *  __print_symbolic( variable, { value, "string" }, ... ),
 *
 *    The variable is tested against each value of the { } pair. If
 *    the variable matches one of the values, then it will print the
 *    string in that pair. If non are matched, it returns a string
 *    version of the number (if __entry->bar == 7 then "7" is returned).
 */
                  __print_symbolic(__entry->bar,
                                   { 0, "zero" },
                                   { TRACE_SAMPLE_FOO, "TWO" },
                                   { TRACE_SAMPLE_BAR, "FOUR" },
                                   { TRACE_SAMPLE_ZOO, "EIGHT" },
                                   { 10, "TEN" }
                          ),

/*
 *  __print_flags( variable, "delim", { value, "flag" }, ... ),
 *
 *    This is similar to __print_symbolic, except that it tests the bits
 *    of the value. If ((FLAG & variable) == FLAG) then the string is
 *    printed. If more than one flag matches, then each one that does is
 *    also printed with delim in between them.
 *    If not all bits are accounted for, then the not found bits will be
 *    added in hex format: 0x506 will show BIT2|BIT4|0x500
 */
                  __print_flags(__entry->bar, "|",
                                { 1, "BIT1" },
                                { 2, "BIT2" },
                                { 4, "BIT3" },
                                { 8, "BIT4" }
                          ),
/*
 *  __print_array( array, len, element_size )
 *
 *    This prints out the array that is defined by __array in a nice format.
 */
                  __print_array(__get_dynamic_array(list),
                                __get_dynamic_array_len(list) / sizeof(int),
                                sizeof(int)),
                  __get_str(str), __get_bitmask(cpus))
);

在linux/tracepoint.h头文件中对这个宏有定义,宏展开方式按照linux/tracepoint.h中定义的展开

#define TRACE_EVENT(name, proto, args, struct, assign, print)   \
        DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))

再看DECLARE_TRACE宏展开

#define DECLARE_TRACE(name, proto, args)                                \
        __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),              \
                        cpu_online(raw_smp_processor_id()),             \
                        PARAMS(void *__data, proto),                    \
                        PARAMS(__data, args))

看 __DECLARE_TRACE展开

#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
        extern struct tracepoint __tracepoint_##name;                   \
        static inline void trace_##name(proto)                          \  //这个函数是在需要trace的地方插入,例如例子中在需要trace的地方写了trace_foo_bar
        {                                                               \
                if (static_key_false(&__tracepoint_##name.key))         \
                        __DO_TRACE(&__tracepoint_##name,                \
                                TP_PROTO(data_proto),                   \
                                TP_ARGS(data_args),                     \
                                TP_CONDITION(cond), 0);                 \
                if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {             \
                        rcu_read_lock_sched_notrace();                  \
                        rcu_dereference_sched(__tracepoint_##name.funcs);\
                        rcu_read_unlock_sched_notrace();                \
                }                                                       \
        }                                                               \
        __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args),          \
                PARAMS(cond), PARAMS(data_proto), PARAMS(data_args))    \
        static inline int                                               \
        register_trace_##name(void (*probe)(data_proto), void *data)    \ //注册探测函数,tracepoint会用到 trace_event中不用这个方式注册
        {                                                               \
                return tracepoint_probe_register(&__tracepoint_##name,  \
                                                (void *)probe, data);   \
        }                                                               \
        static inline int                                               \
        register_trace_prio_##name(void (*probe)(data_proto), void *data,\
                                   int prio)                            \
        {                                                               \
                return tracepoint_probe_register_prio(&__tracepoint_##name, \
                                              (void *)probe, data, prio); \
        }                                                               \
        static inline int                                               \
        unregister_trace_##name(void (*probe)(data_proto), void *data)  \
        {                                                               \
                return tracepoint_probe_unregister(&__tracepoint_##name,\
                                                (void *)probe, data);   \
        }                                                               \
        static inline void                                              \
        check_trace_callback_type_##name(void (*cb)(data_proto))        \
        {                                                               \
        }                                                               \
        static inline bool                                              \
        trace_##name##_enabled(void)                                    \//使能探测,同样单独使用tracepoint时需要,在使用trace_event时不需要使用这个函数。
        {                                                               \
                return static_key_false(&__tracepoint_##name.key);      \
        }
#define DEFINE_TRACE_FN(name, reg, unreg)                                \
        static const char __tpstrtab_##name[]                            \
        __attribute__((section("__tracepoints_strings"))) = #name;       \
        struct tracepoint __tracepoint_##name                            \  //tracepoint 结构体定义。
        __attribute__((section("__tracepoints"))) =                      \
                { __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\
        static struct tracepoint * const __tracepoint_ptr_##name __used  \
        __attribute__((section("__tracepoints_ptrs"))) =                 \
                &__tracepoint_##name;

#define DEFINE_TRACE(name)                                              \
        DEFINE_TRACE_FN(name, NULL, NULL);

以上都很常规,真正魔法开始的地方在下面

#define TRACE_INCLUDE_PATH .        //定义头文件路径,在define_trace.h中会用到
/*
 * TRACE_INCLUDE_FILE is not needed if the filename and TRACE_SYSTEM are equal
 */
#define TRACE_INCLUDE_FILE trace-events-sample          //定义头文件名称,在define_trace.h中会用到
#include <trace/define_trace.h>           //包含trace/define_trace.h

trace/define_trace.h文件在开始就对TRACE_EVENT宏进行了重定义

#undef TRACE_EVENT
#define TRACE_EVENT(name, proto, args, tstruct, assign, print)  \
        DEFINE_TRACE(name)

接下来这一段如下,

#ifndef TRACE_INCLUDE_PATH
# define __TRACE_INCLUDE(system) <trace/events/system.h>
# define UNDEF_TRACE_INCLUDE_PATH
#else
# define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h)
#endif

# define TRACE_INCLUDE(system) __TRACE_INCLUDE(system)

/* Let the trace headers be reread */
#define TRACE_HEADER_MULTI_READ

#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)

这段代码的大意 就是重新包含TRACE_INCLUDE_FIELE也就是重新包含trace-events-sample.h,重新展开TRACE_EVENT宏。这段展开是tracepoint定义
define_trace.h包trace_events.h 这个文件是trace_event的核心。

#include <trace/trace_events.h>

在trace_events.h 头文件中,在一开始又重新定义了 TRACE_EVENT宏。

#undef TRACE_EVENT
#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
        DECLARE_EVENT_CLASS(name,                              \
                             PARAMS(proto),                    \
                             PARAMS(args),                     \
                             PARAMS(tstruct),                  \
                             PARAMS(assign),                   \
                             PARAMS(print));                   \
        DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));

在剩下的代码中会数次重新定义DECLARE_EVENT_CLASS 和 DEFINE_EVENT宏,然后重新包含trace-events-sample.h。对TRACE_EVENT宏进行7次展开。中间的展开很简单就不一一说明了。
直接看最后一次展开。

 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)         \
 _TRACE_PERF_PROTO(call, PARAMS(proto));                                                \
 static char print_fmt_##call[] = print;                                                                \
 static struct trace_event_class __used __refdata event_class_##call = {                \
                .system                         = TRACE_SYSTEM_STRING,                      \
                .define_fields                  = trace_event_define_fields_##call,                 \
                .fields                         = LIST_HEAD_INIT(event_class_##call.fields),\
                .raw_init                           = trace_event_raw_init,                             \
                .probe                              = trace_event_raw_event_##call,             \
                .reg                                    = trace_event_reg,                                      \
                _TRACE_PERF_INIT(call)                                                                          \
 };

#undef DEFINE_EVENT
#define DEFINE_EVENT(template, call, proto, args)                       \
                                                                        \
static struct trace_event_call __used event_##call = {                  \
        .class                  = &event_class_##template,              \
        {                                                               \
                .tp                     = &__tracepoint_##call,         \
        },                                                              \
        .event.funcs            = &trace_event_type_funcs_##template,   \
        .print_fmt              = print_fmt_##template,                 \
        .flags                  = TRACE_EVENT_FL_TRACEPOINT,            \
};                                                                      \
static struct trace_event_call __used                                   \
__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call

这一次宏展开,定义了一个trace_event_call的结构体。并且定义编译时将结构体存放在section("_ftrace_events"))段。当模块被加载时,内核会读取该段的数据,获取trace_event_call结构体。然后将trace_event_call注册到系统中。 下篇介绍,如何注册及使能。

发表评论

电子邮件地址不会被公开。 必填项已用*标注