本文共 12550 字,大约阅读时间需要 41 分钟。
MIPS在应用ftrace功能之前,需要添加-pg编译选项,该选项用于编译_mcount()函数,_mcount函数的具体实现可由各个架构自行决定。随后,在链接过程中,将_mcount函数链接到其余函数中。
MIPS中,_mcount函数的定义如下:
#ifdef CONFIG_DYNAMIC_FTRACENESTED(ftrace_caller, PT_SIZE, ra) .globl _mcount_mcount:EXPORT_SYMBOL(_mcount) b ftrace_stub#ifdef CONFIG_32BIT addiu sp,sp,8#else nop#endif MCOUNT_SAVE_REGS#ifdef KBUILD_MCOUNT_RA_ADDRESS PTR_S MCOUNT_RA_ADDRESS_REG, PT_R12(sp)#endif PTR_SUBU a0, ra, 8 PTR_LA t1, _stext sltu t2, a0, t1 PTR_LA t1, _etext sltu t3, t1, a0 or t1, t2, t3 beqz t1, ftrace_call nop#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT) PTR_SUBU a0, a0, 16#else PTR_SUBU a0, a0, 12#endif .globl ftrace_callftrace_call: nop move a1, AT#ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_callftrace_graph_call: nop nop#endif MCOUNT_RESTORE_REGS .globl ftrace_stubftrace_stub: RETURN_BACK END(ftrace_caller)
可以看到,_mcount的定义与ftrace_caller的定义位于同一入口。因此,内核编译完成后,反汇编代码会出现:
ffffffff802019b0:get_system_type():ffffffff802019b0: 03e0082d move at,raffffffff802019b4: 0c088904 jal ffffffff80222410
通过System.map可发现_mcount与ftrace_caller的地址一致。
内核在启动过程中会对ftrace进行初始化,入口为:start_kernel()—>ftrace_init(),(init/main.c)。
void __init ftrace_init(void){ extern unsigned long __start_mcount_loc[]; extern unsigned long __stop_mcount_loc[]; //__start_mcount_loc与__stp_mcount_loc变量由内核编译的过程中进行赋值 unsigned long count, flags; int ret; local_irq_save(flags); ret = ftrace_dyn_arch_init(); //该接口主要用来创建两条指令码,以及将_mcount()函数入口的第一条指令设置为nop指令 local_irq_restore(flags); if (ret) goto failed; count = __stop_mcount_loc - __start_mcount_loc; //计算内核代码中共有多少位置为jal ftrace_caller指令 if (!count) { pr_info("ftrace: No functions to be traced?\n"); goto failed; } pr_info("ftrace: allocating %ld entries in %ld pages\n", count, count / ENTRIES_PER_PAGE + 1); last_ftrace_enabled = ftrace_enabled = 1; ret = ftrace_process_locs(NULL, __start_mcount_loc, __stop_mcount_loc); //记录_mcount函数所在的位置,主要通过ftrace_page以及dyn_ftrace结构体来完成 set_ftrace_early_filters(); return ;failed: ftrace_disabled = 1;}int __init ftrace_dyn_arch_init(void){ ftrace_dyn_arch_init_insns(); //创建两条指令码,分别为:la v1, _mcount; jal ftrace_caller+8 ftrace_modify_code(MCOUNT_ADDR, INSN_NOP); //#define MCOUNT_ADDR ((unsigned long)(_mcount));MCOUNT_ADDR为_mcount()函数的入口地址 //#define INSN_NOP 0x00000000 //将_mcount()函数入口指令b ftrace_stub修改为nop指令 return 0;}//全局变量static unsigned int insn_jal_ftrace_caller __read_mostly;static unsigned int insn_la_mcount[2] __read_mostly;static inline void ftrace_dyn_arch_init_insns(void){ u32 *buf; unsigned int v1; v1 = 3; buf = (u32 *)&insn_la_mcount[0]; UASM_i_LA(&buf, v1, MCOUNT_ADDR); //buf存放指令码:la v1, _mcount buf = (u32 *)&insn_jal_ftrace_caller; uasm_i_jal(&buf, (FTRAE_ADDR + 8) & JUMP_RANGE_MASK); //#define FTRACE_ADDR ((unsigned long)ftrace_caller);FTRACE_ADDR为ftrace_caller()函数的入口地址 //buf中存放指令码:jal ftrace_caller+8}/*//addr为_mcount()函数的入口地址,rs为寄存器编号void UASM_i_LA(u32 **buf, unsigned int rs, long addr){ UASM_i_LA_mostly(buf, rs, addr); if (uasm_rel_lo(addr)) { if (!uasm_in_compat_space_p(addr)) uasm_i_daddiu(buf, rs, rs, uasm_rel_lo(addr)); else uasm_i_addiu(buf, rs, rs, uasm_rel_lo(addr)); }}//void UASM_i_LA_mostly(u32 **buf, unsigned int rs, long addr){ if (!uasm_in_compat_space_p(addr)) { //如果地址为64位,则按照该分支处理 uasm_i_lui(buf, rs, uasm_rel_highest(addr)); if (uasm_rel_higher(addr)) uasm_i_daddiu(buf, rs, rs, uasm_rel_higher(addr)); if (uasm_rel_hi(addr)) { uasm_i_dsll(buf, rs, rs, 16); uasm_i_daddiu(buf, rs, rs, uasm_rel_hi(addr)); uasm_i_dsll(buf, rs, rs, 16); } else uasm_i_dsll32(buf, rs, rs, 0); } else //如果地址为32位,则按照该分支处理 uasm_i_lui(buf, rs, uasm_rel_hi(addr));}int uasm_i_compat_space_p(long addr){ //检测地址是否为32位地址,是返回1,否返回0 return addr == (int)addr;}//关于上述接口中所调用的uasm_i_xxx()接口的声明如下:#define Ip_u1s2(op) void uasm_i##op(u32 **buf, unsigned int a, unsigned int b)#define I_u1s2(op) Ip_u1s2(op) { build_insn(buf, insn##op, a, b); } \ UASM_EXPORT_SYMBOL(uasm_i##op);#define UASM_EXPORT_SYMBOL EXPORT_SYMBOL//因此,uasm_i_lui接口的实际原型为:void uasm_i_lui(u32 **buf, unsigned int a, unsigned int b){ build_insn(buf, insn_lui, a, b);}EXPORT_SYMBOL(uasm_i_lui);//宏定义表示为:I_u1s2(_lui);//由上可知,实际执行函数为build_insn()接口,该接口操作前,需实例化数组对象insn_table,如下://枚举类型,即为每条操作码设置一个索引号enum opcode { ..., insn_lui, ...};struct insn { u32 match; enum fields fields;};//insn_table数组对象static const struct insn insn_table[insn_invalid] = { ..., [insn_lui] = {M(lui_op, 0, 0, 0, 0, 0), RT | SIMM}; ...};#define M(a, b ,c, d, e, f) ((a) << OP_SH | (b) << RS_SH | (c) << RT_SH | (d) << RD_SH | (e) << RE_SH | (f) << FUNC_SH)//假设当前指令操作码为lui,则该指令为:struct insn insn_table[insn_lui];&insn_table[insn_lui]->match = 15 << 26;//match属性用于存放操作码&insn_table[insn_lui]->fields = RT | SIMM;//fields属性用于存放src与dst//构建指令码,即opc dst/src src/dststatic void build_insn(u32 **buf, enum opcode opc, ...){ const struct insn *ip; va_list ap; u32 op; if (opc < 0 || opc >= insn_invalid || (opc == insn_daddiu && r4k_daddiu_bug()) || (insn_table[opc].match == 0 && insn_table[opc].fields == 0)) panic("Unsupported Micro-assembler instruction %d", opc); ip = &insn_table[opc]; //获取insn_table数组中opc索引所对应的struct insn结构体对象 op = ip->match; //操作码与dst或src进行组合,拼成指令码 va_start(ap, opc); ... if (ip->fields & RT) op |= build_rt(va_arg(ap, u32)); ... if (ip->fields & SIMM) op |= build_simm(va_arg(ap, u32)); ... va_end(ap); **buf = op; //赋值操作 (*buf)++;}static inline u32 build_rt(u32 arg){ WARN(arg & ~RT_MASK, KERN_WARNING "Micro-assembler filed overflow\n"); return (arg & RT_MASK) << RT_SH; //#define RT_MASK 0x1f //#define RT_SH 16}static inline u32 build_simm(s32 arg){ WARN(arg > 0x7fff || arg < -0x8000, KERN_WARNING "Micro-assembler field overflow\n"); return arg & 0xffff;}*///综上,可知替换指令已经创建完成//替换指令static int ftrace_modify_code(unsigned long ip, unsigned int new_code){ int faulted; mm_segment_t old_fs; safe_store_code(new_code, ip, faulted); if (unlikely(faulted)) return -EFAULT; old_fs = get_fs(); set_fs(get_ds()); flush_icache_range(ip, ip + 8); set_fs(old_fs); return 0;}#define safe_store_code(src, dst, error) safe_store(STR(sw), src, dst, error)#define safe_store(store, src, dst, error)do { asm volatile ( "1: " store " %[tmp_src], 0(%[tmp_dst])\n" //store变量为sw指令,该指令作用为: //%[tmp_dst] = %[tmp_src] //%[addr],取addr地址处的值 " li %[tmp_err], 0\n" //将0写入tmp_err地址处 "2: .insn\n" ".section .fixup, \"ax\"\n" "3: li %[tmp_err], 1\n" " j 2b\n" ".previous\n" ".section\t__ex_table,\"a\"\n\t" STR(PTR) "\t1b, 3b\n\t" //#define PTR .word ".previous\n" : [tmp_err] "=r" (error) //输出 : [tmp_dst] "r" (dst), [tmp_src] "r" (src) //输入 : "memory" //操作均在内存中完成 );} while (0)//注意,汇编中src实际为&INSN_NOP//flush_icache_range():static void local_loongson3_flush_icache_range(unsigned long start, unsigned long end){ asm volatile ("\tsynci 0($0)\n"::); //同步指令数据缓存}static int ftrace_process_locs(struct module *mod, unsigned long *start, unsigned long *end){ struct ftrace_page *start_pg; struct ftrace_page *pg; struct dyn_ftrace *rec; unsigned long count; unsigned long *p; unsigned long addr; unsigned long flags = 0; int ret = -ENOMEM; count = end - start; if (!count) return 0; sort(start, count, sizeof(*start), ftrace_cmp_ips, NULL); //对记录的_mcount地址进行排序 start_pg = ftrace_allocate_pages(count); //创建ftrace_page结构体对象 //该过程中,创建的组织结构可能为: /* *|-------------| *| ftrace_page | *|+++++++++++++| *| next |------>|-------------| *|+++++++++++++| | ftrace_page | *| records |-->| *|+++++++++++++| | *| | |---------------------------------| * | dyn_ftrace0 | dtn_ftrace1 | ... | * |---------------------------------| */ //dyn_ftrace结构体记录_mcount函数被调用的位置 if (!start_pg) return -ENOMEM; mutex_lock(&ftrace_lock); if (!mod) { WARN_ON(ftrace_pages || ftrace_pages_start); ftrace_pages = ftrace_pages_start = start_pg; } else { if (!ftrace_pages) goto out; if (WARN_ON(ftrace_pages->next)) { while (ftrace_pages->next) ftrace_pages = ftrace_pages->next; } ftrace_pages->next = start_pg; //单向循环链表 } p = start; pg = start_pg; while (p < end) { addr = ftrace_call_adjust(*p++); if (!addr) continue; if (pg->index == pg->size) { //当前ftrace_page空间无法存放所有_macount信息时,将使用下一个ftrace_page if (WARN_ON(!pg->next)) break; pg = pg->next; } rec = &pg->records[pg->index++]; //从索引0开始填充ftrac_page结构体中的dyn_ftrace结构体对象 rec->ip = addr; //记录_mcount函数被调用的位置 } WARN_ON(pg->next); ftrace_pages = pg; if (!mod) local_irq_save(flags); ftrace_update_code(mod, start_pg); //替换_mcount函数被调用的位置的指令 if (!mod) local_irq_restore(flags); ret = 0;out: mutex_unlock(&ftrace_lock); return ret;}static struct ftrace_page *ftrace_allocate_pages(unsigned long num_to_init){ struct ftrace_page *start_pg; struct ftrace_page *pg; int order; int cnt; if (!num_to_init) return 0; start_pg = pg = kzalloc(szieof(*pg), GFP_KERNEL); if (!pg) return NULL; for (;;) { cnt = ftrace_allocate_records(pg, num_to_init); //计算可包含records个数,该过程中以连续的空闲页分配地址空间 //cnt <= num_to_init if (cnt < 0) goto free_pages; num_to_init -= cnt; if (!num_to_init) break; //判断当前ftrace_page是否包含所有records,如果没有则创建新的ftrace_page来包含 pg->next = kzalloc(sizeof(*pg), GFP_KERNEL); if (!pg->next) goto free_pages; pg = pg->next; } return start_pg;free_pages: pg = start_pg; while (pg) { order = get_count_order(pg->size / ENTRIES_PER_PAGE); free_pages((unsigned long)pg->records, order); start_pg = pg->next; kfree(pg); pg =start_pg; } pr_info("ftrace: FAILED to allocate memory for functions\n"); return NULL;}static int ftrace_allocate_records(struct ftrace_page *pg, int count){ int order; int cnt; if (WARN_ON(!count)) return -EINVAL; order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE)); //获取幂数 while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE) order--; //对幂数进行调整again: pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order); //分配空闲页,ftrace_page结构体中records指向该页首地址 if (!pg->records) { if (!order) return -ENOMEM; order >>= 1; goto again; } cnt = (PAGE_SIZE << order) / ENTRY_SIZE; pg->size = cnt; if (cnt > count) cnt = count; return cnt;}static inline unsigned long ftrace_call_adjust(unsigned long addr){ return addr;}static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs){ struct ftrace_page *pg; struct dyn_ftrace *p; u64 start, stop; unsigned long update_cnt = 0; unsigned long rec_flags = 0; int i; start = ftrace_now(raw_smp_processor_id()); if (mod) rec_flags |= FTRACE_FL_DISABLED; for (pg = new_pgs; pg; pg = pg->next) { //遍历所有的ftrace_page for (i = 0; i < pg->next; i++) { //遍历ftrace_page中所有的dyn_ftrace结构体 if (unlikely(ftrace_disabled)) return -1; p = &pg->records[i]; p->flags = rec_flags; if (!__is_defined(CC_USING_NOP_MCOUNT) && !ftrace_code_disable(mod, p)) //将调用_mcout()接口的指令修改为nop指令 break; update_cnt++; } } stop = ftrace_now(raw_smp_processor_id()); ftrace_update_time = stop - start; ftrace_update_tot_cnt += update_cnt; return 0;}static int ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec){ int ret; if (unlikely(ftrace_disabled)) return 0; ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR); //将调用_mcount函数的指令替换为nop指令 if (ret) { ftrace_bug_type = FTRACE_BUG_INIT; ftrace_bug(ret, rec); return 0; } return 1;}int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr){ unsigned int new; unsigned long ip = rec->ip; new = core_kernel_text(ip) ? INSN_NOP : INSN_B_1F; //判断ip地址的位置,来选择替换的指令 //如果ip地址处于内核而非模块中,则使用INSN_NOP,否则使用后者 #ifdef CONFIG_64BIT return ftrace_modify_code(ip, new);#else return ftrace_modify_code_2(ip, new, INSN_NOP);#endif}int notrace core_kernel_text(unsigned long addr){ if (addr >= (unsigned long)_stext && add < (unsigned long)_etext) return 1; if (system_state < SYSTEM_RUNNING && init_kernel_text(addr)) return 1; return 0;}int init_kernel_text(unsigned long addr){ if (addr >= (unsigned long)_sinittext && addr < (unsigned long)_einittext) return 1; return 0;}//利用ftrace设置启动阶段时的过滤点/*static void __init set_ftrace_early_filters(void){ if (ftrace_filter_buf[0]) ftrace_set_early_filter(&global_ops, ftrace_filter_buf, 1); ...}static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;static int __init set_ftrace_filter(char *str){ ftrace_filter_param = true; strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); return 1;}__setup("ftrace_filter=", set_ftrace_filter);*///该类过滤点需要在启动命令行中设置,否则为空
转载地址:http://ppxii.baihongyu.com/