kernel/kernel/trace/bpf_trace.c

   1 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
   2  *
   3  * This program is free software; you can redistribute it and/or
   4  * modify it under the terms of version 2 of the GNU General Public
   5  * License as published by the Free Software Foundation.
   6  */
   7 #include <linux/kernel.h>
   8 #include <linux/types.h>
   9 #include <linux/slab.h>
  10 #include <linux/bpf.h>
  11 #include <linux/filter.h>
  12 #include <linux/uaccess.h>
  13 #include <linux/ctype.h>
  14 #include "trace.h"
  15
  16 static DEFINE_PER_CPU(int, bpf_prog_active);
  17
  18 /**
  19  * trace_call_bpf - invoke BPF program
  20  * @prog: BPF program
  21  * @ctx: opaque context pointer
  22  *
  23  * kprobe handlers execute BPF programs via this helper.
  24  * Can be used from static tracepoints in the future.
  25  *
  26  * Return: BPF programs always return an integer which is interpreted by
  27  * kprobe handler as:
  28  * 0 - return from kprobe (event is filtered out)
  29  * 1 - store kprobe event into ring buffer
  30  * Other values are reserved and currently alias to 1
  31  */
  32 unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
  33 {
  34         unsigned int ret;
  35
  36         if (in_nmi()) /* not supported yet */
  37                 return 1;
  38
  39         preempt_disable();
  40
  41         if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
  42                 /*
  43                  * since some bpf program is already running on this cpu,
  44                  * don't call into another bpf program (same or different)
  45                  * and don't send kprobe event into ring-buffer,
  46                  * so return zero here
  47                  */
  48                 ret = 0;
  49                 goto out;
  50         }
  51
  52         rcu_read_lock();
  53         ret = BPF_PROG_RUN(prog, ctx);
  54         rcu_read_unlock();
  55
  56  out:
  57         __this_cpu_dec(bpf_prog_active);
  58         preempt_enable();
  59
  60         return ret;
  61 }
  62 EXPORT_SYMBOL_GPL(trace_call_bpf);
  63
  64 static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
  65 {
  66         void *dst = (void *) (long) r1;
  67         int size = (int) r2;
  68         void *unsafe_ptr = (void *) (long) r3;
  69
  70         return probe_kernel_read(dst, unsafe_ptr, size);
  71 }
  72
  73 static const struct bpf_func_proto bpf_probe_read_proto = {
  74         .func           = bpf_probe_read,
  75         .gpl_only       = true,
  76         .ret_type       = RET_INTEGER,
  77         .arg1_type      = ARG_PTR_TO_STACK,
  78         .arg2_type      = ARG_CONST_STACK_SIZE,
  79         .arg3_type      = ARG_ANYTHING,
  80 };
  81
  82 static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
  83 {
  84         /* NMI safe access to clock monotonic */
  85         return ktime_get_mono_fast_ns();
  86 }
  87
  88 static const struct bpf_func_proto bpf_ktime_get_ns_proto = {
  89         .func           = bpf_ktime_get_ns,
  90         .gpl_only       = true,
  91         .ret_type       = RET_INTEGER,
  92 };
  93
  94 /*
  95  * limited trace_printk()
  96  * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed
  97  */
  98 static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
  99 {
 100         char *fmt = (char *) (long) r1;
 101         int mod[3] = {};
 102         int fmt_cnt = 0;
 103         int i;
 104
 105         /*
 106          * bpf_check()->check_func_arg()->check_stack_boundary()
 107          * guarantees that fmt points to bpf program stack,
 108          * fmt_size bytes of it were initialized and fmt_size > 0
 109          */
 110         if (fmt[--fmt_size] != 0)
 111                 return -EINVAL;
 112
 113         /* check format string for allowed specifiers */
 114         for (i = 0; i < fmt_size; i++) {
 115                 if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
 116                         return -EINVAL;
 117
 118                 if (fmt[i] != '%')
 119                         continue;
 120
 121                 if (fmt_cnt >= 3)
 122                         return -EINVAL;
 123
 124                 /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
 125                 i++;
 126                 if (fmt[i] == 'l') {
 127                         mod[fmt_cnt]++;
 128                         i++;
 129                 } else if (fmt[i] == 'p') {
 130                         mod[fmt_cnt]++;
 131                         i++;
 132                         if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0)
 133                                 return -EINVAL;
 134                         fmt_cnt++;
 135                         continue;
 136                 }
 137
 138                 if (fmt[i] == 'l') {
 139                         mod[fmt_cnt]++;
 140                         i++;
 141                 }
 142
 143                 if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x')
 144                         return -EINVAL;
 145                 fmt_cnt++;
 146         }
 147
 148         return __trace_printk(1/* fake ip will not be printed */, fmt,
 149                               mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3,
 150                               mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4,
 151                               mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5);
 152 }
 153
 154 static const struct bpf_func_proto bpf_trace_printk_proto = {
 155         .func           = bpf_trace_printk,
 156         .gpl_only       = true,
 157         .ret_type       = RET_INTEGER,
 158         .arg1_type      = ARG_PTR_TO_STACK,
 159         .arg2_type      = ARG_CONST_STACK_SIZE,
 160 };
 161
 162 static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
 163 {
 164         switch (func_id) {
 165         case BPF_FUNC_map_lookup_elem:
 166                 return &bpf_map_lookup_elem_proto;
 167         case BPF_FUNC_map_update_elem:
 168                 return &bpf_map_update_elem_proto;
 169         case BPF_FUNC_map_delete_elem:
 170                 return &bpf_map_delete_elem_proto;
 171         case BPF_FUNC_probe_read:
 172                 return &bpf_probe_read_proto;
 173         case BPF_FUNC_ktime_get_ns:
 174                 return &bpf_ktime_get_ns_proto;
 175
 176         case BPF_FUNC_trace_printk:
 177                 /*
 178                  * this program might be calling bpf_trace_printk,
 179                  * so allocate per-cpu printk buffers
 180                  */
 181                 trace_printk_init_buffers();
 182
 183                 return &bpf_trace_printk_proto;
 184         default:
 185                 return NULL;
 186         }
 187 }
 188
 189 /* bpf+kprobe programs can access fields of 'struct pt_regs' */
 190 static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type)
 191 {
 192         /* check bounds */
 193         if (off < 0 || off >= sizeof(struct pt_regs))
 194                 return false;
 195
 196         /* only read is allowed */
 197         if (type != BPF_READ)
 198                 return false;
 199
 200         /* disallow misaligned access */
 201         if (off % size != 0)
 202                 return false;
 203
 204         return true;
 205 }
 206
 207 static struct bpf_verifier_ops kprobe_prog_ops = {
 208         .get_func_proto  = kprobe_prog_func_proto,
 209         .is_valid_access = kprobe_prog_is_valid_access,
 210 };
 211
 212 static struct bpf_prog_type_list kprobe_tl = {
 213         .ops    = &kprobe_prog_ops,
 214         .type   = BPF_PROG_TYPE_KPROBE,
 215 };
 216
 217 static int __init register_kprobe_prog_ops(void)
 218 {
 219         bpf_register_prog_type(&kprobe_tl);
 220         return 0;
 221 }
 222 late_initcall(register_kprobe_prog_ops);