These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / kernel / bpf / verifier.c
index 47dcd3a..2e7f7ab 100644 (file)
@@ -199,6 +199,7 @@ struct verifier_env {
        struct verifier_state_list **explored_states; /* search pruning optimization */
        struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
        u32 used_map_cnt;               /* number of used maps */
+       bool allow_ptr_leaks;
 };
 
 /* verbose verifier prints what it's seeing
@@ -213,7 +214,7 @@ static DEFINE_MUTEX(bpf_verifier_lock);
  * verbose() is used to dump the verification trace to the log, so the user
  * can figure out what's wrong with the program
  */
-static void verbose(const char *fmt, ...)
+static __printf(1, 2) void verbose(const char *fmt, ...)
 {
        va_list args;
 
@@ -238,6 +239,15 @@ static const char * const reg_type_str[] = {
        [CONST_IMM]             = "imm",
 };
 
+static const struct {
+       int map_type;
+       int func_id;
+} func_limit[] = {
+       {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
+       {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
+       {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output},
+};
+
 static void print_verifier_state(struct verifier_env *env)
 {
        enum bpf_reg_type t;
@@ -275,7 +285,7 @@ static const char *const bpf_class_string[] = {
        [BPF_ALU64] = "alu64",
 };
 
-static const char *const bpf_alu_string[] = {
+static const char *const bpf_alu_string[16] = {
        [BPF_ADD >> 4]  = "+=",
        [BPF_SUB >> 4]  = "-=",
        [BPF_MUL >> 4]  = "*=",
@@ -299,7 +309,7 @@ static const char *const bpf_ldst_string[] = {
        [BPF_DW >> 3] = "u64",
 };
 
-static const char *const bpf_jmp_string[] = {
+static const char *const bpf_jmp_string[16] = {
        [BPF_JA >> 4]   = "jmp",
        [BPF_JEQ >> 4]  = "==",
        [BPF_JGT >> 4]  = ">",
@@ -530,6 +540,21 @@ static int bpf_size_to_bytes(int bpf_size)
                return -EINVAL;
 }
 
+static bool is_spillable_regtype(enum bpf_reg_type type)
+{
+       switch (type) {
+       case PTR_TO_MAP_VALUE:
+       case PTR_TO_MAP_VALUE_OR_NULL:
+       case PTR_TO_STACK:
+       case PTR_TO_CTX:
+       case FRAME_PTR:
+       case CONST_PTR_TO_MAP:
+               return true;
+       default:
+               return false;
+       }
+}
+
 /* check_stack_read/write functions track spill/fill of registers,
  * stack boundary and alignment are checked in check_mem_access()
  */
@@ -542,9 +567,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
         */
 
        if (value_regno >= 0 &&
-           (state->regs[value_regno].type == PTR_TO_MAP_VALUE ||
-            state->regs[value_regno].type == PTR_TO_STACK ||
-            state->regs[value_regno].type == PTR_TO_CTX)) {
+           is_spillable_regtype(state->regs[value_regno].type)) {
 
                /* register containing pointer is being spilled into stack */
                if (size != BPF_REG_SIZE) {
@@ -635,6 +658,20 @@ static int check_ctx_access(struct verifier_env *env, int off, int size,
        return -EACCES;
 }
 
+static bool is_pointer_value(struct verifier_env *env, int regno)
+{
+       if (env->allow_ptr_leaks)
+               return false;
+
+       switch (env->cur_state.regs[regno].type) {
+       case UNKNOWN_VALUE:
+       case CONST_IMM:
+               return false;
+       default:
+               return true;
+       }
+}
+
 /* check whether memory at (regno + off) is accessible for t = (read | write)
  * if t==write, value_regno is a register which value is stored into memory
  * if t==read, value_regno is a register which will receive the value from memory
@@ -648,6 +685,9 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
        struct verifier_state *state = &env->cur_state;
        int size, err = 0;
 
+       if (state->regs[regno].type == PTR_TO_STACK)
+               off += state->regs[regno].imm;
+
        size = bpf_size_to_bytes(bpf_size);
        if (size < 0)
                return size;
@@ -658,24 +698,42 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
        }
 
        if (state->regs[regno].type == PTR_TO_MAP_VALUE) {
+               if (t == BPF_WRITE && value_regno >= 0 &&
+                   is_pointer_value(env, value_regno)) {
+                       verbose("R%d leaks addr into map\n", value_regno);
+                       return -EACCES;
+               }
                err = check_map_access(env, regno, off, size);
                if (!err && t == BPF_READ && value_regno >= 0)
                        mark_reg_unknown_value(state->regs, value_regno);
 
        } else if (state->regs[regno].type == PTR_TO_CTX) {
+               if (t == BPF_WRITE && value_regno >= 0 &&
+                   is_pointer_value(env, value_regno)) {
+                       verbose("R%d leaks addr into ctx\n", value_regno);
+                       return -EACCES;
+               }
                err = check_ctx_access(env, off, size, t);
                if (!err && t == BPF_READ && value_regno >= 0)
                        mark_reg_unknown_value(state->regs, value_regno);
 
-       } else if (state->regs[regno].type == FRAME_PTR) {
+       } else if (state->regs[regno].type == FRAME_PTR ||
+                  state->regs[regno].type == PTR_TO_STACK) {
                if (off >= 0 || off < -MAX_BPF_STACK) {
                        verbose("invalid stack off=%d size=%d\n", off, size);
                        return -EACCES;
                }
-               if (t == BPF_WRITE)
+               if (t == BPF_WRITE) {
+                       if (!env->allow_ptr_leaks &&
+                           state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL &&
+                           size != BPF_REG_SIZE) {
+                               verbose("attempt to corrupt spilled pointer on stack\n");
+                               return -EACCES;
+                       }
                        err = check_stack_write(state, off, size, value_regno);
-               else
+               } else {
                        err = check_stack_read(state, off, size, value_regno);
+               }
        } else {
                verbose("R%d invalid mem access '%s'\n",
                        regno, reg_type_str[state->regs[regno].type]);
@@ -763,8 +821,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
                return -EACCES;
        }
 
-       if (arg_type == ARG_ANYTHING)
+       if (arg_type == ARG_ANYTHING) {
+               if (is_pointer_value(env, regno)) {
+                       verbose("R%d leaks addr into helper function\n", regno);
+                       return -EACCES;
+               }
                return 0;
+       }
 
        if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY ||
            arg_type == ARG_PTR_TO_MAP_VALUE) {
@@ -833,6 +896,28 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
        return err;
 }
 
+static int check_map_func_compatibility(struct bpf_map *map, int func_id)
+{
+       bool bool_map, bool_func;
+       int i;
+
+       if (!map)
+               return 0;
+
+       for (i = 0; i < ARRAY_SIZE(func_limit); i++) {
+               bool_map = (map->map_type == func_limit[i].map_type);
+               bool_func = (func_id == func_limit[i].func_id);
+               /* only when map & func pair match it can continue.
+                * don't allow any other map type to be passed into
+                * the special func;
+                */
+               if (bool_func && bool_map != bool_func)
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int check_call(struct verifier_env *env, int func_id)
 {
        struct verifier_state *state = &env->cur_state;
@@ -907,12 +992,18 @@ static int check_call(struct verifier_env *env, int func_id)
                        fn->ret_type, func_id);
                return -EINVAL;
        }
+
+       err = check_map_func_compatibility(map, func_id);
+       if (err)
+               return err;
+
        return 0;
 }
 
 /* check validity of 32-bit and 64-bit arithmetic operations */
-static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
+static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
 {
+       struct reg_state *regs = env->cur_state.regs;
        u8 opcode = BPF_OP(insn->code);
        int err;
 
@@ -937,6 +1028,12 @@ static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
                if (err)
                        return err;
 
+               if (is_pointer_value(env, insn->dst_reg)) {
+                       verbose("R%d pointer arithmetic prohibited\n",
+                               insn->dst_reg);
+                       return -EACCES;
+               }
+
                /* check dest operand */
                err = check_reg_arg(regs, insn->dst_reg, DST_OP);
                if (err)
@@ -973,6 +1070,11 @@ static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
                                 */
                                regs[insn->dst_reg] = regs[insn->src_reg];
                        } else {
+                               if (is_pointer_value(env, insn->src_reg)) {
+                                       verbose("R%d partial copy of pointer\n",
+                                               insn->src_reg);
+                                       return -EACCES;
+                               }
                                regs[insn->dst_reg].type = UNKNOWN_VALUE;
                                regs[insn->dst_reg].map_ptr = NULL;
                        }
@@ -1019,11 +1121,31 @@ static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
                        return -EINVAL;
                }
 
+               if ((opcode == BPF_LSH || opcode == BPF_RSH ||
+                    opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
+                       int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
+
+                       if (insn->imm < 0 || insn->imm >= size) {
+                               verbose("invalid shift %d\n", insn->imm);
+                               return -EINVAL;
+                       }
+               }
+
                /* pattern match 'bpf_add Rx, imm' instruction */
                if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
                    regs[insn->dst_reg].type == FRAME_PTR &&
-                   BPF_SRC(insn->code) == BPF_K)
+                   BPF_SRC(insn->code) == BPF_K) {
                        stack_relative = true;
+               } else if (is_pointer_value(env, insn->dst_reg)) {
+                       verbose("R%d pointer arithmetic prohibited\n",
+                               insn->dst_reg);
+                       return -EACCES;
+               } else if (BPF_SRC(insn->code) == BPF_X &&
+                          is_pointer_value(env, insn->src_reg)) {
+                       verbose("R%d pointer arithmetic prohibited\n",
+                               insn->src_reg);
+                       return -EACCES;
+               }
 
                /* check dest operand */
                err = check_reg_arg(regs, insn->dst_reg, DST_OP);
@@ -1062,6 +1184,12 @@ static int check_cond_jmp_op(struct verifier_env *env,
                err = check_reg_arg(regs, insn->src_reg, SRC_OP);
                if (err)
                        return err;
+
+               if (is_pointer_value(env, insn->src_reg)) {
+                       verbose("R%d pointer comparison prohibited\n",
+                               insn->src_reg);
+                       return -EACCES;
+               }
        } else {
                if (insn->src_reg != BPF_REG_0) {
                        verbose("BPF_JMP uses reserved fields\n");
@@ -1116,6 +1244,9 @@ static int check_cond_jmp_op(struct verifier_env *env,
                        regs[insn->dst_reg].type = CONST_IMM;
                        regs[insn->dst_reg].imm = 0;
                }
+       } else if (is_pointer_value(env, insn->dst_reg)) {
+               verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
+               return -EACCES;
        } else if (BPF_SRC(insn->code) == BPF_K &&
                   (opcode == BPF_JEQ || opcode == BPF_JNE)) {
 
@@ -1619,7 +1750,7 @@ static int do_check(struct verifier_env *env)
                }
 
                if (class == BPF_ALU || class == BPF_ALU64) {
-                       err = check_alu_op(regs, insn);
+                       err = check_alu_op(env, insn);
                        if (err)
                                return err;
 
@@ -1675,6 +1806,8 @@ static int do_check(struct verifier_env *env)
                        }
 
                } else if (class == BPF_STX) {
+                       enum bpf_reg_type dst_reg_type;
+
                        if (BPF_MODE(insn->code) == BPF_XADD) {
                                err = check_xadd(env, insn);
                                if (err)
@@ -1683,11 +1816,6 @@ static int do_check(struct verifier_env *env)
                                continue;
                        }
 
-                       if (BPF_MODE(insn->code) != BPF_MEM ||
-                           insn->imm != 0) {
-                               verbose("BPF_STX uses reserved fields\n");
-                               return -EINVAL;
-                       }
                        /* check src1 operand */
                        err = check_reg_arg(regs, insn->src_reg, SRC_OP);
                        if (err)
@@ -1697,6 +1825,8 @@ static int do_check(struct verifier_env *env)
                        if (err)
                                return err;
 
+                       dst_reg_type = regs[insn->dst_reg].type;
+
                        /* check that memory (dst_reg + off) is writeable */
                        err = check_mem_access(env, insn->dst_reg, insn->off,
                                               BPF_SIZE(insn->code), BPF_WRITE,
@@ -1704,6 +1834,15 @@ static int do_check(struct verifier_env *env)
                        if (err)
                                return err;
 
+                       if (insn->imm == 0) {
+                               insn->imm = dst_reg_type;
+                       } else if (dst_reg_type != insn->imm &&
+                                  (dst_reg_type == PTR_TO_CTX ||
+                                   insn->imm == PTR_TO_CTX)) {
+                               verbose("same insn cannot be used with different pointers\n");
+                               return -EINVAL;
+                       }
+
                } else if (class == BPF_ST) {
                        if (BPF_MODE(insn->code) != BPF_MEM ||
                            insn->src_reg != BPF_REG_0) {
@@ -1769,6 +1908,11 @@ static int do_check(struct verifier_env *env)
                                if (err)
                                        return err;
 
+                               if (is_pointer_value(env, BPF_REG_0)) {
+                                       verbose("R0 leaks addr as return value\n");
+                                       return -EACCES;
+                               }
+
 process_bpf_exit:
                                insn_idx = pop_stack(env, &prev_insn_idx);
                                if (insn_idx < 0) {
@@ -1822,12 +1966,18 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
 
        for (i = 0; i < insn_cnt; i++, insn++) {
                if (BPF_CLASS(insn->code) == BPF_LDX &&
-                   (BPF_MODE(insn->code) != BPF_MEM ||
-                    insn->imm != 0)) {
+                   (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
                        verbose("BPF_LDX uses reserved fields\n");
                        return -EINVAL;
                }
 
+               if (BPF_CLASS(insn->code) == BPF_STX &&
+                   ((BPF_MODE(insn->code) != BPF_MEM &&
+                     BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
+                       verbose("BPF_STX uses reserved fields\n");
+                       return -EINVAL;
+               }
+
                if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
                        struct bpf_map *map;
                        struct fd f;
@@ -1849,8 +1999,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
                        }
 
                        f = fdget(insn->imm);
-
-                       map = bpf_map_get(f);
+                       map = __bpf_map_get(f);
                        if (IS_ERR(map)) {
                                verbose("fd %d is not pointing to valid bpf_map\n",
                                        insn->imm);
@@ -1882,8 +2031,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
                         * will be used by the valid program until it's unloaded
                         * and all maps are released in free_bpf_prog_info()
                         */
-                       atomic_inc(&map->refcnt);
-
+                       bpf_map_inc(map, false);
                        fdput(f);
 next_insn:
                        insn++;
@@ -1934,7 +2082,7 @@ static void adjust_branches(struct bpf_prog *prog, int pos, int delta)
                /* adjust offset of jmps if necessary */
                if (i < pos && i + insn->off + 1 > pos)
                        insn->off += delta;
-               else if (i > pos && i + insn->off + 1 < pos)
+               else if (i > pos + delta && i + insn->off + 1 <= pos + delta)
                        insn->off -= delta;
        }
 }
@@ -1950,12 +2098,17 @@ static int convert_ctx_accesses(struct verifier_env *env)
        struct bpf_prog *new_prog;
        u32 cnt;
        int i;
+       enum bpf_access_type type;
 
        if (!env->prog->aux->ops->convert_ctx_access)
                return 0;
 
        for (i = 0; i < insn_cnt; i++, insn++) {
-               if (insn->code != (BPF_LDX | BPF_MEM | BPF_W))
+               if (insn->code == (BPF_LDX | BPF_MEM | BPF_W))
+                       type = BPF_READ;
+               else if (insn->code == (BPF_STX | BPF_MEM | BPF_W))
+                       type = BPF_WRITE;
+               else
                        continue;
 
                if (insn->imm != PTR_TO_CTX) {
@@ -1965,8 +2118,8 @@ static int convert_ctx_accesses(struct verifier_env *env)
                }
 
                cnt = env->prog->aux->ops->
-                       convert_ctx_access(insn->dst_reg, insn->src_reg,
-                                          insn->off, insn_buf);
+                       convert_ctx_access(type, insn->dst_reg, insn->src_reg,
+                                          insn->off, insn_buf, env->prog);
                if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
                        verbose("bpf verifier is misconfigured\n");
                        return -EINVAL;
@@ -2086,6 +2239,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
        if (ret < 0)
                goto skip_full_check;
 
+       env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
+
        ret = do_check(env);
 
 skip_full_check: