These changes are the raw update to linux-4.4.6-rt14. Kernel sources

[kvmfornfv.git] / kernel / net / core / filter.c
diff --git a/kernel/net/core/filter.c b/kernel/net/core/filter.c

index bf831a8..37157c4 100644 (file)
--- a/kernel/net/core/filter.c
+++ b/kernel/net/core/filter.c
@@ -36,6 +36,7 @@
  #include <net/netlink.h>
  #include <linux/skbuff.h>
  #include <net/sock.h>
+#include <net/flow_dissector.h>
  #include <linux/errno.h>
  #include <linux/timer.h>
  #include <asm/uaccess.h>
@@ -45,16 +46,20 @@
  #include <linux/seccomp.h>
  #include <linux/if_vlan.h>
  #include <linux/bpf.h>
+#include <net/sch_generic.h>
+#include <net/cls_cgroup.h>
+#include <net/dst_metadata.h>
+#include <net/dst.h>
  
  /**
   *     sk_filter - run a packet through a socket filter
   *     @sk: sock associated with &sk_buff
   *     @skb: buffer to filter
   *
- * Run the filter code and then cut skb->data to correct size returned by
- * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller
+ * Run the eBPF program and then cut skb->data to correct size returned by
+ * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
   * than pkt_len we keep whole skb->data. This is the socket level
- * wrapper to SK_RUN_FILTER. It returns 0 if the packet should
+ * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
   * be accepted or -EPERM if the packet should be tossed.
   *
   */
@@ -78,7 +83,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
         rcu_read_lock();
         filter = rcu_dereference(sk->sk_filter);
         if (filter) {
-               unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
+               unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
  
                 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
         }
@@ -144,12 +149,6 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
         return raw_smp_processor_id();
  }
  
-/* note that this only generates 32-bit random numbers */
-static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
-{
-       return prandom_u32();
-}
-
  static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
                               struct bpf_insn *insn_buf)
  {
@@ -308,7 +307,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
                         *insn = BPF_EMIT_CALL(__get_raw_cpu_id);
                         break;
                 case SKF_AD_OFF + SKF_AD_RANDOM:
-                       *insn = BPF_EMIT_CALL(__get_random_u32);
+                       *insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
+                       bpf_user_rnd_init_once();
                         break;
                 }
                 break;
@@ -355,8 +355,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
   * for socket filters: ctx == 'struct sk_buff *', for seccomp:
   * ctx == 'struct seccomp_data *'.
   */
-int bpf_convert_filter(struct sock_filter *prog, int len,
-                      struct bpf_insn *new_prog, int *new_len)
+static int bpf_convert_filter(struct sock_filter *prog, int len,
+                             struct bpf_insn *new_prog, int *new_len)
  {
         int new_flen = 0, pass = 0, target, i;
         struct bpf_insn *new_insn;
@@ -371,7 +371,8 @@ int bpf_convert_filter(struct sock_filter *prog, int len,
                 return -EINVAL;
  
         if (new_prog) {
-               addrs = kcalloc(len, sizeof(*addrs), GFP_KERNEL);
+               addrs = kcalloc(len, sizeof(*addrs),
+                               GFP_KERNEL | __GFP_NOWARN);
                 if (!addrs)
                         return -ENOMEM;
         }
@@ -473,9 +474,9 @@ do_pass:
                                 bpf_src = BPF_X;
                         } else {
                                 insn->dst_reg = BPF_REG_A;
-                               insn->src_reg = BPF_REG_X;
                                 insn->imm = fp->k;
                                 bpf_src = BPF_SRC(fp->code);
+                               insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
                         }
  
                         /* Common case where 'jump_false' is next insn. */
@@ -751,7 +752,8 @@ static bool chk_code_allowed(u16 code_to_probe)
   *
   * Returns 0 if the rule set is legal or -EINVAL if not.
   */
-int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
+static int bpf_check_classic(const struct sock_filter *filter,
+                            unsigned int flen)
  {
         bool anc_found;
         int pc;
@@ -775,6 +777,11 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
                         if (ftest->k == 0)
                                 return -EINVAL;
                         break;
+               case BPF_ALU | BPF_LSH | BPF_K:
+               case BPF_ALU | BPF_RSH | BPF_K:
+                       if (ftest->k >= 32)
+                               return -EINVAL;
+                       break;
                 case BPF_LD | BPF_MEM:
                 case BPF_LDX | BPF_MEM:
                 case BPF_ST:
@@ -825,7 +832,6 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
  
         return -EINVAL;
  }
-EXPORT_SYMBOL(bpf_check_classic);
  
  static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
                                       const struct sock_fprog *fprog)
@@ -839,7 +845,9 @@ static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
  
         fkprog = fp->orig_prog;
         fkprog->len = fprog->len;
-       fkprog->filter = kmemdup(fp->insns, fsize, GFP_KERNEL);
+
+       fkprog->filter = kmemdup(fp->insns, fsize,
+                                GFP_KERNEL | __GFP_NOWARN);
         if (!fkprog->filter) {
                 kfree(fp->orig_prog);
                 return -ENOMEM;
@@ -941,7 +949,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
          * pass. At this time, the user BPF is stored in fp->insns.
          */
         old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
-                          GFP_KERNEL);
+                          GFP_KERNEL | __GFP_NOWARN);
         if (!old_prog) {
                 err = -ENOMEM;
                 goto out_err;
@@ -988,12 +996,13 @@ out_err:
         return ERR_PTR(err);
  }
  
-static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp)
+static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
+                                          bpf_aux_classic_check_t trans)
  {
         int err;
  
         fp->bpf_func = NULL;
-       fp->jited = false;
+       fp->jited = 0;
  
         err = bpf_check_classic(fp->insns, fp->len);
         if (err) {
@@ -1001,6 +1010,17 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp)
                 return ERR_PTR(err);
         }
  
+       /* There might be additional checks and transformations
+        * needed on classic filters, f.e. in case of seccomp.
+        */
+       if (trans) {
+               err = trans(fp->insns, fp->len);
+               if (err) {
+                       __bpf_prog_release(fp);
+                       return ERR_PTR(err);
+               }
+       }
+
         /* Probe if we can JIT compile the filter and if so, do
          * the compilation of the filter.
          */
@@ -1050,7 +1070,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
         /* bpf_prepare_filter() already takes care of freeing
          * memory in case something goes wrong.
          */
-       fp = bpf_prepare_filter(fp);
+       fp = bpf_prepare_filter(fp, NULL);
         if (IS_ERR(fp))
                 return PTR_ERR(fp);
  
@@ -1059,6 +1079,60 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
  }
  EXPORT_SYMBOL_GPL(bpf_prog_create);
  
+/**
+ *     bpf_prog_create_from_user - create an unattached filter from user buffer
+ *     @pfp: the unattached filter that is created
+ *     @fprog: the filter program
+ *     @trans: post-classic verifier transformation handler
+ *     @save_orig: save classic BPF program
+ *
+ * This function effectively does the same as bpf_prog_create(), only
+ * that it builds up its insns buffer from user space provided buffer.
+ * It also allows for passing a bpf_aux_classic_check_t handler.
+ */
+int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
+                             bpf_aux_classic_check_t trans, bool save_orig)
+{
+       unsigned int fsize = bpf_classic_proglen(fprog);
+       struct bpf_prog *fp;
+       int err;
+
+       /* Make sure new filter is there and in the right amounts. */
+       if (fprog->filter == NULL)
+               return -EINVAL;
+
+       fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
+       if (!fp)
+               return -ENOMEM;
+
+       if (copy_from_user(fp->insns, fprog->filter, fsize)) {
+               __bpf_prog_free(fp);
+               return -EFAULT;
+       }
+
+       fp->len = fprog->len;
+       fp->orig_prog = NULL;
+
+       if (save_orig) {
+               err = bpf_prog_store_orig_filter(fp, fprog);
+               if (err) {
+                       __bpf_prog_free(fp);
+                       return -ENOMEM;
+               }
+       }
+
+       /* bpf_prepare_filter() already takes care of freeing
+        * memory in case something goes wrong.
+        */
+       fp = bpf_prepare_filter(fp, trans);
+       if (IS_ERR(fp))
+               return PTR_ERR(fp);
+
+       *pfp = fp;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(bpf_prog_create_from_user);
+
  void bpf_prog_destroy(struct bpf_prog *fp)
  {
         __bpf_prog_release(fp);
@@ -1135,7 +1209,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
         /* bpf_prepare_filter() already takes care of freeing
          * memory in case something goes wrong.
          */
-       prog = bpf_prepare_filter(prog);
+       prog = bpf_prepare_filter(prog, NULL);
         if (IS_ERR(prog))
                 return PTR_ERR(prog);
  
@@ -1175,21 +1249,6 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
         return 0;
  }
  
-/**
- *     bpf_skb_clone_not_writable - is the header of a clone not writable
- *     @skb: buffer to check
- *     @len: length up to which to write, can be negative
- *
- *     Returns true if modifying the header part of the cloned buffer
- *     does require the data to be copied. I.e. this version works with
- *     negative lengths needed for eBPF case!
- */
-static bool bpf_skb_clone_unwritable(const struct sk_buff *skb, int len)
-{
-       return skb_header_cloned(skb) ||
-              (int) skb_headroom(skb) + len > skb->hdr_len;
-}
-
  #define BPF_RECOMPUTE_CSUM(flags)      ((flags) & 1)
  
  static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
@@ -1212,9 +1271,8 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
         if (unlikely((u32) offset > 0xffff || len > sizeof(buf)))
                 return -EFAULT;
  
-       offset -= skb->data - skb_mac_header(skb);
         if (unlikely(skb_cloned(skb) &&
-                    bpf_skb_clone_unwritable(skb, offset + len)))
+                    !skb_clone_writable(skb, offset + len)))
                 return -EFAULT;
  
         ptr = skb_header_pointer(skb, offset, len, buf);
@@ -1258,9 +1316,8 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
         if (unlikely((u32) offset > 0xffff))
                 return -EFAULT;
  
-       offset -= skb->data - skb_mac_header(skb);
         if (unlikely(skb_cloned(skb) &&
-                    bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
+                    !skb_clone_writable(skb, offset + sizeof(sum))))
                 return -EFAULT;
  
         ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1299,16 +1356,15 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = {
  static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
  {
         struct sk_buff *skb = (struct sk_buff *) (long) r1;
-       u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags);
+       bool is_pseudo = !!BPF_IS_PSEUDO_HEADER(flags);
         int offset = (int) r2;
         __sum16 sum, *ptr;
  
         if (unlikely((u32) offset > 0xffff))
                 return -EFAULT;
  
-       offset -= skb->data - skb_mac_header(skb);
         if (unlikely(skb_cloned(skb) &&
-                    bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
+                    !skb_clone_writable(skb, offset + sizeof(sum))))
                 return -EFAULT;
  
         ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1344,6 +1400,233 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = {
         .arg5_type      = ARG_ANYTHING,
  };
  
+#define BPF_IS_REDIRECT_INGRESS(flags) ((flags) & 1)
+
+static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
+{
+       struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2;
+       struct net_device *dev;
+
+       dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
+       if (unlikely(!dev))
+               return -EINVAL;
+
+       skb2 = skb_clone(skb, GFP_ATOMIC);
+       if (unlikely(!skb2))
+               return -ENOMEM;
+
+       if (BPF_IS_REDIRECT_INGRESS(flags))
+               return dev_forward_skb(dev, skb2);
+
+       skb2->dev = dev;
+       skb_sender_cpu_clear(skb2);
+       return dev_queue_xmit(skb2);
+}
+
+const struct bpf_func_proto bpf_clone_redirect_proto = {
+       .func           = bpf_clone_redirect,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+};
+
+struct redirect_info {
+       u32 ifindex;
+       u32 flags;
+};
+
+static DEFINE_PER_CPU(struct redirect_info, redirect_info);
+static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5)
+{
+       struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+       ri->ifindex = ifindex;
+       ri->flags = flags;
+       return TC_ACT_REDIRECT;
+}
+
+int skb_do_redirect(struct sk_buff *skb)
+{
+       struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+       struct net_device *dev;
+
+       dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
+       ri->ifindex = 0;
+       if (unlikely(!dev)) {
+               kfree_skb(skb);
+               return -EINVAL;
+       }
+
+       if (BPF_IS_REDIRECT_INGRESS(ri->flags))
+               return dev_forward_skb(dev, skb);
+
+       skb->dev = dev;
+       skb_sender_cpu_clear(skb);
+       return dev_queue_xmit(skb);
+}
+
+const struct bpf_func_proto bpf_redirect_proto = {
+       .func           = bpf_redirect,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_ANYTHING,
+       .arg2_type      = ARG_ANYTHING,
+};
+
+static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+       return task_get_classid((struct sk_buff *) (unsigned long) r1);
+}
+
+static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
+       .func           = bpf_get_cgroup_classid,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+};
+
+static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+#ifdef CONFIG_IP_ROUTE_CLASSID
+       const struct dst_entry *dst;
+
+       dst = skb_dst((struct sk_buff *) (unsigned long) r1);
+       if (dst)
+               return dst->tclassid;
+#endif
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_get_route_realm_proto = {
+       .func           = bpf_get_route_realm,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+};
+
+static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
+{
+       struct sk_buff *skb = (struct sk_buff *) (long) r1;
+       __be16 vlan_proto = (__force __be16) r2;
+
+       if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
+                    vlan_proto != htons(ETH_P_8021AD)))
+               vlan_proto = htons(ETH_P_8021Q);
+
+       return skb_vlan_push(skb, vlan_proto, vlan_tci);
+}
+
+const struct bpf_func_proto bpf_skb_vlan_push_proto = {
+       .func           = bpf_skb_vlan_push,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+};
+EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto);
+
+static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+       struct sk_buff *skb = (struct sk_buff *) (long) r1;
+
+       return skb_vlan_pop(skb);
+}
+
+const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
+       .func           = bpf_skb_vlan_pop,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+};
+EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto);
+
+bool bpf_helper_changes_skb_data(void *func)
+{
+       if (func == bpf_skb_vlan_push)
+               return true;
+       if (func == bpf_skb_vlan_pop)
+               return true;
+       return false;
+}
+
+static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
+{
+       struct sk_buff *skb = (struct sk_buff *) (long) r1;
+       struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2;
+       struct ip_tunnel_info *info = skb_tunnel_info(skb);
+
+       if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags || !info))
+               return -EINVAL;
+       if (ip_tunnel_info_af(info) != AF_INET)
+               return -EINVAL;
+
+       to->tunnel_id = be64_to_cpu(info->key.tun_id);
+       to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
+
+       return 0;
+}
+
+const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
+       .func           = bpf_skb_get_tunnel_key,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_PTR_TO_STACK,
+       .arg3_type      = ARG_CONST_STACK_SIZE,
+       .arg4_type      = ARG_ANYTHING,
+};
+
+static struct metadata_dst __percpu *md_dst;
+
+static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
+{
+       struct sk_buff *skb = (struct sk_buff *) (long) r1;
+       struct bpf_tunnel_key *from = (struct bpf_tunnel_key *) (long) r2;
+       struct metadata_dst *md = this_cpu_ptr(md_dst);
+       struct ip_tunnel_info *info;
+
+       if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags))
+               return -EINVAL;
+
+       skb_dst_drop(skb);
+       dst_hold((struct dst_entry *) md);
+       skb_dst_set(skb, (struct dst_entry *) md);
+
+       info = &md->u.tun_info;
+       info->mode = IP_TUNNEL_INFO_TX;
+       info->key.tun_flags = TUNNEL_KEY;
+       info->key.tun_id = cpu_to_be64(from->tunnel_id);
+       info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
+
+       return 0;
+}
+
+const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
+       .func           = bpf_skb_set_tunnel_key,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_PTR_TO_STACK,
+       .arg3_type      = ARG_CONST_STACK_SIZE,
+       .arg4_type      = ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto *bpf_get_skb_set_tunnel_key_proto(void)
+{
+       if (!md_dst) {
+               /* race is not possible, since it's called from
+                * verifier that is holding verifier mutex
+                */
+               md_dst = metadata_dst_alloc_percpu(0, GFP_KERNEL);
+               if (!md_dst)
+                       return NULL;
+       }
+       return &bpf_skb_set_tunnel_key_proto;
+}
+
  static const struct bpf_func_proto *
  sk_filter_func_proto(enum bpf_func_id func_id)
  {
@@ -1358,6 +1641,13 @@ sk_filter_func_proto(enum bpf_func_id func_id)
                 return &bpf_get_prandom_u32_proto;
         case BPF_FUNC_get_smp_processor_id:
                 return &bpf_get_smp_processor_id_proto;
+       case BPF_FUNC_tail_call:
+               return &bpf_tail_call_proto;
+       case BPF_FUNC_ktime_get_ns:
+               return &bpf_ktime_get_ns_proto;
+       case BPF_FUNC_trace_printk:
+               if (capable(CAP_SYS_ADMIN))
+                       return bpf_get_trace_printk_proto();
         default:
                 return NULL;
         }
@@ -1373,18 +1663,29 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
                 return &bpf_l3_csum_replace_proto;
         case BPF_FUNC_l4_csum_replace:
                 return &bpf_l4_csum_replace_proto;
+       case BPF_FUNC_clone_redirect:
+               return &bpf_clone_redirect_proto;
+       case BPF_FUNC_get_cgroup_classid:
+               return &bpf_get_cgroup_classid_proto;
+       case BPF_FUNC_skb_vlan_push:
+               return &bpf_skb_vlan_push_proto;
+       case BPF_FUNC_skb_vlan_pop:
+               return &bpf_skb_vlan_pop_proto;
+       case BPF_FUNC_skb_get_tunnel_key:
+               return &bpf_skb_get_tunnel_key_proto;
+       case BPF_FUNC_skb_set_tunnel_key:
+               return bpf_get_skb_set_tunnel_key_proto();
+       case BPF_FUNC_redirect:
+               return &bpf_redirect_proto;
+       case BPF_FUNC_get_route_realm:
+               return &bpf_get_route_realm_proto;
         default:
                 return sk_filter_func_proto(func_id);
         }
  }
  
-static bool sk_filter_is_valid_access(int off, int size,
-                                     enum bpf_access_type type)
+static bool __is_valid_access(int off, int size, enum bpf_access_type type)
  {
-       /* only read is allowed */
-       if (type != BPF_READ)
-               return false;
-
         /* check bounds */
         if (off < 0 || off >= sizeof(struct __sk_buff))
                 return false;
@@ -1400,8 +1701,50 @@ static bool sk_filter_is_valid_access(int off, int size,
         return true;
  }
  
-static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
-                                       struct bpf_insn *insn_buf)
+static bool sk_filter_is_valid_access(int off, int size,
+                                     enum bpf_access_type type)
+{
+       if (off == offsetof(struct __sk_buff, tc_classid))
+               return false;
+
+       if (type == BPF_WRITE) {
+               switch (off) {
+               case offsetof(struct __sk_buff, cb[0]) ...
+                       offsetof(struct __sk_buff, cb[4]):
+                       break;
+               default:
+                       return false;
+               }
+       }
+
+       return __is_valid_access(off, size, type);
+}
+
+static bool tc_cls_act_is_valid_access(int off, int size,
+                                      enum bpf_access_type type)
+{
+       if (off == offsetof(struct __sk_buff, tc_classid))
+               return type == BPF_WRITE ? true : false;
+
+       if (type == BPF_WRITE) {
+               switch (off) {
+               case offsetof(struct __sk_buff, mark):
+               case offsetof(struct __sk_buff, tc_index):
+               case offsetof(struct __sk_buff, priority):
+               case offsetof(struct __sk_buff, cb[0]) ...
+                       offsetof(struct __sk_buff, cb[4]):
+                       break;
+               default:
+                       return false;
+               }
+       }
+       return __is_valid_access(off, size, type);
+}
+
+static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+                                     int src_reg, int ctx_off,
+                                     struct bpf_insn *insn_buf,
+                                     struct bpf_prog *prog)
  {
         struct bpf_insn *insn = insn_buf;
  
@@ -1430,12 +1773,49 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
         case offsetof(struct __sk_buff, priority):
                 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4);
  
+               if (type == BPF_WRITE)
+                       *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+                                             offsetof(struct sk_buff, priority));
+               else
+                       *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+                                             offsetof(struct sk_buff, priority));
+               break;
+
+       case offsetof(struct __sk_buff, ingress_ifindex):
+               BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4);
+
                 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
-                                     offsetof(struct sk_buff, priority));
+                                     offsetof(struct sk_buff, skb_iif));
+               break;
+
+       case offsetof(struct __sk_buff, ifindex):
+               BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+
+               *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+                                     dst_reg, src_reg,
+                                     offsetof(struct sk_buff, dev));
+               *insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1);
+               *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg,
+                                     offsetof(struct net_device, ifindex));
+               break;
+
+       case offsetof(struct __sk_buff, hash):
+               BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
+
+               *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+                                     offsetof(struct sk_buff, hash));
                 break;
  
         case offsetof(struct __sk_buff, mark):
-               return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
+               BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+
+               if (type == BPF_WRITE)
+                       *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+                                             offsetof(struct sk_buff, mark));
+               else
+                       *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+                                             offsetof(struct sk_buff, mark));
+               break;
  
         case offsetof(struct __sk_buff, pkt_type):
                 return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn);
@@ -1450,6 +1830,47 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
         case offsetof(struct __sk_buff, vlan_tci):
                 return convert_skb_access(SKF_AD_VLAN_TAG,
                                           dst_reg, src_reg, insn);
+
+       case offsetof(struct __sk_buff, cb[0]) ...
+               offsetof(struct __sk_buff, cb[4]):
+               BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
+
+               prog->cb_access = 1;
+               ctx_off -= offsetof(struct __sk_buff, cb[0]);
+               ctx_off += offsetof(struct sk_buff, cb);
+               ctx_off += offsetof(struct qdisc_skb_cb, data);
+               if (type == BPF_WRITE)
+                       *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
+               else
+                       *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
+               break;
+
+       case offsetof(struct __sk_buff, tc_classid):
+               ctx_off -= offsetof(struct __sk_buff, tc_classid);
+               ctx_off += offsetof(struct sk_buff, cb);
+               ctx_off += offsetof(struct qdisc_skb_cb, tc_classid);
+               WARN_ON(type != BPF_WRITE);
+               *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
+               break;
+
+       case offsetof(struct __sk_buff, tc_index):
+#ifdef CONFIG_NET_SCHED
+               BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
+
+               if (type == BPF_WRITE)
+                       *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg,
+                                             offsetof(struct sk_buff, tc_index));
+               else
+                       *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+                                             offsetof(struct sk_buff, tc_index));
+               break;
+#else
+               if (type == BPF_WRITE)
+                       *insn++ = BPF_MOV64_REG(dst_reg, dst_reg);
+               else
+                       *insn++ = BPF_MOV64_IMM(dst_reg, 0);
+               break;
+#endif
         }
  
         return insn - insn_buf;
@@ -1458,13 +1879,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
  static const struct bpf_verifier_ops sk_filter_ops = {
         .get_func_proto = sk_filter_func_proto,
         .is_valid_access = sk_filter_is_valid_access,
-       .convert_ctx_access = sk_filter_convert_ctx_access,
+       .convert_ctx_access = bpf_net_convert_ctx_access,
  };
  
  static const struct bpf_verifier_ops tc_cls_act_ops = {
         .get_func_proto = tc_cls_act_func_proto,
-       .is_valid_access = sk_filter_is_valid_access,
-       .convert_ctx_access = sk_filter_convert_ctx_access,
+       .is_valid_access = tc_cls_act_is_valid_access,
+       .convert_ctx_access = bpf_net_convert_ctx_access,
  };
  
  static struct bpf_prog_type_list sk_filter_type __read_mostly = {
@@ -1526,9 +1947,13 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
                 goto out;
  
         /* We're copying the filter that has been originally attached,
-        * so no conversion/decode needed anymore.
+        * so no conversion/decode needed anymore. eBPF programs that
+        * have no original program cannot be dumped through this.
          */
+       ret = -EACCES;
         fprog = filter->prog->orig_prog;
+       if (!fprog)
+               goto out;
  
         ret = fprog->len;
         if (!len)