These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / net / ipv6 / netfilter / ip6_tables.c
index 62f5b0d..99425cf 100644 (file)
@@ -117,7 +117,7 @@ ip6_packet_match(const struct sk_buff *skb,
        if (FWINV(ret != 0, IP6T_INV_VIA_IN)) {
                dprintf("VIA in mismatch (%s vs %s).%s\n",
                        indev, ip6info->iniface,
-                       ip6info->invflags&IP6T_INV_VIA_IN ?" (INV)":"");
+                       ip6info->invflags & IP6T_INV_VIA_IN ? " (INV)" : "");
                return false;
        }
 
@@ -126,14 +126,14 @@ ip6_packet_match(const struct sk_buff *skb,
        if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) {
                dprintf("VIA out mismatch (%s vs %s).%s\n",
                        outdev, ip6info->outiface,
-                       ip6info->invflags&IP6T_INV_VIA_OUT ?" (INV)":"");
+                       ip6info->invflags & IP6T_INV_VIA_OUT ? " (INV)" : "");
                return false;
        }
 
 /* ... might want to do something with class and flowlabel here ... */
 
        /* look for the desired protocol header */
-       if((ip6info->flags & IP6T_F_PROTO)) {
+       if (ip6info->flags & IP6T_F_PROTO) {
                int protohdr;
                unsigned short _frag_off;
 
@@ -151,9 +151,9 @@ ip6_packet_match(const struct sk_buff *skb,
                                ip6info->proto);
 
                if (ip6info->proto == protohdr) {
-                       if(ip6info->invflags & IP6T_INV_PROTO) {
+                       if (ip6info->invflags & IP6T_INV_PROTO)
                                return false;
-                       }
+
                        return true;
                }
 
@@ -275,7 +275,8 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
        return 0;
 }
 
-static void trace_packet(const struct sk_buff *skb,
+static void trace_packet(struct net *net,
+                        const struct sk_buff *skb,
                         unsigned int hook,
                         const struct net_device *in,
                         const struct net_device *out,
@@ -283,15 +284,12 @@ static void trace_packet(const struct sk_buff *skb,
                         const struct xt_table_info *private,
                         const struct ip6t_entry *e)
 {
-       const void *table_base;
        const struct ip6t_entry *root;
        const char *hookname, *chainname, *comment;
        const struct ip6t_entry *iter;
        unsigned int rulenum = 0;
-       struct net *net = dev_net(in ? in : out);
 
-       table_base = private->entries[smp_processor_id()];
-       root = get_entry(table_base, private->hook_entry[hook]);
+       root = get_entry(private->entries, private->hook_entry[hook]);
 
        hookname = chainname = hooknames[hook];
        comment = comments[NF_IP6_TRACE_COMMENT_RULE];
@@ -307,7 +305,7 @@ static void trace_packet(const struct sk_buff *skb,
 }
 #endif
 
-static inline __pure struct ip6t_entry *
+static inline struct ip6t_entry *
 ip6t_next_entry(const struct ip6t_entry *entry)
 {
        return (void *)entry + entry->next_offset;
@@ -316,22 +314,23 @@ ip6t_next_entry(const struct ip6t_entry *entry)
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
 ip6t_do_table(struct sk_buff *skb,
-             unsigned int hook,
              const struct nf_hook_state *state,
              struct xt_table *table)
 {
+       unsigned int hook = state->hook;
        static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
        /* Initializing verdict to NF_DROP keeps gcc happy. */
        unsigned int verdict = NF_DROP;
        const char *indev, *outdev;
        const void *table_base;
        struct ip6t_entry *e, **jumpstack;
-       unsigned int *stackptr, origptr, cpu;
+       unsigned int stackidx, cpu;
        const struct xt_table_info *private;
        struct xt_action_param acpar;
        unsigned int addend;
 
        /* Initialization */
+       stackidx = 0;
        indev = state->in ? state->in->name : nulldevname;
        outdev = state->out ? state->out->name : nulldevname;
        /* We handle fragments by dealing with the first fragment as
@@ -341,6 +340,7 @@ ip6t_do_table(struct sk_buff *skb,
         * rule is also a fragment-specific rule, non-fragments won't
         * match it. */
        acpar.hotdrop = false;
+       acpar.net     = state->net;
        acpar.in      = state->in;
        acpar.out     = state->out;
        acpar.family  = NFPROTO_IPV6;
@@ -357,16 +357,25 @@ ip6t_do_table(struct sk_buff *skb,
         */
        smp_read_barrier_depends();
        cpu        = smp_processor_id();
-       table_base = private->entries[cpu];
+       table_base = private->entries;
        jumpstack  = (struct ip6t_entry **)private->jumpstack[cpu];
-       stackptr   = per_cpu_ptr(private->stackptr, cpu);
-       origptr    = *stackptr;
+
+       /* Switch to alternate jumpstack if we're being invoked via TEE.
+        * TEE issues XT_CONTINUE verdict on original skb so we must not
+        * clobber the jumpstack.
+        *
+        * For recursion via REJECT or SYNPROXY the stack will be clobbered
+        * but it is no problem since absolute verdict is issued by these.
+        */
+       if (static_key_false(&xt_tee_enabled))
+               jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
 
        e = get_entry(table_base, private->hook_entry[hook]);
 
        do {
                const struct xt_entry_target *t;
                const struct xt_entry_match *ematch;
+               struct xt_counters *counter;
 
                IP_NF_ASSERT(e);
                acpar.thoff = 0;
@@ -384,7 +393,8 @@ ip6t_do_table(struct sk_buff *skb,
                                goto no_match;
                }
 
-               ADD_COUNTER(e->counters, skb->len, 1);
+               counter = xt_get_this_cpu_counter(&e->counters);
+               ADD_COUNTER(*counter, skb->len, 1);
 
                t = ip6t_get_target_c(e);
                IP_NF_ASSERT(t->u.kernel.target);
@@ -392,8 +402,8 @@ ip6t_do_table(struct sk_buff *skb,
 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
                /* The packet is traced: log it */
                if (unlikely(skb->nf_trace))
-                       trace_packet(skb, hook, state->in, state->out,
-                                    table->name, private, e);
+                       trace_packet(state->net, skb, hook, state->in,
+                                    state->out, table->name, private, e);
 #endif
                /* Standard target? */
                if (!t->u.kernel.target->target) {
@@ -406,20 +416,16 @@ ip6t_do_table(struct sk_buff *skb,
                                        verdict = (unsigned int)(-v) - 1;
                                        break;
                                }
-                               if (*stackptr <= origptr)
+                               if (stackidx == 0)
                                        e = get_entry(table_base,
                                            private->underflow[hook]);
                                else
-                                       e = ip6t_next_entry(jumpstack[--*stackptr]);
+                                       e = ip6t_next_entry(jumpstack[--stackidx]);
                                continue;
                        }
                        if (table_base + v != ip6t_next_entry(e) &&
                            !(e->ipv6.flags & IP6T_F_GOTO)) {
-                               if (*stackptr >= private->stacksize) {
-                                       verdict = NF_DROP;
-                                       break;
-                               }
-                               jumpstack[(*stackptr)++] = e;
+                               jumpstack[stackidx++] = e;
                        }
 
                        e = get_entry(table_base, v);
@@ -437,10 +443,8 @@ ip6t_do_table(struct sk_buff *skb,
                        break;
        } while (!acpar.hotdrop);
 
-       *stackptr = origptr;
-
-       xt_write_recseq_end(addend);
-       local_bh_enable();
+       xt_write_recseq_end(addend);
+       local_bh_enable();
 
 #ifdef DEBUG_ALLOW_ALL
        return NF_ACCEPT;
@@ -557,7 +561,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
                                pos = newpos;
                        }
                }
-               next:
+next:
                duprintf("Finished chain %u\n", hook);
        }
        return 1;
@@ -679,6 +683,10 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
        if (ret)
                return ret;
 
+       e->counters.pcnt = xt_percpu_counter_alloc();
+       if (IS_ERR_VALUE(e->counters.pcnt))
+               return -ENOMEM;
+
        j = 0;
        mtpar.net       = net;
        mtpar.table     = name;
@@ -714,6 +722,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
                        break;
                cleanup_match(ematch, net);
        }
+
+       xt_percpu_counter_free(e->counters.pcnt);
+
        return ret;
 }
 
@@ -797,13 +808,15 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net)
        if (par.target->destroy != NULL)
                par.target->destroy(&par);
        module_put(par.target->me);
+
+       xt_percpu_counter_free(e->counters.pcnt);
 }
 
 /* Checks and translates the user-supplied table segment (held in
    newinfo) */
 static int
 translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
-                const struct ip6t_replace *repl)
+               const struct ip6t_replace *repl)
 {
        struct ip6t_entry *iter;
        unsigned int i;
@@ -879,12 +892,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
                return ret;
        }
 
-       /* And one copy for every other CPU */
-       for_each_possible_cpu(i) {
-               if (newinfo->entries[i] && newinfo->entries[i] != entry0)
-                       memcpy(newinfo->entries[i], entry0, newinfo->size);
-       }
-
        return ret;
 }
 
@@ -900,14 +907,16 @@ get_counters(const struct xt_table_info *t,
                seqcount_t *s = &per_cpu(xt_recseq, cpu);
 
                i = 0;
-               xt_entry_foreach(iter, t->entries[cpu], t->size) {
+               xt_entry_foreach(iter, t->entries, t->size) {
+                       struct xt_counters *tmp;
                        u64 bcnt, pcnt;
                        unsigned int start;
 
+                       tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
                        do {
                                start = read_seqcount_begin(s);
-                               bcnt = iter->counters.bcnt;
-                               pcnt = iter->counters.pcnt;
+                               bcnt = tmp->bcnt;
+                               pcnt = tmp->pcnt;
                        } while (read_seqcount_retry(s, start));
 
                        ADD_COUNTER(counters[i], bcnt, pcnt);
@@ -952,11 +961,7 @@ copy_entries_to_user(unsigned int total_size,
        if (IS_ERR(counters))
                return PTR_ERR(counters);
 
-       /* choose the copy that is on our node/cpu, ...
-        * This choice is lazy (because current thread is
-        * allowed to migrate to another cpu)
-        */
-       loc_cpu_entry = private->entries[raw_smp_processor_id()];
+       loc_cpu_entry = private->entries;
        if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
                ret = -EFAULT;
                goto free_counters;
@@ -1064,16 +1069,16 @@ static int compat_table_info(const struct xt_table_info *info,
                             struct xt_table_info *newinfo)
 {
        struct ip6t_entry *iter;
-       void *loc_cpu_entry;
+       const void *loc_cpu_entry;
        int ret;
 
        if (!newinfo || !info)
                return -EINVAL;
 
-       /* we dont care about newinfo->entries[] */
+       /* we dont care about newinfo->entries */
        memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
        newinfo->initial_entries = 0;
-       loc_cpu_entry = info->entries[raw_smp_processor_id()];
+       loc_cpu_entry = info->entries;
        xt_compat_init_offsets(AF_INET6, info->number);
        xt_entry_foreach(iter, loc_cpu_entry, info->size) {
                ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
@@ -1085,7 +1090,7 @@ static int compat_table_info(const struct xt_table_info *info,
 #endif
 
 static int get_info(struct net *net, void __user *user,
-                    const int *len, int compat)
+                   const int *len, int compat)
 {
        char name[XT_TABLE_MAXNAMELEN];
        struct xt_table *t;
@@ -1147,7 +1152,7 @@ static int get_info(struct net *net, void __user *user,
 
 static int
 get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
-            const int *len)
+           const int *len)
 {
        int ret;
        struct ip6t_get_entries get;
@@ -1194,7 +1199,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
        struct xt_table *t;
        struct xt_table_info *oldinfo;
        struct xt_counters *counters;
-       const void *loc_cpu_old_entry;
        struct ip6t_entry *iter;
 
        ret = 0;
@@ -1237,8 +1241,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
        get_counters(oldinfo, counters);
 
        /* Decrease module usage counts and free resource */
-       loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
-       xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+       xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
                cleanup_entry(iter, net);
 
        xt_free_table_info(oldinfo);
@@ -1284,8 +1287,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
        if (!newinfo)
                return -ENOMEM;
 
-       /* choose the copy that is on our node/cpu */
-       loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+       loc_cpu_entry = newinfo->entries;
        if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
                           tmp.size) != 0) {
                ret = -EFAULT;
@@ -1316,7 +1318,7 @@ static int
 do_add_counters(struct net *net, const void __user *user, unsigned int len,
                int compat)
 {
-       unsigned int i, curcpu;
+       unsigned int i;
        struct xt_counters_info tmp;
        struct xt_counters *paddc;
        unsigned int num_counters;
@@ -1326,7 +1328,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
        struct xt_table *t;
        const struct xt_table_info *private;
        int ret = 0;
-       const void *loc_cpu_entry;
        struct ip6t_entry *iter;
        unsigned int addend;
 #ifdef CONFIG_COMPAT
@@ -1374,7 +1375,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
                goto free;
        }
 
-
        local_bh_disable();
        private = t->private;
        if (private->number != num_counters) {
@@ -1383,16 +1383,15 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
        }
 
        i = 0;
-       /* Choose the copy that is on our node */
-       curcpu = smp_processor_id();
        addend = xt_write_recseq_begin();
-       loc_cpu_entry = private->entries[curcpu];
-       xt_entry_foreach(iter, loc_cpu_entry, private->size) {
-               ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+       xt_entry_foreach(iter, private->entries, private->size) {
+               struct xt_counters *tmp;
+
+               tmp = xt_get_this_cpu_counter(&iter->counters);
+               ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
                ++i;
        }
        xt_write_recseq_end(addend);
-
  unlock_up_free:
        local_bh_enable();
        xt_table_unlock(t);
@@ -1459,7 +1458,6 @@ static int
 compat_find_calc_match(struct xt_entry_match *m,
                       const char *name,
                       const struct ip6t_ip6 *ipv6,
-                      unsigned int hookmask,
                       int *size)
 {
        struct xt_match *match;
@@ -1528,8 +1526,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
        entry_offset = (void *)e - (void *)base;
        j = 0;
        xt_ematch_foreach(ematch, e) {
-               ret = compat_find_calc_match(ematch, name,
-                                            &e->ipv6, e->comefrom, &off);
+               ret = compat_find_calc_match(ematch, name, &e->ipv6, &off);
                if (ret != 0)
                        goto release_matches;
                ++j;
@@ -1623,6 +1620,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
        struct xt_mtchk_param mtpar;
        struct xt_entry_match *ematch;
 
+       e->counters.pcnt = xt_percpu_counter_alloc();
+       if (IS_ERR_VALUE(e->counters.pcnt))
+               return -ENOMEM;
        j = 0;
        mtpar.net       = net;
        mtpar.table     = name;
@@ -1647,6 +1647,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
                        break;
                cleanup_match(ematch, net);
        }
+
+       xt_percpu_counter_free(e->counters.pcnt);
+
        return ret;
 }
 
@@ -1731,7 +1734,7 @@ translate_compat_table(struct net *net,
                newinfo->hook_entry[i] = info->hook_entry[i];
                newinfo->underflow[i] = info->underflow[i];
        }
-       entry1 = newinfo->entries[raw_smp_processor_id()];
+       entry1 = newinfo->entries;
        pos = entry1;
        size = total_size;
        xt_entry_foreach(iter0, entry0, total_size) {
@@ -1783,11 +1786,6 @@ translate_compat_table(struct net *net,
                return ret;
        }
 
-       /* And one copy for every other CPU */
-       for_each_possible_cpu(i)
-               if (newinfo->entries[i] && newinfo->entries[i] != entry1)
-                       memcpy(newinfo->entries[i], entry1, newinfo->size);
-
        *pinfo = newinfo;
        *pentry0 = entry1;
        xt_free_table_info(info);
@@ -1834,8 +1832,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
        if (!newinfo)
                return -ENOMEM;
 
-       /* choose the copy that is on our node/cpu */
-       loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+       loc_cpu_entry = newinfo->entries;
        if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
                           tmp.size) != 0) {
                ret = -EFAULT;
@@ -1906,7 +1903,6 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
        void __user *pos;
        unsigned int size;
        int ret = 0;
-       const void *loc_cpu_entry;
        unsigned int i = 0;
        struct ip6t_entry *iter;
 
@@ -1914,14 +1910,9 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
        if (IS_ERR(counters))
                return PTR_ERR(counters);
 
-       /* choose the copy that is on our node/cpu, ...
-        * This choice is lazy (because current thread is
-        * allowed to migrate to another cpu)
-        */
-       loc_cpu_entry = private->entries[raw_smp_processor_id()];
        pos = userptr;
        size = total_size;
-       xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+       xt_entry_foreach(iter, private->entries, total_size) {
                ret = compat_copy_entry_to_user(iter, &pos,
                                                &size, counters, i++);
                if (ret != 0)
@@ -2096,8 +2087,7 @@ struct xt_table *ip6t_register_table(struct net *net,
                goto out;
        }
 
-       /* choose the copy on our node/cpu, but dont care about preemption */
-       loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+       loc_cpu_entry = newinfo->entries;
        memcpy(loc_cpu_entry, repl->entries, repl->size);
 
        ret = translate_table(net, newinfo, loc_cpu_entry, repl);
@@ -2127,7 +2117,7 @@ void ip6t_unregister_table(struct net *net, struct xt_table *table)
        private = xt_unregister_table(table);
 
        /* Decrease module usage counts and free resources */
-       loc_cpu_entry = private->entries[raw_smp_processor_id()];
+       loc_cpu_entry = private->entries;
        xt_entry_foreach(iter, loc_cpu_entry, private->size)
                cleanup_entry(iter, net);
        if (private->number > private->initial_entries)