These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / net / ipv4 / netfilter / ip_tables.c
index 2d0e265..b99affa 100644 (file)
@@ -102,7 +102,7 @@ ip_packet_match(const struct iphdr *ip,
        if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
                dprintf("VIA in mismatch (%s vs %s).%s\n",
                        indev, ipinfo->iniface,
-                       ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
+                       ipinfo->invflags & IPT_INV_VIA_IN ? " (INV)" : "");
                return false;
        }
 
@@ -111,7 +111,7 @@ ip_packet_match(const struct iphdr *ip,
        if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
                dprintf("VIA out mismatch (%s vs %s).%s\n",
                        outdev, ipinfo->outiface,
-                       ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
+                       ipinfo->invflags & IPT_INV_VIA_OUT ? " (INV)" : "");
                return false;
        }
 
@@ -120,7 +120,7 @@ ip_packet_match(const struct iphdr *ip,
            FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
                dprintf("Packet protocol %hi does not match %hi.%s\n",
                        ip->protocol, ipinfo->proto,
-                       ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
+                       ipinfo->invflags & IPT_INV_PROTO ? " (INV)" : "");
                return false;
        }
 
@@ -246,7 +246,8 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
        return 0;
 }
 
-static void trace_packet(const struct sk_buff *skb,
+static void trace_packet(struct net *net,
+                        const struct sk_buff *skb,
                         unsigned int hook,
                         const struct net_device *in,
                         const struct net_device *out,
@@ -254,15 +255,12 @@ static void trace_packet(const struct sk_buff *skb,
                         const struct xt_table_info *private,
                         const struct ipt_entry *e)
 {
-       const void *table_base;
        const struct ipt_entry *root;
        const char *hookname, *chainname, *comment;
        const struct ipt_entry *iter;
        unsigned int rulenum = 0;
-       struct net *net = dev_net(in ? in : out);
 
-       table_base = private->entries[smp_processor_id()];
-       root = get_entry(table_base, private->hook_entry[hook]);
+       root = get_entry(private->entries, private->hook_entry[hook]);
 
        hookname = chainname = hooknames[hook];
        comment = comments[NF_IP_TRACE_COMMENT_RULE];
@@ -278,7 +276,7 @@ static void trace_packet(const struct sk_buff *skb,
 }
 #endif
 
-static inline __pure
+static inline
 struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
 {
        return (void *)entry + entry->next_offset;
@@ -287,10 +285,10 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
 ipt_do_table(struct sk_buff *skb,
-            unsigned int hook,
             const struct nf_hook_state *state,
             struct xt_table *table)
 {
+       unsigned int hook = state->hook;
        static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
        const struct iphdr *ip;
        /* Initializing verdict to NF_DROP keeps gcc happy. */
@@ -298,12 +296,13 @@ ipt_do_table(struct sk_buff *skb,
        const char *indev, *outdev;
        const void *table_base;
        struct ipt_entry *e, **jumpstack;
-       unsigned int *stackptr, origptr, cpu;
+       unsigned int stackidx, cpu;
        const struct xt_table_info *private;
        struct xt_action_param acpar;
        unsigned int addend;
 
        /* Initialization */
+       stackidx = 0;
        ip = ip_hdr(skb);
        indev = state->in ? state->in->name : nulldevname;
        outdev = state->out ? state->out->name : nulldevname;
@@ -316,6 +315,7 @@ ipt_do_table(struct sk_buff *skb,
        acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
        acpar.thoff   = ip_hdrlen(skb);
        acpar.hotdrop = false;
+       acpar.net     = state->net;
        acpar.in      = state->in;
        acpar.out     = state->out;
        acpar.family  = NFPROTO_IPV4;
@@ -331,20 +331,29 @@ ipt_do_table(struct sk_buff *skb,
         * pointer.
         */
        smp_read_barrier_depends();
-       table_base = private->entries[cpu];
+       table_base = private->entries;
        jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
-       stackptr   = per_cpu_ptr(private->stackptr, cpu);
-       origptr    = *stackptr;
+
+       /* Switch to alternate jumpstack if we're being invoked via TEE.
+        * TEE issues XT_CONTINUE verdict on original skb so we must not
+        * clobber the jumpstack.
+        *
+        * For recursion via REJECT or SYNPROXY the stack will be clobbered
+        * but it is no problem since absolute verdict is issued by these.
+        */
+       if (static_key_false(&xt_tee_enabled))
+               jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
 
        e = get_entry(table_base, private->hook_entry[hook]);
 
-       pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n",
-                table->name, hook, origptr,
+       pr_debug("Entering %s(hook %u), UF %p\n",
+                table->name, hook,
                 get_entry(table_base, private->underflow[hook]));
 
        do {
                const struct xt_entry_target *t;
                const struct xt_entry_match *ematch;
+               struct xt_counters *counter;
 
                IP_NF_ASSERT(e);
                if (!ip_packet_match(ip, indev, outdev,
@@ -361,7 +370,8 @@ ipt_do_table(struct sk_buff *skb,
                                goto no_match;
                }
 
-               ADD_COUNTER(e->counters, skb->len, 1);
+               counter = xt_get_this_cpu_counter(&e->counters);
+               ADD_COUNTER(*counter, skb->len, 1);
 
                t = ipt_get_target(e);
                IP_NF_ASSERT(t->u.kernel.target);
@@ -369,8 +379,8 @@ ipt_do_table(struct sk_buff *skb,
 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
                /* The packet is traced: log it */
                if (unlikely(skb->nf_trace))
-                       trace_packet(skb, hook, state->in, state->out,
-                                    table->name, private, e);
+                       trace_packet(state->net, skb, hook, state->in,
+                                    state->out, table->name, private, e);
 #endif
                /* Standard target? */
                if (!t->u.kernel.target->target) {
@@ -383,28 +393,24 @@ ipt_do_table(struct sk_buff *skb,
                                        verdict = (unsigned int)(-v) - 1;
                                        break;
                                }
-                               if (*stackptr <= origptr) {
+                               if (stackidx == 0) {
                                        e = get_entry(table_base,
                                            private->underflow[hook]);
                                        pr_debug("Underflow (this is normal) "
                                                 "to %p\n", e);
                                } else {
-                                       e = jumpstack[--*stackptr];
+                                       e = jumpstack[--stackidx];
                                        pr_debug("Pulled %p out from pos %u\n",
-                                                e, *stackptr);
+                                                e, stackidx);
                                        e = ipt_next_entry(e);
                                }
                                continue;
                        }
                        if (table_base + v != ipt_next_entry(e) &&
                            !(e->ip.flags & IPT_F_GOTO)) {
-                               if (*stackptr >= private->stacksize) {
-                                       verdict = NF_DROP;
-                                       break;
-                               }
-                               jumpstack[(*stackptr)++] = e;
+                               jumpstack[stackidx++] = e;
                                pr_debug("Pushed %p into pos %u\n",
-                                        e, *stackptr - 1);
+                                        e, stackidx - 1);
                        }
 
                        e = get_entry(table_base, v);
@@ -423,11 +429,10 @@ ipt_do_table(struct sk_buff *skb,
                        /* Verdict */
                        break;
        } while (!acpar.hotdrop);
-       pr_debug("Exiting %s; resetting sp from %u to %u\n",
-                __func__, *stackptr, origptr);
-       *stackptr = origptr;
-       xt_write_recseq_end(addend);
-       local_bh_enable();
+       pr_debug("Exiting %s; sp at %u\n", __func__, stackidx);
+
+       xt_write_recseq_end(addend);
+       local_bh_enable();
 
 #ifdef DEBUG_ALLOW_ALL
        return NF_ACCEPT;
@@ -479,7 +484,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
                                unsigned int oldpos, size;
 
                                if ((strcmp(t->target.u.user.name,
-                                           XT_STANDARD_TARGET) == 0) &&
+                                           XT_STANDARD_TARGET) == 0) &&
                                    t->verdict < -NF_MAX_VERDICT - 1) {
                                        duprintf("mark_source_chains: bad "
                                                "negative verdict (%i)\n",
@@ -544,7 +549,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
                                pos = newpos;
                        }
                }
-               next:
+next:
                duprintf("Finished chain %u\n", hook);
        }
        return 1;
@@ -665,6 +670,10 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
        if (ret)
                return ret;
 
+       e->counters.pcnt = xt_percpu_counter_alloc();
+       if (IS_ERR_VALUE(e->counters.pcnt))
+               return -ENOMEM;
+
        j = 0;
        mtpar.net       = net;
        mtpar.table     = name;
@@ -691,6 +700,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
        ret = check_target(e, net, name);
        if (ret)
                goto err;
+
        return 0;
  err:
        module_put(t->u.kernel.target->me);
@@ -700,6 +710,9 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
                        break;
                cleanup_match(ematch, net);
        }
+
+       xt_percpu_counter_free(e->counters.pcnt);
+
        return ret;
 }
 
@@ -784,13 +797,14 @@ cleanup_entry(struct ipt_entry *e, struct net *net)
        if (par.target->destroy != NULL)
                par.target->destroy(&par);
        module_put(par.target->me);
+       xt_percpu_counter_free(e->counters.pcnt);
 }
 
 /* Checks and translates the user-supplied table segment (held in
    newinfo) */
 static int
 translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
-                const struct ipt_replace *repl)
+               const struct ipt_replace *repl)
 {
        struct ipt_entry *iter;
        unsigned int i;
@@ -866,12 +880,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
                return ret;
        }
 
-       /* And one copy for every other CPU */
-       for_each_possible_cpu(i) {
-               if (newinfo->entries[i] && newinfo->entries[i] != entry0)
-                       memcpy(newinfo->entries[i], entry0, newinfo->size);
-       }
-
        return ret;
 }
 
@@ -887,14 +895,16 @@ get_counters(const struct xt_table_info *t,
                seqcount_t *s = &per_cpu(xt_recseq, cpu);
 
                i = 0;
-               xt_entry_foreach(iter, t->entries[cpu], t->size) {
+               xt_entry_foreach(iter, t->entries, t->size) {
+                       struct xt_counters *tmp;
                        u64 bcnt, pcnt;
                        unsigned int start;
 
+                       tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
                        do {
                                start = read_seqcount_begin(s);
-                               bcnt = iter->counters.bcnt;
-                               pcnt = iter->counters.pcnt;
+                               bcnt = tmp->bcnt;
+                               pcnt = tmp->pcnt;
                        } while (read_seqcount_retry(s, start));
 
                        ADD_COUNTER(counters[i], bcnt, pcnt);
@@ -939,11 +949,7 @@ copy_entries_to_user(unsigned int total_size,
        if (IS_ERR(counters))
                return PTR_ERR(counters);
 
-       /* choose the copy that is on our node/cpu, ...
-        * This choice is lazy (because current thread is
-        * allowed to migrate to another cpu)
-        */
-       loc_cpu_entry = private->entries[raw_smp_processor_id()];
+       loc_cpu_entry = private->entries;
        if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
                ret = -EFAULT;
                goto free_counters;
@@ -1051,16 +1057,16 @@ static int compat_table_info(const struct xt_table_info *info,
                             struct xt_table_info *newinfo)
 {
        struct ipt_entry *iter;
-       void *loc_cpu_entry;
+       const void *loc_cpu_entry;
        int ret;
 
        if (!newinfo || !info)
                return -EINVAL;
 
-       /* we dont care about newinfo->entries[] */
+       /* we dont care about newinfo->entries */
        memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
        newinfo->initial_entries = 0;
-       loc_cpu_entry = info->entries[raw_smp_processor_id()];
+       loc_cpu_entry = info->entries;
        xt_compat_init_offsets(AF_INET, info->number);
        xt_entry_foreach(iter, loc_cpu_entry, info->size) {
                ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
@@ -1072,7 +1078,7 @@ static int compat_table_info(const struct xt_table_info *info,
 #endif
 
 static int get_info(struct net *net, void __user *user,
-                    const int *len, int compat)
+                   const int *len, int compat)
 {
        char name[XT_TABLE_MAXNAMELEN];
        struct xt_table *t;
@@ -1181,7 +1187,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
        struct xt_table *t;
        struct xt_table_info *oldinfo;
        struct xt_counters *counters;
-       void *loc_cpu_old_entry;
        struct ipt_entry *iter;
 
        ret = 0;
@@ -1224,8 +1229,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
        get_counters(oldinfo, counters);
 
        /* Decrease module usage counts and free resource */
-       loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
-       xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+       xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
                cleanup_entry(iter, net);
 
        xt_free_table_info(oldinfo);
@@ -1271,8 +1275,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
        if (!newinfo)
                return -ENOMEM;
 
-       /* choose the copy that is on our node/cpu */
-       loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+       loc_cpu_entry = newinfo->entries;
        if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
                           tmp.size) != 0) {
                ret = -EFAULT;
@@ -1301,9 +1304,9 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 
 static int
 do_add_counters(struct net *net, const void __user *user,
-                unsigned int len, int compat)
+               unsigned int len, int compat)
 {
-       unsigned int i, curcpu;
+       unsigned int i;
        struct xt_counters_info tmp;
        struct xt_counters *paddc;
        unsigned int num_counters;
@@ -1313,7 +1316,6 @@ do_add_counters(struct net *net, const void __user *user,
        struct xt_table *t;
        const struct xt_table_info *private;
        int ret = 0;
-       void *loc_cpu_entry;
        struct ipt_entry *iter;
        unsigned int addend;
 #ifdef CONFIG_COMPAT
@@ -1369,12 +1371,12 @@ do_add_counters(struct net *net, const void __user *user,
        }
 
        i = 0;
-       /* Choose the copy that is on our node */
-       curcpu = smp_processor_id();
-       loc_cpu_entry = private->entries[curcpu];
        addend = xt_write_recseq_begin();
-       xt_entry_foreach(iter, loc_cpu_entry, private->size) {
-               ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+       xt_entry_foreach(iter, private->entries, private->size) {
+               struct xt_counters *tmp;
+
+               tmp = xt_get_this_cpu_counter(&iter->counters);
+               ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
                ++i;
        }
        xt_write_recseq_end(addend);
@@ -1444,7 +1446,6 @@ static int
 compat_find_calc_match(struct xt_entry_match *m,
                       const char *name,
                       const struct ipt_ip *ip,
-                      unsigned int hookmask,
                       int *size)
 {
        struct xt_match *match;
@@ -1513,8 +1514,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
        entry_offset = (void *)e - (void *)base;
        j = 0;
        xt_ematch_foreach(ematch, e) {
-               ret = compat_find_calc_match(ematch, name,
-                                            &e->ip, e->comefrom, &off);
+               ret = compat_find_calc_match(ematch, name, &e->ip, &off);
                if (ret != 0)
                        goto release_matches;
                ++j;
@@ -1610,6 +1610,10 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
        unsigned int j;
        int ret = 0;
 
+       e->counters.pcnt = xt_percpu_counter_alloc();
+       if (IS_ERR_VALUE(e->counters.pcnt))
+               return -ENOMEM;
+
        j = 0;
        mtpar.net       = net;
        mtpar.table     = name;
@@ -1634,6 +1638,9 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
                        break;
                cleanup_match(ematch, net);
        }
+
+       xt_percpu_counter_free(e->counters.pcnt);
+
        return ret;
 }
 
@@ -1718,7 +1725,7 @@ translate_compat_table(struct net *net,
                newinfo->hook_entry[i] = info->hook_entry[i];
                newinfo->underflow[i] = info->underflow[i];
        }
-       entry1 = newinfo->entries[raw_smp_processor_id()];
+       entry1 = newinfo->entries;
        pos = entry1;
        size = total_size;
        xt_entry_foreach(iter0, entry0, total_size) {
@@ -1770,11 +1777,6 @@ translate_compat_table(struct net *net,
                return ret;
        }
 
-       /* And one copy for every other CPU */
-       for_each_possible_cpu(i)
-               if (newinfo->entries[i] && newinfo->entries[i] != entry1)
-                       memcpy(newinfo->entries[i], entry1, newinfo->size);
-
        *pinfo = newinfo;
        *pentry0 = entry1;
        xt_free_table_info(info);
@@ -1821,8 +1823,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
        if (!newinfo)
                return -ENOMEM;
 
-       /* choose the copy that is on our node/cpu */
-       loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+       loc_cpu_entry = newinfo->entries;
        if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
                           tmp.size) != 0) {
                ret = -EFAULT;
@@ -1893,7 +1894,6 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
        void __user *pos;
        unsigned int size;
        int ret = 0;
-       const void *loc_cpu_entry;
        unsigned int i = 0;
        struct ipt_entry *iter;
 
@@ -1901,14 +1901,9 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
        if (IS_ERR(counters))
                return PTR_ERR(counters);
 
-       /* choose the copy that is on our node/cpu, ...
-        * This choice is lazy (because current thread is
-        * allowed to migrate to another cpu)
-        */
-       loc_cpu_entry = private->entries[raw_smp_processor_id()];
        pos = userptr;
        size = total_size;
-       xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+       xt_entry_foreach(iter, private->entries, total_size) {
                ret = compat_copy_entry_to_user(iter, &pos,
                                                &size, counters, i++);
                if (ret != 0)
@@ -2083,8 +2078,7 @@ struct xt_table *ipt_register_table(struct net *net,
                goto out;
        }
 
-       /* choose the copy on our node/cpu, but dont care about preemption */
-       loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+       loc_cpu_entry = newinfo->entries;
        memcpy(loc_cpu_entry, repl->entries, repl->size);
 
        ret = translate_table(net, newinfo, loc_cpu_entry, repl);
@@ -2115,7 +2109,7 @@ void ipt_unregister_table(struct net *net, struct xt_table *table)
        private = xt_unregister_table(table);
 
        /* Decrease module usage counts and free resources */
-       loc_cpu_entry = private->entries[raw_smp_processor_id()];
+       loc_cpu_entry = private->entries;
        xt_entry_foreach(iter, loc_cpu_entry, private->size)
                cleanup_entry(iter, net);
        if (private->number > private->initial_entries)