These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / net / netfilter / nfnetlink_queue.c
similarity index 91%
rename from kernel/net/netfilter/nfnetlink_queue_core.c
rename to kernel/net/netfilter/nfnetlink_queue.c
index 11c7682..861c661 100644 (file)
 #include <linux/netfilter_bridge.h>
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_queue.h>
+#include <linux/netfilter/nf_conntrack_common.h>
 #include <linux/list.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
 #include <net/netfilter/nf_queue.h>
 #include <net/netns/generic.h>
-#include <net/netfilter/nfnetlink_queue.h>
 
 #include <linux/atomic.h>
 
@@ -278,13 +278,30 @@ nla_put_failure:
        return -1;
 }
 
+static u32 nfqnl_get_sk_secctx(struct sk_buff *skb, char **secdata)
+{
+       u32 seclen = 0;
+#if IS_ENABLED(CONFIG_NETWORK_SECMARK)
+       if (!skb || !sk_fullsock(skb->sk))
+               return 0;
+
+       read_lock_bh(&skb->sk->sk_callback_lock);
+
+       if (skb->secmark)
+               security_secid_to_secctx(skb->secmark, secdata, &seclen);
+
+       read_unlock_bh(&skb->sk->sk_callback_lock);
+#endif
+       return seclen;
+}
+
 static struct sk_buff *
 nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
                           struct nf_queue_entry *entry,
                           __be32 **packet_id_ptr)
 {
        size_t size;
-       size_t data_len = 0, cap_len = 0;
+       size_t data_len = 0, cap_len = 0, rem_len = 0;
        unsigned int hlen = 0;
        struct sk_buff *skb;
        struct nlattr *nla;
@@ -296,7 +313,10 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
        struct net_device *outdev;
        struct nf_conn *ct = NULL;
        enum ip_conntrack_info uninitialized_var(ctinfo);
+       struct nfnl_ct_hook *nfnl_ct;
        bool csum_verify;
+       char *secdata = NULL;
+       u32 seclen = 0;
 
        size =    nlmsg_total_size(sizeof(struct nfgenmsg))
                + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
@@ -341,18 +361,32 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
                hlen = min_t(unsigned int, hlen, data_len);
                size += sizeof(struct nlattr) + hlen;
                cap_len = entskb->len;
+               rem_len = data_len - hlen;
                break;
        }
 
-       if (queue->flags & NFQA_CFG_F_CONNTRACK)
-               ct = nfqnl_ct_get(entskb, &size, &ctinfo);
+       nfnl_ct = rcu_dereference(nfnl_ct_hook);
+
+       if (queue->flags & NFQA_CFG_F_CONNTRACK) {
+               if (nfnl_ct != NULL) {
+                       ct = nfnl_ct->get_ct(entskb, &ctinfo);
+                       if (ct != NULL)
+                               size += nfnl_ct->build_size(ct);
+               }
+       }
 
        if (queue->flags & NFQA_CFG_F_UID_GID) {
                size +=  (nla_total_size(sizeof(u_int32_t))     /* uid */
                        + nla_total_size(sizeof(u_int32_t)));   /* gid */
        }
 
-       skb = nfnetlink_alloc_skb(net, size, queue->peer_portid,
+       if ((queue->flags & NFQA_CFG_F_SECCTX) && entskb->sk) {
+               seclen = nfqnl_get_sk_secctx(entskb, &secdata);
+               if (seclen)
+                       size += nla_total_size(seclen);
+       }
+
+       skb = __netlink_alloc_skb(net->nfnl, size, rem_len, queue->peer_portid,
                                  GFP_ATOMIC);
        if (!skb) {
                skb_tx_error(entskb);
@@ -467,9 +501,10 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 
        if (entskb->tstamp.tv64) {
                struct nfqnl_msg_packet_timestamp ts;
-               struct timeval tv = ktime_to_timeval(entskb->tstamp);
-               ts.sec = cpu_to_be64(tv.tv_sec);
-               ts.usec = cpu_to_be64(tv.tv_usec);
+               struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
+
+               ts.sec = cpu_to_be64(kts.tv_sec);
+               ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
 
                if (nla_put(skb, NFQA_TIMESTAMP, sizeof(ts), &ts))
                        goto nla_put_failure;
@@ -479,7 +514,10 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
            nfqnl_put_sk_uidgid(skb, entskb->sk) < 0)
                goto nla_put_failure;
 
-       if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
+       if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata))
+               goto nla_put_failure;
+
+       if (ct && nfnl_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0)
                goto nla_put_failure;
 
        if (cap_len > data_len &&
@@ -569,12 +607,9 @@ static struct nf_queue_entry *
 nf_queue_entry_dup(struct nf_queue_entry *e)
 {
        struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
-       if (entry) {
-               if (nf_queue_entry_get_refs(entry))
-                       return entry;
-               kfree(entry);
-       }
-       return NULL;
+       if (entry)
+               nf_queue_entry_get_refs(entry);
+       return entry;
 }
 
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
@@ -641,8 +676,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
        struct nfqnl_instance *queue;
        struct sk_buff *skb, *segs;
        int err = -ENOBUFS;
-       struct net *net = dev_net(entry->state.in ?
-                                 entry->state.in : entry->state.out);
+       struct net *net = entry->state.net;
        struct nfnl_queue_net *q = nfnl_queue_pernet(net);
 
        /* rcu_read_lock()ed by nf_hook_slow() */
@@ -670,7 +704,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
        nf_bridge_adjust_skb_data(skb);
        segs = skb_gso_segment(skb, 0);
        /* Does not use PTR_ERR to limit the number of error codes that can be
-        * returned by nf_queue.  For instance, callers rely on -ECANCELED to
+        * returned by nf_queue.  For instance, callers rely on -ESRCH to
         * mean 'ignore this hook'.
         */
        if (IS_ERR_OR_NULL(segs))
@@ -806,8 +840,6 @@ nfqnl_dev_drop(struct net *net, int ifindex)
        rcu_read_unlock();
 }
 
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
-
 static int
 nfqnl_rcv_dev_event(struct notifier_block *this,
                    unsigned long event, void *ptr)
@@ -824,6 +856,27 @@ static struct notifier_block nfqnl_dev_notifier = {
        .notifier_call  = nfqnl_rcv_dev_event,
 };
 
+static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long ops_ptr)
+{
+       return entry->elem == (struct nf_hook_ops *)ops_ptr;
+}
+
+static void nfqnl_nf_hook_drop(struct net *net, struct nf_hook_ops *hook)
+{
+       struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+       int i;
+
+       rcu_read_lock();
+       for (i = 0; i < INSTANCE_BUCKETS; i++) {
+               struct nfqnl_instance *inst;
+               struct hlist_head *head = &q->instance_table[i];
+
+               hlist_for_each_entry_rcu(inst, head, hlist)
+                       nfqnl_flush(inst, nf_hook_cmp, (unsigned long)hook);
+       }
+       rcu_read_unlock();
+}
+
 static int
 nfqnl_rcv_nl_event(struct notifier_block *this,
                   unsigned long event, void *ptr)
@@ -954,6 +1007,28 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
        return 0;
 }
 
+static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct,
+                                     const struct nlmsghdr *nlh,
+                                     const struct nlattr * const nfqa[],
+                                     struct nf_queue_entry *entry,
+                                     enum ip_conntrack_info *ctinfo)
+{
+       struct nf_conn *ct;
+
+       ct = nfnl_ct->get_ct(entry->skb, ctinfo);
+       if (ct == NULL)
+               return NULL;
+
+       if (nfnl_ct->parse(nfqa[NFQA_CT], ct) < 0)
+               return NULL;
+
+       if (nfqa[NFQA_EXP])
+               nfnl_ct->attach_expect(nfqa[NFQA_EXP], ct,
+                                     NETLINK_CB(entry->skb).portid,
+                                     nlmsg_report(nlh));
+       return ct;
+}
+
 static int
 nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
                   const struct nlmsghdr *nlh,
@@ -967,6 +1042,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
        unsigned int verdict;
        struct nf_queue_entry *entry;
        enum ip_conntrack_info uninitialized_var(ctinfo);
+       struct nfnl_ct_hook *nfnl_ct;
        struct nf_conn *ct = NULL;
 
        struct net *net = sock_net(ctnl);
@@ -989,13 +1065,12 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
        if (entry == NULL)
                return -ENOENT;
 
+       /* rcu lock already held from nfnl->call_rcu. */
+       nfnl_ct = rcu_dereference(nfnl_ct_hook);
+
        if (nfqa[NFQA_CT]) {
-               ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo);
-               if (ct && nfqa[NFQA_EXP]) {
-                       nfqnl_attach_expect(ct, nfqa[NFQA_EXP],
-                                           NETLINK_CB(skb).portid,
-                                           nlmsg_report(nlh));
-               }
+               if (nfnl_ct != NULL)
+                       ct = nfqnl_ct_parse(nfnl_ct, nlh, nfqa, entry, &ctinfo);
        }
 
        if (nfqa[NFQA_PAYLOAD]) {
@@ -1006,8 +1081,8 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
                                 payload_len, entry, diff) < 0)
                        verdict = NF_DROP;
 
-               if (ct)
-                       nfqnl_ct_seq_adjust(entry->skb, ct, ctinfo, diff);
+               if (ct && diff)
+                       nfnl_ct->seq_adjust(entry->skb, ct, ctinfo, diff);
        }
 
        if (nfqa[NFQA_MARK])
@@ -1031,7 +1106,8 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
 };
 
 static const struct nf_queue_handler nfqh = {
-       .outfn  = &nfqnl_enqueue_packet,
+       .outfn          = &nfqnl_enqueue_packet,
+       .nf_hook_drop   = &nfqnl_nf_hook_drop,
 };
 
 static int
@@ -1142,7 +1218,12 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
                        ret = -EOPNOTSUPP;
                        goto err_out_unlock;
                }
-
+#if !IS_ENABLED(CONFIG_NETWORK_SECMARK)
+               if (flags & mask & NFQA_CFG_F_SECCTX) {
+                       ret = -EOPNOTSUPP;
+                       goto err_out_unlock;
+               }
+#endif
                spin_lock_bh(&queue->lock);
                queue->flags &= ~mask;
                queue->flags |= flags & mask;
@@ -1257,7 +1338,7 @@ static int seq_show(struct seq_file *s, void *v)
                   inst->copy_mode, inst->copy_range,
                   inst->queue_dropped, inst->queue_user_dropped,
                   inst->id_sequence, 1);
-       return seq_has_overflowed(s);
+       return 0;
 }
 
 static const struct seq_operations nfqnl_seq_ops = {
@@ -1338,6 +1419,7 @@ static int __init nfnetlink_queue_init(void)
 
 cleanup_netlink_notifier:
        netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+       unregister_pernet_subsys(&nfnl_queue_net_ops);
 out:
        return status;
 }