Merge "To generate debug rpm and debian package for kvmfornfv kernel"
[kvmfornfv.git] / kernel / drivers / net / tun.c
index e470ae5..c31d8e7 100644 (file)
@@ -111,6 +111,7 @@ do {                                                                \
 #define TUN_FASYNC     IFF_ATTACH_QUEUE
 /* High bits in flags field are unused. */
 #define TUN_VNET_LE     0x80000000
+#define TUN_VNET_BE     0x40000000
 
 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
                      IFF_MULTI_QUEUE)
@@ -146,7 +147,6 @@ struct tun_file {
        struct socket socket;
        struct socket_wq wq;
        struct tun_struct __rcu *tun;
-       struct net *net;
        struct fasync_struct *fasync;
        /* only used for fasnyc */
        unsigned int flags;
@@ -206,14 +206,68 @@ struct tun_struct {
        u32 flow_count;
 };
 
+#ifdef CONFIG_TUN_VNET_CROSS_LE
+static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
+{
+       return tun->flags & TUN_VNET_BE ? false :
+               virtio_legacy_is_little_endian();
+}
+
+static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
+{
+       int be = !!(tun->flags & TUN_VNET_BE);
+
+       if (put_user(be, argp))
+               return -EFAULT;
+
+       return 0;
+}
+
+static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
+{
+       int be;
+
+       if (get_user(be, argp))
+               return -EFAULT;
+
+       if (be)
+               tun->flags |= TUN_VNET_BE;
+       else
+               tun->flags &= ~TUN_VNET_BE;
+
+       return 0;
+}
+#else
+static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
+{
+       return virtio_legacy_is_little_endian();
+}
+
+static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
+{
+       return -EINVAL;
+}
+
+static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
+{
+       return -EINVAL;
+}
+#endif /* CONFIG_TUN_VNET_CROSS_LE */
+
+static inline bool tun_is_little_endian(struct tun_struct *tun)
+{
+       return tun->flags & TUN_VNET_LE ||
+               tun_legacy_is_little_endian(tun);
+}
+
 static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val)
 {
-       return __virtio16_to_cpu(tun->flags & TUN_VNET_LE, val);
+       return __virtio16_to_cpu(tun_is_little_endian(tun), val);
 }
 
 static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val)
 {
-       return __cpu_to_virtio16(tun->flags & TUN_VNET_LE, val);
+       return __cpu_to_virtio16(tun_is_little_endian(tun), val);
 }
 
 static inline u32 tun_hashfn(u32 rxhash)
@@ -493,10 +547,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
                            tun->dev->reg_state == NETREG_REGISTERED)
                                unregister_netdevice(tun->dev);
                }
-
-               BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED,
-                                &tfile->socket.flags));
-               sk_release_kernel(&tfile->sk);
+               sock_put(&tfile->sk);
        }
 }
 
@@ -516,11 +567,13 @@ static void tun_detach_all(struct net_device *dev)
        for (i = 0; i < n; i++) {
                tfile = rtnl_dereference(tun->tfiles[i]);
                BUG_ON(!tfile);
+               tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
                tfile->socket.sk->sk_data_ready(tfile->socket.sk);
                RCU_INIT_POINTER(tfile->tun, NULL);
                --tun->numqueues;
        }
        list_for_each_entry(tfile, &tun->disabled, next) {
+               tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
                tfile->socket.sk->sk_data_ready(tfile->socket.sk);
                RCU_INIT_POINTER(tfile->tun, NULL);
        }
@@ -570,11 +623,13 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte
 
        /* Re-attach the filter to persist device */
        if (!skip_filter && (tun->filter_attached == true)) {
-               err = sk_attach_filter(&tun->fprog, tfile->socket.sk);
+               err = __sk_attach_filter(&tun->fprog, tfile->socket.sk,
+                                        lockdep_rtnl_is_held());
                if (!err)
                        goto out;
        }
        tfile->queue_index = tun->numqueues;
+       tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN;
        rcu_assign_pointer(tfile->tun, tun);
        rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
        tun->numqueues++;
@@ -807,7 +862,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
        if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
                goto drop;
 
-       if (skb->sk) {
+       if (skb->sk && sk_fullsock(skb->sk)) {
                sock_tx_timestamp(skb->sk, &skb_shinfo(skb)->tx_flags);
                sw_tx_timestamp(skb);
        }
@@ -910,6 +965,7 @@ static const struct net_device_ops tap_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = tun_poll_controller,
 #endif
+       .ndo_features_check     = passthru_features_check,
 };
 
 static void tun_flow_init(struct tun_struct *tun)
@@ -948,7 +1004,6 @@ static void tun_net_init(struct net_device *dev)
                /* Zero header length */
                dev->type = ARPHRD_NONE;
                dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
-               dev->tx_queue_len = TUN_READQ_SIZE;  /* We prefer our own queue length */
                break;
 
        case IFF_TAP:
@@ -960,7 +1015,6 @@ static void tun_net_init(struct net_device *dev)
 
                eth_hw_addr_random(dev);
 
-               dev->tx_queue_len = TUN_READQ_SIZE;  /* We prefer our own queue length */
                break;
        }
 }
@@ -988,7 +1042,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
                mask |= POLLIN | POLLRDNORM;
 
        if (sock_writeable(sk) ||
-           (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
+           (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
             sock_writeable(sk)))
                mask |= POLLOUT | POLLWRNORM;
 
@@ -1054,9 +1108,11 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
        }
 
        if (tun->flags & IFF_VNET_HDR) {
-               if (len < tun->vnet_hdr_sz)
+               int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
+
+               if (len < vnet_hdr_sz)
                        return -EINVAL;
-               len -= tun->vnet_hdr_sz;
+               len -= vnet_hdr_sz;
 
                n = copy_from_iter(&gso, sizeof(gso), from);
                if (n != sizeof(gso))
@@ -1068,7 +1124,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 
                if (tun16_to_cpu(tun, gso.hdr_len) > len)
                        return -EINVAL;
-               iov_iter_advance(from, tun->vnet_hdr_sz - sizeof(gso));
+               iov_iter_advance(from, vnet_hdr_sz - sizeof(gso));
        }
 
        if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) {
@@ -1247,7 +1303,7 @@ static ssize_t tun_put_user(struct tun_struct *tun,
                vlan_hlen = VLAN_HLEN;
 
        if (tun->flags & IFF_VNET_HDR)
-               vnet_hdr_sz = tun->vnet_hdr_sz;
+               vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
 
        total = skb->len + vlan_hlen + vnet_hdr_sz;
 
@@ -1357,9 +1413,6 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
        if (!iov_iter_count(to))
                return 0;
 
-       if (tun->dev->reg_state != NETREG_REGISTERED)
-               return -EIO;
-
        /* Read frames from queue */
        skb = __skb_recv_datagram(tfile->socket.sk, noblock ? MSG_DONTWAIT : 0,
                                  &peeked, &off, &err);
@@ -1411,6 +1464,8 @@ static void tun_setup(struct net_device *dev)
 
        dev->ethtool_ops = &tun_ethtool_ops;
        dev->destructor = tun_free_netdev;
+       /* We prefer our own queue length */
+       dev->tx_queue_len = TUN_READQ_SIZE;
 }
 
 /* Trivial set of netlink ops to allow deleting tun or tap
@@ -1436,7 +1491,7 @@ static void tun_sock_write_space(struct sock *sk)
        if (!sock_writeable(sk))
                return;
 
-       if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
+       if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags))
                return;
 
        wqueue = sk_sleep(sk);
@@ -1492,18 +1547,10 @@ out:
        return ret;
 }
 
-static int tun_release(struct socket *sock)
-{
-       if (sock->sk)
-               sock_put(sock->sk);
-       return 0;
-}
-
 /* Ops structure to mimic raw sockets with tun */
 static const struct proto_ops tun_socket_ops = {
        .sendmsg = tun_sendmsg,
        .recvmsg = tun_recvmsg,
-       .release = tun_release,
 };
 
 static struct proto tun_proto = {
@@ -1760,7 +1807,7 @@ static void tun_detach_filter(struct tun_struct *tun, int n)
 
        for (i = 0; i < n; i++) {
                tfile = rtnl_dereference(tun->tfiles[i]);
-               sk_detach_filter(tfile->socket.sk);
+               __sk_detach_filter(tfile->socket.sk, lockdep_rtnl_is_held());
        }
 
        tun->filter_attached = false;
@@ -1773,7 +1820,8 @@ static int tun_attach_filter(struct tun_struct *tun)
 
        for (i = 0; i < tun->numqueues; i++) {
                tfile = rtnl_dereference(tun->tfiles[i]);
-               ret = sk_attach_filter(&tun->fprog, tfile->socket.sk);
+               ret = __sk_attach_filter(&tun->fprog, tfile->socket.sk,
+                                        lockdep_rtnl_is_held());
                if (ret) {
                        tun_detach_filter(tun, i);
                        return ret;
@@ -1865,7 +1913,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
        if (cmd == TUNSETIFF && !tun) {
                ifr.ifr_name[IFNAMSIZ-1] = '\0';
 
-               ret = tun_set_iff(tfile->net, file, &ifr);
+               ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr);
 
                if (ret)
                        goto unlock;
@@ -2056,6 +2104,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
                        tun->flags &= ~TUN_VNET_LE;
                break;
 
+       case TUNGETVNETBE:
+               ret = tun_get_vnet_be(tun, argp);
+               break;
+
+       case TUNSETVNETBE:
+               ret = tun_set_vnet_be(tun, argp);
+               break;
+
        case TUNATTACHFILTER:
                /* Can be set only for TAPs */
                ret = -EINVAL;
@@ -2154,16 +2210,16 @@ out:
 
 static int tun_chr_open(struct inode *inode, struct file * file)
 {
+       struct net *net = current->nsproxy->net_ns;
        struct tun_file *tfile;
 
        DBG1(KERN_INFO, "tunX: tun_chr_open\n");
 
-       tfile = (struct tun_file *)sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL,
-                                           &tun_proto);
+       tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
+                                           &tun_proto, 0);
        if (!tfile)
                return -ENOMEM;
        RCU_INIT_POINTER(tfile->tun, NULL);
-       tfile->net = get_net(current->nsproxy->net_ns);
        tfile->flags = 0;
        tfile->ifindex = 0;
 
@@ -2174,13 +2230,11 @@ static int tun_chr_open(struct inode *inode, struct file * file)
        tfile->socket.ops = &tun_socket_ops;
 
        sock_init_data(&tfile->socket, &tfile->sk);
-       sk_change_net(&tfile->sk, tfile->net);
 
        tfile->sk.sk_write_space = tun_sock_write_space;
        tfile->sk.sk_sndbuf = INT_MAX;
 
        file->private_data = tfile;
-       set_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags);
        INIT_LIST_HEAD(&tfile->next);
 
        sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
@@ -2191,10 +2245,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 static int tun_chr_close(struct inode *inode, struct file *file)
 {
        struct tun_file *tfile = file->private_data;
-       struct net *net = tfile->net;
 
        tun_detach(tfile, true);
-       put_net(net);
 
        return 0;
 }