X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=kernel%2Fdrivers%2Fnet%2Ftun.c;h=c31d8e74f131ed5da2e7cad82e655096d7126996;hb=4bfeded9ee7ddc1cf05f6bd2c388894a7d736291;hp=e470ae59d40536fe7530774cb473ebe57000f9a8;hpb=9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00;p=kvmfornfv.git diff --git a/kernel/drivers/net/tun.c b/kernel/drivers/net/tun.c index e470ae59d..c31d8e74f 100644 --- a/kernel/drivers/net/tun.c +++ b/kernel/drivers/net/tun.c @@ -111,6 +111,7 @@ do { \ #define TUN_FASYNC IFF_ATTACH_QUEUE /* High bits in flags field are unused. */ #define TUN_VNET_LE 0x80000000 +#define TUN_VNET_BE 0x40000000 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ IFF_MULTI_QUEUE) @@ -146,7 +147,6 @@ struct tun_file { struct socket socket; struct socket_wq wq; struct tun_struct __rcu *tun; - struct net *net; struct fasync_struct *fasync; /* only used for fasnyc */ unsigned int flags; @@ -206,14 +206,68 @@ struct tun_struct { u32 flow_count; }; +#ifdef CONFIG_TUN_VNET_CROSS_LE +static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) +{ + return tun->flags & TUN_VNET_BE ? false : + virtio_legacy_is_little_endian(); +} + +static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) +{ + int be = !!(tun->flags & TUN_VNET_BE); + + if (put_user(be, argp)) + return -EFAULT; + + return 0; +} + +static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) +{ + int be; + + if (get_user(be, argp)) + return -EFAULT; + + if (be) + tun->flags |= TUN_VNET_BE; + else + tun->flags &= ~TUN_VNET_BE; + + return 0; +} +#else +static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) +{ + return virtio_legacy_is_little_endian(); +} + +static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) +{ + return -EINVAL; +} + +static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) +{ + return -EINVAL; +} +#endif /* CONFIG_TUN_VNET_CROSS_LE */ + +static inline bool tun_is_little_endian(struct tun_struct *tun) +{ + return tun->flags & TUN_VNET_LE || + tun_legacy_is_little_endian(tun); +} + static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) { - return __virtio16_to_cpu(tun->flags & TUN_VNET_LE, val); + return __virtio16_to_cpu(tun_is_little_endian(tun), val); } static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val) { - return __cpu_to_virtio16(tun->flags & TUN_VNET_LE, val); + return __cpu_to_virtio16(tun_is_little_endian(tun), val); } static inline u32 tun_hashfn(u32 rxhash) @@ -493,10 +547,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } - - BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, - &tfile->socket.flags)); - sk_release_kernel(&tfile->sk); + sock_put(&tfile->sk); } } @@ -516,11 +567,13 @@ static void tun_detach_all(struct net_device *dev) for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); BUG_ON(!tfile); + tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; tfile->socket.sk->sk_data_ready(tfile->socket.sk); RCU_INIT_POINTER(tfile->tun, NULL); --tun->numqueues; } list_for_each_entry(tfile, &tun->disabled, next) { + tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; tfile->socket.sk->sk_data_ready(tfile->socket.sk); RCU_INIT_POINTER(tfile->tun, NULL); } @@ -570,11 +623,13 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte /* Re-attach the filter to persist device */ if (!skip_filter && (tun->filter_attached == true)) { - err = sk_attach_filter(&tun->fprog, tfile->socket.sk); + err = __sk_attach_filter(&tun->fprog, tfile->socket.sk, + lockdep_rtnl_is_held()); if (!err) goto out; } tfile->queue_index = tun->numqueues; + tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN; rcu_assign_pointer(tfile->tun, tun); rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); tun->numqueues++; @@ -807,7 +862,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) goto drop; - if (skb->sk) { + if (skb->sk && sk_fullsock(skb->sk)) { sock_tx_timestamp(skb->sk, &skb_shinfo(skb)->tx_flags); sw_tx_timestamp(skb); } @@ -910,6 +965,7 @@ static const struct net_device_ops tap_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = tun_poll_controller, #endif + .ndo_features_check = passthru_features_check, }; static void tun_flow_init(struct tun_struct *tun) @@ -948,7 +1004,6 @@ static void tun_net_init(struct net_device *dev) /* Zero header length */ dev->type = ARPHRD_NONE; dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; - dev->tx_queue_len = TUN_READQ_SIZE; /* We prefer our own queue length */ break; case IFF_TAP: @@ -960,7 +1015,6 @@ static void tun_net_init(struct net_device *dev) eth_hw_addr_random(dev); - dev->tx_queue_len = TUN_READQ_SIZE; /* We prefer our own queue length */ break; } } @@ -988,7 +1042,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait) mask |= POLLIN | POLLRDNORM; if (sock_writeable(sk) || - (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags) && + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && sock_writeable(sk))) mask |= POLLOUT | POLLWRNORM; @@ -1054,9 +1108,11 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } if (tun->flags & IFF_VNET_HDR) { - if (len < tun->vnet_hdr_sz) + int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); + + if (len < vnet_hdr_sz) return -EINVAL; - len -= tun->vnet_hdr_sz; + len -= vnet_hdr_sz; n = copy_from_iter(&gso, sizeof(gso), from); if (n != sizeof(gso)) @@ -1068,7 +1124,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (tun16_to_cpu(tun, gso.hdr_len) > len) return -EINVAL; - iov_iter_advance(from, tun->vnet_hdr_sz - sizeof(gso)); + iov_iter_advance(from, vnet_hdr_sz - sizeof(gso)); } if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) { @@ -1247,7 +1303,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, vlan_hlen = VLAN_HLEN; if (tun->flags & IFF_VNET_HDR) - vnet_hdr_sz = tun->vnet_hdr_sz; + vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); total = skb->len + vlan_hlen + vnet_hdr_sz; @@ -1357,9 +1413,6 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, if (!iov_iter_count(to)) return 0; - if (tun->dev->reg_state != NETREG_REGISTERED) - return -EIO; - /* Read frames from queue */ skb = __skb_recv_datagram(tfile->socket.sk, noblock ? MSG_DONTWAIT : 0, &peeked, &off, &err); @@ -1411,6 +1464,8 @@ static void tun_setup(struct net_device *dev) dev->ethtool_ops = &tun_ethtool_ops; dev->destructor = tun_free_netdev; + /* We prefer our own queue length */ + dev->tx_queue_len = TUN_READQ_SIZE; } /* Trivial set of netlink ops to allow deleting tun or tap @@ -1436,7 +1491,7 @@ static void tun_sock_write_space(struct sock *sk) if (!sock_writeable(sk)) return; - if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) + if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags)) return; wqueue = sk_sleep(sk); @@ -1492,18 +1547,10 @@ out: return ret; } -static int tun_release(struct socket *sock) -{ - if (sock->sk) - sock_put(sock->sk); - return 0; -} - /* Ops structure to mimic raw sockets with tun */ static const struct proto_ops tun_socket_ops = { .sendmsg = tun_sendmsg, .recvmsg = tun_recvmsg, - .release = tun_release, }; static struct proto tun_proto = { @@ -1760,7 +1807,7 @@ static void tun_detach_filter(struct tun_struct *tun, int n) for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); - sk_detach_filter(tfile->socket.sk); + __sk_detach_filter(tfile->socket.sk, lockdep_rtnl_is_held()); } tun->filter_attached = false; @@ -1773,7 +1820,8 @@ static int tun_attach_filter(struct tun_struct *tun) for (i = 0; i < tun->numqueues; i++) { tfile = rtnl_dereference(tun->tfiles[i]); - ret = sk_attach_filter(&tun->fprog, tfile->socket.sk); + ret = __sk_attach_filter(&tun->fprog, tfile->socket.sk, + lockdep_rtnl_is_held()); if (ret) { tun_detach_filter(tun, i); return ret; @@ -1865,7 +1913,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, if (cmd == TUNSETIFF && !tun) { ifr.ifr_name[IFNAMSIZ-1] = '\0'; - ret = tun_set_iff(tfile->net, file, &ifr); + ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr); if (ret) goto unlock; @@ -2056,6 +2104,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, tun->flags &= ~TUN_VNET_LE; break; + case TUNGETVNETBE: + ret = tun_get_vnet_be(tun, argp); + break; + + case TUNSETVNETBE: + ret = tun_set_vnet_be(tun, argp); + break; + case TUNATTACHFILTER: /* Can be set only for TAPs */ ret = -EINVAL; @@ -2154,16 +2210,16 @@ out: static int tun_chr_open(struct inode *inode, struct file * file) { + struct net *net = current->nsproxy->net_ns; struct tun_file *tfile; DBG1(KERN_INFO, "tunX: tun_chr_open\n"); - tfile = (struct tun_file *)sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, - &tun_proto); + tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, + &tun_proto, 0); if (!tfile) return -ENOMEM; RCU_INIT_POINTER(tfile->tun, NULL); - tfile->net = get_net(current->nsproxy->net_ns); tfile->flags = 0; tfile->ifindex = 0; @@ -2174,13 +2230,11 @@ static int tun_chr_open(struct inode *inode, struct file * file) tfile->socket.ops = &tun_socket_ops; sock_init_data(&tfile->socket, &tfile->sk); - sk_change_net(&tfile->sk, tfile->net); tfile->sk.sk_write_space = tun_sock_write_space; tfile->sk.sk_sndbuf = INT_MAX; file->private_data = tfile; - set_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags); INIT_LIST_HEAD(&tfile->next); sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); @@ -2191,10 +2245,8 @@ static int tun_chr_open(struct inode *inode, struct file * file) static int tun_chr_close(struct inode *inode, struct file *file) { struct tun_file *tfile = file->private_data; - struct net *net = tfile->net; tun_detach(tfile, true); - put_net(net); return 0; }