Adding breaktrace & disabling timer migration
[kvmfornfv.git] / kernel / net / core / dev.c
index 349de9d..aa6165e 100644 (file)
@@ -99,6 +99,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/stat.h>
 #include <net/dst.h>
+#include <net/dst_metadata.h>
 #include <net/pkt_sched.h>
 #include <net/checksum.h>
 #include <net/xfrm.h>
 #include <linux/if_macvlan.h>
 #include <linux/errqueue.h>
 #include <linux/hrtimer.h>
+#include <linux/netfilter_ingress.h>
 
 #include "net-sysfs.h"
 
@@ -469,10 +471,14 @@ EXPORT_SYMBOL(dev_remove_pack);
  */
 void dev_add_offload(struct packet_offload *po)
 {
-       struct list_head *head = &offload_base;
+       struct packet_offload *elem;
 
        spin_lock(&offload_lock);
-       list_add_rcu(&po->list, head);
+       list_for_each_entry(elem, &offload_base, list) {
+               if (po->priority < elem->priority)
+                       break;
+       }
+       list_add_rcu(&po->list, elem->list.prev);
        spin_unlock(&offload_lock);
 }
 EXPORT_SYMBOL(dev_add_offload);
@@ -673,14 +679,36 @@ int dev_get_iflink(const struct net_device *dev)
        if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
                return dev->netdev_ops->ndo_get_iflink(dev);
 
-       /* If dev->rtnl_link_ops is set, it's a virtual interface. */
-       if (dev->rtnl_link_ops)
-               return 0;
-
        return dev->ifindex;
 }
 EXPORT_SYMBOL(dev_get_iflink);
 
+/**
+ *     dev_fill_metadata_dst - Retrieve tunnel egress information.
+ *     @dev: targeted interface
+ *     @skb: The packet.
+ *
+ *     For better visibility of tunnel traffic OVS needs to retrieve
+ *     egress tunnel information for a packet. Following API allows
+ *     user to get this info.
+ */
+int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+       struct ip_tunnel_info *info;
+
+       if (!dev->netdev_ops  || !dev->netdev_ops->ndo_fill_metadata_dst)
+               return -EINVAL;
+
+       info = skb_tunnel_info_unclone(skb);
+       if (!info)
+               return -ENOMEM;
+       if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
+               return -EINVAL;
+
+       return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
+}
+EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
+
 /**
  *     __dev_get_by_name       - find a device by its name
  *     @net: the applicable net namespace
@@ -1636,7 +1664,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
 }
 EXPORT_SYMBOL(call_netdevice_notifiers);
 
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_INGRESS
 static struct static_key ingress_needed __read_mostly;
 
 void net_inc_ingress_queue(void)
@@ -1654,24 +1682,19 @@ EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
 
 static struct static_key netstamp_needed __read_mostly;
 #ifdef HAVE_JUMP_LABEL
-/* We are not allowed to call static_key_slow_dec() from irq context
- * If net_disable_timestamp() is called from irq context, defer the
- * static_key_slow_dec() calls.
- */
 static atomic_t netstamp_needed_deferred;
-#endif
-
-void net_enable_timestamp(void)
+static void netstamp_clear(struct work_struct *work)
 {
-#ifdef HAVE_JUMP_LABEL
        int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
 
-       if (deferred) {
-               while (--deferred)
-                       static_key_slow_dec(&netstamp_needed);
-               return;
-       }
+       while (deferred--)
+               static_key_slow_dec(&netstamp_needed);
+}
+static DECLARE_WORK(netstamp_work, netstamp_clear);
 #endif
+
+void net_enable_timestamp(void)
+{
        static_key_slow_inc(&netstamp_needed);
 }
 EXPORT_SYMBOL(net_enable_timestamp);
@@ -1679,12 +1702,12 @@ EXPORT_SYMBOL(net_enable_timestamp);
 void net_disable_timestamp(void)
 {
 #ifdef HAVE_JUMP_LABEL
-       if (in_interrupt()) {
-               atomic_inc(&netstamp_needed_deferred);
-               return;
-       }
-#endif
+       /* net_disable_timestamp() can be called from non process context */
+       atomic_inc(&netstamp_needed_deferred);
+       schedule_work(&netstamp_work);
+#else
        static_key_slow_dec(&netstamp_needed);
+#endif
 }
 EXPORT_SYMBOL(net_disable_timestamp);
 
@@ -2351,21 +2374,52 @@ void netif_device_attach(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_device_attach);
 
+/*
+ * Returns a Tx hash based on the given packet descriptor a Tx queues' number
+ * to be used as a distribution range.
+ */
+u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
+                 unsigned int num_tx_queues)
+{
+       u32 hash;
+       u16 qoffset = 0;
+       u16 qcount = num_tx_queues;
+
+       if (skb_rx_queue_recorded(skb)) {
+               hash = skb_get_rx_queue(skb);
+               while (unlikely(hash >= num_tx_queues))
+                       hash -= num_tx_queues;
+               return hash;
+       }
+
+       if (dev->num_tc) {
+               u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+               qoffset = dev->tc_to_txq[tc].offset;
+               qcount = dev->tc_to_txq[tc].count;
+       }
+
+       return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
+}
+EXPORT_SYMBOL(__skb_tx_hash);
+
 static void skb_warn_bad_offload(const struct sk_buff *skb)
 {
        static const netdev_features_t null_features = 0;
        struct net_device *dev = skb->dev;
-       const char *driver = "";
+       const char *name = "";
 
        if (!net_ratelimit())
                return;
 
-       if (dev && dev->dev.parent)
-               driver = dev_driver_string(dev->dev.parent);
-
+       if (dev) {
+               if (dev->dev.parent)
+                       name = dev_driver_string(dev->dev.parent);
+               else
+                       name = netdev_name(dev);
+       }
        WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
             "gso_type=%d ip_summed=%d\n",
-            driver, dev ? &dev->features : &null_features,
+            name, dev ? &dev->features : &null_features,
             skb->sk ? &skb->sk->sk_route_caps : &null_features,
             skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
             skb_shinfo(skb)->gso_type, skb->ip_summed);
@@ -2411,7 +2465,7 @@ int skb_checksum_help(struct sk_buff *skb)
                        goto out;
        }
 
-       *(__sum16 *)(skb->data + offset) = csum_fold(csum);
+       *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
 out_set_summed:
        skb->ip_summed = CHECKSUM_NONE;
 out:
@@ -2491,6 +2545,8 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
  *
  *     It may return NULL if the skb requires no segmentation.  This is
  *     only possible when GSO is used for verifying header integrity.
+ *
+ *     Segmentation preserves SKB_SGO_CB_OFFSET bytes of previous skb cb.
  */
 struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
                                  netdev_features_t features, bool tx_path)
@@ -2505,6 +2561,9 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
                        return ERR_PTR(err);
        }
 
+       BUILD_BUG_ON(SKB_SGO_CB_OFFSET +
+                    sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
+
        SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
        SKB_GSO_CB(skb)->encap_level = 0;
 
@@ -2594,9 +2653,9 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
        if (skb->ip_summed != CHECKSUM_NONE &&
            !can_checksum_protocol(features, type)) {
                features &= ~NETIF_F_ALL_CSUM;
-       } else if (illegal_highdma(skb->dev, skb)) {
-               features &= ~NETIF_F_SG;
        }
+       if (illegal_highdma(skb->dev, skb))
+               features &= ~NETIF_F_SG;
 
        return features;
 }
@@ -2780,6 +2839,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d
        }
        return head;
 }
+EXPORT_SYMBOL_GPL(validate_xmit_skb_list);
 
 static void qdisc_pkt_len_init(struct sk_buff *skb)
 {
@@ -2827,7 +2887,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
         * This permits __QDISC___STATE_RUNNING owner to get the lock more
         * often and dequeue packets faster.
         */
+#ifdef CONFIG_PREEMPT_RT_FULL
+       contended = true;
+#else
        contended = qdisc_is_running(q);
+#endif
        if (unlikely(contended))
                spin_lock(&q->busylock);
 
@@ -2887,16 +2951,53 @@ static void skb_update_prio(struct sk_buff *skb)
 #define skb_update_prio(skb)
 #endif
 
+#ifdef CONFIG_PREEMPT_RT_FULL
+
+static inline int xmit_rec_read(void)
+{
+       return current->xmit_recursion;
+}
+
+static inline void xmit_rec_inc(void)
+{
+       current->xmit_recursion++;
+}
+
+static inline void xmit_rec_dec(void)
+{
+       current->xmit_recursion--;
+}
+
+#else
+
 DEFINE_PER_CPU(int, xmit_recursion);
 EXPORT_SYMBOL(xmit_recursion);
 
+static inline int xmit_rec_read(void)
+{
+       return __this_cpu_read(xmit_recursion);
+}
+
+static inline void xmit_rec_inc(void)
+{
+       __this_cpu_inc(xmit_recursion);
+}
+
+static inline void xmit_rec_dec(void)
+{
+       __this_cpu_dec(xmit_recursion);
+}
+#endif
+
 #define RECURSION_LIMIT 10
 
 /**
  *     dev_loopback_xmit - loop back @skb
+ *     @net: network namespace this loopback is happening in
+ *     @sk:  sk needed to be a netfilter okfn
  *     @skb: buffer to transmit
  */
-int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
+int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
        skb_reset_mac_header(skb);
        __skb_pull(skb, skb_network_offset(skb));
@@ -2909,6 +3010,85 @@ int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dev_loopback_xmit);
 
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+       struct xps_dev_maps *dev_maps;
+       struct xps_map *map;
+       int queue_index = -1;
+
+       rcu_read_lock();
+       dev_maps = rcu_dereference(dev->xps_maps);
+       if (dev_maps) {
+               map = rcu_dereference(
+                   dev_maps->cpu_map[skb->sender_cpu - 1]);
+               if (map) {
+                       if (map->len == 1)
+                               queue_index = map->queues[0];
+                       else
+                               queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
+                                                                          map->len)];
+                       if (unlikely(queue_index >= dev->real_num_tx_queues))
+                               queue_index = -1;
+               }
+       }
+       rcu_read_unlock();
+
+       return queue_index;
+#else
+       return -1;
+#endif
+}
+
+static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+{
+       struct sock *sk = skb->sk;
+       int queue_index = sk_tx_queue_get(sk);
+
+       if (queue_index < 0 || skb->ooo_okay ||
+           queue_index >= dev->real_num_tx_queues) {
+               int new_index = get_xps_queue(dev, skb);
+               if (new_index < 0)
+                       new_index = skb_tx_hash(dev, skb);
+
+               if (queue_index != new_index && sk &&
+                   sk_fullsock(sk) &&
+                   rcu_access_pointer(sk->sk_dst_cache))
+                       sk_tx_queue_set(sk, new_index);
+
+               queue_index = new_index;
+       }
+
+       return queue_index;
+}
+
+struct netdev_queue *netdev_pick_tx(struct net_device *dev,
+                                   struct sk_buff *skb,
+                                   void *accel_priv)
+{
+       int queue_index = 0;
+
+#ifdef CONFIG_XPS
+       if (skb->sender_cpu == 0)
+               skb->sender_cpu = raw_smp_processor_id() + 1;
+#endif
+
+       if (dev->real_num_tx_queues != 1) {
+               const struct net_device_ops *ops = dev->netdev_ops;
+               if (ops->ndo_select_queue)
+                       queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
+                                                           __netdev_pick_tx);
+               else
+                       queue_index = __netdev_pick_tx(dev, skb);
+
+               if (!accel_priv)
+                       queue_index = netdev_cap_txqueue(dev, queue_index);
+       }
+
+       skb_set_queue_mapping(skb, queue_index);
+       return netdev_get_tx_queue(dev, queue_index);
+}
+
 /**
  *     __dev_queue_xmit - transmit a buffer
  *     @skb: buffer to transmit
@@ -2962,6 +3142,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
        else
                skb_dst_force(skb);
 
+#ifdef CONFIG_NET_SWITCHDEV
+       /* Don't forward if offload device already forwarded */
+       if (skb->offload_fwd_mark &&
+           skb->offload_fwd_mark == dev->offload_fwd_mark) {
+               consume_skb(skb);
+               rc = NET_XMIT_SUCCESS;
+               goto out;
+       }
+#endif
+
        txq = netdev_pick_tx(dev, skb, accel_priv);
        q = rcu_dereference_bh(txq->qdisc);
 
@@ -2991,7 +3181,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 
                if (txq->xmit_lock_owner != cpu) {
 
-                       if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
+                       if (xmit_rec_read() > RECURSION_LIMIT)
                                goto recursion_alert;
 
                        skb = validate_xmit_skb(skb, dev);
@@ -3001,9 +3191,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
                        HARD_TX_LOCK(dev, txq, cpu);
 
                        if (!netif_xmit_stopped(txq)) {
-                               __this_cpu_inc(xmit_recursion);
+                               xmit_rec_inc();
                                skb = dev_hard_start_xmit(skb, dev, txq, &rc);
-                               __this_cpu_dec(xmit_recursion);
+                               xmit_rec_dec();
                                if (dev_xmit_complete(rc)) {
                                        HARD_TX_UNLOCK(dev, txq);
                                        goto out;
@@ -3034,11 +3224,11 @@ out:
        return rc;
 }
 
-int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb)
+int dev_queue_xmit(struct sk_buff *skb)
 {
        return __dev_queue_xmit(skb, NULL);
 }
-EXPORT_SYMBOL(dev_queue_xmit_sk);
+EXPORT_SYMBOL(dev_queue_xmit);
 
 int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
 {
@@ -3349,6 +3539,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
        local_irq_save(flags);
 
        rps_lock(sd);
+       if (!netif_running(skb->dev))
+               goto drop;
        qlen = skb_queue_len(&sd->input_pkt_queue);
        if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
                if (qlen) {
@@ -3370,6 +3562,7 @@ enqueue:
                goto enqueue;
        }
 
+drop:
        sd->dropped++;
        rps_unlock(sd);
 
@@ -3550,66 +3743,71 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev,
 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
 #endif
 
-#ifdef CONFIG_NET_CLS_ACT
-/* TODO: Maybe we should just force sch_ingress to be compiled in
- * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
- * a compare and 2 stores extra right now if we dont have it on
- * but have CONFIG_NET_CLS_ACT
- * NOTE: This doesn't stop any functionality; if you dont have
- * the ingress scheduler, you just can't add policies on ingress.
- *
- */
-static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
-{
-       struct net_device *dev = skb->dev;
-       u32 ttl = G_TC_RTTL(skb->tc_verd);
-       int result = TC_ACT_OK;
-       struct Qdisc *q;
-
-       if (unlikely(MAX_RED_LOOP < ttl++)) {
-               net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
-                                    skb->skb_iif, dev->ifindex);
-               return TC_ACT_SHOT;
-       }
-
-       skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
-       skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
-
-       q = rcu_dereference(rxq->qdisc);
-       if (q != &noop_qdisc) {
-               spin_lock(qdisc_lock(q));
-               if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
-                       result = qdisc_enqueue_root(skb, q);
-               spin_unlock(qdisc_lock(q));
-       }
-
-       return result;
-}
-
 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
                                         struct packet_type **pt_prev,
                                         int *ret, struct net_device *orig_dev)
 {
-       struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
+#ifdef CONFIG_NET_CLS_ACT
+       struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
+       struct tcf_result cl_res;
 
-       if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc)
+       /* If there's at least one ingress present somewhere (so
+        * we get here via enabled static key), remaining devices
+        * that are not configured with an ingress qdisc will bail
+        * out here.
+        */
+       if (!cl)
                return skb;
-
        if (*pt_prev) {
                *ret = deliver_skb(skb, *pt_prev, orig_dev);
                *pt_prev = NULL;
        }
 
-       switch (ing_filter(skb, rxq)) {
+       qdisc_skb_cb(skb)->pkt_len = skb->len;
+       skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
+       qdisc_bstats_cpu_update(cl->q, skb);
+
+       switch (tc_classify(skb, cl, &cl_res, false)) {
+       case TC_ACT_OK:
+       case TC_ACT_RECLASSIFY:
+               skb->tc_index = TC_H_MIN(cl_res.classid);
+               break;
        case TC_ACT_SHOT:
+               qdisc_qstats_cpu_drop(cl->q);
        case TC_ACT_STOLEN:
+       case TC_ACT_QUEUED:
                kfree_skb(skb);
                return NULL;
+       case TC_ACT_REDIRECT:
+               /* skb_mac_header check was done by cls/act_bpf, so
+                * we can safely push the L2 header back before
+                * redirecting to another netdev
+                */
+               __skb_push(skb, skb->mac_len);
+               skb_do_redirect(skb);
+               return NULL;
+       default:
+               break;
        }
-
+#endif /* CONFIG_NET_CLS_ACT */
        return skb;
 }
-#endif
+
+/**
+ *     netdev_is_rx_handler_busy - check if receive handler is registered
+ *     @dev: device to check
+ *
+ *     Check if a receive handler is already registered for a given device.
+ *     Return true if there one.
+ *
+ *     The caller must hold the rtnl_mutex.
+ */
+bool netdev_is_rx_handler_busy(struct net_device *dev)
+{
+       ASSERT_RTNL();
+       return dev && rtnl_dereference(dev->rx_handler);
+}
+EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy);
 
 /**
  *     netdev_rx_handler_register - register receive handler
@@ -3682,6 +3880,22 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
        }
 }
 
+static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
+                            int *ret, struct net_device *orig_dev)
+{
+#ifdef CONFIG_NETFILTER_INGRESS
+       if (nf_hook_ingress_active(skb)) {
+               if (*pt_prev) {
+                       *ret = deliver_skb(skb, *pt_prev, orig_dev);
+                       *pt_prev = NULL;
+               }
+
+               return nf_hook_ingress(skb);
+       }
+#endif /* CONFIG_NETFILTER_INGRESS */
+       return 0;
+}
+
 static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 {
        struct packet_type *ptype, *pt_prev;
@@ -3704,8 +3918,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 
        pt_prev = NULL;
 
-       rcu_read_lock();
-
 another_round:
        skb->skb_iif = skb->dev->ifindex;
 
@@ -3715,7 +3927,7 @@ another_round:
            skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
                skb = skb_vlan_untag(skb);
                if (unlikely(!skb))
-                       goto unlock;
+                       goto out;
        }
 
 #ifdef CONFIG_NET_CLS_ACT
@@ -3741,13 +3953,17 @@ another_round:
        }
 
 skip_taps:
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_INGRESS
        if (static_key_false(&ingress_needed)) {
                skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
                if (!skb)
-                       goto unlock;
-       }
+                       goto out;
 
+               if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
+                       goto out;
+       }
+#endif
+#ifdef CONFIG_NET_CLS_ACT
        skb->tc_verd = 0;
 ncls:
 #endif
@@ -3762,7 +3978,7 @@ ncls:
                if (vlan_do_receive(&skb))
                        goto another_round;
                else if (unlikely(!skb))
-                       goto unlock;
+                       goto out;
        }
 
        rx_handler = rcu_dereference(skb->dev->rx_handler);
@@ -3774,7 +3990,7 @@ ncls:
                switch (rx_handler(&skb)) {
                case RX_HANDLER_CONSUMED:
                        ret = NET_RX_SUCCESS;
-                       goto unlock;
+                       goto out;
                case RX_HANDLER_ANOTHER:
                        goto another_round;
                case RX_HANDLER_EXACT:
@@ -3828,8 +4044,7 @@ drop:
                ret = NET_RX_DROP;
        }
 
-unlock:
-       rcu_read_unlock();
+out:
        return ret;
 }
 
@@ -3860,29 +4075,30 @@ static int __netif_receive_skb(struct sk_buff *skb)
 
 static int netif_receive_skb_internal(struct sk_buff *skb)
 {
+       int ret;
+
        net_timestamp_check(netdev_tstamp_prequeue, skb);
 
        if (skb_defer_rx_timestamp(skb))
                return NET_RX_SUCCESS;
 
+       rcu_read_lock();
+
 #ifdef CONFIG_RPS
        if (static_key_false(&rps_needed)) {
                struct rps_dev_flow voidflow, *rflow = &voidflow;
-               int cpu, ret;
-
-               rcu_read_lock();
-
-               cpu = get_rps_cpu(skb->dev, skb, &rflow);
+               int cpu = get_rps_cpu(skb->dev, skb, &rflow);
 
                if (cpu >= 0) {
                        ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
                        rcu_read_unlock();
                        return ret;
                }
-               rcu_read_unlock();
        }
 #endif
-       return __netif_receive_skb(skb);
+       ret = __netif_receive_skb(skb);
+       rcu_read_unlock();
+       return ret;
 }
 
 /**
@@ -3900,13 +4116,13 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
  *     NET_RX_SUCCESS: no congestion
  *     NET_RX_DROP: packet was dropped
  */
-int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb)
+int netif_receive_skb(struct sk_buff *skb)
 {
        trace_netif_receive_skb_entry(skb);
 
        return netif_receive_skb_internal(skb);
 }
-EXPORT_SYMBOL(netif_receive_skb_sk);
+EXPORT_SYMBOL(netif_receive_skb);
 
 /* Network device is going away, flush any packets still pending
  * Called with irqs disabled.
@@ -4020,6 +4236,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
 
                diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
                diffs |= p->vlan_tci ^ skb->vlan_tci;
+               diffs |= skb_metadata_dst_cmp(p, skb);
                if (maclen == ETH_HLEN)
                        diffs |= compare_ether_header(skb_mac_header(p),
                                                      skb_mac_header(skb));
@@ -4044,7 +4261,9 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
            pinfo->nr_frags &&
            !PageHighMem(skb_frag_page(frag0))) {
                NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
-               NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
+               NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
+                                                   skb_frag_size(frag0),
+                                                   skb->end - skb->tail);
        }
 }
 
@@ -4097,7 +4316,8 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
                NAPI_GRO_CB(skb)->same_flow = 0;
                NAPI_GRO_CB(skb)->flush = 0;
                NAPI_GRO_CB(skb)->free = 0;
-               NAPI_GRO_CB(skb)->udp_mark = 0;
+               NAPI_GRO_CB(skb)->encap_mark = 0;
+               NAPI_GRO_CB(skb)->recursion_counter = 0;
                NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
 
                /* Setup for GRO checksum validation */
@@ -4217,10 +4437,12 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
                break;
 
        case GRO_MERGED_FREE:
-               if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+               if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
+                       skb_dst_drop(skb);
                        kmem_cache_free(skbuff_head_cache, skb);
-               else
+               } else {
                        __kfree_skb(skb);
+               }
                break;
 
        case GRO_HELD:
@@ -4432,8 +4654,10 @@ static int process_backlog(struct napi_struct *napi, int quota)
                struct sk_buff *skb;
 
                while ((skb = __skb_dequeue(&sd->process_queue))) {
+                       rcu_read_lock();
                        local_irq_enable();
                        __netif_receive_skb(skb);
+                       rcu_read_unlock();
                        local_irq_disable();
                        input_queue_head_incr(sd);
                        if (++work >= quota) {
@@ -4485,6 +4709,7 @@ void __napi_schedule(struct napi_struct *n)
 }
 EXPORT_SYMBOL(__napi_schedule);
 
+#ifndef CONFIG_PREEMPT_RT_FULL
 /**
  * __napi_schedule_irqoff - schedule for receive
  * @n: entry to schedule
@@ -4496,6 +4721,7 @@ void __napi_schedule_irqoff(struct napi_struct *n)
        ____napi_schedule(this_cpu_ptr(&softnet_data), n);
 }
 EXPORT_SYMBOL(__napi_schedule_irqoff);
+#endif
 
 void __napi_complete(struct napi_struct *n)
 {
@@ -4635,6 +4861,8 @@ void napi_disable(struct napi_struct *n)
 
        while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
                msleep(1);
+       while (test_and_set_bit(NAPI_STATE_NPSVC, &n->state))
+               msleep(1);
 
        hrtimer_cancel(&n->timer);
 
@@ -4720,13 +4948,21 @@ static void net_rx_action(struct softirq_action *h)
        struct softnet_data *sd = this_cpu_ptr(&softnet_data);
        unsigned long time_limit = jiffies + 2;
        int budget = netdev_budget;
+       struct sk_buff_head tofree_q;
+       struct sk_buff *skb;
        LIST_HEAD(list);
        LIST_HEAD(repoll);
 
+       __skb_queue_head_init(&tofree_q);
+
        local_irq_disable();
+       skb_queue_splice_init(&sd->tofree_queue, &tofree_q);
        list_splice_init(&sd->poll_list, &list);
        local_irq_enable();
 
+       while ((skb = __skb_dequeue(&tofree_q)))
+               kfree_skb(skb);
+
        for (;;) {
                struct napi_struct *n;
 
@@ -4756,7 +4992,7 @@ static void net_rx_action(struct softirq_action *h)
        list_splice_tail(&repoll, &list);
        list_splice(&list, &sd->poll_list);
        if (!list_empty(&sd->poll_list))
-               __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+               __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ);
 
        net_rps_action_and_irq_enable(sd);
 }
@@ -4777,8 +5013,7 @@ struct netdev_adjacent {
        struct rcu_head rcu;
 };
 
-static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
-                                                struct net_device *adj_dev,
+static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
                                                 struct list_head *adj_list)
 {
        struct netdev_adjacent *adj;
@@ -4804,7 +5039,7 @@ bool netdev_has_upper_dev(struct net_device *dev,
 {
        ASSERT_RTNL();
 
-       return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);
+       return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper);
 }
 EXPORT_SYMBOL(netdev_has_upper_dev);
 
@@ -4917,7 +5152,7 @@ EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
  * Gets the next netdev_adjacent->private from the dev's lower neighbour
  * list, starting from iter position. The caller must hold either hold the
  * RTNL lock or its own locking that guarantees that the neighbour lower
- * list will remain unchainged.
+ * list will remain unchanged.
  */
 void *netdev_lower_get_next_private(struct net_device *dev,
                                    struct list_head **iter)
@@ -4972,7 +5207,7 @@ EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
  * Gets the next netdev_adjacent from the dev's lower neighbour
  * list, starting from iter position. The caller must hold RTNL lock or
  * its own locking that guarantees that the neighbour lower
- * list will remain unchainged.
+ * list will remain unchanged.
  */
 void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
 {
@@ -5060,16 +5295,17 @@ static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
 
 static int __netdev_adjacent_dev_insert(struct net_device *dev,
                                        struct net_device *adj_dev,
+                                       u16 ref_nr,
                                        struct list_head *dev_list,
                                        void *private, bool master)
 {
        struct netdev_adjacent *adj;
        int ret;
 
-       adj = __netdev_find_adj(dev, adj_dev, dev_list);
+       adj = __netdev_find_adj(adj_dev, dev_list);
 
        if (adj) {
-               adj->ref_nr++;
+               adj->ref_nr += ref_nr;
                return 0;
        }
 
@@ -5079,7 +5315,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
 
        adj->dev = adj_dev;
        adj->master = master;
-       adj->ref_nr = 1;
+       adj->ref_nr = ref_nr;
        adj->private = private;
        dev_hold(adj_dev);
 
@@ -5118,11 +5354,12 @@ free_adj:
 
 static void __netdev_adjacent_dev_remove(struct net_device *dev,
                                         struct net_device *adj_dev,
+                                        u16 ref_nr,
                                         struct list_head *dev_list)
 {
        struct netdev_adjacent *adj;
 
-       adj = __netdev_find_adj(dev, adj_dev, dev_list);
+       adj = __netdev_find_adj(adj_dev, dev_list);
 
        if (!adj) {
                pr_err("tried to remove device %s from %s\n",
@@ -5130,10 +5367,10 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
                BUG();
        }
 
-       if (adj->ref_nr > 1) {
-               pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,
-                        adj->ref_nr-1);
-               adj->ref_nr--;
+       if (adj->ref_nr > ref_nr) {
+               pr_debug("%s to %s ref_nr-%d = %d\n", dev->name, adj_dev->name,
+                        ref_nr, adj->ref_nr-ref_nr);
+               adj->ref_nr -= ref_nr;
                return;
        }
 
@@ -5152,21 +5389,22 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
 
 static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
                                            struct net_device *upper_dev,
+                                           u16 ref_nr,
                                            struct list_head *up_list,
                                            struct list_head *down_list,
                                            void *private, bool master)
 {
        int ret;
 
-       ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,
-                                          master);
+       ret = __netdev_adjacent_dev_insert(dev, upper_dev, ref_nr, up_list,
+                                          private, master);
        if (ret)
                return ret;
 
-       ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,
-                                          false);
+       ret = __netdev_adjacent_dev_insert(upper_dev, dev, ref_nr, down_list,
+                                          private, false);
        if (ret) {
-               __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
+               __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list);
                return ret;
        }
 
@@ -5174,9 +5412,10 @@ static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
 }
 
 static int __netdev_adjacent_dev_link(struct net_device *dev,
-                                     struct net_device *upper_dev)
+                                     struct net_device *upper_dev,
+                                     u16 ref_nr)
 {
-       return __netdev_adjacent_dev_link_lists(dev, upper_dev,
+       return __netdev_adjacent_dev_link_lists(dev, upper_dev, ref_nr,
                                                &dev->all_adj_list.upper,
                                                &upper_dev->all_adj_list.lower,
                                                NULL, false);
@@ -5184,17 +5423,19 @@ static int __netdev_adjacent_dev_link(struct net_device *dev,
 
 static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
                                               struct net_device *upper_dev,
+                                              u16 ref_nr,
                                               struct list_head *up_list,
                                               struct list_head *down_list)
 {
-       __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
-       __netdev_adjacent_dev_remove(upper_dev, dev, down_list);
+       __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list);
+       __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list);
 }
 
 static void __netdev_adjacent_dev_unlink(struct net_device *dev,
-                                        struct net_device *upper_dev)
+                                        struct net_device *upper_dev,
+                                        u16 ref_nr)
 {
-       __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
+       __netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr,
                                           &dev->all_adj_list.upper,
                                           &upper_dev->all_adj_list.lower);
 }
@@ -5203,17 +5444,17 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
                                                struct net_device *upper_dev,
                                                void *private, bool master)
 {
-       int ret = __netdev_adjacent_dev_link(dev, upper_dev);
+       int ret = __netdev_adjacent_dev_link(dev, upper_dev, 1);
 
        if (ret)
                return ret;
 
-       ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
+       ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 1,
                                               &dev->adj_list.upper,
                                               &upper_dev->adj_list.lower,
                                               private, master);
        if (ret) {
-               __netdev_adjacent_dev_unlink(dev, upper_dev);
+               __netdev_adjacent_dev_unlink(dev, upper_dev, 1);
                return ret;
        }
 
@@ -5223,8 +5464,8 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
 static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
                                                   struct net_device *upper_dev)
 {
-       __netdev_adjacent_dev_unlink(dev, upper_dev);
-       __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
+       __netdev_adjacent_dev_unlink(dev, upper_dev, 1);
+       __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1,
                                           &dev->adj_list.upper,
                                           &upper_dev->adj_list.lower);
 }
@@ -5233,6 +5474,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
                                   struct net_device *upper_dev, bool master,
                                   void *private)
 {
+       struct netdev_notifier_changeupper_info changeupper_info;
        struct netdev_adjacent *i, *j, *to_i, *to_j;
        int ret = 0;
 
@@ -5242,15 +5484,25 @@ static int __netdev_upper_dev_link(struct net_device *dev,
                return -EBUSY;
 
        /* To prevent loops, check if dev is not upper device to upper_dev. */
-       if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
+       if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper))
                return -EBUSY;
 
-       if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper))
+       if (__netdev_find_adj(upper_dev, &dev->adj_list.upper))
                return -EEXIST;
 
        if (master && netdev_master_upper_dev_get(dev))
                return -EBUSY;
 
+       changeupper_info.upper_dev = upper_dev;
+       changeupper_info.master = master;
+       changeupper_info.linking = true;
+
+       ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+                                           &changeupper_info.info);
+       ret = notifier_to_errno(ret);
+       if (ret)
+               return ret;
+
        ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
                                                   master);
        if (ret)
@@ -5265,7 +5517,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
                list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
                        pr_debug("Interlinking %s with %s, non-neighbour\n",
                                 i->dev->name, j->dev->name);
-                       ret = __netdev_adjacent_dev_link(i->dev, j->dev);
+                       ret = __netdev_adjacent_dev_link(i->dev, j->dev, i->ref_nr);
                        if (ret)
                                goto rollback_mesh;
                }
@@ -5275,7 +5527,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
        list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
                pr_debug("linking %s's upper device %s with %s\n",
                         upper_dev->name, i->dev->name, dev->name);
-               ret = __netdev_adjacent_dev_link(dev, i->dev);
+               ret = __netdev_adjacent_dev_link(dev, i->dev, i->ref_nr);
                if (ret)
                        goto rollback_upper_mesh;
        }
@@ -5284,12 +5536,13 @@ static int __netdev_upper_dev_link(struct net_device *dev,
        list_for_each_entry(i, &dev->all_adj_list.lower, list) {
                pr_debug("linking %s's lower device %s with %s\n", dev->name,
                         i->dev->name, upper_dev->name);
-               ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
+               ret = __netdev_adjacent_dev_link(i->dev, upper_dev, i->ref_nr);
                if (ret)
                        goto rollback_lower_mesh;
        }
 
-       call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
+       call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+                                     &changeupper_info.info);
        return 0;
 
 rollback_lower_mesh:
@@ -5297,7 +5550,7 @@ rollback_lower_mesh:
        list_for_each_entry(i, &dev->all_adj_list.lower, list) {
                if (i == to_i)
                        break;
-               __netdev_adjacent_dev_unlink(i->dev, upper_dev);
+               __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr);
        }
 
        i = NULL;
@@ -5307,7 +5560,7 @@ rollback_upper_mesh:
        list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
                if (i == to_i)
                        break;
-               __netdev_adjacent_dev_unlink(dev, i->dev);
+               __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr);
        }
 
        i = j = NULL;
@@ -5319,7 +5572,7 @@ rollback_mesh:
                list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
                        if (i == to_i && j == to_j)
                                break;
-                       __netdev_adjacent_dev_unlink(i->dev, j->dev);
+                       __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr);
                }
                if (i == to_i)
                        break;
@@ -5384,9 +5637,17 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link_private);
 void netdev_upper_dev_unlink(struct net_device *dev,
                             struct net_device *upper_dev)
 {
+       struct netdev_notifier_changeupper_info changeupper_info;
        struct netdev_adjacent *i, *j;
        ASSERT_RTNL();
 
+       changeupper_info.upper_dev = upper_dev;
+       changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
+       changeupper_info.linking = false;
+
+       call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+                                     &changeupper_info.info);
+
        __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
 
        /* Here is the tricky part. We must remove all dev's lower
@@ -5395,18 +5656,19 @@ void netdev_upper_dev_unlink(struct net_device *dev,
         */
        list_for_each_entry(i, &dev->all_adj_list.lower, list)
                list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
-                       __netdev_adjacent_dev_unlink(i->dev, j->dev);
+                       __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr);
 
        /* remove also the devices itself from lower/upper device
         * list
         */
        list_for_each_entry(i, &dev->all_adj_list.lower, list)
-               __netdev_adjacent_dev_unlink(i->dev, upper_dev);
+               __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr);
 
        list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
-               __netdev_adjacent_dev_unlink(dev, i->dev);
+               __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr);
 
-       call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
+       call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+                                     &changeupper_info.info);
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
@@ -5512,7 +5774,7 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
 
        if (!lower_dev)
                return NULL;
-       lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);
+       lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower);
        if (!lower)
                return NULL;
 
@@ -6006,6 +6268,26 @@ int dev_get_phys_port_name(struct net_device *dev,
 }
 EXPORT_SYMBOL(dev_get_phys_port_name);
 
+/**
+ *     dev_change_proto_down - update protocol port state information
+ *     @dev: device
+ *     @proto_down: new value
+ *
+ *     This info can be used by switch drivers to set the phys state of the
+ *     port.
+ */
+int dev_change_proto_down(struct net_device *dev, bool proto_down)
+{
+       const struct net_device_ops *ops = dev->netdev_ops;
+
+       if (!ops->ndo_change_proto_down)
+               return -EOPNOTSUPP;
+       if (!netif_device_present(dev))
+               return -ENODEV;
+       return ops->ndo_change_proto_down(dev, proto_down);
+}
+EXPORT_SYMBOL(dev_change_proto_down);
+
 /**
  *     dev_new_index   -       allocate an ifindex
  *     @net: the applicable net namespace
@@ -6070,6 +6352,7 @@ static void rollback_registered_many(struct list_head *head)
                unlist_netdevice(dev);
 
                dev->reg_state = NETREG_UNREGISTERING;
+               on_each_cpu(flush_backlog, dev, 1);
        }
 
        synchronize_net();
@@ -6129,6 +6412,48 @@ static void rollback_registered(struct net_device *dev)
        list_del(&single);
 }
 
+static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
+       struct net_device *upper, netdev_features_t features)
+{
+       netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
+       netdev_features_t feature;
+       int feature_bit;
+
+       for_each_netdev_feature(&upper_disables, feature_bit) {
+               feature = __NETIF_F_BIT(feature_bit);
+               if (!(upper->wanted_features & feature)
+                   && (features & feature)) {
+                       netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n",
+                                  &feature, upper->name);
+                       features &= ~feature;
+               }
+       }
+
+       return features;
+}
+
+static void netdev_sync_lower_features(struct net_device *upper,
+       struct net_device *lower, netdev_features_t features)
+{
+       netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
+       netdev_features_t feature;
+       int feature_bit;
+
+       for_each_netdev_feature(&upper_disables, feature_bit) {
+               feature = __NETIF_F_BIT(feature_bit);
+               if (!(features & feature) && (lower->features & feature)) {
+                       netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
+                                  &feature, lower->name);
+                       lower->wanted_features &= ~feature;
+                       netdev_update_features(lower);
+
+                       if (unlikely(lower->features & feature))
+                               netdev_WARN(upper, "failed to disable %pNF on %s!\n",
+                                           &feature, lower->name);
+               }
+       }
+}
+
 static netdev_features_t netdev_fix_features(struct net_device *dev,
        netdev_features_t features)
 {
@@ -6198,8 +6523,10 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
 
 int __netdev_update_features(struct net_device *dev)
 {
+       struct net_device *upper, *lower;
        netdev_features_t features;
-       int err = 0;
+       struct list_head *iter;
+       int err = -1;
 
        ASSERT_RTNL();
 
@@ -6211,26 +6538,42 @@ int __netdev_update_features(struct net_device *dev)
        /* driver might be less strict about feature dependencies */
        features = netdev_fix_features(dev, features);
 
+       /* some features can't be enabled if they're off an an upper device */
+       netdev_for_each_upper_dev_rcu(dev, upper, iter)
+               features = netdev_sync_upper_features(dev, upper, features);
+
        if (dev->features == features)
-               return 0;
+               goto sync_lower;
 
        netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
                &dev->features, &features);
 
        if (dev->netdev_ops->ndo_set_features)
                err = dev->netdev_ops->ndo_set_features(dev, features);
+       else
+               err = 0;
 
        if (unlikely(err < 0)) {
                netdev_err(dev,
                        "set_features() failed (%d); wanted %pNF, left %pNF\n",
                        err, &features, &dev->features);
+               /* return non-0 since some features might have changed and
+                * it's better to fire a spurious notification than miss it
+                */
                return -1;
        }
 
+sync_lower:
+       /* some features must be disabled on lower devices when disabled
+        * on an upper device (think: bonding master or bridge)
+        */
+       netdev_for_each_lower_dev(dev, lower, iter)
+               netdev_sync_lower_features(dev, lower, features);
+
        if (!err)
                dev->features = features;
 
-       return 1;
+       return err < 0 ? 0 : 1;
 }
 
 /**
@@ -6340,7 +6683,8 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
        struct netdev_queue *tx;
        size_t sz = count * sizeof(*tx);
 
-       BUG_ON(count < 1 || count > 0xffff);
+       if (count < 1 || count > 0xffff)
+               return -EINVAL;
 
        tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
        if (!tx) {
@@ -6356,6 +6700,17 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
        return 0;
 }
 
+void netif_tx_stop_all_queues(struct net_device *dev)
+{
+       unsigned int i;
+
+       for (i = 0; i < dev->num_tx_queues; i++) {
+               struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+               netif_tx_stop_queue(txq);
+       }
+}
+EXPORT_SYMBOL(netif_tx_stop_all_queues);
+
 /**
  *     register_netdevice      - register a network device
  *     @dev: device to register
@@ -6693,8 +7048,6 @@ void netdev_run_todo(void)
 
                dev->reg_state = NETREG_UNREGISTERED;
 
-               on_each_cpu(flush_backlog, dev, 1);
-
                netdev_wait_allrefs(dev);
 
                /* paranoia */
@@ -6888,6 +7241,11 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
        dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
        setup(dev);
 
+       if (!dev->tx_queue_len) {
+               dev->priv_flags |= IFF_NO_QUEUE;
+               dev->tx_queue_len = 1;
+       }
+
        dev->num_tx_queues = txqs;
        dev->real_num_tx_queues = txqs;
        if (netif_alloc_netdev_queues(dev))
@@ -6905,6 +7263,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
        dev->group = INIT_NETDEV_GROUP;
        if (!dev->ethtool_ops)
                dev->ethtool_ops = &default_ethtool_ops;
+
+       nf_hook_ingress_init(dev);
+
        return dev;
 
 free_all:
@@ -6970,7 +7331,7 @@ EXPORT_SYMBOL(free_netdev);
 void synchronize_net(void)
 {
        might_sleep();
-       if (rtnl_is_locked())
+       if (rtnl_is_locked() && !IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
                synchronize_rcu_expedited();
        else
                synchronize_rcu();
@@ -7218,7 +7579,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
                netif_rx_ni(skb);
                input_queue_head_incr(oldsd);
        }
-       while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
+       while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
                netif_rx_ni(skb);
                input_queue_head_incr(oldsd);
        }
@@ -7562,7 +7923,7 @@ static int __init net_dev_init(void)
        open_softirq(NET_RX_SOFTIRQ, net_rx_action);
 
        hotcpu_notifier(dev_cpu_callback, 0);
-       dst_init();
+       dst_subsys_init();
        rc = 0;
 out:
        return rc;