Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / net / bridge / br_mdb.c
diff --git a/kernel/net/bridge/br_mdb.c b/kernel/net/bridge/br_mdb.c
new file mode 100644 (file)
index 0000000..e29ad70
--- /dev/null
@@ -0,0 +1,499 @@
+#include <linux/err.h>
+#include <linux/igmp.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/if_ether.h>
+#include <net/ip.h>
+#include <net/netlink.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#endif
+
+#include "br_private.h"
+
+static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
+                              struct net_device *dev)
+{
+       struct net_bridge *br = netdev_priv(dev);
+       struct net_bridge_port *p;
+       struct nlattr *nest;
+
+       if (!br->multicast_router || hlist_empty(&br->router_list))
+               return 0;
+
+       nest = nla_nest_start(skb, MDBA_ROUTER);
+       if (nest == NULL)
+               return -EMSGSIZE;
+
+       hlist_for_each_entry_rcu(p, &br->router_list, rlist) {
+               if (p && nla_put_u32(skb, MDBA_ROUTER_PORT, p->dev->ifindex))
+                       goto fail;
+       }
+
+       nla_nest_end(skb, nest);
+       return 0;
+fail:
+       nla_nest_cancel(skb, nest);
+       return -EMSGSIZE;
+}
+
+static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
+                           struct net_device *dev)
+{
+       struct net_bridge *br = netdev_priv(dev);
+       struct net_bridge_mdb_htable *mdb;
+       struct nlattr *nest, *nest2;
+       int i, err = 0;
+       int idx = 0, s_idx = cb->args[1];
+
+       if (br->multicast_disabled)
+               return 0;
+
+       mdb = rcu_dereference(br->mdb);
+       if (!mdb)
+               return 0;
+
+       nest = nla_nest_start(skb, MDBA_MDB);
+       if (nest == NULL)
+               return -EMSGSIZE;
+
+       for (i = 0; i < mdb->max; i++) {
+               struct net_bridge_mdb_entry *mp;
+               struct net_bridge_port_group *p;
+               struct net_bridge_port_group __rcu **pp;
+               struct net_bridge_port *port;
+
+               hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) {
+                       if (idx < s_idx)
+                               goto skip;
+
+                       nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY);
+                       if (nest2 == NULL) {
+                               err = -EMSGSIZE;
+                               goto out;
+                       }
+
+                       for (pp = &mp->ports;
+                            (p = rcu_dereference(*pp)) != NULL;
+                             pp = &p->next) {
+                               port = p->port;
+                               if (port) {
+                                       struct br_mdb_entry e;
+                                       memset(&e, 0, sizeof(e));
+                                       e.ifindex = port->dev->ifindex;
+                                       e.state = p->state;
+                                       if (p->addr.proto == htons(ETH_P_IP))
+                                               e.addr.u.ip4 = p->addr.u.ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+                                       if (p->addr.proto == htons(ETH_P_IPV6))
+                                               e.addr.u.ip6 = p->addr.u.ip6;
+#endif
+                                       e.addr.proto = p->addr.proto;
+                                       if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(e), &e)) {
+                                               nla_nest_cancel(skb, nest2);
+                                               err = -EMSGSIZE;
+                                               goto out;
+                                       }
+                               }
+                       }
+                       nla_nest_end(skb, nest2);
+               skip:
+                       idx++;
+               }
+       }
+
+out:
+       cb->args[1] = idx;
+       nla_nest_end(skb, nest);
+       return err;
+}
+
+static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct net_device *dev;
+       struct net *net = sock_net(skb->sk);
+       struct nlmsghdr *nlh = NULL;
+       int idx = 0, s_idx;
+
+       s_idx = cb->args[0];
+
+       rcu_read_lock();
+
+       /* In theory this could be wrapped to 0... */
+       cb->seq = net->dev_base_seq + br_mdb_rehash_seq;
+
+       for_each_netdev_rcu(net, dev) {
+               if (dev->priv_flags & IFF_EBRIDGE) {
+                       struct br_port_msg *bpm;
+
+                       if (idx < s_idx)
+                               goto skip;
+
+                       nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+                                       cb->nlh->nlmsg_seq, RTM_GETMDB,
+                                       sizeof(*bpm), NLM_F_MULTI);
+                       if (nlh == NULL)
+                               break;
+
+                       bpm = nlmsg_data(nlh);
+                       memset(bpm, 0, sizeof(*bpm));
+                       bpm->ifindex = dev->ifindex;
+                       if (br_mdb_fill_info(skb, cb, dev) < 0)
+                               goto out;
+                       if (br_rports_fill_info(skb, cb, dev) < 0)
+                               goto out;
+
+                       cb->args[1] = 0;
+                       nlmsg_end(skb, nlh);
+               skip:
+                       idx++;
+               }
+       }
+
+out:
+       if (nlh)
+               nlmsg_end(skb, nlh);
+       rcu_read_unlock();
+       cb->args[0] = idx;
+       return skb->len;
+}
+
+static int nlmsg_populate_mdb_fill(struct sk_buff *skb,
+                                  struct net_device *dev,
+                                  struct br_mdb_entry *entry, u32 pid,
+                                  u32 seq, int type, unsigned int flags)
+{
+       struct nlmsghdr *nlh;
+       struct br_port_msg *bpm;
+       struct nlattr *nest, *nest2;
+
+       nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0);
+       if (!nlh)
+               return -EMSGSIZE;
+
+       bpm = nlmsg_data(nlh);
+       memset(bpm, 0, sizeof(*bpm));
+       bpm->family  = AF_BRIDGE;
+       bpm->ifindex = dev->ifindex;
+       nest = nla_nest_start(skb, MDBA_MDB);
+       if (nest == NULL)
+               goto cancel;
+       nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY);
+       if (nest2 == NULL)
+               goto end;
+
+       if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(*entry), entry))
+               goto end;
+
+       nla_nest_end(skb, nest2);
+       nla_nest_end(skb, nest);
+       nlmsg_end(skb, nlh);
+       return 0;
+
+end:
+       nla_nest_end(skb, nest);
+cancel:
+       nlmsg_cancel(skb, nlh);
+       return -EMSGSIZE;
+}
+
+static inline size_t rtnl_mdb_nlmsg_size(void)
+{
+       return NLMSG_ALIGN(sizeof(struct br_port_msg))
+               + nla_total_size(sizeof(struct br_mdb_entry));
+}
+
+static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry,
+                           int type)
+{
+       struct net *net = dev_net(dev);
+       struct sk_buff *skb;
+       int err = -ENOBUFS;
+
+       skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC);
+       if (!skb)
+               goto errout;
+
+       err = nlmsg_populate_mdb_fill(skb, dev, entry, 0, 0, type, NTF_SELF);
+       if (err < 0) {
+               kfree_skb(skb);
+               goto errout;
+       }
+
+       rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_ATOMIC);
+       return;
+errout:
+       rtnl_set_sk_err(net, RTNLGRP_MDB, err);
+}
+
+void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
+                  struct br_ip *group, int type)
+{
+       struct br_mdb_entry entry;
+
+       memset(&entry, 0, sizeof(entry));
+       entry.ifindex = port->dev->ifindex;
+       entry.addr.proto = group->proto;
+       entry.addr.u.ip4 = group->u.ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+       entry.addr.u.ip6 = group->u.ip6;
+#endif
+       __br_mdb_notify(dev, &entry, type);
+}
+
+static bool is_valid_mdb_entry(struct br_mdb_entry *entry)
+{
+       if (entry->ifindex == 0)
+               return false;
+
+       if (entry->addr.proto == htons(ETH_P_IP)) {
+               if (!ipv4_is_multicast(entry->addr.u.ip4))
+                       return false;
+               if (ipv4_is_local_multicast(entry->addr.u.ip4))
+                       return false;
+#if IS_ENABLED(CONFIG_IPV6)
+       } else if (entry->addr.proto == htons(ETH_P_IPV6)) {
+               if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6))
+                       return false;
+#endif
+       } else
+               return false;
+       if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY)
+               return false;
+
+       return true;
+}
+
+static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
+                       struct net_device **pdev, struct br_mdb_entry **pentry)
+{
+       struct net *net = sock_net(skb->sk);
+       struct br_mdb_entry *entry;
+       struct br_port_msg *bpm;
+       struct nlattr *tb[MDBA_SET_ENTRY_MAX+1];
+       struct net_device *dev;
+       int err;
+
+       err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, NULL);
+       if (err < 0)
+               return err;
+
+       bpm = nlmsg_data(nlh);
+       if (bpm->ifindex == 0) {
+               pr_info("PF_BRIDGE: br_mdb_parse() with invalid ifindex\n");
+               return -EINVAL;
+       }
+
+       dev = __dev_get_by_index(net, bpm->ifindex);
+       if (dev == NULL) {
+               pr_info("PF_BRIDGE: br_mdb_parse() with unknown ifindex\n");
+               return -ENODEV;
+       }
+
+       if (!(dev->priv_flags & IFF_EBRIDGE)) {
+               pr_info("PF_BRIDGE: br_mdb_parse() with non-bridge\n");
+               return -EOPNOTSUPP;
+       }
+
+       *pdev = dev;
+
+       if (!tb[MDBA_SET_ENTRY] ||
+           nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) {
+               pr_info("PF_BRIDGE: br_mdb_parse() with invalid attr\n");
+               return -EINVAL;
+       }
+
+       entry = nla_data(tb[MDBA_SET_ENTRY]);
+       if (!is_valid_mdb_entry(entry)) {
+               pr_info("PF_BRIDGE: br_mdb_parse() with invalid entry\n");
+               return -EINVAL;
+       }
+
+       *pentry = entry;
+       return 0;
+}
+
+static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
+                           struct br_ip *group, unsigned char state)
+{
+       struct net_bridge_mdb_entry *mp;
+       struct net_bridge_port_group *p;
+       struct net_bridge_port_group __rcu **pp;
+       struct net_bridge_mdb_htable *mdb;
+       int err;
+
+       mdb = mlock_dereference(br->mdb, br);
+       mp = br_mdb_ip_get(mdb, group);
+       if (!mp) {
+               mp = br_multicast_new_group(br, port, group);
+               err = PTR_ERR(mp);
+               if (IS_ERR(mp))
+                       return err;
+       }
+
+       for (pp = &mp->ports;
+            (p = mlock_dereference(*pp, br)) != NULL;
+            pp = &p->next) {
+               if (p->port == port)
+                       return -EEXIST;
+               if ((unsigned long)p->port < (unsigned long)port)
+                       break;
+       }
+
+       p = br_multicast_new_port_group(port, group, *pp, state);
+       if (unlikely(!p))
+               return -ENOMEM;
+       rcu_assign_pointer(*pp, p);
+
+       br_mdb_notify(br->dev, port, group, RTM_NEWMDB);
+       return 0;
+}
+
+static int __br_mdb_add(struct net *net, struct net_bridge *br,
+                       struct br_mdb_entry *entry)
+{
+       struct br_ip ip;
+       struct net_device *dev;
+       struct net_bridge_port *p;
+       int ret;
+
+       if (!netif_running(br->dev) || br->multicast_disabled)
+               return -EINVAL;
+
+       dev = __dev_get_by_index(net, entry->ifindex);
+       if (!dev)
+               return -ENODEV;
+
+       p = br_port_get_rtnl(dev);
+       if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+               return -EINVAL;
+
+       ip.proto = entry->addr.proto;
+       if (ip.proto == htons(ETH_P_IP))
+               ip.u.ip4 = entry->addr.u.ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+       else
+               ip.u.ip6 = entry->addr.u.ip6;
+#endif
+
+       spin_lock_bh(&br->multicast_lock);
+       ret = br_mdb_add_group(br, p, &ip, entry->state);
+       spin_unlock_bh(&br->multicast_lock);
+       return ret;
+}
+
+static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+       struct net *net = sock_net(skb->sk);
+       struct br_mdb_entry *entry;
+       struct net_device *dev;
+       struct net_bridge *br;
+       int err;
+
+       err = br_mdb_parse(skb, nlh, &dev, &entry);
+       if (err < 0)
+               return err;
+
+       br = netdev_priv(dev);
+
+       err = __br_mdb_add(net, br, entry);
+       if (!err)
+               __br_mdb_notify(dev, entry, RTM_NEWMDB);
+       return err;
+}
+
+static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
+{
+       struct net_bridge_mdb_htable *mdb;
+       struct net_bridge_mdb_entry *mp;
+       struct net_bridge_port_group *p;
+       struct net_bridge_port_group __rcu **pp;
+       struct br_ip ip;
+       int err = -EINVAL;
+
+       if (!netif_running(br->dev) || br->multicast_disabled)
+               return -EINVAL;
+
+       ip.proto = entry->addr.proto;
+       if (ip.proto == htons(ETH_P_IP)) {
+               if (timer_pending(&br->ip4_other_query.timer))
+                       return -EBUSY;
+
+               ip.u.ip4 = entry->addr.u.ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+       } else {
+               if (timer_pending(&br->ip6_other_query.timer))
+                       return -EBUSY;
+
+               ip.u.ip6 = entry->addr.u.ip6;
+#endif
+       }
+
+       spin_lock_bh(&br->multicast_lock);
+       mdb = mlock_dereference(br->mdb, br);
+
+       mp = br_mdb_ip_get(mdb, &ip);
+       if (!mp)
+               goto unlock;
+
+       for (pp = &mp->ports;
+            (p = mlock_dereference(*pp, br)) != NULL;
+            pp = &p->next) {
+               if (!p->port || p->port->dev->ifindex != entry->ifindex)
+                       continue;
+
+               if (p->port->state == BR_STATE_DISABLED)
+                       goto unlock;
+
+               rcu_assign_pointer(*pp, p->next);
+               hlist_del_init(&p->mglist);
+               del_timer(&p->timer);
+               call_rcu_bh(&p->rcu, br_multicast_free_pg);
+               err = 0;
+
+               if (!mp->ports && !mp->mglist &&
+                   netif_running(br->dev))
+                       mod_timer(&mp->timer, jiffies);
+               break;
+       }
+
+unlock:
+       spin_unlock_bh(&br->multicast_lock);
+       return err;
+}
+
+static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+       struct net_device *dev;
+       struct br_mdb_entry *entry;
+       struct net_bridge *br;
+       int err;
+
+       err = br_mdb_parse(skb, nlh, &dev, &entry);
+       if (err < 0)
+               return err;
+
+       br = netdev_priv(dev);
+
+       err = __br_mdb_del(br, entry);
+       if (!err)
+               __br_mdb_notify(dev, entry, RTM_DELMDB);
+       return err;
+}
+
+void br_mdb_init(void)
+{
+       rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL);
+       rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL);
+       rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL);
+}
+
+void br_mdb_uninit(void)
+{
+       rtnl_unregister(PF_BRIDGE, RTM_GETMDB);
+       rtnl_unregister(PF_BRIDGE, RTM_NEWMDB);
+       rtnl_unregister(PF_BRIDGE, RTM_DELMDB);
+}