These changes are a raw update to a vanilla kernel 4.1.10, with the
[kvmfornfv.git] / kernel / net / bridge / br_mdb.c
1 #include <linux/err.h>
2 #include <linux/igmp.h>
3 #include <linux/kernel.h>
4 #include <linux/netdevice.h>
5 #include <linux/rculist.h>
6 #include <linux/skbuff.h>
7 #include <linux/if_ether.h>
8 #include <net/ip.h>
9 #include <net/netlink.h>
10 #if IS_ENABLED(CONFIG_IPV6)
11 #include <net/ipv6.h>
12 #include <net/addrconf.h>
13 #endif
14
15 #include "br_private.h"
16
17 static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
18                                struct net_device *dev)
19 {
20         struct net_bridge *br = netdev_priv(dev);
21         struct net_bridge_port *p;
22         struct nlattr *nest;
23
24         if (!br->multicast_router || hlist_empty(&br->router_list))
25                 return 0;
26
27         nest = nla_nest_start(skb, MDBA_ROUTER);
28         if (nest == NULL)
29                 return -EMSGSIZE;
30
31         hlist_for_each_entry_rcu(p, &br->router_list, rlist) {
32                 if (p && nla_put_u32(skb, MDBA_ROUTER_PORT, p->dev->ifindex))
33                         goto fail;
34         }
35
36         nla_nest_end(skb, nest);
37         return 0;
38 fail:
39         nla_nest_cancel(skb, nest);
40         return -EMSGSIZE;
41 }
42
43 static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
44                             struct net_device *dev)
45 {
46         struct net_bridge *br = netdev_priv(dev);
47         struct net_bridge_mdb_htable *mdb;
48         struct nlattr *nest, *nest2;
49         int i, err = 0;
50         int idx = 0, s_idx = cb->args[1];
51
52         if (br->multicast_disabled)
53                 return 0;
54
55         mdb = rcu_dereference(br->mdb);
56         if (!mdb)
57                 return 0;
58
59         nest = nla_nest_start(skb, MDBA_MDB);
60         if (nest == NULL)
61                 return -EMSGSIZE;
62
63         for (i = 0; i < mdb->max; i++) {
64                 struct net_bridge_mdb_entry *mp;
65                 struct net_bridge_port_group *p;
66                 struct net_bridge_port_group __rcu **pp;
67                 struct net_bridge_port *port;
68
69                 hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) {
70                         if (idx < s_idx)
71                                 goto skip;
72
73                         nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY);
74                         if (nest2 == NULL) {
75                                 err = -EMSGSIZE;
76                                 goto out;
77                         }
78
79                         for (pp = &mp->ports;
80                              (p = rcu_dereference(*pp)) != NULL;
81                               pp = &p->next) {
82                                 port = p->port;
83                                 if (port) {
84                                         struct br_mdb_entry e;
85                                         memset(&e, 0, sizeof(e));
86                                         e.ifindex = port->dev->ifindex;
87                                         e.state = p->state;
88                                         if (p->addr.proto == htons(ETH_P_IP))
89                                                 e.addr.u.ip4 = p->addr.u.ip4;
90 #if IS_ENABLED(CONFIG_IPV6)
91                                         if (p->addr.proto == htons(ETH_P_IPV6))
92                                                 e.addr.u.ip6 = p->addr.u.ip6;
93 #endif
94                                         e.addr.proto = p->addr.proto;
95                                         if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(e), &e)) {
96                                                 nla_nest_cancel(skb, nest2);
97                                                 err = -EMSGSIZE;
98                                                 goto out;
99                                         }
100                                 }
101                         }
102                         nla_nest_end(skb, nest2);
103                 skip:
104                         idx++;
105                 }
106         }
107
108 out:
109         cb->args[1] = idx;
110         nla_nest_end(skb, nest);
111         return err;
112 }
113
114 static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
115 {
116         struct net_device *dev;
117         struct net *net = sock_net(skb->sk);
118         struct nlmsghdr *nlh = NULL;
119         int idx = 0, s_idx;
120
121         s_idx = cb->args[0];
122
123         rcu_read_lock();
124
125         /* In theory this could be wrapped to 0... */
126         cb->seq = net->dev_base_seq + br_mdb_rehash_seq;
127
128         for_each_netdev_rcu(net, dev) {
129                 if (dev->priv_flags & IFF_EBRIDGE) {
130                         struct br_port_msg *bpm;
131
132                         if (idx < s_idx)
133                                 goto skip;
134
135                         nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
136                                         cb->nlh->nlmsg_seq, RTM_GETMDB,
137                                         sizeof(*bpm), NLM_F_MULTI);
138                         if (nlh == NULL)
139                                 break;
140
141                         bpm = nlmsg_data(nlh);
142                         memset(bpm, 0, sizeof(*bpm));
143                         bpm->ifindex = dev->ifindex;
144                         if (br_mdb_fill_info(skb, cb, dev) < 0)
145                                 goto out;
146                         if (br_rports_fill_info(skb, cb, dev) < 0)
147                                 goto out;
148
149                         cb->args[1] = 0;
150                         nlmsg_end(skb, nlh);
151                 skip:
152                         idx++;
153                 }
154         }
155
156 out:
157         if (nlh)
158                 nlmsg_end(skb, nlh);
159         rcu_read_unlock();
160         cb->args[0] = idx;
161         return skb->len;
162 }
163
164 static int nlmsg_populate_mdb_fill(struct sk_buff *skb,
165                                    struct net_device *dev,
166                                    struct br_mdb_entry *entry, u32 pid,
167                                    u32 seq, int type, unsigned int flags)
168 {
169         struct nlmsghdr *nlh;
170         struct br_port_msg *bpm;
171         struct nlattr *nest, *nest2;
172
173         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0);
174         if (!nlh)
175                 return -EMSGSIZE;
176
177         bpm = nlmsg_data(nlh);
178         memset(bpm, 0, sizeof(*bpm));
179         bpm->family  = AF_BRIDGE;
180         bpm->ifindex = dev->ifindex;
181         nest = nla_nest_start(skb, MDBA_MDB);
182         if (nest == NULL)
183                 goto cancel;
184         nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY);
185         if (nest2 == NULL)
186                 goto end;
187
188         if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(*entry), entry))
189                 goto end;
190
191         nla_nest_end(skb, nest2);
192         nla_nest_end(skb, nest);
193         nlmsg_end(skb, nlh);
194         return 0;
195
196 end:
197         nla_nest_end(skb, nest);
198 cancel:
199         nlmsg_cancel(skb, nlh);
200         return -EMSGSIZE;
201 }
202
203 static inline size_t rtnl_mdb_nlmsg_size(void)
204 {
205         return NLMSG_ALIGN(sizeof(struct br_port_msg))
206                 + nla_total_size(sizeof(struct br_mdb_entry));
207 }
208
209 static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry,
210                             int type)
211 {
212         struct net *net = dev_net(dev);
213         struct sk_buff *skb;
214         int err = -ENOBUFS;
215
216         skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC);
217         if (!skb)
218                 goto errout;
219
220         err = nlmsg_populate_mdb_fill(skb, dev, entry, 0, 0, type, NTF_SELF);
221         if (err < 0) {
222                 kfree_skb(skb);
223                 goto errout;
224         }
225
226         rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_ATOMIC);
227         return;
228 errout:
229         rtnl_set_sk_err(net, RTNLGRP_MDB, err);
230 }
231
232 void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
233                    struct br_ip *group, int type)
234 {
235         struct br_mdb_entry entry;
236
237         memset(&entry, 0, sizeof(entry));
238         entry.ifindex = port->dev->ifindex;
239         entry.addr.proto = group->proto;
240         entry.addr.u.ip4 = group->u.ip4;
241 #if IS_ENABLED(CONFIG_IPV6)
242         entry.addr.u.ip6 = group->u.ip6;
243 #endif
244         __br_mdb_notify(dev, &entry, type);
245 }
246
247 static bool is_valid_mdb_entry(struct br_mdb_entry *entry)
248 {
249         if (entry->ifindex == 0)
250                 return false;
251
252         if (entry->addr.proto == htons(ETH_P_IP)) {
253                 if (!ipv4_is_multicast(entry->addr.u.ip4))
254                         return false;
255                 if (ipv4_is_local_multicast(entry->addr.u.ip4))
256                         return false;
257 #if IS_ENABLED(CONFIG_IPV6)
258         } else if (entry->addr.proto == htons(ETH_P_IPV6)) {
259                 if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6))
260                         return false;
261 #endif
262         } else
263                 return false;
264         if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY)
265                 return false;
266
267         return true;
268 }
269
270 static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
271                         struct net_device **pdev, struct br_mdb_entry **pentry)
272 {
273         struct net *net = sock_net(skb->sk);
274         struct br_mdb_entry *entry;
275         struct br_port_msg *bpm;
276         struct nlattr *tb[MDBA_SET_ENTRY_MAX+1];
277         struct net_device *dev;
278         int err;
279
280         err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, NULL);
281         if (err < 0)
282                 return err;
283
284         bpm = nlmsg_data(nlh);
285         if (bpm->ifindex == 0) {
286                 pr_info("PF_BRIDGE: br_mdb_parse() with invalid ifindex\n");
287                 return -EINVAL;
288         }
289
290         dev = __dev_get_by_index(net, bpm->ifindex);
291         if (dev == NULL) {
292                 pr_info("PF_BRIDGE: br_mdb_parse() with unknown ifindex\n");
293                 return -ENODEV;
294         }
295
296         if (!(dev->priv_flags & IFF_EBRIDGE)) {
297                 pr_info("PF_BRIDGE: br_mdb_parse() with non-bridge\n");
298                 return -EOPNOTSUPP;
299         }
300
301         *pdev = dev;
302
303         if (!tb[MDBA_SET_ENTRY] ||
304             nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) {
305                 pr_info("PF_BRIDGE: br_mdb_parse() with invalid attr\n");
306                 return -EINVAL;
307         }
308
309         entry = nla_data(tb[MDBA_SET_ENTRY]);
310         if (!is_valid_mdb_entry(entry)) {
311                 pr_info("PF_BRIDGE: br_mdb_parse() with invalid entry\n");
312                 return -EINVAL;
313         }
314
315         *pentry = entry;
316         return 0;
317 }
318
319 static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
320                             struct br_ip *group, unsigned char state)
321 {
322         struct net_bridge_mdb_entry *mp;
323         struct net_bridge_port_group *p;
324         struct net_bridge_port_group __rcu **pp;
325         struct net_bridge_mdb_htable *mdb;
326         int err;
327
328         mdb = mlock_dereference(br->mdb, br);
329         mp = br_mdb_ip_get(mdb, group);
330         if (!mp) {
331                 mp = br_multicast_new_group(br, port, group);
332                 err = PTR_ERR(mp);
333                 if (IS_ERR(mp))
334                         return err;
335         }
336
337         for (pp = &mp->ports;
338              (p = mlock_dereference(*pp, br)) != NULL;
339              pp = &p->next) {
340                 if (p->port == port)
341                         return -EEXIST;
342                 if ((unsigned long)p->port < (unsigned long)port)
343                         break;
344         }
345
346         p = br_multicast_new_port_group(port, group, *pp, state);
347         if (unlikely(!p))
348                 return -ENOMEM;
349         rcu_assign_pointer(*pp, p);
350
351         return 0;
352 }
353
354 static int __br_mdb_add(struct net *net, struct net_bridge *br,
355                         struct br_mdb_entry *entry)
356 {
357         struct br_ip ip;
358         struct net_device *dev;
359         struct net_bridge_port *p;
360         int ret;
361
362         if (!netif_running(br->dev) || br->multicast_disabled)
363                 return -EINVAL;
364
365         dev = __dev_get_by_index(net, entry->ifindex);
366         if (!dev)
367                 return -ENODEV;
368
369         p = br_port_get_rtnl(dev);
370         if (!p || p->br != br || p->state == BR_STATE_DISABLED)
371                 return -EINVAL;
372
373         memset(&ip, 0, sizeof(ip));
374         ip.proto = entry->addr.proto;
375         if (ip.proto == htons(ETH_P_IP))
376                 ip.u.ip4 = entry->addr.u.ip4;
377 #if IS_ENABLED(CONFIG_IPV6)
378         else
379                 ip.u.ip6 = entry->addr.u.ip6;
380 #endif
381
382         spin_lock_bh(&br->multicast_lock);
383         ret = br_mdb_add_group(br, p, &ip, entry->state);
384         spin_unlock_bh(&br->multicast_lock);
385         return ret;
386 }
387
388 static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
389 {
390         struct net *net = sock_net(skb->sk);
391         struct br_mdb_entry *entry;
392         struct net_device *dev;
393         struct net_bridge *br;
394         int err;
395
396         err = br_mdb_parse(skb, nlh, &dev, &entry);
397         if (err < 0)
398                 return err;
399
400         br = netdev_priv(dev);
401
402         err = __br_mdb_add(net, br, entry);
403         if (!err)
404                 __br_mdb_notify(dev, entry, RTM_NEWMDB);
405         return err;
406 }
407
408 static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
409 {
410         struct net_bridge_mdb_htable *mdb;
411         struct net_bridge_mdb_entry *mp;
412         struct net_bridge_port_group *p;
413         struct net_bridge_port_group __rcu **pp;
414         struct br_ip ip;
415         int err = -EINVAL;
416
417         if (!netif_running(br->dev) || br->multicast_disabled)
418                 return -EINVAL;
419
420         memset(&ip, 0, sizeof(ip));
421         ip.proto = entry->addr.proto;
422         if (ip.proto == htons(ETH_P_IP)) {
423                 if (timer_pending(&br->ip4_other_query.timer))
424                         return -EBUSY;
425
426                 ip.u.ip4 = entry->addr.u.ip4;
427 #if IS_ENABLED(CONFIG_IPV6)
428         } else {
429                 if (timer_pending(&br->ip6_other_query.timer))
430                         return -EBUSY;
431
432                 ip.u.ip6 = entry->addr.u.ip6;
433 #endif
434         }
435
436         spin_lock_bh(&br->multicast_lock);
437         mdb = mlock_dereference(br->mdb, br);
438
439         mp = br_mdb_ip_get(mdb, &ip);
440         if (!mp)
441                 goto unlock;
442
443         for (pp = &mp->ports;
444              (p = mlock_dereference(*pp, br)) != NULL;
445              pp = &p->next) {
446                 if (!p->port || p->port->dev->ifindex != entry->ifindex)
447                         continue;
448
449                 if (p->port->state == BR_STATE_DISABLED)
450                         goto unlock;
451
452                 rcu_assign_pointer(*pp, p->next);
453                 hlist_del_init(&p->mglist);
454                 del_timer(&p->timer);
455                 call_rcu_bh(&p->rcu, br_multicast_free_pg);
456                 err = 0;
457
458                 if (!mp->ports && !mp->mglist &&
459                     netif_running(br->dev))
460                         mod_timer(&mp->timer, jiffies);
461                 break;
462         }
463
464 unlock:
465         spin_unlock_bh(&br->multicast_lock);
466         return err;
467 }
468
469 static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
470 {
471         struct net_device *dev;
472         struct br_mdb_entry *entry;
473         struct net_bridge *br;
474         int err;
475
476         err = br_mdb_parse(skb, nlh, &dev, &entry);
477         if (err < 0)
478                 return err;
479
480         br = netdev_priv(dev);
481
482         err = __br_mdb_del(br, entry);
483         if (!err)
484                 __br_mdb_notify(dev, entry, RTM_DELMDB);
485         return err;
486 }
487
488 void br_mdb_init(void)
489 {
490         rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL);
491         rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL);
492         rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL);
493 }
494
495 void br_mdb_uninit(void)
496 {
497         rtnl_unregister(PF_BRIDGE, RTM_GETMDB);
498         rtnl_unregister(PF_BRIDGE, RTM_NEWMDB);
499         rtnl_unregister(PF_BRIDGE, RTM_DELMDB);
500 }