Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / net / sched / sch_teql.c
1 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2  *
3  *              This program is free software; you can redistribute it and/or
4  *              modify it under the terms of the GNU General Public License
5  *              as published by the Free Software Foundation; either version
6  *              2 of the License, or (at your option) any later version.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
17 #include <linux/if_arp.h>
18 #include <linux/netdevice.h>
19 #include <linux/init.h>
20 #include <linux/skbuff.h>
21 #include <linux/moduleparam.h>
22 #include <net/dst.h>
23 #include <net/neighbour.h>
24 #include <net/pkt_sched.h>
25
26 /*
27    How to setup it.
28    ----------------
29
30    After loading this module you will find a new device teqlN
31    and new qdisc with the same name. To join a slave to the equalizer
32    you should just set this qdisc on a device f.e.
33
34    # tc qdisc add dev eth0 root teql0
35    # tc qdisc add dev eth1 root teql0
36
37    That's all. Full PnP 8)
38
39    Applicability.
40    --------------
41
42    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
43       signal and generate EOI events. If you want to equalize virtual devices
44       like tunnels, use a normal eql device.
45    2. This device puts no limitations on physical slave characteristics
46       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
47       Certainly, large difference in link speeds will make the resulting
48       eqalized link unusable, because of huge packet reordering.
49       I estimate an upper useful difference as ~10 times.
50    3. If the slave requires address resolution, only protocols using
51       neighbour cache (IPv4/IPv6) will work over the equalized link.
52       Other protocols are still allowed to use the slave device directly,
53       which will not break load balancing, though native slave
54       traffic will have the highest priority.  */
55
56 struct teql_master {
57         struct Qdisc_ops qops;
58         struct net_device *dev;
59         struct Qdisc *slaves;
60         struct list_head master_list;
61         unsigned long   tx_bytes;
62         unsigned long   tx_packets;
63         unsigned long   tx_errors;
64         unsigned long   tx_dropped;
65 };
66
67 struct teql_sched_data {
68         struct Qdisc *next;
69         struct teql_master *m;
70         struct sk_buff_head q;
71 };
72
73 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
74
75 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
76
77 /* "teql*" qdisc routines */
78
79 static int
80 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
81 {
82         struct net_device *dev = qdisc_dev(sch);
83         struct teql_sched_data *q = qdisc_priv(sch);
84
85         if (q->q.qlen < dev->tx_queue_len) {
86                 __skb_queue_tail(&q->q, skb);
87                 return NET_XMIT_SUCCESS;
88         }
89
90         return qdisc_drop(skb, sch);
91 }
92
93 static struct sk_buff *
94 teql_dequeue(struct Qdisc *sch)
95 {
96         struct teql_sched_data *dat = qdisc_priv(sch);
97         struct netdev_queue *dat_queue;
98         struct sk_buff *skb;
99         struct Qdisc *q;
100
101         skb = __skb_dequeue(&dat->q);
102         dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
103         q = rcu_dereference_bh(dat_queue->qdisc);
104
105         if (skb == NULL) {
106                 struct net_device *m = qdisc_dev(q);
107                 if (m) {
108                         dat->m->slaves = sch;
109                         netif_wake_queue(m);
110                 }
111         } else {
112                 qdisc_bstats_update(sch, skb);
113         }
114         sch->q.qlen = dat->q.qlen + q->q.qlen;
115         return skb;
116 }
117
118 static struct sk_buff *
119 teql_peek(struct Qdisc *sch)
120 {
121         /* teql is meant to be used as root qdisc */
122         return NULL;
123 }
124
125 static void
126 teql_reset(struct Qdisc *sch)
127 {
128         struct teql_sched_data *dat = qdisc_priv(sch);
129
130         skb_queue_purge(&dat->q);
131         sch->q.qlen = 0;
132 }
133
134 static void
135 teql_destroy(struct Qdisc *sch)
136 {
137         struct Qdisc *q, *prev;
138         struct teql_sched_data *dat = qdisc_priv(sch);
139         struct teql_master *master = dat->m;
140
141         prev = master->slaves;
142         if (prev) {
143                 do {
144                         q = NEXT_SLAVE(prev);
145                         if (q == sch) {
146                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
147                                 if (q == master->slaves) {
148                                         master->slaves = NEXT_SLAVE(q);
149                                         if (q == master->slaves) {
150                                                 struct netdev_queue *txq;
151                                                 spinlock_t *root_lock;
152
153                                                 txq = netdev_get_tx_queue(master->dev, 0);
154                                                 master->slaves = NULL;
155
156                                                 root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
157                                                 spin_lock_bh(root_lock);
158                                                 qdisc_reset(rtnl_dereference(txq->qdisc));
159                                                 spin_unlock_bh(root_lock);
160                                         }
161                                 }
162                                 skb_queue_purge(&dat->q);
163                                 break;
164                         }
165
166                 } while ((prev = q) != master->slaves);
167         }
168 }
169
170 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
171 {
172         struct net_device *dev = qdisc_dev(sch);
173         struct teql_master *m = (struct teql_master *)sch->ops;
174         struct teql_sched_data *q = qdisc_priv(sch);
175
176         if (dev->hard_header_len > m->dev->hard_header_len)
177                 return -EINVAL;
178
179         if (m->dev == dev)
180                 return -ELOOP;
181
182         q->m = m;
183
184         skb_queue_head_init(&q->q);
185
186         if (m->slaves) {
187                 if (m->dev->flags & IFF_UP) {
188                         if ((m->dev->flags & IFF_POINTOPOINT &&
189                              !(dev->flags & IFF_POINTOPOINT)) ||
190                             (m->dev->flags & IFF_BROADCAST &&
191                              !(dev->flags & IFF_BROADCAST)) ||
192                             (m->dev->flags & IFF_MULTICAST &&
193                              !(dev->flags & IFF_MULTICAST)) ||
194                             dev->mtu < m->dev->mtu)
195                                 return -EINVAL;
196                 } else {
197                         if (!(dev->flags&IFF_POINTOPOINT))
198                                 m->dev->flags &= ~IFF_POINTOPOINT;
199                         if (!(dev->flags&IFF_BROADCAST))
200                                 m->dev->flags &= ~IFF_BROADCAST;
201                         if (!(dev->flags&IFF_MULTICAST))
202                                 m->dev->flags &= ~IFF_MULTICAST;
203                         if (dev->mtu < m->dev->mtu)
204                                 m->dev->mtu = dev->mtu;
205                 }
206                 q->next = NEXT_SLAVE(m->slaves);
207                 NEXT_SLAVE(m->slaves) = sch;
208         } else {
209                 q->next = sch;
210                 m->slaves = sch;
211                 m->dev->mtu = dev->mtu;
212                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
213         }
214         return 0;
215 }
216
217
218 static int
219 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
220                struct net_device *dev, struct netdev_queue *txq,
221                struct dst_entry *dst)
222 {
223         struct neighbour *n;
224         int err = 0;
225
226         n = dst_neigh_lookup_skb(dst, skb);
227         if (!n)
228                 return -ENOENT;
229
230         if (dst->dev != dev) {
231                 struct neighbour *mn;
232
233                 mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
234                 neigh_release(n);
235                 if (IS_ERR(mn))
236                         return PTR_ERR(mn);
237                 n = mn;
238         }
239
240         if (neigh_event_send(n, skb_res) == 0) {
241                 int err;
242                 char haddr[MAX_ADDR_LEN];
243
244                 neigh_ha_snapshot(haddr, n, dev);
245                 err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)),
246                                       haddr, NULL, skb->len);
247
248                 if (err < 0)
249                         err = -EINVAL;
250         } else {
251                 err = (skb_res == NULL) ? -EAGAIN : 1;
252         }
253         neigh_release(n);
254         return err;
255 }
256
257 static inline int teql_resolve(struct sk_buff *skb,
258                                struct sk_buff *skb_res,
259                                struct net_device *dev,
260                                struct netdev_queue *txq)
261 {
262         struct dst_entry *dst = skb_dst(skb);
263         int res;
264
265         if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
266                 return -ENODEV;
267
268         if (!dev->header_ops || !dst)
269                 return 0;
270
271         rcu_read_lock();
272         res = __teql_resolve(skb, skb_res, dev, txq, dst);
273         rcu_read_unlock();
274
275         return res;
276 }
277
278 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
279 {
280         struct teql_master *master = netdev_priv(dev);
281         struct Qdisc *start, *q;
282         int busy;
283         int nores;
284         int subq = skb_get_queue_mapping(skb);
285         struct sk_buff *skb_res = NULL;
286
287         start = master->slaves;
288
289 restart:
290         nores = 0;
291         busy = 0;
292
293         q = start;
294         if (!q)
295                 goto drop;
296
297         do {
298                 struct net_device *slave = qdisc_dev(q);
299                 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
300
301                 if (slave_txq->qdisc_sleeping != q)
302                         continue;
303                 if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
304                     !netif_running(slave)) {
305                         busy = 1;
306                         continue;
307                 }
308
309                 switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
310                 case 0:
311                         if (__netif_tx_trylock(slave_txq)) {
312                                 unsigned int length = qdisc_pkt_len(skb);
313
314                                 if (!netif_xmit_frozen_or_stopped(slave_txq) &&
315                                     netdev_start_xmit(skb, slave, slave_txq, false) ==
316                                     NETDEV_TX_OK) {
317                                         __netif_tx_unlock(slave_txq);
318                                         master->slaves = NEXT_SLAVE(q);
319                                         netif_wake_queue(dev);
320                                         master->tx_packets++;
321                                         master->tx_bytes += length;
322                                         return NETDEV_TX_OK;
323                                 }
324                                 __netif_tx_unlock(slave_txq);
325                         }
326                         if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
327                                 busy = 1;
328                         break;
329                 case 1:
330                         master->slaves = NEXT_SLAVE(q);
331                         return NETDEV_TX_OK;
332                 default:
333                         nores = 1;
334                         break;
335                 }
336                 __skb_pull(skb, skb_network_offset(skb));
337         } while ((q = NEXT_SLAVE(q)) != start);
338
339         if (nores && skb_res == NULL) {
340                 skb_res = skb;
341                 goto restart;
342         }
343
344         if (busy) {
345                 netif_stop_queue(dev);
346                 return NETDEV_TX_BUSY;
347         }
348         master->tx_errors++;
349
350 drop:
351         master->tx_dropped++;
352         dev_kfree_skb(skb);
353         return NETDEV_TX_OK;
354 }
355
356 static int teql_master_open(struct net_device *dev)
357 {
358         struct Qdisc *q;
359         struct teql_master *m = netdev_priv(dev);
360         int mtu = 0xFFFE;
361         unsigned int flags = IFF_NOARP | IFF_MULTICAST;
362
363         if (m->slaves == NULL)
364                 return -EUNATCH;
365
366         flags = FMASK;
367
368         q = m->slaves;
369         do {
370                 struct net_device *slave = qdisc_dev(q);
371
372                 if (slave == NULL)
373                         return -EUNATCH;
374
375                 if (slave->mtu < mtu)
376                         mtu = slave->mtu;
377                 if (slave->hard_header_len > LL_MAX_HEADER)
378                         return -EINVAL;
379
380                 /* If all the slaves are BROADCAST, master is BROADCAST
381                    If all the slaves are PtP, master is PtP
382                    Otherwise, master is NBMA.
383                  */
384                 if (!(slave->flags&IFF_POINTOPOINT))
385                         flags &= ~IFF_POINTOPOINT;
386                 if (!(slave->flags&IFF_BROADCAST))
387                         flags &= ~IFF_BROADCAST;
388                 if (!(slave->flags&IFF_MULTICAST))
389                         flags &= ~IFF_MULTICAST;
390         } while ((q = NEXT_SLAVE(q)) != m->slaves);
391
392         m->dev->mtu = mtu;
393         m->dev->flags = (m->dev->flags&~FMASK) | flags;
394         netif_start_queue(m->dev);
395         return 0;
396 }
397
398 static int teql_master_close(struct net_device *dev)
399 {
400         netif_stop_queue(dev);
401         return 0;
402 }
403
404 static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
405                                                      struct rtnl_link_stats64 *stats)
406 {
407         struct teql_master *m = netdev_priv(dev);
408
409         stats->tx_packets       = m->tx_packets;
410         stats->tx_bytes         = m->tx_bytes;
411         stats->tx_errors        = m->tx_errors;
412         stats->tx_dropped       = m->tx_dropped;
413         return stats;
414 }
415
416 static int teql_master_mtu(struct net_device *dev, int new_mtu)
417 {
418         struct teql_master *m = netdev_priv(dev);
419         struct Qdisc *q;
420
421         if (new_mtu < 68)
422                 return -EINVAL;
423
424         q = m->slaves;
425         if (q) {
426                 do {
427                         if (new_mtu > qdisc_dev(q)->mtu)
428                                 return -EINVAL;
429                 } while ((q = NEXT_SLAVE(q)) != m->slaves);
430         }
431
432         dev->mtu = new_mtu;
433         return 0;
434 }
435
436 static const struct net_device_ops teql_netdev_ops = {
437         .ndo_open       = teql_master_open,
438         .ndo_stop       = teql_master_close,
439         .ndo_start_xmit = teql_master_xmit,
440         .ndo_get_stats64 = teql_master_stats64,
441         .ndo_change_mtu = teql_master_mtu,
442 };
443
444 static __init void teql_master_setup(struct net_device *dev)
445 {
446         struct teql_master *master = netdev_priv(dev);
447         struct Qdisc_ops *ops = &master->qops;
448
449         master->dev     = dev;
450         ops->priv_size  = sizeof(struct teql_sched_data);
451
452         ops->enqueue    =       teql_enqueue;
453         ops->dequeue    =       teql_dequeue;
454         ops->peek       =       teql_peek;
455         ops->init       =       teql_qdisc_init;
456         ops->reset      =       teql_reset;
457         ops->destroy    =       teql_destroy;
458         ops->owner      =       THIS_MODULE;
459
460         dev->netdev_ops =       &teql_netdev_ops;
461         dev->type               = ARPHRD_VOID;
462         dev->mtu                = 1500;
463         dev->tx_queue_len       = 100;
464         dev->flags              = IFF_NOARP;
465         dev->hard_header_len    = LL_MAX_HEADER;
466         netif_keep_dst(dev);
467 }
468
469 static LIST_HEAD(master_dev_list);
470 static int max_equalizers = 1;
471 module_param(max_equalizers, int, 0);
472 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
473
474 static int __init teql_init(void)
475 {
476         int i;
477         int err = -ENODEV;
478
479         for (i = 0; i < max_equalizers; i++) {
480                 struct net_device *dev;
481                 struct teql_master *master;
482
483                 dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
484                                    NET_NAME_UNKNOWN, teql_master_setup);
485                 if (!dev) {
486                         err = -ENOMEM;
487                         break;
488                 }
489
490                 if ((err = register_netdev(dev))) {
491                         free_netdev(dev);
492                         break;
493                 }
494
495                 master = netdev_priv(dev);
496
497                 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
498                 err = register_qdisc(&master->qops);
499
500                 if (err) {
501                         unregister_netdev(dev);
502                         free_netdev(dev);
503                         break;
504                 }
505
506                 list_add_tail(&master->master_list, &master_dev_list);
507         }
508         return i ? 0 : err;
509 }
510
511 static void __exit teql_exit(void)
512 {
513         struct teql_master *master, *nxt;
514
515         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
516
517                 list_del(&master->master_list);
518
519                 unregister_qdisc(&master->qops);
520                 unregister_netdev(master->dev);
521                 free_netdev(master->dev);
522         }
523 }
524
525 module_init(teql_init);
526 module_exit(teql_exit);
527
528 MODULE_LICENSE("GPL");