Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / net / openvswitch / datapath.c
1 /*
2  * Copyright (c) 2007-2014 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/init.h>
22 #include <linux/module.h>
23 #include <linux/if_arp.h>
24 #include <linux/if_vlan.h>
25 #include <linux/in.h>
26 #include <linux/ip.h>
27 #include <linux/jhash.h>
28 #include <linux/delay.h>
29 #include <linux/time.h>
30 #include <linux/etherdevice.h>
31 #include <linux/genetlink.h>
32 #include <linux/kernel.h>
33 #include <linux/kthread.h>
34 #include <linux/mutex.h>
35 #include <linux/percpu.h>
36 #include <linux/rcupdate.h>
37 #include <linux/tcp.h>
38 #include <linux/udp.h>
39 #include <linux/ethtool.h>
40 #include <linux/wait.h>
41 #include <asm/div64.h>
42 #include <linux/highmem.h>
43 #include <linux/netfilter_bridge.h>
44 #include <linux/netfilter_ipv4.h>
45 #include <linux/inetdevice.h>
46 #include <linux/list.h>
47 #include <linux/openvswitch.h>
48 #include <linux/rculist.h>
49 #include <linux/dmi.h>
50 #include <net/genetlink.h>
51 #include <net/net_namespace.h>
52 #include <net/netns/generic.h>
53
54 #include "datapath.h"
55 #include "flow.h"
56 #include "flow_table.h"
57 #include "flow_netlink.h"
58 #include "vport-internal_dev.h"
59 #include "vport-netdev.h"
60
61 int ovs_net_id __read_mostly;
62 EXPORT_SYMBOL_GPL(ovs_net_id);
63
64 static struct genl_family dp_packet_genl_family;
65 static struct genl_family dp_flow_genl_family;
66 static struct genl_family dp_datapath_genl_family;
67
68 static const struct nla_policy flow_policy[];
69
70 static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
71         .name = OVS_FLOW_MCGROUP,
72 };
73
74 static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
75         .name = OVS_DATAPATH_MCGROUP,
76 };
77
78 static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
79         .name = OVS_VPORT_MCGROUP,
80 };
81
82 /* Check if need to build a reply message.
83  * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
84 static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
85                             unsigned int group)
86 {
87         return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
88                genl_has_listeners(family, genl_info_net(info), group);
89 }
90
91 static void ovs_notify(struct genl_family *family,
92                        struct sk_buff *skb, struct genl_info *info)
93 {
94         genl_notify(family, skb, genl_info_net(info), info->snd_portid,
95                     0, info->nlhdr, GFP_KERNEL);
96 }
97
98 /**
99  * DOC: Locking:
100  *
101  * All writes e.g. Writes to device state (add/remove datapath, port, set
102  * operations on vports, etc.), Writes to other state (flow table
103  * modifications, set miscellaneous datapath parameters, etc.) are protected
104  * by ovs_lock.
105  *
106  * Reads are protected by RCU.
107  *
108  * There are a few special cases (mostly stats) that have their own
109  * synchronization but they nest under all of above and don't interact with
110  * each other.
111  *
112  * The RTNL lock nests inside ovs_mutex.
113  */
114
115 static DEFINE_MUTEX(ovs_mutex);
116
117 void ovs_lock(void)
118 {
119         mutex_lock(&ovs_mutex);
120 }
121
122 void ovs_unlock(void)
123 {
124         mutex_unlock(&ovs_mutex);
125 }
126
127 #ifdef CONFIG_LOCKDEP
128 int lockdep_ovsl_is_held(void)
129 {
130         if (debug_locks)
131                 return lockdep_is_held(&ovs_mutex);
132         else
133                 return 1;
134 }
135 EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held);
136 #endif
137
138 static struct vport *new_vport(const struct vport_parms *);
139 static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
140                              const struct sw_flow_key *,
141                              const struct dp_upcall_info *);
142 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
143                                   const struct sw_flow_key *,
144                                   const struct dp_upcall_info *);
145
146 /* Must be called with rcu_read_lock. */
147 static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
148 {
149         struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
150
151         if (dev) {
152                 struct vport *vport = ovs_internal_dev_get_vport(dev);
153                 if (vport)
154                         return vport->dp;
155         }
156
157         return NULL;
158 }
159
160 /* The caller must hold either ovs_mutex or rcu_read_lock to keep the
161  * returned dp pointer valid.
162  */
163 static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
164 {
165         struct datapath *dp;
166
167         WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
168         rcu_read_lock();
169         dp = get_dp_rcu(net, dp_ifindex);
170         rcu_read_unlock();
171
172         return dp;
173 }
174
175 /* Must be called with rcu_read_lock or ovs_mutex. */
176 const char *ovs_dp_name(const struct datapath *dp)
177 {
178         struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
179         return vport->ops->get_name(vport);
180 }
181
182 static int get_dpifindex(const struct datapath *dp)
183 {
184         struct vport *local;
185         int ifindex;
186
187         rcu_read_lock();
188
189         local = ovs_vport_rcu(dp, OVSP_LOCAL);
190         if (local)
191                 ifindex = netdev_vport_priv(local)->dev->ifindex;
192         else
193                 ifindex = 0;
194
195         rcu_read_unlock();
196
197         return ifindex;
198 }
199
200 static void destroy_dp_rcu(struct rcu_head *rcu)
201 {
202         struct datapath *dp = container_of(rcu, struct datapath, rcu);
203
204         ovs_flow_tbl_destroy(&dp->table);
205         free_percpu(dp->stats_percpu);
206         kfree(dp->ports);
207         kfree(dp);
208 }
209
210 static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
211                                             u16 port_no)
212 {
213         return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
214 }
215
216 /* Called with ovs_mutex or RCU read lock. */
217 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
218 {
219         struct vport *vport;
220         struct hlist_head *head;
221
222         head = vport_hash_bucket(dp, port_no);
223         hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
224                 if (vport->port_no == port_no)
225                         return vport;
226         }
227         return NULL;
228 }
229
230 /* Called with ovs_mutex. */
231 static struct vport *new_vport(const struct vport_parms *parms)
232 {
233         struct vport *vport;
234
235         vport = ovs_vport_add(parms);
236         if (!IS_ERR(vport)) {
237                 struct datapath *dp = parms->dp;
238                 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
239
240                 hlist_add_head_rcu(&vport->dp_hash_node, head);
241         }
242         return vport;
243 }
244
245 void ovs_dp_detach_port(struct vport *p)
246 {
247         ASSERT_OVSL();
248
249         /* First drop references to device. */
250         hlist_del_rcu(&p->dp_hash_node);
251
252         /* Then destroy it. */
253         ovs_vport_del(p);
254 }
255
256 /* Must be called with rcu_read_lock. */
257 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
258 {
259         const struct vport *p = OVS_CB(skb)->input_vport;
260         struct datapath *dp = p->dp;
261         struct sw_flow *flow;
262         struct sw_flow_actions *sf_acts;
263         struct dp_stats_percpu *stats;
264         u64 *stats_counter;
265         u32 n_mask_hit;
266
267         stats = this_cpu_ptr(dp->stats_percpu);
268
269         /* Look up flow. */
270         flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
271         if (unlikely(!flow)) {
272                 struct dp_upcall_info upcall;
273                 int error;
274
275                 upcall.cmd = OVS_PACKET_CMD_MISS;
276                 upcall.userdata = NULL;
277                 upcall.portid = ovs_vport_find_upcall_portid(p, skb);
278                 upcall.egress_tun_info = NULL;
279                 error = ovs_dp_upcall(dp, skb, key, &upcall);
280                 if (unlikely(error))
281                         kfree_skb(skb);
282                 else
283                         consume_skb(skb);
284                 stats_counter = &stats->n_missed;
285                 goto out;
286         }
287
288         ovs_flow_stats_update(flow, key->tp.flags, skb);
289         sf_acts = rcu_dereference(flow->sf_acts);
290         ovs_execute_actions(dp, skb, sf_acts, key);
291
292         stats_counter = &stats->n_hit;
293
294 out:
295         /* Update datapath statistics. */
296         u64_stats_update_begin(&stats->syncp);
297         (*stats_counter)++;
298         stats->n_mask_hit += n_mask_hit;
299         u64_stats_update_end(&stats->syncp);
300 }
301
302 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
303                   const struct sw_flow_key *key,
304                   const struct dp_upcall_info *upcall_info)
305 {
306         struct dp_stats_percpu *stats;
307         int err;
308
309         if (upcall_info->portid == 0) {
310                 err = -ENOTCONN;
311                 goto err;
312         }
313
314         if (!skb_is_gso(skb))
315                 err = queue_userspace_packet(dp, skb, key, upcall_info);
316         else
317                 err = queue_gso_packets(dp, skb, key, upcall_info);
318         if (err)
319                 goto err;
320
321         return 0;
322
323 err:
324         stats = this_cpu_ptr(dp->stats_percpu);
325
326         u64_stats_update_begin(&stats->syncp);
327         stats->n_lost++;
328         u64_stats_update_end(&stats->syncp);
329
330         return err;
331 }
332
333 static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
334                              const struct sw_flow_key *key,
335                              const struct dp_upcall_info *upcall_info)
336 {
337         unsigned short gso_type = skb_shinfo(skb)->gso_type;
338         struct sw_flow_key later_key;
339         struct sk_buff *segs, *nskb;
340         struct ovs_skb_cb ovs_cb;
341         int err;
342
343         ovs_cb = *OVS_CB(skb);
344         segs = __skb_gso_segment(skb, NETIF_F_SG, false);
345         *OVS_CB(skb) = ovs_cb;
346         if (IS_ERR(segs))
347                 return PTR_ERR(segs);
348         if (segs == NULL)
349                 return -EINVAL;
350
351         if (gso_type & SKB_GSO_UDP) {
352                 /* The initial flow key extracted by ovs_flow_key_extract()
353                  * in this case is for a first fragment, so we need to
354                  * properly mark later fragments.
355                  */
356                 later_key = *key;
357                 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
358         }
359
360         /* Queue all of the segments. */
361         skb = segs;
362         do {
363                 *OVS_CB(skb) = ovs_cb;
364                 if (gso_type & SKB_GSO_UDP && skb != segs)
365                         key = &later_key;
366
367                 err = queue_userspace_packet(dp, skb, key, upcall_info);
368                 if (err)
369                         break;
370
371         } while ((skb = skb->next));
372
373         /* Free all of the segments. */
374         skb = segs;
375         do {
376                 nskb = skb->next;
377                 if (err)
378                         kfree_skb(skb);
379                 else
380                         consume_skb(skb);
381         } while ((skb = nskb));
382         return err;
383 }
384
385 static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
386                               unsigned int hdrlen)
387 {
388         size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
389                 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
390                 + nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */
391
392         /* OVS_PACKET_ATTR_USERDATA */
393         if (upcall_info->userdata)
394                 size += NLA_ALIGN(upcall_info->userdata->nla_len);
395
396         /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
397         if (upcall_info->egress_tun_info)
398                 size += nla_total_size(ovs_tun_key_attr_size());
399
400         return size;
401 }
402
403 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
404                                   const struct sw_flow_key *key,
405                                   const struct dp_upcall_info *upcall_info)
406 {
407         struct ovs_header *upcall;
408         struct sk_buff *nskb = NULL;
409         struct sk_buff *user_skb = NULL; /* to be queued to userspace */
410         struct nlattr *nla;
411         struct genl_info info = {
412                 .dst_sk = ovs_dp_get_net(dp)->genl_sock,
413                 .snd_portid = upcall_info->portid,
414         };
415         size_t len;
416         unsigned int hlen;
417         int err, dp_ifindex;
418
419         dp_ifindex = get_dpifindex(dp);
420         if (!dp_ifindex)
421                 return -ENODEV;
422
423         if (skb_vlan_tag_present(skb)) {
424                 nskb = skb_clone(skb, GFP_ATOMIC);
425                 if (!nskb)
426                         return -ENOMEM;
427
428                 nskb = __vlan_hwaccel_push_inside(nskb);
429                 if (!nskb)
430                         return -ENOMEM;
431
432                 skb = nskb;
433         }
434
435         if (nla_attr_size(skb->len) > USHRT_MAX) {
436                 err = -EFBIG;
437                 goto out;
438         }
439
440         /* Complete checksum if needed */
441         if (skb->ip_summed == CHECKSUM_PARTIAL &&
442             (err = skb_checksum_help(skb)))
443                 goto out;
444
445         /* Older versions of OVS user space enforce alignment of the last
446          * Netlink attribute to NLA_ALIGNTO which would require extensive
447          * padding logic. Only perform zerocopy if padding is not required.
448          */
449         if (dp->user_features & OVS_DP_F_UNALIGNED)
450                 hlen = skb_zerocopy_headlen(skb);
451         else
452                 hlen = skb->len;
453
454         len = upcall_msg_size(upcall_info, hlen);
455         user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
456         if (!user_skb) {
457                 err = -ENOMEM;
458                 goto out;
459         }
460
461         upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
462                              0, upcall_info->cmd);
463         upcall->dp_ifindex = dp_ifindex;
464
465         err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
466         BUG_ON(err);
467
468         if (upcall_info->userdata)
469                 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
470                           nla_len(upcall_info->userdata),
471                           nla_data(upcall_info->userdata));
472
473         if (upcall_info->egress_tun_info) {
474                 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
475                 err = ovs_nla_put_egress_tunnel_key(user_skb,
476                                                     upcall_info->egress_tun_info);
477                 BUG_ON(err);
478                 nla_nest_end(user_skb, nla);
479         }
480
481         /* Only reserve room for attribute header, packet data is added
482          * in skb_zerocopy() */
483         if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
484                 err = -ENOBUFS;
485                 goto out;
486         }
487         nla->nla_len = nla_attr_size(skb->len);
488
489         err = skb_zerocopy(user_skb, skb, skb->len, hlen);
490         if (err)
491                 goto out;
492
493         /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
494         if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
495                 size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len;
496
497                 if (plen > 0)
498                         memset(skb_put(user_skb, plen), 0, plen);
499         }
500
501         ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
502
503         err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
504         user_skb = NULL;
505 out:
506         if (err)
507                 skb_tx_error(skb);
508         kfree_skb(user_skb);
509         kfree_skb(nskb);
510         return err;
511 }
512
513 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
514 {
515         struct ovs_header *ovs_header = info->userhdr;
516         struct nlattr **a = info->attrs;
517         struct sw_flow_actions *acts;
518         struct sk_buff *packet;
519         struct sw_flow *flow;
520         struct sw_flow_actions *sf_acts;
521         struct datapath *dp;
522         struct ethhdr *eth;
523         struct vport *input_vport;
524         int len;
525         int err;
526         bool log = !a[OVS_PACKET_ATTR_PROBE];
527
528         err = -EINVAL;
529         if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
530             !a[OVS_PACKET_ATTR_ACTIONS])
531                 goto err;
532
533         len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
534         packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
535         err = -ENOMEM;
536         if (!packet)
537                 goto err;
538         skb_reserve(packet, NET_IP_ALIGN);
539
540         nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
541
542         skb_reset_mac_header(packet);
543         eth = eth_hdr(packet);
544
545         /* Normally, setting the skb 'protocol' field would be handled by a
546          * call to eth_type_trans(), but it assumes there's a sending
547          * device, which we may not have. */
548         if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
549                 packet->protocol = eth->h_proto;
550         else
551                 packet->protocol = htons(ETH_P_802_2);
552
553         /* Build an sw_flow for sending this packet. */
554         flow = ovs_flow_alloc();
555         err = PTR_ERR(flow);
556         if (IS_ERR(flow))
557                 goto err_kfree_skb;
558
559         err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet,
560                                              &flow->key, log);
561         if (err)
562                 goto err_flow_free;
563
564         err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
565                                    &flow->key, &acts, log);
566         if (err)
567                 goto err_flow_free;
568
569         rcu_assign_pointer(flow->sf_acts, acts);
570         OVS_CB(packet)->egress_tun_info = NULL;
571         packet->priority = flow->key.phy.priority;
572         packet->mark = flow->key.phy.skb_mark;
573
574         rcu_read_lock();
575         dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
576         err = -ENODEV;
577         if (!dp)
578                 goto err_unlock;
579
580         input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
581         if (!input_vport)
582                 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
583
584         if (!input_vport)
585                 goto err_unlock;
586
587         OVS_CB(packet)->input_vport = input_vport;
588         sf_acts = rcu_dereference(flow->sf_acts);
589
590         local_bh_disable();
591         err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
592         local_bh_enable();
593         rcu_read_unlock();
594
595         ovs_flow_free(flow, false);
596         return err;
597
598 err_unlock:
599         rcu_read_unlock();
600 err_flow_free:
601         ovs_flow_free(flow, false);
602 err_kfree_skb:
603         kfree_skb(packet);
604 err:
605         return err;
606 }
607
608 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
609         [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
610         [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
611         [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
612         [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
613 };
614
615 static const struct genl_ops dp_packet_genl_ops[] = {
616         { .cmd = OVS_PACKET_CMD_EXECUTE,
617           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
618           .policy = packet_policy,
619           .doit = ovs_packet_cmd_execute
620         }
621 };
622
623 static struct genl_family dp_packet_genl_family = {
624         .id = GENL_ID_GENERATE,
625         .hdrsize = sizeof(struct ovs_header),
626         .name = OVS_PACKET_FAMILY,
627         .version = OVS_PACKET_VERSION,
628         .maxattr = OVS_PACKET_ATTR_MAX,
629         .netnsok = true,
630         .parallel_ops = true,
631         .ops = dp_packet_genl_ops,
632         .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
633 };
634
635 static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
636                          struct ovs_dp_megaflow_stats *mega_stats)
637 {
638         int i;
639
640         memset(mega_stats, 0, sizeof(*mega_stats));
641
642         stats->n_flows = ovs_flow_tbl_count(&dp->table);
643         mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
644
645         stats->n_hit = stats->n_missed = stats->n_lost = 0;
646
647         for_each_possible_cpu(i) {
648                 const struct dp_stats_percpu *percpu_stats;
649                 struct dp_stats_percpu local_stats;
650                 unsigned int start;
651
652                 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
653
654                 do {
655                         start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
656                         local_stats = *percpu_stats;
657                 } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
658
659                 stats->n_hit += local_stats.n_hit;
660                 stats->n_missed += local_stats.n_missed;
661                 stats->n_lost += local_stats.n_lost;
662                 mega_stats->n_mask_hit += local_stats.n_mask_hit;
663         }
664 }
665
666 static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
667 {
668         return ovs_identifier_is_ufid(sfid) &&
669                !(ufid_flags & OVS_UFID_F_OMIT_KEY);
670 }
671
672 static bool should_fill_mask(uint32_t ufid_flags)
673 {
674         return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
675 }
676
677 static bool should_fill_actions(uint32_t ufid_flags)
678 {
679         return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
680 }
681
682 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
683                                     const struct sw_flow_id *sfid,
684                                     uint32_t ufid_flags)
685 {
686         size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
687
688         /* OVS_FLOW_ATTR_UFID */
689         if (sfid && ovs_identifier_is_ufid(sfid))
690                 len += nla_total_size(sfid->ufid_len);
691
692         /* OVS_FLOW_ATTR_KEY */
693         if (!sfid || should_fill_key(sfid, ufid_flags))
694                 len += nla_total_size(ovs_key_attr_size());
695
696         /* OVS_FLOW_ATTR_MASK */
697         if (should_fill_mask(ufid_flags))
698                 len += nla_total_size(ovs_key_attr_size());
699
700         /* OVS_FLOW_ATTR_ACTIONS */
701         if (should_fill_actions(ufid_flags))
702                 len += nla_total_size(acts->actions_len);
703
704         return len
705                 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
706                 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
707                 + nla_total_size(8); /* OVS_FLOW_ATTR_USED */
708 }
709
710 /* Called with ovs_mutex or RCU read lock. */
711 static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
712                                    struct sk_buff *skb)
713 {
714         struct ovs_flow_stats stats;
715         __be16 tcp_flags;
716         unsigned long used;
717
718         ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
719
720         if (used &&
721             nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
722                 return -EMSGSIZE;
723
724         if (stats.n_packets &&
725             nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
726                 return -EMSGSIZE;
727
728         if ((u8)ntohs(tcp_flags) &&
729              nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
730                 return -EMSGSIZE;
731
732         return 0;
733 }
734
735 /* Called with ovs_mutex or RCU read lock. */
736 static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
737                                      struct sk_buff *skb, int skb_orig_len)
738 {
739         struct nlattr *start;
740         int err;
741
742         /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
743          * this is the first flow to be dumped into 'skb'.  This is unusual for
744          * Netlink but individual action lists can be longer than
745          * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
746          * The userspace caller can always fetch the actions separately if it
747          * really wants them.  (Most userspace callers in fact don't care.)
748          *
749          * This can only fail for dump operations because the skb is always
750          * properly sized for single flows.
751          */
752         start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
753         if (start) {
754                 const struct sw_flow_actions *sf_acts;
755
756                 sf_acts = rcu_dereference_ovsl(flow->sf_acts);
757                 err = ovs_nla_put_actions(sf_acts->actions,
758                                           sf_acts->actions_len, skb);
759
760                 if (!err)
761                         nla_nest_end(skb, start);
762                 else {
763                         if (skb_orig_len)
764                                 return err;
765
766                         nla_nest_cancel(skb, start);
767                 }
768         } else if (skb_orig_len) {
769                 return -EMSGSIZE;
770         }
771
772         return 0;
773 }
774
775 /* Called with ovs_mutex or RCU read lock. */
776 static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
777                                   struct sk_buff *skb, u32 portid,
778                                   u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
779 {
780         const int skb_orig_len = skb->len;
781         struct ovs_header *ovs_header;
782         int err;
783
784         ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
785                                  flags, cmd);
786         if (!ovs_header)
787                 return -EMSGSIZE;
788
789         ovs_header->dp_ifindex = dp_ifindex;
790
791         err = ovs_nla_put_identifier(flow, skb);
792         if (err)
793                 goto error;
794
795         if (should_fill_key(&flow->id, ufid_flags)) {
796                 err = ovs_nla_put_masked_key(flow, skb);
797                 if (err)
798                         goto error;
799         }
800
801         if (should_fill_mask(ufid_flags)) {
802                 err = ovs_nla_put_mask(flow, skb);
803                 if (err)
804                         goto error;
805         }
806
807         err = ovs_flow_cmd_fill_stats(flow, skb);
808         if (err)
809                 goto error;
810
811         if (should_fill_actions(ufid_flags)) {
812                 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
813                 if (err)
814                         goto error;
815         }
816
817         genlmsg_end(skb, ovs_header);
818         return 0;
819
820 error:
821         genlmsg_cancel(skb, ovs_header);
822         return err;
823 }
824
825 /* May not be called with RCU read lock. */
826 static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
827                                                const struct sw_flow_id *sfid,
828                                                struct genl_info *info,
829                                                bool always,
830                                                uint32_t ufid_flags)
831 {
832         struct sk_buff *skb;
833         size_t len;
834
835         if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
836                 return NULL;
837
838         len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
839         skb = genlmsg_new_unicast(len, info, GFP_KERNEL);
840         if (!skb)
841                 return ERR_PTR(-ENOMEM);
842
843         return skb;
844 }
845
846 /* Called with ovs_mutex. */
847 static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
848                                                int dp_ifindex,
849                                                struct genl_info *info, u8 cmd,
850                                                bool always, u32 ufid_flags)
851 {
852         struct sk_buff *skb;
853         int retval;
854
855         skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
856                                       &flow->id, info, always, ufid_flags);
857         if (IS_ERR_OR_NULL(skb))
858                 return skb;
859
860         retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
861                                         info->snd_portid, info->snd_seq, 0,
862                                         cmd, ufid_flags);
863         BUG_ON(retval < 0);
864         return skb;
865 }
866
867 static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
868 {
869         struct nlattr **a = info->attrs;
870         struct ovs_header *ovs_header = info->userhdr;
871         struct sw_flow *flow = NULL, *new_flow;
872         struct sw_flow_mask mask;
873         struct sk_buff *reply;
874         struct datapath *dp;
875         struct sw_flow_key key;
876         struct sw_flow_actions *acts;
877         struct sw_flow_match match;
878         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
879         int error;
880         bool log = !a[OVS_FLOW_ATTR_PROBE];
881
882         /* Must have key and actions. */
883         error = -EINVAL;
884         if (!a[OVS_FLOW_ATTR_KEY]) {
885                 OVS_NLERR(log, "Flow key attr not present in new flow.");
886                 goto error;
887         }
888         if (!a[OVS_FLOW_ATTR_ACTIONS]) {
889                 OVS_NLERR(log, "Flow actions attr not present in new flow.");
890                 goto error;
891         }
892
893         /* Most of the time we need to allocate a new flow, do it before
894          * locking.
895          */
896         new_flow = ovs_flow_alloc();
897         if (IS_ERR(new_flow)) {
898                 error = PTR_ERR(new_flow);
899                 goto error;
900         }
901
902         /* Extract key. */
903         ovs_match_init(&match, &key, &mask);
904         error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
905                                   a[OVS_FLOW_ATTR_MASK], log);
906         if (error)
907                 goto err_kfree_flow;
908
909         ovs_flow_mask_key(&new_flow->key, &key, &mask);
910
911         /* Extract flow identifier. */
912         error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
913                                        &key, log);
914         if (error)
915                 goto err_kfree_flow;
916
917         /* Validate actions. */
918         error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
919                                      &acts, log);
920         if (error) {
921                 OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
922                 goto err_kfree_flow;
923         }
924
925         reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
926                                         ufid_flags);
927         if (IS_ERR(reply)) {
928                 error = PTR_ERR(reply);
929                 goto err_kfree_acts;
930         }
931
932         ovs_lock();
933         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
934         if (unlikely(!dp)) {
935                 error = -ENODEV;
936                 goto err_unlock_ovs;
937         }
938
939         /* Check if this is a duplicate flow */
940         if (ovs_identifier_is_ufid(&new_flow->id))
941                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
942         if (!flow)
943                 flow = ovs_flow_tbl_lookup(&dp->table, &key);
944         if (likely(!flow)) {
945                 rcu_assign_pointer(new_flow->sf_acts, acts);
946
947                 /* Put flow in bucket. */
948                 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
949                 if (unlikely(error)) {
950                         acts = NULL;
951                         goto err_unlock_ovs;
952                 }
953
954                 if (unlikely(reply)) {
955                         error = ovs_flow_cmd_fill_info(new_flow,
956                                                        ovs_header->dp_ifindex,
957                                                        reply, info->snd_portid,
958                                                        info->snd_seq, 0,
959                                                        OVS_FLOW_CMD_NEW,
960                                                        ufid_flags);
961                         BUG_ON(error < 0);
962                 }
963                 ovs_unlock();
964         } else {
965                 struct sw_flow_actions *old_acts;
966
967                 /* Bail out if we're not allowed to modify an existing flow.
968                  * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
969                  * because Generic Netlink treats the latter as a dump
970                  * request.  We also accept NLM_F_EXCL in case that bug ever
971                  * gets fixed.
972                  */
973                 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
974                                                          | NLM_F_EXCL))) {
975                         error = -EEXIST;
976                         goto err_unlock_ovs;
977                 }
978                 /* The flow identifier has to be the same for flow updates.
979                  * Look for any overlapping flow.
980                  */
981                 if (unlikely(!ovs_flow_cmp(flow, &match))) {
982                         if (ovs_identifier_is_key(&flow->id))
983                                 flow = ovs_flow_tbl_lookup_exact(&dp->table,
984                                                                  &match);
985                         else /* UFID matches but key is different */
986                                 flow = NULL;
987                         if (!flow) {
988                                 error = -ENOENT;
989                                 goto err_unlock_ovs;
990                         }
991                 }
992                 /* Update actions. */
993                 old_acts = ovsl_dereference(flow->sf_acts);
994                 rcu_assign_pointer(flow->sf_acts, acts);
995
996                 if (unlikely(reply)) {
997                         error = ovs_flow_cmd_fill_info(flow,
998                                                        ovs_header->dp_ifindex,
999                                                        reply, info->snd_portid,
1000                                                        info->snd_seq, 0,
1001                                                        OVS_FLOW_CMD_NEW,
1002                                                        ufid_flags);
1003                         BUG_ON(error < 0);
1004                 }
1005                 ovs_unlock();
1006
1007                 ovs_nla_free_flow_actions(old_acts);
1008                 ovs_flow_free(new_flow, false);
1009         }
1010
1011         if (reply)
1012                 ovs_notify(&dp_flow_genl_family, reply, info);
1013         return 0;
1014
1015 err_unlock_ovs:
1016         ovs_unlock();
1017         kfree_skb(reply);
1018 err_kfree_acts:
1019         kfree(acts);
1020 err_kfree_flow:
1021         ovs_flow_free(new_flow, false);
1022 error:
1023         return error;
1024 }
1025
1026 /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1027 static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
1028                                                 const struct sw_flow_key *key,
1029                                                 const struct sw_flow_mask *mask,
1030                                                 bool log)
1031 {
1032         struct sw_flow_actions *acts;
1033         struct sw_flow_key masked_key;
1034         int error;
1035
1036         ovs_flow_mask_key(&masked_key, key, mask);
1037         error = ovs_nla_copy_actions(a, &masked_key, &acts, log);
1038         if (error) {
1039                 OVS_NLERR(log,
1040                           "Actions may not be safe on all matching packets");
1041                 return ERR_PTR(error);
1042         }
1043
1044         return acts;
1045 }
1046
1047 static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1048 {
1049         struct nlattr **a = info->attrs;
1050         struct ovs_header *ovs_header = info->userhdr;
1051         struct sw_flow_key key;
1052         struct sw_flow *flow;
1053         struct sw_flow_mask mask;
1054         struct sk_buff *reply = NULL;
1055         struct datapath *dp;
1056         struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1057         struct sw_flow_match match;
1058         struct sw_flow_id sfid;
1059         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1060         int error;
1061         bool log = !a[OVS_FLOW_ATTR_PROBE];
1062         bool ufid_present;
1063
1064         /* Extract key. */
1065         error = -EINVAL;
1066         if (!a[OVS_FLOW_ATTR_KEY]) {
1067                 OVS_NLERR(log, "Flow key attribute not present in set flow.");
1068                 goto error;
1069         }
1070
1071         ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1072         ovs_match_init(&match, &key, &mask);
1073         error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
1074                                   a[OVS_FLOW_ATTR_MASK], log);
1075         if (error)
1076                 goto error;
1077
1078         /* Validate actions. */
1079         if (a[OVS_FLOW_ATTR_ACTIONS]) {
1080                 acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask,
1081                                         log);
1082                 if (IS_ERR(acts)) {
1083                         error = PTR_ERR(acts);
1084                         goto error;
1085                 }
1086
1087                 /* Can allocate before locking if have acts. */
1088                 reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1089                                                 ufid_flags);
1090                 if (IS_ERR(reply)) {
1091                         error = PTR_ERR(reply);
1092                         goto err_kfree_acts;
1093                 }
1094         }
1095
1096         ovs_lock();
1097         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1098         if (unlikely(!dp)) {
1099                 error = -ENODEV;
1100                 goto err_unlock_ovs;
1101         }
1102         /* Check that the flow exists. */
1103         if (ufid_present)
1104                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1105         else
1106                 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1107         if (unlikely(!flow)) {
1108                 error = -ENOENT;
1109                 goto err_unlock_ovs;
1110         }
1111
1112         /* Update actions, if present. */
1113         if (likely(acts)) {
1114                 old_acts = ovsl_dereference(flow->sf_acts);
1115                 rcu_assign_pointer(flow->sf_acts, acts);
1116
1117                 if (unlikely(reply)) {
1118                         error = ovs_flow_cmd_fill_info(flow,
1119                                                        ovs_header->dp_ifindex,
1120                                                        reply, info->snd_portid,
1121                                                        info->snd_seq, 0,
1122                                                        OVS_FLOW_CMD_NEW,
1123                                                        ufid_flags);
1124                         BUG_ON(error < 0);
1125                 }
1126         } else {
1127                 /* Could not alloc without acts before locking. */
1128                 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1129                                                 info, OVS_FLOW_CMD_NEW, false,
1130                                                 ufid_flags);
1131
1132                 if (unlikely(IS_ERR(reply))) {
1133                         error = PTR_ERR(reply);
1134                         goto err_unlock_ovs;
1135                 }
1136         }
1137
1138         /* Clear stats. */
1139         if (a[OVS_FLOW_ATTR_CLEAR])
1140                 ovs_flow_stats_clear(flow);
1141         ovs_unlock();
1142
1143         if (reply)
1144                 ovs_notify(&dp_flow_genl_family, reply, info);
1145         if (old_acts)
1146                 ovs_nla_free_flow_actions(old_acts);
1147
1148         return 0;
1149
1150 err_unlock_ovs:
1151         ovs_unlock();
1152         kfree_skb(reply);
1153 err_kfree_acts:
1154         kfree(acts);
1155 error:
1156         return error;
1157 }
1158
1159 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1160 {
1161         struct nlattr **a = info->attrs;
1162         struct ovs_header *ovs_header = info->userhdr;
1163         struct sw_flow_key key;
1164         struct sk_buff *reply;
1165         struct sw_flow *flow;
1166         struct datapath *dp;
1167         struct sw_flow_match match;
1168         struct sw_flow_id ufid;
1169         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1170         int err = 0;
1171         bool log = !a[OVS_FLOW_ATTR_PROBE];
1172         bool ufid_present;
1173
1174         ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1175         if (a[OVS_FLOW_ATTR_KEY]) {
1176                 ovs_match_init(&match, &key, NULL);
1177                 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
1178                                         log);
1179         } else if (!ufid_present) {
1180                 OVS_NLERR(log,
1181                           "Flow get message rejected, Key attribute missing.");
1182                 err = -EINVAL;
1183         }
1184         if (err)
1185                 return err;
1186
1187         ovs_lock();
1188         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1189         if (!dp) {
1190                 err = -ENODEV;
1191                 goto unlock;
1192         }
1193
1194         if (ufid_present)
1195                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1196         else
1197                 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1198         if (!flow) {
1199                 err = -ENOENT;
1200                 goto unlock;
1201         }
1202
1203         reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1204                                         OVS_FLOW_CMD_NEW, true, ufid_flags);
1205         if (IS_ERR(reply)) {
1206                 err = PTR_ERR(reply);
1207                 goto unlock;
1208         }
1209
1210         ovs_unlock();
1211         return genlmsg_reply(reply, info);
1212 unlock:
1213         ovs_unlock();
1214         return err;
1215 }
1216
1217 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1218 {
1219         struct nlattr **a = info->attrs;
1220         struct ovs_header *ovs_header = info->userhdr;
1221         struct sw_flow_key key;
1222         struct sk_buff *reply;
1223         struct sw_flow *flow = NULL;
1224         struct datapath *dp;
1225         struct sw_flow_match match;
1226         struct sw_flow_id ufid;
1227         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1228         int err;
1229         bool log = !a[OVS_FLOW_ATTR_PROBE];
1230         bool ufid_present;
1231
1232         ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1233         if (a[OVS_FLOW_ATTR_KEY]) {
1234                 ovs_match_init(&match, &key, NULL);
1235                 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
1236                                         log);
1237                 if (unlikely(err))
1238                         return err;
1239         }
1240
1241         ovs_lock();
1242         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1243         if (unlikely(!dp)) {
1244                 err = -ENODEV;
1245                 goto unlock;
1246         }
1247
1248         if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1249                 err = ovs_flow_tbl_flush(&dp->table);
1250                 goto unlock;
1251         }
1252
1253         if (ufid_present)
1254                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1255         else
1256                 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1257         if (unlikely(!flow)) {
1258                 err = -ENOENT;
1259                 goto unlock;
1260         }
1261
1262         ovs_flow_tbl_remove(&dp->table, flow);
1263         ovs_unlock();
1264
1265         reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
1266                                         &flow->id, info, false, ufid_flags);
1267         if (likely(reply)) {
1268                 if (likely(!IS_ERR(reply))) {
1269                         rcu_read_lock();        /*To keep RCU checker happy. */
1270                         err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1271                                                      reply, info->snd_portid,
1272                                                      info->snd_seq, 0,
1273                                                      OVS_FLOW_CMD_DEL,
1274                                                      ufid_flags);
1275                         rcu_read_unlock();
1276                         BUG_ON(err < 0);
1277
1278                         ovs_notify(&dp_flow_genl_family, reply, info);
1279                 } else {
1280                         netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply));
1281                 }
1282         }
1283
1284         ovs_flow_free(flow, true);
1285         return 0;
1286 unlock:
1287         ovs_unlock();
1288         return err;
1289 }
1290
1291 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1292 {
1293         struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1294         struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1295         struct table_instance *ti;
1296         struct datapath *dp;
1297         u32 ufid_flags;
1298         int err;
1299
1300         err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
1301                             OVS_FLOW_ATTR_MAX, flow_policy);
1302         if (err)
1303                 return err;
1304         ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1305
1306         rcu_read_lock();
1307         dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1308         if (!dp) {
1309                 rcu_read_unlock();
1310                 return -ENODEV;
1311         }
1312
1313         ti = rcu_dereference(dp->table.ti);
1314         for (;;) {
1315                 struct sw_flow *flow;
1316                 u32 bucket, obj;
1317
1318                 bucket = cb->args[0];
1319                 obj = cb->args[1];
1320                 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1321                 if (!flow)
1322                         break;
1323
1324                 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1325                                            NETLINK_CB(cb->skb).portid,
1326                                            cb->nlh->nlmsg_seq, NLM_F_MULTI,
1327                                            OVS_FLOW_CMD_NEW, ufid_flags) < 0)
1328                         break;
1329
1330                 cb->args[0] = bucket;
1331                 cb->args[1] = obj;
1332         }
1333         rcu_read_unlock();
1334         return skb->len;
1335 }
1336
1337 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1338         [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1339         [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1340         [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1341         [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1342         [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1343         [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1344         [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1345 };
1346
1347 static const struct genl_ops dp_flow_genl_ops[] = {
1348         { .cmd = OVS_FLOW_CMD_NEW,
1349           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1350           .policy = flow_policy,
1351           .doit = ovs_flow_cmd_new
1352         },
1353         { .cmd = OVS_FLOW_CMD_DEL,
1354           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1355           .policy = flow_policy,
1356           .doit = ovs_flow_cmd_del
1357         },
1358         { .cmd = OVS_FLOW_CMD_GET,
1359           .flags = 0,               /* OK for unprivileged users. */
1360           .policy = flow_policy,
1361           .doit = ovs_flow_cmd_get,
1362           .dumpit = ovs_flow_cmd_dump
1363         },
1364         { .cmd = OVS_FLOW_CMD_SET,
1365           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1366           .policy = flow_policy,
1367           .doit = ovs_flow_cmd_set,
1368         },
1369 };
1370
1371 static struct genl_family dp_flow_genl_family = {
1372         .id = GENL_ID_GENERATE,
1373         .hdrsize = sizeof(struct ovs_header),
1374         .name = OVS_FLOW_FAMILY,
1375         .version = OVS_FLOW_VERSION,
1376         .maxattr = OVS_FLOW_ATTR_MAX,
1377         .netnsok = true,
1378         .parallel_ops = true,
1379         .ops = dp_flow_genl_ops,
1380         .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
1381         .mcgrps = &ovs_dp_flow_multicast_group,
1382         .n_mcgrps = 1,
1383 };
1384
1385 static size_t ovs_dp_cmd_msg_size(void)
1386 {
1387         size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1388
1389         msgsize += nla_total_size(IFNAMSIZ);
1390         msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
1391         msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats));
1392         msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1393
1394         return msgsize;
1395 }
1396
1397 /* Called with ovs_mutex. */
1398 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1399                                 u32 portid, u32 seq, u32 flags, u8 cmd)
1400 {
1401         struct ovs_header *ovs_header;
1402         struct ovs_dp_stats dp_stats;
1403         struct ovs_dp_megaflow_stats dp_megaflow_stats;
1404         int err;
1405
1406         ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1407                                    flags, cmd);
1408         if (!ovs_header)
1409                 goto error;
1410
1411         ovs_header->dp_ifindex = get_dpifindex(dp);
1412
1413         err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1414         if (err)
1415                 goto nla_put_failure;
1416
1417         get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1418         if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1419                         &dp_stats))
1420                 goto nla_put_failure;
1421
1422         if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1423                         sizeof(struct ovs_dp_megaflow_stats),
1424                         &dp_megaflow_stats))
1425                 goto nla_put_failure;
1426
1427         if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1428                 goto nla_put_failure;
1429
1430         genlmsg_end(skb, ovs_header);
1431         return 0;
1432
1433 nla_put_failure:
1434         genlmsg_cancel(skb, ovs_header);
1435 error:
1436         return -EMSGSIZE;
1437 }
1438
1439 static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)
1440 {
1441         return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
1442 }
1443
1444 /* Called with rcu_read_lock or ovs_mutex. */
1445 static struct datapath *lookup_datapath(struct net *net,
1446                                         const struct ovs_header *ovs_header,
1447                                         struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1448 {
1449         struct datapath *dp;
1450
1451         if (!a[OVS_DP_ATTR_NAME])
1452                 dp = get_dp(net, ovs_header->dp_ifindex);
1453         else {
1454                 struct vport *vport;
1455
1456                 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1457                 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1458         }
1459         return dp ? dp : ERR_PTR(-ENODEV);
1460 }
1461
1462 static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
1463 {
1464         struct datapath *dp;
1465
1466         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1467         if (IS_ERR(dp))
1468                 return;
1469
1470         WARN(dp->user_features, "Dropping previously announced user features\n");
1471         dp->user_features = 0;
1472 }
1473
1474 static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1475 {
1476         if (a[OVS_DP_ATTR_USER_FEATURES])
1477                 dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1478 }
1479
1480 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1481 {
1482         struct nlattr **a = info->attrs;
1483         struct vport_parms parms;
1484         struct sk_buff *reply;
1485         struct datapath *dp;
1486         struct vport *vport;
1487         struct ovs_net *ovs_net;
1488         int err, i;
1489
1490         err = -EINVAL;
1491         if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1492                 goto err;
1493
1494         reply = ovs_dp_cmd_alloc_info(info);
1495         if (!reply)
1496                 return -ENOMEM;
1497
1498         err = -ENOMEM;
1499         dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1500         if (dp == NULL)
1501                 goto err_free_reply;
1502
1503         ovs_dp_set_net(dp, sock_net(skb->sk));
1504
1505         /* Allocate table. */
1506         err = ovs_flow_tbl_init(&dp->table);
1507         if (err)
1508                 goto err_free_dp;
1509
1510         dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1511         if (!dp->stats_percpu) {
1512                 err = -ENOMEM;
1513                 goto err_destroy_table;
1514         }
1515
1516         dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1517                             GFP_KERNEL);
1518         if (!dp->ports) {
1519                 err = -ENOMEM;
1520                 goto err_destroy_percpu;
1521         }
1522
1523         for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1524                 INIT_HLIST_HEAD(&dp->ports[i]);
1525
1526         /* Set up our datapath device. */
1527         parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1528         parms.type = OVS_VPORT_TYPE_INTERNAL;
1529         parms.options = NULL;
1530         parms.dp = dp;
1531         parms.port_no = OVSP_LOCAL;
1532         parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1533
1534         ovs_dp_change(dp, a);
1535
1536         /* So far only local changes have been made, now need the lock. */
1537         ovs_lock();
1538
1539         vport = new_vport(&parms);
1540         if (IS_ERR(vport)) {
1541                 err = PTR_ERR(vport);
1542                 if (err == -EBUSY)
1543                         err = -EEXIST;
1544
1545                 if (err == -EEXIST) {
1546                         /* An outdated user space instance that does not understand
1547                          * the concept of user_features has attempted to create a new
1548                          * datapath and is likely to reuse it. Drop all user features.
1549                          */
1550                         if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1551                                 ovs_dp_reset_user_features(skb, info);
1552                 }
1553
1554                 goto err_destroy_ports_array;
1555         }
1556
1557         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1558                                    info->snd_seq, 0, OVS_DP_CMD_NEW);
1559         BUG_ON(err < 0);
1560
1561         ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1562         list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1563
1564         ovs_unlock();
1565
1566         ovs_notify(&dp_datapath_genl_family, reply, info);
1567         return 0;
1568
1569 err_destroy_ports_array:
1570         ovs_unlock();
1571         kfree(dp->ports);
1572 err_destroy_percpu:
1573         free_percpu(dp->stats_percpu);
1574 err_destroy_table:
1575         ovs_flow_tbl_destroy(&dp->table);
1576 err_free_dp:
1577         kfree(dp);
1578 err_free_reply:
1579         kfree_skb(reply);
1580 err:
1581         return err;
1582 }
1583
1584 /* Called with ovs_mutex. */
1585 static void __dp_destroy(struct datapath *dp)
1586 {
1587         int i;
1588
1589         for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1590                 struct vport *vport;
1591                 struct hlist_node *n;
1592
1593                 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1594                         if (vport->port_no != OVSP_LOCAL)
1595                                 ovs_dp_detach_port(vport);
1596         }
1597
1598         list_del_rcu(&dp->list_node);
1599
1600         /* OVSP_LOCAL is datapath internal port. We need to make sure that
1601          * all ports in datapath are destroyed first before freeing datapath.
1602          */
1603         ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1604
1605         /* RCU destroy the flow table */
1606         call_rcu(&dp->rcu, destroy_dp_rcu);
1607 }
1608
1609 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1610 {
1611         struct sk_buff *reply;
1612         struct datapath *dp;
1613         int err;
1614
1615         reply = ovs_dp_cmd_alloc_info(info);
1616         if (!reply)
1617                 return -ENOMEM;
1618
1619         ovs_lock();
1620         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1621         err = PTR_ERR(dp);
1622         if (IS_ERR(dp))
1623                 goto err_unlock_free;
1624
1625         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1626                                    info->snd_seq, 0, OVS_DP_CMD_DEL);
1627         BUG_ON(err < 0);
1628
1629         __dp_destroy(dp);
1630         ovs_unlock();
1631
1632         ovs_notify(&dp_datapath_genl_family, reply, info);
1633
1634         return 0;
1635
1636 err_unlock_free:
1637         ovs_unlock();
1638         kfree_skb(reply);
1639         return err;
1640 }
1641
1642 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1643 {
1644         struct sk_buff *reply;
1645         struct datapath *dp;
1646         int err;
1647
1648         reply = ovs_dp_cmd_alloc_info(info);
1649         if (!reply)
1650                 return -ENOMEM;
1651
1652         ovs_lock();
1653         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1654         err = PTR_ERR(dp);
1655         if (IS_ERR(dp))
1656                 goto err_unlock_free;
1657
1658         ovs_dp_change(dp, info->attrs);
1659
1660         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1661                                    info->snd_seq, 0, OVS_DP_CMD_NEW);
1662         BUG_ON(err < 0);
1663
1664         ovs_unlock();
1665         ovs_notify(&dp_datapath_genl_family, reply, info);
1666
1667         return 0;
1668
1669 err_unlock_free:
1670         ovs_unlock();
1671         kfree_skb(reply);
1672         return err;
1673 }
1674
1675 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1676 {
1677         struct sk_buff *reply;
1678         struct datapath *dp;
1679         int err;
1680
1681         reply = ovs_dp_cmd_alloc_info(info);
1682         if (!reply)
1683                 return -ENOMEM;
1684
1685         ovs_lock();
1686         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1687         if (IS_ERR(dp)) {
1688                 err = PTR_ERR(dp);
1689                 goto err_unlock_free;
1690         }
1691         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1692                                    info->snd_seq, 0, OVS_DP_CMD_NEW);
1693         BUG_ON(err < 0);
1694         ovs_unlock();
1695
1696         return genlmsg_reply(reply, info);
1697
1698 err_unlock_free:
1699         ovs_unlock();
1700         kfree_skb(reply);
1701         return err;
1702 }
1703
1704 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1705 {
1706         struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1707         struct datapath *dp;
1708         int skip = cb->args[0];
1709         int i = 0;
1710
1711         ovs_lock();
1712         list_for_each_entry(dp, &ovs_net->dps, list_node) {
1713                 if (i >= skip &&
1714                     ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1715                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
1716                                          OVS_DP_CMD_NEW) < 0)
1717                         break;
1718                 i++;
1719         }
1720         ovs_unlock();
1721
1722         cb->args[0] = i;
1723
1724         return skb->len;
1725 }
1726
1727 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1728         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1729         [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1730         [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1731 };
1732
1733 static const struct genl_ops dp_datapath_genl_ops[] = {
1734         { .cmd = OVS_DP_CMD_NEW,
1735           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1736           .policy = datapath_policy,
1737           .doit = ovs_dp_cmd_new
1738         },
1739         { .cmd = OVS_DP_CMD_DEL,
1740           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1741           .policy = datapath_policy,
1742           .doit = ovs_dp_cmd_del
1743         },
1744         { .cmd = OVS_DP_CMD_GET,
1745           .flags = 0,               /* OK for unprivileged users. */
1746           .policy = datapath_policy,
1747           .doit = ovs_dp_cmd_get,
1748           .dumpit = ovs_dp_cmd_dump
1749         },
1750         { .cmd = OVS_DP_CMD_SET,
1751           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1752           .policy = datapath_policy,
1753           .doit = ovs_dp_cmd_set,
1754         },
1755 };
1756
1757 static struct genl_family dp_datapath_genl_family = {
1758         .id = GENL_ID_GENERATE,
1759         .hdrsize = sizeof(struct ovs_header),
1760         .name = OVS_DATAPATH_FAMILY,
1761         .version = OVS_DATAPATH_VERSION,
1762         .maxattr = OVS_DP_ATTR_MAX,
1763         .netnsok = true,
1764         .parallel_ops = true,
1765         .ops = dp_datapath_genl_ops,
1766         .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
1767         .mcgrps = &ovs_dp_datapath_multicast_group,
1768         .n_mcgrps = 1,
1769 };
1770
1771 /* Called with ovs_mutex or RCU read lock. */
1772 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1773                                    u32 portid, u32 seq, u32 flags, u8 cmd)
1774 {
1775         struct ovs_header *ovs_header;
1776         struct ovs_vport_stats vport_stats;
1777         int err;
1778
1779         ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1780                                  flags, cmd);
1781         if (!ovs_header)
1782                 return -EMSGSIZE;
1783
1784         ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1785
1786         if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1787             nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1788             nla_put_string(skb, OVS_VPORT_ATTR_NAME,
1789                            vport->ops->get_name(vport)))
1790                 goto nla_put_failure;
1791
1792         ovs_vport_get_stats(vport, &vport_stats);
1793         if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1794                     &vport_stats))
1795                 goto nla_put_failure;
1796
1797         if (ovs_vport_get_upcall_portids(vport, skb))
1798                 goto nla_put_failure;
1799
1800         err = ovs_vport_get_options(vport, skb);
1801         if (err == -EMSGSIZE)
1802                 goto error;
1803
1804         genlmsg_end(skb, ovs_header);
1805         return 0;
1806
1807 nla_put_failure:
1808         err = -EMSGSIZE;
1809 error:
1810         genlmsg_cancel(skb, ovs_header);
1811         return err;
1812 }
1813
1814 static struct sk_buff *ovs_vport_cmd_alloc_info(void)
1815 {
1816         return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1817 }
1818
1819 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
1820 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1821                                          u32 seq, u8 cmd)
1822 {
1823         struct sk_buff *skb;
1824         int retval;
1825
1826         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1827         if (!skb)
1828                 return ERR_PTR(-ENOMEM);
1829
1830         retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1831         BUG_ON(retval < 0);
1832
1833         return skb;
1834 }
1835
1836 /* Called with ovs_mutex or RCU read lock. */
1837 static struct vport *lookup_vport(struct net *net,
1838                                   const struct ovs_header *ovs_header,
1839                                   struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1840 {
1841         struct datapath *dp;
1842         struct vport *vport;
1843
1844         if (a[OVS_VPORT_ATTR_NAME]) {
1845                 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1846                 if (!vport)
1847                         return ERR_PTR(-ENODEV);
1848                 if (ovs_header->dp_ifindex &&
1849                     ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1850                         return ERR_PTR(-ENODEV);
1851                 return vport;
1852         } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1853                 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1854
1855                 if (port_no >= DP_MAX_PORTS)
1856                         return ERR_PTR(-EFBIG);
1857
1858                 dp = get_dp(net, ovs_header->dp_ifindex);
1859                 if (!dp)
1860                         return ERR_PTR(-ENODEV);
1861
1862                 vport = ovs_vport_ovsl_rcu(dp, port_no);
1863                 if (!vport)
1864                         return ERR_PTR(-ENODEV);
1865                 return vport;
1866         } else
1867                 return ERR_PTR(-EINVAL);
1868 }
1869
1870 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1871 {
1872         struct nlattr **a = info->attrs;
1873         struct ovs_header *ovs_header = info->userhdr;
1874         struct vport_parms parms;
1875         struct sk_buff *reply;
1876         struct vport *vport;
1877         struct datapath *dp;
1878         u32 port_no;
1879         int err;
1880
1881         if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1882             !a[OVS_VPORT_ATTR_UPCALL_PID])
1883                 return -EINVAL;
1884
1885         port_no = a[OVS_VPORT_ATTR_PORT_NO]
1886                 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
1887         if (port_no >= DP_MAX_PORTS)
1888                 return -EFBIG;
1889
1890         reply = ovs_vport_cmd_alloc_info();
1891         if (!reply)
1892                 return -ENOMEM;
1893
1894         ovs_lock();
1895 restart:
1896         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1897         err = -ENODEV;
1898         if (!dp)
1899                 goto exit_unlock_free;
1900
1901         if (port_no) {
1902                 vport = ovs_vport_ovsl(dp, port_no);
1903                 err = -EBUSY;
1904                 if (vport)
1905                         goto exit_unlock_free;
1906         } else {
1907                 for (port_no = 1; ; port_no++) {
1908                         if (port_no >= DP_MAX_PORTS) {
1909                                 err = -EFBIG;
1910                                 goto exit_unlock_free;
1911                         }
1912                         vport = ovs_vport_ovsl(dp, port_no);
1913                         if (!vport)
1914                                 break;
1915                 }
1916         }
1917
1918         parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1919         parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1920         parms.options = a[OVS_VPORT_ATTR_OPTIONS];
1921         parms.dp = dp;
1922         parms.port_no = port_no;
1923         parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
1924
1925         vport = new_vport(&parms);
1926         err = PTR_ERR(vport);
1927         if (IS_ERR(vport)) {
1928                 if (err == -EAGAIN)
1929                         goto restart;
1930                 goto exit_unlock_free;
1931         }
1932
1933         err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1934                                       info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1935         BUG_ON(err < 0);
1936         ovs_unlock();
1937
1938         ovs_notify(&dp_vport_genl_family, reply, info);
1939         return 0;
1940
1941 exit_unlock_free:
1942         ovs_unlock();
1943         kfree_skb(reply);
1944         return err;
1945 }
1946
1947 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1948 {
1949         struct nlattr **a = info->attrs;
1950         struct sk_buff *reply;
1951         struct vport *vport;
1952         int err;
1953
1954         reply = ovs_vport_cmd_alloc_info();
1955         if (!reply)
1956                 return -ENOMEM;
1957
1958         ovs_lock();
1959         vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1960         err = PTR_ERR(vport);
1961         if (IS_ERR(vport))
1962                 goto exit_unlock_free;
1963
1964         if (a[OVS_VPORT_ATTR_TYPE] &&
1965             nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
1966                 err = -EINVAL;
1967                 goto exit_unlock_free;
1968         }
1969
1970         if (a[OVS_VPORT_ATTR_OPTIONS]) {
1971                 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
1972                 if (err)
1973                         goto exit_unlock_free;
1974         }
1975
1976
1977         if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
1978                 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
1979
1980                 err = ovs_vport_set_upcall_portids(vport, ids);
1981                 if (err)
1982                         goto exit_unlock_free;
1983         }
1984
1985         err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1986                                       info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1987         BUG_ON(err < 0);
1988
1989         ovs_unlock();
1990         ovs_notify(&dp_vport_genl_family, reply, info);
1991         return 0;
1992
1993 exit_unlock_free:
1994         ovs_unlock();
1995         kfree_skb(reply);
1996         return err;
1997 }
1998
1999 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2000 {
2001         struct nlattr **a = info->attrs;
2002         struct sk_buff *reply;
2003         struct vport *vport;
2004         int err;
2005
2006         reply = ovs_vport_cmd_alloc_info();
2007         if (!reply)
2008                 return -ENOMEM;
2009
2010         ovs_lock();
2011         vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2012         err = PTR_ERR(vport);
2013         if (IS_ERR(vport))
2014                 goto exit_unlock_free;
2015
2016         if (vport->port_no == OVSP_LOCAL) {
2017                 err = -EINVAL;
2018                 goto exit_unlock_free;
2019         }
2020
2021         err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2022                                       info->snd_seq, 0, OVS_VPORT_CMD_DEL);
2023         BUG_ON(err < 0);
2024         ovs_dp_detach_port(vport);
2025         ovs_unlock();
2026
2027         ovs_notify(&dp_vport_genl_family, reply, info);
2028         return 0;
2029
2030 exit_unlock_free:
2031         ovs_unlock();
2032         kfree_skb(reply);
2033         return err;
2034 }
2035
2036 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2037 {
2038         struct nlattr **a = info->attrs;
2039         struct ovs_header *ovs_header = info->userhdr;
2040         struct sk_buff *reply;
2041         struct vport *vport;
2042         int err;
2043
2044         reply = ovs_vport_cmd_alloc_info();
2045         if (!reply)
2046                 return -ENOMEM;
2047
2048         rcu_read_lock();
2049         vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2050         err = PTR_ERR(vport);
2051         if (IS_ERR(vport))
2052                 goto exit_unlock_free;
2053         err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2054                                       info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2055         BUG_ON(err < 0);
2056         rcu_read_unlock();
2057
2058         return genlmsg_reply(reply, info);
2059
2060 exit_unlock_free:
2061         rcu_read_unlock();
2062         kfree_skb(reply);
2063         return err;
2064 }
2065
2066 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2067 {
2068         struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2069         struct datapath *dp;
2070         int bucket = cb->args[0], skip = cb->args[1];
2071         int i, j = 0;
2072
2073         rcu_read_lock();
2074         dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2075         if (!dp) {
2076                 rcu_read_unlock();
2077                 return -ENODEV;
2078         }
2079         for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2080                 struct vport *vport;
2081
2082                 j = 0;
2083                 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2084                         if (j >= skip &&
2085                             ovs_vport_cmd_fill_info(vport, skb,
2086                                                     NETLINK_CB(cb->skb).portid,
2087                                                     cb->nlh->nlmsg_seq,
2088                                                     NLM_F_MULTI,
2089                                                     OVS_VPORT_CMD_NEW) < 0)
2090                                 goto out;
2091
2092                         j++;
2093                 }
2094                 skip = 0;
2095         }
2096 out:
2097         rcu_read_unlock();
2098
2099         cb->args[0] = i;
2100         cb->args[1] = j;
2101
2102         return skb->len;
2103 }
2104
2105 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2106         [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2107         [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2108         [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2109         [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2110         [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2111         [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2112 };
2113
2114 static const struct genl_ops dp_vport_genl_ops[] = {
2115         { .cmd = OVS_VPORT_CMD_NEW,
2116           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2117           .policy = vport_policy,
2118           .doit = ovs_vport_cmd_new
2119         },
2120         { .cmd = OVS_VPORT_CMD_DEL,
2121           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2122           .policy = vport_policy,
2123           .doit = ovs_vport_cmd_del
2124         },
2125         { .cmd = OVS_VPORT_CMD_GET,
2126           .flags = 0,               /* OK for unprivileged users. */
2127           .policy = vport_policy,
2128           .doit = ovs_vport_cmd_get,
2129           .dumpit = ovs_vport_cmd_dump
2130         },
2131         { .cmd = OVS_VPORT_CMD_SET,
2132           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2133           .policy = vport_policy,
2134           .doit = ovs_vport_cmd_set,
2135         },
2136 };
2137
2138 struct genl_family dp_vport_genl_family = {
2139         .id = GENL_ID_GENERATE,
2140         .hdrsize = sizeof(struct ovs_header),
2141         .name = OVS_VPORT_FAMILY,
2142         .version = OVS_VPORT_VERSION,
2143         .maxattr = OVS_VPORT_ATTR_MAX,
2144         .netnsok = true,
2145         .parallel_ops = true,
2146         .ops = dp_vport_genl_ops,
2147         .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
2148         .mcgrps = &ovs_dp_vport_multicast_group,
2149         .n_mcgrps = 1,
2150 };
2151
2152 static struct genl_family * const dp_genl_families[] = {
2153         &dp_datapath_genl_family,
2154         &dp_vport_genl_family,
2155         &dp_flow_genl_family,
2156         &dp_packet_genl_family,
2157 };
2158
2159 static void dp_unregister_genl(int n_families)
2160 {
2161         int i;
2162
2163         for (i = 0; i < n_families; i++)
2164                 genl_unregister_family(dp_genl_families[i]);
2165 }
2166
2167 static int dp_register_genl(void)
2168 {
2169         int err;
2170         int i;
2171
2172         for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2173
2174                 err = genl_register_family(dp_genl_families[i]);
2175                 if (err)
2176                         goto error;
2177         }
2178
2179         return 0;
2180
2181 error:
2182         dp_unregister_genl(i);
2183         return err;
2184 }
2185
2186 static int __net_init ovs_init_net(struct net *net)
2187 {
2188         struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2189
2190         INIT_LIST_HEAD(&ovs_net->dps);
2191         INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2192         return 0;
2193 }
2194
2195 static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2196                                             struct list_head *head)
2197 {
2198         struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2199         struct datapath *dp;
2200
2201         list_for_each_entry(dp, &ovs_net->dps, list_node) {
2202                 int i;
2203
2204                 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2205                         struct vport *vport;
2206
2207                         hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2208                                 struct netdev_vport *netdev_vport;
2209
2210                                 if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2211                                         continue;
2212
2213                                 netdev_vport = netdev_vport_priv(vport);
2214                                 if (dev_net(netdev_vport->dev) == dnet)
2215                                         list_add(&vport->detach_list, head);
2216                         }
2217                 }
2218         }
2219 }
2220
2221 static void __net_exit ovs_exit_net(struct net *dnet)
2222 {
2223         struct datapath *dp, *dp_next;
2224         struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2225         struct vport *vport, *vport_next;
2226         struct net *net;
2227         LIST_HEAD(head);
2228
2229         ovs_lock();
2230         list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2231                 __dp_destroy(dp);
2232
2233         rtnl_lock();
2234         for_each_net(net)
2235                 list_vports_from_net(net, dnet, &head);
2236         rtnl_unlock();
2237
2238         /* Detach all vports from given namespace. */
2239         list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2240                 list_del(&vport->detach_list);
2241                 ovs_dp_detach_port(vport);
2242         }
2243
2244         ovs_unlock();
2245
2246         cancel_work_sync(&ovs_net->dp_notify_work);
2247 }
2248
2249 static struct pernet_operations ovs_net_ops = {
2250         .init = ovs_init_net,
2251         .exit = ovs_exit_net,
2252         .id   = &ovs_net_id,
2253         .size = sizeof(struct ovs_net),
2254 };
2255
2256 static int __init dp_init(void)
2257 {
2258         int err;
2259
2260         BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
2261
2262         pr_info("Open vSwitch switching datapath\n");
2263
2264         err = action_fifos_init();
2265         if (err)
2266                 goto error;
2267
2268         err = ovs_internal_dev_rtnl_link_register();
2269         if (err)
2270                 goto error_action_fifos_exit;
2271
2272         err = ovs_flow_init();
2273         if (err)
2274                 goto error_unreg_rtnl_link;
2275
2276         err = ovs_vport_init();
2277         if (err)
2278                 goto error_flow_exit;
2279
2280         err = register_pernet_device(&ovs_net_ops);
2281         if (err)
2282                 goto error_vport_exit;
2283
2284         err = register_netdevice_notifier(&ovs_dp_device_notifier);
2285         if (err)
2286                 goto error_netns_exit;
2287
2288         err = ovs_netdev_init();
2289         if (err)
2290                 goto error_unreg_notifier;
2291
2292         err = dp_register_genl();
2293         if (err < 0)
2294                 goto error_unreg_netdev;
2295
2296         return 0;
2297
2298 error_unreg_netdev:
2299         ovs_netdev_exit();
2300 error_unreg_notifier:
2301         unregister_netdevice_notifier(&ovs_dp_device_notifier);
2302 error_netns_exit:
2303         unregister_pernet_device(&ovs_net_ops);
2304 error_vport_exit:
2305         ovs_vport_exit();
2306 error_flow_exit:
2307         ovs_flow_exit();
2308 error_unreg_rtnl_link:
2309         ovs_internal_dev_rtnl_link_unregister();
2310 error_action_fifos_exit:
2311         action_fifos_exit();
2312 error:
2313         return err;
2314 }
2315
2316 static void dp_cleanup(void)
2317 {
2318         dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2319         ovs_netdev_exit();
2320         unregister_netdevice_notifier(&ovs_dp_device_notifier);
2321         unregister_pernet_device(&ovs_net_ops);
2322         rcu_barrier();
2323         ovs_vport_exit();
2324         ovs_flow_exit();
2325         ovs_internal_dev_rtnl_link_unregister();
2326         action_fifos_exit();
2327 }
2328
2329 module_init(dp_init);
2330 module_exit(dp_cleanup);
2331
2332 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2333 MODULE_LICENSE("GPL");