Add support for igmp and multicast
[samplevnf.git] / VNFs / DPPD-PROX / handle_swap.c
1 /*
2 // Copyright (c) 2010-2017 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include <rte_mbuf.h>
18 #include <rte_udp.h>
19
20 #include "task_init.h"
21 #include "task_base.h"
22 #include "lconf.h"
23 #include "log.h"
24 #include "prox_port_cfg.h"
25 #include "mpls.h"
26 #include "qinq.h"
27 #include "gre.h"
28 #include "prefetch.h"
29 #include "igmp.h"
30 #include "prox_cksum.h"
31
32 struct task_swap {
33         struct task_base base;
34         struct rte_mempool *igmp_pool;
35         uint32_t runtime_flags;
36         uint32_t igmp_address;
37         uint8_t src_dst_mac[12];
38         uint32_t local_ipv4;
39         int offload_crc;
40 };
41
42 #define NB_IGMP_MBUF            1024
43 #define IGMP_MBUF_SIZE          2048
44 #define NB_CACHE_IGMP_MBUF      256
45
46 static void write_src_and_dst_mac(struct task_swap *task, struct rte_mbuf *mbuf)
47 {
48         struct ether_hdr *hdr;
49         struct ether_addr mac;
50
51         if (unlikely((task->runtime_flags & (TASK_ARG_DST_MAC_SET|TASK_ARG_SRC_MAC_SET)) == (TASK_ARG_DST_MAC_SET|TASK_ARG_SRC_MAC_SET))) {
52                 /* Source and Destination mac hardcoded */
53                 hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
54                 rte_memcpy(hdr, task->src_dst_mac, sizeof(task->src_dst_mac));
55         } else {
56                 hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
57                 if (likely((task->runtime_flags & TASK_ARG_SRC_MAC_SET) == 0)) {
58                         /* dst mac will be used as src mac */
59                         ether_addr_copy(&hdr->d_addr, &mac);
60                 }
61
62                 if (unlikely(task->runtime_flags & TASK_ARG_DST_MAC_SET))
63                         ether_addr_copy((struct ether_addr *)&task->src_dst_mac[0], &hdr->d_addr);
64                 else
65                         ether_addr_copy(&hdr->s_addr, &hdr->d_addr);
66
67                 if (unlikely(task->runtime_flags & TASK_ARG_SRC_MAC_SET)) {
68                         ether_addr_copy((struct ether_addr *)&task->src_dst_mac[6], &hdr->s_addr);
69                 } else {
70                         ether_addr_copy(&mac, &hdr->s_addr);
71                 }
72         }
73 }
74
75 static inline void build_mcast_mac(uint32_t ip, struct ether_addr *dst_mac)
76 {
77         // MAC address is 01:00:5e followed by 23 LSB of IP address
78         uint64_t mac = 0x0000005e0001L | ((ip & 0xFFFF7F00L) << 16);
79         memcpy(dst_mac, &mac, sizeof(struct ether_addr));
80 }
81
82 static inline void build_igmp_message(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t ip, uint8_t igmp_message)
83 {
84         struct task_swap *task = (struct task_swap *)tbase;
85         struct ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
86         struct ether_addr dst_mac;
87         build_mcast_mac(ip, &dst_mac);
88
89         rte_pktmbuf_pkt_len(mbuf) = 46;
90         rte_pktmbuf_data_len(mbuf) = 46;
91         init_mbuf_seg(mbuf);
92
93         ether_addr_copy(&dst_mac, &hdr->d_addr);
94         ether_addr_copy((struct ether_addr *)&task->src_dst_mac[6], &hdr->s_addr);
95         hdr->ether_type = ETYPE_IPv4;
96
97         struct ipv4_hdr *ip_hdr = (struct ipv4_hdr *)(hdr + 1);
98         ip_hdr->version_ihl = 0x45;             /**< version and header length */
99         ip_hdr->type_of_service = 0;    /**< type of service */
100         ip_hdr->total_length = rte_cpu_to_be_16(32);            /**< length of packet */
101         ip_hdr->packet_id = 0;          /**< packet ID */
102         ip_hdr->fragment_offset = 0;    /**< fragmentation offset */
103         ip_hdr->time_to_live = 1;               /**< time to live */
104         ip_hdr->next_proto_id = IPPROTO_IGMP;           /**< protocol ID */
105         ip_hdr->hdr_checksum = 0;               /**< header checksum */
106         ip_hdr->src_addr = task->local_ipv4;            /**< source address */
107         ip_hdr->dst_addr = ip;  /**< destination address */
108         struct igmpv2_hdr *pigmp = (struct igmpv2_hdr *)(ip_hdr + 1);
109         pigmp->type = igmp_message;
110         pigmp->max_resp_time = 0;
111         pigmp->checksum = 0;
112         pigmp->group_address = ip;
113         prox_ip_udp_cksum(mbuf, ip_hdr, sizeof(struct ether_hdr), sizeof(struct ipv4_hdr), task->offload_crc);
114 }
115
116 static int handle_swap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
117 {
118         struct task_swap *task = (struct task_swap *)tbase;
119         struct ether_hdr *hdr;
120         struct ether_addr mac;
121         struct ipv4_hdr *ip_hdr;
122         struct udp_hdr *udp_hdr;
123         struct gre_hdr *pgre;
124         struct ipv4_hdr *inner_ip_hdr;
125         uint32_t ip;
126         uint16_t port;
127         uint8_t out[64] = {0};
128         struct mpls_hdr *mpls;
129         uint32_t mpls_len = 0;
130         struct qinq_hdr *qinq;
131         struct vlan_hdr *vlan;
132         uint16_t j;
133         struct igmpv2_hdr *pigmp;
134         uint8_t type;
135
136         for (j = 0; j < n_pkts; ++j) {
137                 PREFETCH0(mbufs[j]);
138         }
139         for (j = 0; j < n_pkts; ++j) {
140                 PREFETCH0(rte_pktmbuf_mtod(mbufs[j], void *));
141         }
142
143         // TODO 1: check packet is long enough for Ethernet + IP + UDP = 42 bytes
144         for (uint16_t j = 0; j < n_pkts; ++j) {
145                 hdr = rte_pktmbuf_mtod(mbufs[j], struct ether_hdr *);
146                 switch (hdr->ether_type) {
147                 case ETYPE_MPLSU:
148                         mpls = (struct mpls_hdr *)(hdr + 1);
149                         while (!(mpls->bytes & 0x00010000)) {
150                                 // TODO: verify pcket length
151                                 mpls++;
152                                 mpls_len += sizeof(struct mpls_hdr);
153                         }
154                         mpls_len += sizeof(struct mpls_hdr);
155                         ip_hdr = (struct ipv4_hdr *)(mpls + 1);
156                         break;
157                 case ETYPE_8021ad:
158                         qinq = (struct qinq_hdr *)hdr;
159                         if (qinq->cvlan.eth_proto != ETYPE_VLAN) {
160                                 plog_warn("Unexpected proto in QinQ = %#04x\n", qinq->cvlan.eth_proto);
161                                 out[j] = OUT_DISCARD;
162                                 continue;
163                         }
164                         ip_hdr = (struct ipv4_hdr *)(qinq + 1);
165                         break;
166                 case ETYPE_VLAN:
167                         vlan = (struct vlan_hdr *)(hdr + 1);
168                         if (vlan->eth_proto == ETYPE_IPv4) {
169                                 ip_hdr = (struct ipv4_hdr *)(vlan + 1);
170                         } else if (vlan->eth_proto == ETYPE_VLAN) {
171                                 vlan = (struct vlan_hdr *)(vlan + 1);
172                                 if (vlan->eth_proto == ETYPE_IPv4) {
173                                         ip_hdr = (struct ipv4_hdr *)(vlan + 1);
174                                 }
175                                 else if (vlan->eth_proto == ETYPE_IPv6) {
176                                         plog_warn("Unsupported IPv6\n");
177                                         out[j] = OUT_DISCARD;
178                                         continue;
179                                 }
180                                 else {
181                                         plog_warn("Unsupported packet type\n");
182                                         out[j] = OUT_DISCARD;
183                                         continue;
184                                 }
185                         } else {
186                                 plog_warn("Unsupported packet type\n");
187                                 out[j] = OUT_DISCARD;
188                                 continue;
189                         }
190                         break;
191                 case ETYPE_IPv4:
192                         ip_hdr = (struct ipv4_hdr *)(hdr + 1);
193                         break;
194                 case ETYPE_IPv6:
195                         plog_warn("Unsupported IPv6\n");
196                         out[j] = OUT_DISCARD;
197                         continue;
198                 case ETYPE_LLDP:
199                         out[j] = OUT_DISCARD;
200                         continue;
201                 default:
202                         plog_warn("Unsupported ether_type 0x%x\n", hdr->ether_type);
203                         out[j] = OUT_DISCARD;
204                         continue;
205                 }
206                 // TODO 2 : check packet is long enough for Ethernet + IP + UDP + extra header (VLAN, MPLS, ...)
207                 ip = ip_hdr->dst_addr;
208
209                 switch (ip_hdr->next_proto_id) {
210                 case IPPROTO_GRE:
211                         ip_hdr->dst_addr = ip_hdr->src_addr;
212                         ip_hdr->src_addr = ip;
213
214                         pgre = (struct gre_hdr *)(ip_hdr + 1);
215                         inner_ip_hdr = ((struct ipv4_hdr *)(pgre + 1));
216                         ip = inner_ip_hdr->dst_addr;
217                         inner_ip_hdr->dst_addr = inner_ip_hdr->src_addr;
218                         inner_ip_hdr->src_addr = ip;
219
220                         udp_hdr = (struct udp_hdr *)(inner_ip_hdr + 1);
221                         // TODO 3.1 : verify proto is UPD or TCP
222                         port = udp_hdr->dst_port;
223                         udp_hdr->dst_port = udp_hdr->src_port;
224                         udp_hdr->src_port = port;
225                         write_src_and_dst_mac(task, mbufs[j]);
226                         break;
227                 case IPPROTO_UDP:
228                 case IPPROTO_TCP:
229                         if (task->igmp_address && IS_IPV4_MCAST(rte_be_to_cpu_32(ip))) {
230                                 out[j] = OUT_DISCARD;
231                                 continue;
232                         }
233                         udp_hdr = (struct udp_hdr *)(ip_hdr + 1);
234                         ip_hdr->dst_addr = ip_hdr->src_addr;
235                         ip_hdr->src_addr = ip;
236
237                         port = udp_hdr->dst_port;
238                         udp_hdr->dst_port = udp_hdr->src_port;
239                         udp_hdr->src_port = port;
240                         write_src_and_dst_mac(task, mbufs[j]);
241                         break;
242                 case IPPROTO_IGMP:
243                         pigmp = (struct igmpv2_hdr *)(ip_hdr + 1);
244                         // TODO: check packet len
245                         type = pigmp->type;
246                         if (type == IGMP_MEMBERSHIP_QUERY) {
247                                 if (task->igmp_address) {
248                                         // We have an address registered
249                                         if ((task->igmp_address == pigmp->group_address) || (pigmp->group_address == 0)) {
250                                                 // We get a request for the registered address, or to 0.0.0.0
251                                                 build_igmp_message(tbase, mbufs[j], task->igmp_address, IGMP_MEMBERSHIP_REPORT);        // replace Membership query packet with a response
252                                         } else {
253                                                 // Discard as either we are not registered or this is a query for a different group
254                                                 out[j] = OUT_DISCARD;
255                                                 continue;
256                                         }
257                                 } else {
258                                         // Discard as either we are not registered
259                                         out[j] = OUT_DISCARD;
260                                         continue;
261                                 }
262                         } else {
263                                 // Do not forward other IGMP packets back
264                                 out[j] = OUT_DISCARD;
265                                 continue;
266                         }
267                         break;
268                 default:
269                         plog_warn("Unsupported IP protocol 0x%x\n", ip_hdr->next_proto_id);
270                         out[j] = OUT_DISCARD;
271                         continue;
272                 }
273         }
274         return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
275 }
276
277 void igmp_join_group(struct task_base *tbase, uint32_t igmp_address)
278 {
279         struct task_swap *task = (struct task_swap *)tbase;
280         struct rte_mbuf *igmp_mbuf;
281         uint8_t out[64] = {0};
282         int ret;
283
284         task->igmp_address = igmp_address;
285         ret = rte_mempool_get(task->igmp_pool, (void **)&igmp_mbuf);
286         if (ret != 0) {
287                 plog_err("Unable to allocate igmp mbuf\n");
288                 return;
289         }
290         build_igmp_message(tbase, igmp_mbuf, task->igmp_address, IGMP_MEMBERSHIP_REPORT);
291         task->base.tx_pkt(&task->base, &igmp_mbuf, 1, out);
292 }
293
294 void igmp_leave_group(struct task_base *tbase)
295 {
296         struct task_swap *task = (struct task_swap *)tbase;
297         struct rte_mbuf *igmp_mbuf;
298         uint8_t out[64] = {0};
299         int ret;
300
301         task->igmp_address = 0;
302         ret = rte_mempool_get(task->igmp_pool, (void **)&igmp_mbuf);
303         if (ret != 0) {
304                 plog_err("Unable to allocate igmp mbuf\n");
305                 return;
306         }
307         build_igmp_message(tbase, igmp_mbuf, task->igmp_address, IGMP_LEAVE_GROUP);
308         task->base.tx_pkt(&task->base, &igmp_mbuf, 1, out);
309 }
310
311 static void init_task_swap(struct task_base *tbase, struct task_args *targ)
312 {
313         struct task_swap *task = (struct task_swap *)tbase;
314         struct ether_addr *src_addr, *dst_addr;
315
316         /*
317          * The destination MAC of the outgoing packet is based on the config file:
318          *    - 'dst mac=xx:xx:xx:xx:xx:xx' => the pre-configured mac will be used as dst mac
319          *    - 'dst mac=packet'            => the src mac of the incoming packet is used as dst mac
320          *    - (default - no 'dst mac')    => the src mac from the incoming packet is used as dst mac
321          *
322          * The source MAC of the outgoing packet is based on the config file:
323          *    - 'src mac=xx:xx:xx:xx:xx:xx' => the pre-configured mac will be used as src mac
324          *    - 'src mac=packet'            => the dst mac of the incoming packet is used as src mac
325          *    - 'src mac=hw'                => the mac address of the tx port is used as src mac
326          *                                     An error is returned if there are no physical tx ports
327          *    - (default - no 'src mac')    => if there is physical tx port, the mac of that port is used as src mac
328          *    - (default - no 'src mac')       if there are no physical tx ports the dst mac of the incoming packet
329          */
330
331         if (targ->flags & TASK_ARG_DST_MAC_SET) {
332                 dst_addr = &targ->edaddr;
333                 memcpy(&task->src_dst_mac[0], dst_addr, sizeof(*src_addr));
334         }
335
336         PROX_PANIC(targ->flags & TASK_ARG_DO_NOT_SET_SRC_MAC, "src mac must be set in swap mode, by definition => src mac=no is not supported\n");
337         PROX_PANIC(targ->flags & TASK_ARG_DO_NOT_SET_DST_MAC, "dst mac must be set in swap mode, by definition => dst mac=no is not supported\n");
338
339         if (targ->flags & TASK_ARG_SRC_MAC_SET) {
340                 src_addr =  &targ->esaddr;
341                 memcpy(&task->src_dst_mac[6], src_addr, sizeof(*dst_addr));
342                 plog_info("\t\tCore %d: src mac set from config file\n", targ->lconf->id);
343         } else {
344                 if (targ->flags & TASK_ARG_HW_SRC_MAC)
345                         PROX_PANIC(targ->nb_txports == 0, "src mac set to hw but no tx port\n");
346                 if (targ->nb_txports) {
347                         src_addr = &prox_port_cfg[task->base.tx_params_hw.tx_port_queue[0].port].eth_addr;
348                         memcpy(&task->src_dst_mac[6], src_addr, sizeof(*dst_addr));
349                         targ->flags |= TASK_ARG_SRC_MAC_SET;
350                         plog_info("\t\tCore %d: src mac set from port\n", targ->lconf->id);
351                 }
352         }
353         task->runtime_flags = targ->flags;
354         task->igmp_address =  rte_cpu_to_be_32(targ->igmp_address);
355         if (task->igmp_pool == NULL) {
356                 static char name[] = "igmp0_pool";
357                 name[4]++;
358                 struct rte_mempool *ret = rte_mempool_create(name, NB_IGMP_MBUF, IGMP_MBUF_SIZE, NB_CACHE_IGMP_MBUF,
359                         sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0,
360                         rte_socket_id(), 0);
361                 PROX_PANIC(ret == NULL, "Failed to allocate IGMP memory pool on socket %u with %u elements\n",
362                         rte_socket_id(), NB_IGMP_MBUF);
363                 plog_info("\t\tMempool %p (%s) size = %u * %u cache %u, socket %d\n", ret, name, NB_IGMP_MBUF,
364                         IGMP_MBUF_SIZE, NB_CACHE_IGMP_MBUF, rte_socket_id());
365                 task->igmp_pool = ret;
366         }
367         task->local_ipv4 = rte_cpu_to_be_32(targ->local_ipv4);
368
369         struct prox_port_cfg *port = find_reachable_port(targ);
370         if (port) {
371                 task->offload_crc = port->requested_tx_offload & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM);
372         }
373 }
374
375 static struct task_init task_init_swap = {
376         .mode_str = "swap",
377         .init = init_task_swap,
378         .handle = handle_swap_bulk,
379         .flag_features = 0,
380         .size = sizeof(struct task_swap),
381 };
382
383 __attribute__((constructor)) static void reg_task_swap(void)
384 {
385         reg_task(&task_init_swap);
386 }